def select_data_set(self, name): size_pyramid = 9 factor = 3 self.hic_pyr = pyr.build_and_filter( self.base_folder, size_pyramid, factor, thresh_factor=self.thresh_factor, ) print("pyramid loaded") if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) self.output_folder = os.path.join(self.output_folder, self.data_set) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) self.output_folder = os.path.join(self.output_folder, "test_mcmc_" + self.str_level) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) self.new_fasta = os.path.join(self.output_folder, "genome.fasta") self.info_frags = os.path.join(self.output_folder, "info_frags.txt") self.output_matrix_em = os.path.join(self.output_folder, "post_em.tiff") self.output_matrix_mcmc = os.path.join(self.output_folder, "post_mcmc.tiff") self.input_matrix = os.path.join(self.output_folder, "pre_simu.tiff") self.scrambled_input_matrix = os.path.join(self.output_folder, "scrambled_simu.tiff")
def run(self): """Run Worker Thread.""" # This is the code executing in the new thread. Simulation of # a long process (well, 10s here) as a simple loop - you will # need to structure your processing so that you periodically # peek at the abort variable pyramid = pyr.build_and_filter(self.base_folder, self.size_pyramid, self.factor) lev = pyr.level(pyramid, 2) # Here's where the result would be returned (this is an # example fixed result of the number 10, but it could be # any Python object) wx.PostEvent(self._notify_window, ResultEvent(pyramid))
def select_data_set(self, name): hostname = socket.gethostname() print "Host name:", hostname ordi = hostname.split('.')[0] if name == 'tricho' or name == 'tricho_rutc30' or name == 'tricho_qm6a': size_pyramid = 6 elif name == 'ykf1246_new_hq' or name == 'ykf1246_new_ref_hq' or name == 'ykf1246_axel': size_pyramid = 6 elif name == "amibes_full_2014": size_pyramid = 7 elif name == "community_33" or name == "community_75" or name == 'community_24' or name == 'community_0' or name == 'community_axel': size_pyramid = 6 elif name == 'yvette_comm_0': size_pyramid = 6 elif name == 'yvette_comm_156': size_pyramid = 6 elif name == 'yvette_comm_0_156': size_pyramid = 6 elif name == 'yvette_comm_2': size_pyramid = 5 elif name == 'meta_ecoli' or name == '3bacts' or name == 'com2_3bacts' : size_pyramid = 6 else: # or name == '3bacts' or name == 'meta_ecoli' size_pyramid = 4 if name == 'ykf1246_new_hq': factor = 3 # elif name == 'ykf1246_new_ref_hq_2' or name == 'ykf1246_axel': elif name == 'ykf1246_new_ref_hq_2': factor = 2 else: factor = 3 min_bin_per_contig = 1 size_chunk = 5000 self.data_set = dict() self.data_set['malesian'] = 'malesian/' self.data_set['community_0'] = 'community_0/' self.data_set['community_33'] = 'community_33/' self.data_set["community_75"] = "community_75" self.data_set['community_24'] = 'community_24' self.data_set['community_axel'] = 'community_axel' self.data_set['S1'] = 'S1/' self.data_set['3bacts'] = '3bacts/' self.data_set['meta_ecoli'] = 'meta_ecoli/' self.data_set['com2_3bacts'] = 'com2_3bacts/' self.data_set['G1'] = 'G1/' self.data_set['tricho'] = 'tricho/' self.data_set['tricho_rutc30'] = 'tricho_rutc30' self.data_set['tricho_qm6a'] = 'tricho_qm6a' self.data_set['ykf1246'] = 'ykf1246' self.data_set['ykf1246_new'] = 'ykf1246_new' self.data_set['ykf1246_new_hq'] = 'ykf1246_new_hq' self.data_set['ykf1246_new_ref_hq_2'] = 'ykf1246_new_ref_hq_2' self.data_set['ykf1246_axel'] = 'ykf1246_axel' self.data_set['ykf175n'] = 'ykf175n' self.data_set['amibes_full_2014'] = 'amibes_full_2014' self.data_set['yvette_comm_0'] = 'yvette_comm_0' self.data_set['yvette_comm_156'] = 'yvette_comm_156' self.data_set['yvette_comm_0_156'] = 'yvette_comm_0_156' self.data_set['yvette_comm_2'] = 'yvette_comm_2' selected = name if ordi == 'matisse': if selected == 'tricho' or selected == 'tricho_rutc30' or selected == 'tricho_qm6a': self.data_set_root = '/media/hervemn/LaCie/data_hic/data_set_assembly/' self.fasta = '/media/hervemn/LaCie/data_hic/fasta_genomes/trichoderma/trichoderma_new.fa' else: self.data_set_root = '/media/hervemn/data/data_set_assembly/' if self.name == "community_33": self.fasta = '/media/hervemn/data/genome_fasta/community_33/community_33.fasta' elif self.name == "community_75": self.fasta = '/media/hervemn/data/genome_fasta/community_75/community_75.fasta' elif self.name == 'community_24': self.fasta = '/media/hervemn/data/genome_fasta/community_24/community_24.fasta' elif self.name == 'community_axel': self.fasta = '/media/hervemn/data/genome_fasta/community_axel/community_axel.fasta' else: self.fasta = '/media/hervemn/LaCie/data_hic/fasta_genomes/cerevisiae_classic/new_ref_genome.fsa' self.dir_home = '/home/hervemn/' if ordi == 'rv-retina': self.data_set_root = '/Volumes/VeryBigData/HiC/data_set_assembly' self.dir_home = '/Users/hervemarie-nelly/' if selected == "tricho" or selected == 'tricho_rutc30' or selected == 'tricho_qm6a': self.fasta = '/Volumes/VeryBigData/HiC/fasta_genomes/trichoderma/trichoderma_new.fa' else: self.fasta = '/Volumes/VeryBigData/HiC/fasta_genomes/cerevisiae_classic/new_ref_genome.fsa' if ordi == 'loopkin': self.data_set_root = '/data/hervemn/data_set_assembly/' self.dir_home = '/home/hervemn/' if selected == "tricho" or selected == 'tricho_rutc30' or selected == 'tricho_qm6a': self.fasta = '/data/hervemn/alignment_toolbox/fasta_genomes/trichoderma/trichoderma_new.fa' elif selected == "amibes_full_2014": self.fasta = '/data/hervemn/alignment_toolbox/fasta_genomes/amoeba/EHI_v13.fa' elif selected == 'community_0': self.fasta = '/data/hervemn/data_set_assembly/community_0/analysis/community_0.fasta' elif selected == 'community_24': self.fasta = '/data/hervemn/data_set_assembly/community_24/analysis/community_24.fasta' elif selected == 'community_33': self.fasta = '/data/hervemn/data_set_assembly/community_33/analysis/community_33.fasta' elif selected == 'community_75': self.fasta = '/data/hervemn/data_set_assembly/community_75/analysis/community_75.fasta' elif selected == 'community_axel': self.fasta = '/data/hervemn/data_set_assembly/community_axel/analysis/community_axel.fasta' elif selected == 'yvette_comm_0': self.fasta = '/data/hervemn/data_set_assembly/yvette_comm_0/analysis/community_0.fasta' else: self.fasta = '/data/hervemn/alignment_toolbox/fasta_genomes/cerevisiae_classic/new_ref_genome.fsa' if ordi == 'duvel': self.data_set_root = '/media/hervemn/data/HiC/data_set_assembly/' self.dir_home = '/home/hervemn/' if selected == 'tricho' or selected == 'tricho_rutc30' or selected == 'tricho_qm6a': self.fasta = '/media/hervemn/data/HiC/fasta_genomes/trichoderma/trichoderma_new.fa' elif selected == 'community_0': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/community_0/analysis/community_0.fasta' elif selected == 'community_24': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/community_24/analysis/community_24.fasta' elif selected == 'community_33': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/community_33/analysis/community_33.fasta' elif selected == 'community_75': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/community_75/analysis/community_75.fasta' elif selected == 'community_axel': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/community_axel/analysis/community_axel.fasta' elif selected == '3bacts': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/3bacts/analysis/contigs_3bacts.fasta' elif selected == 'meta_ecoli': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/meta_ecoli/analysis/community_1.fasta' elif selected == 'com2_3bacts': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/com2_3bacts/analysis/community_2.fasta' elif selected == 'yvette_comm_0': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/yvette_comm_0/analysis/community_0.fasta' elif selected == 'yvette_comm_156': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/yvette_comm_156/analysis/community_156.fasta' elif selected == 'yvette_comm_0_156': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/yvette_comm_0_156/analysis/community_0_156.fasta' elif selected == 'yvette_comm_2': self.fasta = '/media/hervemn/data/HiC/data_set_assembly/yvette_comm_2/analysis/community_2.fasta' elif selected == 'amibes_full_2014': self.fasta = '/media/hervemn/data/HiC/fasta_genomes/amoeba/EHI_v13.fa' else: self.fasta = '/media/hervemn/data/HiC/fasta_genomes/cerevisiae_classic/new_ref_genome.fsa' default_level = size_pyramid - 1 self.base_folder = os.path.join(self.data_set_root, self.data_set[selected], 'analysis') # self.hic_pyr = pyr.build_and_filter(self.base_folder, size_pyramid, factor, min_bin_per_contig, size_chunk, # default_level) self.hic_pyr = pyr.build_and_filter(self.base_folder, size_pyramid, factor) print "pyramid loaded" ################################################################################################################ self.output_folder = os.path.join(self.data_set_root, 'results') if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) self.output_folder = os.path.join(self.data_set_root, 'results', self.data_set[selected]) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) self.output_folder = os.path.join(self.data_set_root, 'results', self.data_set[selected], 'test_mcmc_' + self.str_level) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) if self.fact_sub_sampling > 0: self.folder_sub_sampling = os.path.join(self.output_folder, 'sub_sampling') if not (os.path.exists(self.folder_sub_sampling)): os.mkdir(self.folder_sub_sampling) self.output_folder = os.path.join(self.folder_sub_sampling, str(self.fact_sub_sampling)) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder)
def select_data_set(self, name, size_pyramid=7): factor = 3 self.data_set = dict() self.data_set['tricho_qm6a_sparse'] = 'tricho_qm6a_sparse' self.data_set['S1'] = 'S1' self.data_set['amoeba'] = 'amoeba' self.data_set['mosquito'] = 'mosquito' self.data_set['de_novo_chr14_jff'] = 'de_novo_chr14_jff' self.data_set['de_novo_chr14'] = 'de_novo_chr14' self.data_set['s1_de_novo'] = 's1_de_novo' self.data_set['s1_de_novo_0_1'] = 's1_de_novo_0_1' self.data_set['human_chr4_chr14'] = 'human_chr4_chr14' self.data_set['human_chr19_to_22'] = 'human_chr19_to_22' self.data_set['human_chr7_chr22'] = 'human_chr7_chr22' selected = name toolbox_directory = os.path.dirname(os.path.abspath(__file__)) self.fasta = os.path.join(toolbox_directory, 'fasta', selected + '.fa') self.data_set = dict({selected: selected}) self.data_set_root = toolbox_directory if hostname == 'duvel': self.dir_home = '/mirror/' self.data_set_root = '/mirror/data/' if selected == 'mosquito': self.dir_home = '/media/hervemn/data/HiC/' self.data_set_root = '/media/hervemn/data/HiC/data_set_assembly' if selected == 'tricho_qm6a_sparse': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/trichoderma/trichoderma_new.fa') elif selected == 'S1': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/cerevisiae_classic/new_ref_genome.fsa') elif selected == 'amoeba': self.fasta = os.path.join(self.dir_home, 'fasta_genomes/amoeba/EHI_v13.fa') elif selected == 'mosquito': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/aedes_aegypti/Aedes_aegypti.fa.parsed') elif selected == 'de_novo_chr14' or selected == 'de_novo_chr14_jff': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/de_novo_chr14/genome.ctg.fasta') elif selected == 'human_chr4_chr14': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/human_chr4_chr14/human_4_14.fa') elif selected == 'human_chr19_to_22': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/human_chr19_to_22/chr19_chr20_chr21_chr22.fa' ) elif selected == 'human_chr7_chr22': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/human_chr7_chr22/chr7_chr17_chr19_chr22.fa') elif selected == 's1_de_novo' or selected == 's1_de_novo_0_1': self.fasta = os.path.join( self.dir_home, 'fasta_genomes/cerevisiae_de_novo/cerevisiae_de_novo.fa') else: self.dir_home = toolbox_directory self.data_set_root = toolbox_directory if selected == 'mosquito': self.dir_home = '/media/hervemn/data/HiC/' self.data_set_root = '/media/hervemn/data/HiC/data_set_assembly' elif selected == 'tricho_qm6a_sparse': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/trichoderma/trichoderma_new.fa') elif selected == 'S1': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/cerevisiae_classic/new_ref_genome.fsa') elif selected == 'amoeba': self.fasta = os.path.join(self.dir_home, 'alignment_toolbox', 'fasta_genomes/amoeba/EHI_v13.fa') elif selected == 'mosquito': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/aedes_aegypti/Aedes_aegypti.fa.parsed') elif selected == 'de_novo_chr14' or selected == 'de_novo_chr14_fjff': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/de_novo_chr14/genome.ctg.fasta') elif selected == 'human_chr4_chr14': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/human_chr4_chr14/human_4_14.fa') elif selected == 'human_chr19_to_22': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/human_chr19_to_22/chr19_chr20_chr21_chr22.fa' ) elif selected == 'human_chr7_chr22': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/human_chr7_chr22/chr7_chr17_chr19_chr22.fa') elif selected == 's1_de_novo' or selected == 's1_de_novo_0_1': self.fasta = os.path.join( self.dir_home, 'alignment_toolbox', 'fasta_genomes/cerevisiae_de_novo/cerevisiae_de_novo.fa') else: self.fasta = os.path.join(toolbox_directory, 'fasta', selected + '.fa') default_level = size_pyramid - 1 self.base_folder = os.path.join(self.data_set_root, self.data_set[selected], 'analysis') self.hic_pyr = pyr.build_and_filter(self.base_folder, size_pyramid, factor) print "pyramid loaded" ################################################################################################################ self.output_folder = os.path.join(self.data_set_root, 'results') if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) self.output_folder = os.path.join(self.data_set_root, 'results', self.data_set[selected]) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) self.output_folder = os.path.join(self.data_set_root, 'results', self.data_set[selected], 'test_mcmc_' + self.str_level) if not (os.path.exists(self.output_folder)): os.mkdir(self.output_folder) ################################################################################################################ self.new_fasta = os.path.join(self.output_folder, 'genome.fasta') self.info_frags = os.path.join(self.output_folder, 'info_frags.txt') self.output_matrix_em = os.path.join(self.output_folder, 'post_em.tiff') self.output_matrix_mcmc = os.path.join(self.output_folder, 'post_mcmc.tiff') self.input_matrix = os.path.join(self.output_folder, 'pre_simu.tiff') self.scrambled_input_matrix = os.path.join(self.output_folder, 'scrambled_simu.tiff')