def novoplasty_run(project, step_data, params): step = AssemblyStep(project, step_data, remove_data=True) # Create config file sr = SequenceReads.from_file(params.sequence_reads, relative_dir='..') datasets = [_dataset_data.format( num=num + 1, read_length=read.get('read_length', ''), insert_size=read.get('insert_length', ''), platform=read.get('platform', '').lower() or 'illumina', # illumina or ion read_1=read.get('file' if rt == 'SE' else 'file_1'), read_2=read.get('file_2', ''), single_paired=rt) for num, (rt, read) in enumerate(sr)] ot = params.organelle_type[0].lower() g_range = _organele_range[ot] conf = _config_data.format( project_name=params.project_name or step_data['step_name'], organelle_type=_organele_types[ot], genome_range_from=params.genome_range_from or g_range[0], genome_range_to=params.genome_range_to or g_range[1], k_mer=params.k_mer, seed=os.path.join('..', params.seed), datasets='\n\n'.join(datasets), ) write_str_in_file(step.step_file(_config_file), conf) # Run NOVOPlasty # ToDo: napraviti opcenitije, run ovdje ili server, trazenje exe-a, ... print(f"Command: cd {step.directory}; NOVOPlasty -c {_config_file} > /dev/null") subprocess.run(['NOVOPlasty', '-c', _config_file], cwd=step.directory, stdout=subprocess.DEVNULL) step.save() return step
def get_consensus_file(self): cf = self.step_file('consensus.newick') if not os.path.isfile(cf): # Note: Bio.Phylo can't handle nexus file with more than one comment in a value part! # Remove problematic comments from nexus file f = self.step_file('consensus.nexus') text = read_file_as_str(self.step_file('result.con.tre')) write_str_in_file(f, re.sub(r'\[&[^\]]*]', '', text)) import_bio_phylo().convert(f, 'nexus', cf, 'newick') return cf
def fetch_genome_assemblies(project, step_data): # Create table step data step = TableStep(project, step_data, remove_data=True) step.save() # Takes a care about complete status! # Set instructions write_str_in_file(step.step_file('INSTRUCTIONS.txt'), _instructions.format(step_name=step.directory)) return step
def create_ogdraw(step_data, image_format, annotations_step, common_db, sequences=None): step = ImagesStep(annotations_step.project, step_data, remove_data=True) all_images = sorted( sequences.split(';') if sequences else annotations_step.all_sequences( )) # Fetch common db sequences to_fetch = step.get_common_db_records(common_db, all_images, info=True) # If OGDraw is done on GeSeq data, than jpg images are already in if image_format == 'jpg': # Extract jpg files job-results-<num>/GeSeqJob-<num>-<num>_<seq_ident>_OGDRAW.jpg for filename in annotations_step.step_files( matches='^job-results-[0-9]*.zip'): with ZipFile(annotations_step.step_file(filename), 'r') as zip_f: for z_i in zip_f.infolist(): m = _re_zip_jpg.search(z_i.filename) if m: seq_ident = m.group(1) if seq_ident in to_fetch: to_fetch.remove(seq_ident) extract_from_zip( zip_f, z_i.filename, step.step_file(seq_ident + '.jpg')) # Store sequence if to_fetch: # Note: it is important that file has extension gbff (multiple sequence data) sequences = dict() for i, d in enumerate(split_list(to_fetch, 30)): annotations_step.concatenate_seqs_genbank( step.step_file(f'sequences_{i + 1}.gbff'), d) sequences[i + 1] = d # Store instructions write_str_in_file( step.step_file('INSTRUCTIONS.txt'), _instructions.format(step_name=step_data['step_name'], image_format=image_format)) # Store image format used write_yaml(dict(image_format=image_format, sequences=sequences), step.step_file('finish.yml')) # step.set_images(all_images) step.save(completed=not to_fetch) return step
def fetch_sequences(step_data, table_step, common_db, column_name=None): step = SequencesStep(table_step.project, step_data, remove_data=True) table_step.propagate_step_name_prefix(step) seq_idents = table_step.get_column_values_by_type('seq_ident', column_name=column_name) to_fetch = do_fetch_sequences(step, seq_idents, common_db) # ToDo: remove not referenced sequences # Store step data # step._check_data() step.save(completed=not to_fetch) if to_fetch: write_str_in_file( step.step_file('INSTRUCTIONS.txt'), _instructions_no_data.format(sequence_db=sequence_db, seqs=', '.join(sorted(to_fetch)))) return step
def create_ge_seq_data(step_data, sequences_step, common_db, num_sequences_in_file): step = AnnotationsStep(sequences_step.project, step_data, remove_data=True) sequences_step.propagate_step_name_prefix(step) all_sequences = list(sequences_step.all_sequences()) # Fetch common DB sequences to_fetch = step.get_common_db_records(common_db, all_sequences, info=True) # Store sequence if to_fetch: for i, d in enumerate(split_list(to_fetch, num_sequences_in_file)): sequences_step.concatenate_seqs_fa(step.step_file(f'sequences_{i + 1}.fa'), d) # Store instructions write_str_in_file(step.step_file('INSTRUCTIONS.txt'), _instructions.format(step_name=step_data['step_name'])) # step.set_sequences(all_sequences) step.save(completed=not to_fetch) return step
def create_permutations(project, step_data, raw_file, permutations, num_traits=None, run=False): # Check input files map_file = raw_file.replace('.raw', '.map') data_dir, base_raw_file = os.path.split(raw_file) tmp_files = ('tmp.00m', 'tmp.00c', 'tmp.00r') for mf in (raw_file, map_file): if not os.path.isfile(mf): raise ZCItoolsValueError( f"Input MapMaker file {mf} doesn't exist!") for qf in tmp_files: f = os.path.join(data_dir, qf) if not os.path.isfile(f): raise ZCItoolsValueError( f"Input Windows QTL Cartographer file {qf} doesn't exist!") # step = QTLCartStep(project, step_data, remove_data=True) step.set_data(num_traits, permutations) # Copy input files files_to_zip = [] for qf in tmp_files: files_to_zip.append(step.step_file(qf)) copy_file(os.path.join(data_dir, qf), files_to_zip[-1]) # Create trait directories # ToDo: find max traits and fix it/set default assert num_traits and num_traits > 0, num_traits trait_dirs = [] for t_idx in range(1, num_traits + 1): trait_dirs.append(step.trait_dir(t_idx)) t_dir = step.step_file(trait_dirs[-1]) ensure_directory(t_dir) files_to_zip.append(os.path.join(t_dir, 'qtlcart.rc')) write_str_in_file( files_to_zip[-1], _qtlcart_rc.format(trait=t_idx, num_traits=num_traits)) # # Create links to input files # for qf in tmp_files: # link_file(os.path.join('..', qf), os.path.join(t_dir, qf)) # files_to_zip.append(step.step_file('finish.yml')) write_yaml(dict(permutations=permutations, trait_dirs=trait_dirs), files_to_zip[-1]) # Stores description.yml step.save(completed=run) # Run or set instructions if run: run_module_script(run_qtl_cart_perm, step) else: set_run_instructions(run_qtl_cart_perm, step, files_to_zip, _instructions) # return step
def cmd_summary(self): summary = self.get_summary() if text := summary.get('text'): print(text) write_str_in_file('workflow_summary.txt', text)
def create_circos_correlation(project, step_data, params): # Read correlation data cm = None if params.input_filename: cm = CorrelationMatrix.from_file(params.input_filename) if not cm: raise ZCItoolsValueError('No correlation input data!') num_c = cm.num_columns() if num_c < 2: raise ZCItoolsValueError('Not much of a matrix!') step = ImagesStep(project, step_data, remove_data=True) one_width = params.one_width gap_correlations = params.gap_correlations ow_2 = one_width // 2 one_plus_gap = one_width + gap_correlations # Note: column lowercase names are used as column identifiers data_dir = step.step_file('data') etc_dir = step.step_file('etc') ensure_directory(data_dir) ensure_directory(etc_dir) colors = dict( (lc, 'green') for lc in cm._columns_lower) # ToDo: some defaults colors['plus_'] = 'blue' colors['minus_'] = 'red' for col_def in params.group_color: col_fields = col_def.split(',', 1) if len(col_fields) == 2 and cm.check_column(col_fields[0]): colors[cm.check_column(col_fields[0])] = col_fields[1] else: print(f"Warning: '{col_def}' is not column color definition!") # data directory # karyotype.txt: defines groups (as chromosomes) # chr - <name> <label> <start> <end> <color> # ... gl = (num_c - 1) * one_width + (num_c - 2) * gap_correlations # group length write_str_in_file( os.path.join(data_dir, 'karyotype.txt'), '\n'.join(f"chr - {lc} {c} 0 {gl} color_{lc}" for lc, c in zip(cm._columns_lower, cm._columns))) # tiles.txt: defines abs(correlation) == 1 interval, as tiles # <name> <start> <end> [options] with open(os.path.join(data_dir, 'tiles.txt'), 'w') as out: for idx1, c1 in enumerate(cm._columns_lower): for idx2, c2 in enumerate(cm._columns_lower): if idx1 == idx2: continue pos = (idx1 - idx2 - 1) if idx1 > idx2 else (idx1 - idx2 + (num_c - 1)) start = pos * one_plus_gap out.write( f"{c1} {start} {start + one_width} fill_color=color_{c2}\n" ) # cells.txt: defines correlations as links # <cell_idx> <group_1> <start_1> <end_1> color=color_{plus|minus}_,dist={int} # <cell_idx> <group_2> <start_2> <end_2> color=color_{plus|minus}_,dist={int} # ... with open(os.path.join(data_dir, 'links.txt'), 'w') as out: cell_idx = 0 for idx1, c1 in enumerate(cm._columns_lower): rest_c = cm._columns_lower[idx1 + 1:] for idx2, c2 in enumerate(rest_c): corr = cm.get(c1, c2) if corr is not None: w = round(abs(corr) * one_width) w_1 = w // 2 w_2 = w - w_1 # - 1? centar = ow_2 + idx2 * one_plus_gap color = 'plus_' if corr >= 0 else 'minus_' dist = min(idx2 + 1, idx1 + (len(rest_c) - idx2)) atts = f"color=color_{color},dist={dist}" out.write( f"cell_{cell_idx} {c1} {gl - centar - w_2} {gl - centar + w_1} {atts}\n" ) out.write( f"cell_{cell_idx} {c2} {centar - w_1} {centar + w_2} {atts}\n" ) cell_idx += 1 # etc directory write_str_in_file( os.path.join(etc_dir, 'circos.conf'), _circos_conf.format(colors='\n'.join(f"color_{lc} = {c}" for lc, c in colors.items()))) subprocess.run(['circos', '-conf', 'etc/circos.conf'], cwd=step.directory) # View it if params.show_image: image_viewer = get_settings().get('image_viewer') if image_viewer: subprocess.Popen([image_viewer, step.step_file('circos.png')])