def _run_align(seq_ident, seq_data, close_data, seq_2_result_object, match_length=100, first_nice=True): # In input dictionary seq_2_result_object for key seq_ident set value (align_seq_ident, ira, irb) # Store fasta f_dir = seq_data._analyse.step.step_file('find_irs', seq_ident) ensure_directory(f_dir) seq_fasta = os.path.join(f_dir, f"{seq_ident}.fa") write_fasta(seq_fasta, [(seq_ident, seq_data._seq.seq)]) # It is (probably) better first to prefer newer sequences! close_data = sorted(close_data, reverse=True, key=lambda d: d.first_date) all_aligns = [] for d in close_data: ira = d._partition.get_part_by_name['ira'] rec = ira.extract(d._seq) qry_fasta = os.path.join(f_dir, f"qry_{d.seq_ident}.fa") write_fasta(qry_fasta, [('end1', rec.seq[:match_length]), ('end2', rec.seq[-match_length:])]) align = run_align_cmd(seq_fasta, qry_fasta, f"res_{d.seq_ident}") if first_nice and (irs := _get_nice_irs(align)): seq_2_result_object[seq_ident] = (d.seq_ident, *irs) return align.seq_ident = d.seq_ident all_aligns.append(align)
def run(self): import os.path from collections import defaultdict from ..utils.import_methods import import_bio_seq_io from ..utils.helpers import get_bio_io_type from common_utils.file_utils import ensure_directory, write_fasta args = self.args od = args.output_directory SeqIO = import_bio_seq_io() ensure_directory(od) genes = defaultdict(dict) # gene -> dict(species -> data) for i_filename in args.input_files: for seq in SeqIO.parse( i_filename, get_bio_io_type(i_filename, args.input_format)): name = seq.id split_on = name.index('_') gene = name[:split_on] species = name[(split_on + 1):] genes[gene][species] = seq.seq for gene, data in genes.items(): write_fasta(os.path.join(od, f'{gene}.fasta'), sorted(data.items()))
def create_irs_data(step_data, annotation_step, params): SeqIO = import_bio_seq_io() seq_idents = annotation_step.all_sequences() # set ref_ident = find_referent_genome(seq_idents, params.referent_genome) step = annotation_step.project.new_step(ChloroplastSSCBlast, step_data) ref_seq_rec = annotation_step.get_sequence_record(ref_ident) ssc_location = step.get_type_description_elem('ssc_location', default=dict()) ensure_directory(step.step_file('run_dir')) # Store query data query_file = step.step_file('run_dir', 'query.fa') if not os.path.isfile(query_file): irs = find_chloroplast_irs(ref_seq_rec) if not irs: raise ZCItoolsValueError( f"Referent genome ({ref_ident}) doesn't have IRS!") write_fasta(query_file, [('ira', str(irs[0].extract(ref_seq_rec).seq))]) files_to_zip = [query_file] calc_seq_idents = [] # All sequences, to create database from for seq_ident in sorted(seq_idents): if not os.path.isfile(step.step_file('run_dir', f'{seq_ident}.xml')): fa_file = step.step_file('run_dir', f'{seq_ident}.fa') files_to_zip.append(fa_file) calc_seq_idents.append(seq_ident) if not os.path.isfile(fa_file): seq_rec = annotation_step.get_sequence_record(seq_ident) SeqIO.write([seq_rec], fa_file, 'fasta') # Store SSC position irs = find_chloroplast_irs(seq_rec) ssc_location[seq_ident] = [len(seq_rec), int(irs[0].location.end), irb_start(irs[1])] \ if irs else [len(seq_rec), -1, -1] if calc_seq_idents: # Store finish.yml finish_f = step.step_file('finish.yml') write_yaml(dict(calc_seq_idents=calc_seq_idents), finish_f) run = True # ToDo: ... step.save(dict(ssc_location=ssc_location), completed=False) if run: run_module_script(run_irs_blast, step) finish_irs_data(step) else: files_to_zip.append(finish_f) set_run_instructions(run_irs_blast, step, files_to_zip, _instructions) # elif params.force_blast_parse: finish_irs_data(step) return step
def set_group_rows(self, group, rows): # ToDo: check rows? if rows: self._rows = None # Remove get_rows() cache data_dir = self._data_subdirectory() ensure_directory(data_dir) write_csv(os.path.join(data_dir, group), self._columns[1:], rows) else: append_line_to_file(self._no_data_filename(), group)
def calculate_and_add_irs_to_seq_rec(step, seq_ident, seq_rec): SeqIO = import_bio_seq_io() # Store input fasta file ensure_directory(step.step_file('run_dir')) input_filename = step.step_file('run_dir', f'{seq_ident}.fa') SeqIO.write([seq_rec], input_filename, 'fasta') # Run MUMmer run_one(input_filename) m_res = _MUMmerResult(step.step_file('run_dir', f'{seq_ident}.out'), seq_ident, len(seq_rec)) return m_res.set_annotation(seq_ident, seq_rec, step.step_file(f'{seq_ident}.gb'))
def create_new_hybrids_data(project, step_data, params): # Check input files if not os.path.isfile(params.data_file): raise ZCItoolsValueError( f"Input data file {params.data_file} doesn't exist!") if not os.path.isfile(params.gtyp_cat_file): raise ZCItoolsValueError( f"Input genotype category probabilities {params.gtyp_cat_file} doesn't exist!" ) data_file = os.path.basename(params.data_file) gtyp_cat_file = os.path.basename(params.gtyp_cat_file) step = NewHybridsStep(project, step_data, remove_data=True) step.set_data(data_file, gtyp_cat_file, params.theta_prior, params.pi_prior, params.burn_in, params.num_sweeps) # Copy input files files_to_zip = [step.step_file(data_file), step.step_file(gtyp_cat_file)] copy_file(params.data_file, files_to_zip[0]) copy_file(params.gtyp_cat_file, files_to_zip[1]) # Create run directories seeds = random.sample( list(itertools.product(range(1, _MAX_SMALL_NUMBER + 1), repeat=2)), params.num_runs) for seed in seeds: files_to_zip.append(step.step_file(step.seed_dir(seed))) ensure_directory(files_to_zip[-1]) files_to_zip.append(step.step_file('finish.yml')) write_yaml( dict(data_file=data_file, gtyp_cat_file=gtyp_cat_file, theta_prior=params.theta_prior, pi_prior=params.pi_prior, burn_in=params.burn_in, num_sweeps=params.num_sweeps), files_to_zip[-1]) # Stores description.yml step.save(completed=params.run) # Run or set instructions if params.run: run_module_script(run_new_hybrids, step) else: set_run_instructions(run_new_hybrids, step, files_to_zip, _instructions) # return step
def create_irs_data(step_data, input_step, params, common_db): # , run): # Creates Annotations step from input sequences/annotations # Steps subdirectory 'run_dir' contains input and output calculation files SeqIO = import_bio_seq_io() files_to_zip = [] calc_seq_idents = [] step = input_step.project.new_step(AnnotationsStep, step_data) # Set sequences step.set_sequences(input_step.all_sequences()) ensure_directory(step.step_file('run_dir')) for seq_ident in input_step.all_sequences(): out_file = step.step_file('run_dir', f'{seq_ident}.out') if not os.path.isfile(out_file): seq_rec = input_step.get_sequence_record(seq_ident) # Set fasta file for calculation files_to_zip.append(step.step_file('run_dir', f'{seq_ident}.fa')) SeqIO.write([seq_rec], files_to_zip[-1], 'fasta') calc_seq_idents.append(seq_ident) elif not os.path.isfile(step.step_file(f'{seq_ident}.gb')): calc_seq_idents.append(seq_ident) if files_to_zip: # Store finish.yml finish_f = step.step_file('finish.yml') write_yaml(dict(fa_files=files_to_zip), finish_f) run = True # ToDo: ... step.save(completed=False) if run: run_module_script(run_irs_mummer, step) finish_irs_data(step, common_db, calc_seq_idents=calc_seq_idents) else: files_to_zip.append(finish_f) set_run_instructions(run_irs_mummer, step, files_to_zip, _instructions) # elif calc_seq_idents: finish_irs_data(step, common_db, calc_seq_idents=calc_seq_idents) elif params.force_mummer_parse: finish_irs_data(step, common_db) # return step
def __init__(self, project, step_data, remove_data=False, update_mode=False, no_check=False, step_directory=None): assert project.__class__.__name__ == 'RunCommand', self.__class__.__name__ # For now self.project = project self._step_data = step_data # step_data['step_name'] is string or list of strings for substeps self._step_name_list = step_directory or \ ([step_data['step_name']] if isinstance(step_data['step_name'], str) else step_data['step_name']) self.directory = os.path.join(*self._step_name_list) self._update_mode = update_mode # Call init data method if remove_data: remove_directory(self.directory, create=True) d = None else: d = self.get_description() if not d: ensure_directory(self.directory) if d: if d['data_type'] != self._STEP_TYPE: raise ZCItoolsValueError( f"Step class of tyep '{self._STEP_TYPE}' created with data of type '{d['data_type']}'!" ) type_desc = d['data'] # Update project data self._step_data.update((k, v) for k, v in d['project'].items() if k not in self._step_data) else: type_desc = None # self._init_data(type_desc) # Check data if exists and step not set in update mode if type_desc and not self._update_mode and self.is_completed( ) and not no_check: self._check_data()
def run(self): import os.path from ..utils.import_methods import import_bio_seq_io from ..utils.helpers import get_bio_io_type, feature_qualifiers_to_desc from common_utils.file_utils import ensure_directory, write_fasta, basename_no_ext args = self.args od = args.output_directory SeqIO = import_bio_seq_io() ensure_directory(od) type_ = args.filter_type for i_filename in args.input_files: # Note: One sequence in one file! seq_rec = SeqIO.read( i_filename, get_bio_io_type(i_filename, args.input_format)) # ToDo: filtrirati po necemu? # ToDo: sortirati po necemu? write_fasta( os.path.join(od, f"extract_{basename_no_ext(i_filename)}.fasta"), ((feature_qualifiers_to_desc(f), str(f.extract(seq_rec).seq)) for f in seq_rec.features if location and f.type == type_ and 'gene' in f.qualifiers))
def init_project(project, dirname, project_desc, workflow, workflow_parameters): if os.path.isfile('project_log.yml'): print(f'Warning: init project called on existing project!') print(f'Warning: project {dirname} was not created!') elif ensure_directory(dirname, check_empty=True): # Add setting file settings = dict(settings_defaults) if workflow: if workflow_parameters: w_pars = dict( x.split('=') for x in workflow_parameters.split(';')) else: w_pars = dict() wf_cls = project.get_workflow_cls(workflow) if (not_in := [p for p in wf_cls.required_parameters() if p not in w_pars]): raise ZCItoolsValueError( f"Workflow's parameters not specified: {', '.join(not_in)}!" ) settings['workflow'] = workflow settings['workflow_parameters'] = wf_cls.format_parameters(w_pars) write_yaml(settings, os.path.join(dirname, 'settings.yml')) # Create empty project.log file with open(os.path.join(dirname, 'project_log.yml'), 'w') as r: pass # Set README.txt file with open(os.path.join(dirname, 'README.txt'), 'w') as r: if project_desc: r.write(f'Project description:\n{project_desc}\n') r.write(_readme) if workflow: r.write(_wf_readme.format(workflow=workflow))
def create_permutations(project, step_data, raw_file, permutations, num_traits=None, run=False): # Check input files map_file = raw_file.replace('.raw', '.map') data_dir, base_raw_file = os.path.split(raw_file) tmp_files = ('tmp.00m', 'tmp.00c', 'tmp.00r') for mf in (raw_file, map_file): if not os.path.isfile(mf): raise ZCItoolsValueError( f"Input MapMaker file {mf} doesn't exist!") for qf in tmp_files: f = os.path.join(data_dir, qf) if not os.path.isfile(f): raise ZCItoolsValueError( f"Input Windows QTL Cartographer file {qf} doesn't exist!") # step = QTLCartStep(project, step_data, remove_data=True) step.set_data(num_traits, permutations) # Copy input files files_to_zip = [] for qf in tmp_files: files_to_zip.append(step.step_file(qf)) copy_file(os.path.join(data_dir, qf), files_to_zip[-1]) # Create trait directories # ToDo: find max traits and fix it/set default assert num_traits and num_traits > 0, num_traits trait_dirs = [] for t_idx in range(1, num_traits + 1): trait_dirs.append(step.trait_dir(t_idx)) t_dir = step.step_file(trait_dirs[-1]) ensure_directory(t_dir) files_to_zip.append(os.path.join(t_dir, 'qtlcart.rc')) write_str_in_file( files_to_zip[-1], _qtlcart_rc.format(trait=t_idx, num_traits=num_traits)) # # Create links to input files # for qf in tmp_files: # link_file(os.path.join('..', qf), os.path.join(t_dir, qf)) # files_to_zip.append(step.step_file('finish.yml')) write_yaml(dict(permutations=permutations, trait_dirs=trait_dirs), files_to_zip[-1]) # Stores description.yml step.save(completed=run) # Run or set instructions if run: run_module_script(run_qtl_cart_perm, step) else: set_run_instructions(run_qtl_cart_perm, step, files_to_zip, _instructions) # return step
def create_circos_correlation(project, step_data, params): # Read correlation data cm = None if params.input_filename: cm = CorrelationMatrix.from_file(params.input_filename) if not cm: raise ZCItoolsValueError('No correlation input data!') num_c = cm.num_columns() if num_c < 2: raise ZCItoolsValueError('Not much of a matrix!') step = ImagesStep(project, step_data, remove_data=True) one_width = params.one_width gap_correlations = params.gap_correlations ow_2 = one_width // 2 one_plus_gap = one_width + gap_correlations # Note: column lowercase names are used as column identifiers data_dir = step.step_file('data') etc_dir = step.step_file('etc') ensure_directory(data_dir) ensure_directory(etc_dir) colors = dict( (lc, 'green') for lc in cm._columns_lower) # ToDo: some defaults colors['plus_'] = 'blue' colors['minus_'] = 'red' for col_def in params.group_color: col_fields = col_def.split(',', 1) if len(col_fields) == 2 and cm.check_column(col_fields[0]): colors[cm.check_column(col_fields[0])] = col_fields[1] else: print(f"Warning: '{col_def}' is not column color definition!") # data directory # karyotype.txt: defines groups (as chromosomes) # chr - <name> <label> <start> <end> <color> # ... gl = (num_c - 1) * one_width + (num_c - 2) * gap_correlations # group length write_str_in_file( os.path.join(data_dir, 'karyotype.txt'), '\n'.join(f"chr - {lc} {c} 0 {gl} color_{lc}" for lc, c in zip(cm._columns_lower, cm._columns))) # tiles.txt: defines abs(correlation) == 1 interval, as tiles # <name> <start> <end> [options] with open(os.path.join(data_dir, 'tiles.txt'), 'w') as out: for idx1, c1 in enumerate(cm._columns_lower): for idx2, c2 in enumerate(cm._columns_lower): if idx1 == idx2: continue pos = (idx1 - idx2 - 1) if idx1 > idx2 else (idx1 - idx2 + (num_c - 1)) start = pos * one_plus_gap out.write( f"{c1} {start} {start + one_width} fill_color=color_{c2}\n" ) # cells.txt: defines correlations as links # <cell_idx> <group_1> <start_1> <end_1> color=color_{plus|minus}_,dist={int} # <cell_idx> <group_2> <start_2> <end_2> color=color_{plus|minus}_,dist={int} # ... with open(os.path.join(data_dir, 'links.txt'), 'w') as out: cell_idx = 0 for idx1, c1 in enumerate(cm._columns_lower): rest_c = cm._columns_lower[idx1 + 1:] for idx2, c2 in enumerate(rest_c): corr = cm.get(c1, c2) if corr is not None: w = round(abs(corr) * one_width) w_1 = w // 2 w_2 = w - w_1 # - 1? centar = ow_2 + idx2 * one_plus_gap color = 'plus_' if corr >= 0 else 'minus_' dist = min(idx2 + 1, idx1 + (len(rest_c) - idx2)) atts = f"color=color_{color},dist={dist}" out.write( f"cell_{cell_idx} {c1} {gl - centar - w_2} {gl - centar + w_1} {atts}\n" ) out.write( f"cell_{cell_idx} {c2} {centar - w_1} {centar + w_2} {atts}\n" ) cell_idx += 1 # etc directory write_str_in_file( os.path.join(etc_dir, 'circos.conf'), _circos_conf.format(colors='\n'.join(f"color_{lc} = {c}" for lc, c in colors.items()))) subprocess.run(['circos', '-conf', 'etc/circos.conf'], cwd=step.directory) # View it if params.show_image: image_viewer = get_settings().get('image_viewer') if image_viewer: subprocess.Popen([image_viewer, step.step_file('circos.png')])
] rows.append(row) taxid = ncbi_2_taxid[seq_ident] search_in = set(all_taxids) search_in.discard(taxid) close_taxids = ncbi_tax.find_close_taxids( taxid, ncbi_2_max_taxid[seq_ident], search_in) if not close_taxids: print( f"Warning: sequence {seq_ident} doesn't have close relative in accession set!" ) continue f_dir = step.step_file('repair_ns', seq_ident) ensure_directory(f_dir) # seq_fasta = os.path.join(f_dir, f"{seq_ident}.fa") # write_fasta(seq_fasta, [(seq_ident, seq_data['_seq'].seq)]) # executor.submit(_run_manage_ns, seq_ident, sequences, ns, f_dir, [taxid_2_ncbi[t] for t in close_taxids]) _run_manage_ns(seq_ident, sequences, ns, f_dir, [taxid_2_ncbi[t] for t in close_taxids]) # columns = [('seq_ident', 'seq_ident'), ('length', 'int'), ('num_ns_parts', 'int'), ('ns_length', 'int'), ('close_seq_idents', 'str'), ('fix', 'str')] step.set_table_data(rows, columns) else: step.set_columns([('seq_ident', 'seq_ident')]) # Dummy table step.save()
def create_irs_data(step_data, input_step, params): # Creates Annotations step from input sequences/annotations # Steps subdirectory 'run_dir' contains input and output calculation files SeqIO = import_bio_seq_io() seq_idents = input_step.all_sequences() step = input_step.project.new_step(AnnotationsStep, step_data) step.set_sequences(seq_idents) # seq_ident -> mummer data ([length, start_1, start_2]) mummer_results = step.get_type_description_elem('mummer_results', default=dict()) # ensure_directory(step.step_file('run_dir')) calc_mummer = [] # tuples (seq_ident, fasta file, mummer output file) # Mummer for seq_ident in sorted(seq_idents - set(mummer_results)): fa_file = step.step_file('run_dir', f'{seq_ident}.fa') mummer_res_file = step.step_file('run_dir', f'{seq_ident}.out') if not os.path.isfile(fa_file): seq_rec = input_step.get_sequence_record(seq_ident) SeqIO.write([seq_rec], fa_file, 'fasta') calc_mummer.append((seq_ident, fa_file, mummer_res_file)) elif not os.path.isfile(mummer_res_file): calc_mummer.append((seq_ident, fa_file, mummer_res_file)) # Run mummer if calc_mummer: mummer_exe = 'repeat-match' # ToDo: n = 3000 threads = multiprocessing.cpu_count() with ThreadPoolExecutor(max_workers=threads) as executor: for seq_ident, fa_file, mummer_res_file in calc_mummer: executor.submit(_run_single, mummer_exe, n, fa_file, mummer_res_file) for seq_ident, _, mummer_res_file in calc_mummer: rep = _read_mummer_repeat(mummer_res_file) if not rep: raise ZCItoolsValueError(f'No repeat for sequence {seq_ident}!') mummer_results[seq_ident] = rep # Find sequences extend with alignment files_to_zip = [] calc_mafft = [] for seq_ident in sorted(seq_idents): length, s1, s2 = mummer_results[seq_ident] if length >= 23000: continue if step.is_file('run_dir', f'{seq_ident}_right_align.fa') and \ step.is_file('run_dir', f'{seq_ident}_right_align.fa'): continue # calc_mafft.append(seq_ident) _seq = input_step.get_sequence_record(seq_ident).seq seq = str(_seq) comp_seq = str(_seq.complement()) missing = 26000 - length # Right side p1 = _extract_subseq_plus(seq, s1 + length, missing) p2 = _extract_subseq_minus(comp_seq, s2 - length, missing) assert len(p1) == len(p2), (length, s1, s2, missing, (len(p1), len(p2))) files_to_zip.append(step.step_file('run_dir', f'{seq_ident}_right.fa')) write_fasta(files_to_zip[-1], [('p1', p1), ('p2', p2)]) # Left side p1 = _extract_subseq_minus(comp_seq, s1 - 1, missing) p2 = _extract_subseq_plus(seq, s2 + 1, missing) assert len(p1) == len(p2), (length, s1, s2, missing, (len(p1), len(p2))) files_to_zip.append(step.step_file('run_dir', f'{seq_ident}_left.fa')) write_fasta(files_to_zip[-1], [('p1', p1), ('p2', p2)]) # Mafft if calc_mafft: finish_f = step.step_file('finish.yml') write_yaml(dict(calc_seq_idents=calc_mafft), finish_f) run = True # ToDo: ... step.save(additional_data=dict(mummer_results=mummer_results), completed=False) if run: run_module_script(run_mafft_irs, step) finish_irs_data(step) else: files_to_zip.append(finish_f) set_run_instructions(run_mafft_irs, step, files_to_zip, _instructions) # elif params.force_parse: finish_irs_data(step) return step
def orientate_chloroplast_start(step_data, annotation_step, params): # Find referent genome # For each sequence, different than referent, directory is created named <seq_ident>. # It contains files: # - {lsc|ira|ss}_{plus|minus}.fa : input alignment files, contain 2 sequences. # - align_{lsc|ira|ss}_{plus|minus}.fa : result alignment files. seq_idents = annotation_step.all_sequences() # set ref_ident = find_referent_genome(seq_idents, params.referent_genome) # length = params.length_to_check step = annotation_step.project.new_step(ChloroplastOrientateStep, step_data, remove_data=False) sequence_data = step.get_type_description_elem('sequence_data', default=dict()) # seq_rec = annotation_step.get_sequence_record(ref_ident) partition = find_chloroplast_partition(seq_rec) ref_parts = [str(partition.get_part_by_name(n).extract(seq_rec).seq)[:length] for n in _part_names] files_to_zip = [] align_files = [] # all_versions = ('plus', 'minus', 'plus_c', 'minus_c') if params.complement else ('plus', 'minus') for seq_ident in sorted(seq_idents): seq_rec = None if seq_ident not in sequence_data: seq_rec = annotation_step.get_sequence_record(seq_ident) partition = find_chloroplast_partition(seq_rec) # Count gene orientation l_seq = len(seq_rec) in_parts = partition.put_features_in_parts( Feature(l_seq, feature=f) for f in seq_rec.features if f.type == 'gene') lsc_count = sum(f.feature.strand if any(x in f.name for x in ('rpl', 'rps')) else 0 for f in in_parts.get('lsc', [])) ssc_count = sum(f.feature.strand for f in in_parts.get('ssc', [])) ira_count = sum(f.feature.strand if 'rrn' in f.name else 0 for f in in_parts.get('ira', [])) sequence_data[seq_ident] = dict( length=len(seq_rec), lsc=(lsc_count <= 0), lsc_count=lsc_count, lsc_length=len(partition.get_part_by_name('lsc')), ssc=(ssc_count <= 0), ssc_count=ssc_count, ssc_length=len(partition.get_part_by_name('ssc')), ira=(ira_count >= 0), ira_count=ira_count, ira_length=len(partition.get_part_by_name('ira'))) if all(all(step.is_file(seq_ident, f'align_{n}_{v}.fa') for v in all_versions) for n in _part_names): continue # if seq_rec is None: seq_rec = annotation_step.get_sequence_record(seq_ident) partition = find_chloroplast_partition(seq_rec) for n, ref_p in zip(_part_names, ref_parts): # Find missing output files _num = len(align_files) for x in all_versions: if not step.is_file(seq_ident, f'align_{n}_{x}.fa'): files_to_zip.append(step.step_file(seq_ident, f'{n}_{x}.fa')) align_files.append((seq_ident, n, x)) if _num == len(align_files): continue # Store input files if all(step.is_file(seq_ident, f'align_{n}_{v}.fa') for v in all_versions): continue ensure_directory(step.step_file(seq_ident)) part_s = partition.get_part_by_name(n).extract(seq_rec) f_p = step.step_file(seq_ident, f'{n}_plus.fa') f_p_c = step.step_file(seq_ident, f'{n}_plus_c.fa') if not os.path.isfile(f_p): write_fasta(f_p, [(ref_ident, ref_p), (seq_ident, str(part_s.seq)[:length])]) if not os.path.isfile(f_p_c): write_fasta(f_p_c, [(ref_ident, ref_p), (seq_ident, str(part_s.reverse_complement().seq)[:(-length-1):-1])]) f_m = step.step_file(seq_ident, f'{n}_minus.fa') f_m_c = step.step_file(seq_ident, f'{n}_minus_c.fa') if not os.path.isfile(f_m): write_fasta(f_m, [(ref_ident, ref_p), (seq_ident, str(part_s.reverse_complement().seq)[:length])]) if not os.path.isfile(f_m_c): write_fasta(f_m_c, [(ref_ident, ref_p), (seq_ident, str(part_s.seq)[:(-length-1):-1])]) # output_file = f"{params.output_file_prefix}_{length}{'_c' if params.complement else ''}.xlsx" data = dict(sequence_data=sequence_data, check_length=length, output_file=output_file, complement=params.complement) if align_files: # Store finish.yml finish_f = step.step_file('finish.yml') write_yaml(dict(align_files=align_files), finish_f) run = True # ToDo: ... step.save(data, completed=False) if run: run_module_script(run_orientate, step) orientate_chloroplast_finish(step) # , common_db, calc_seq_idents=calc_seq_idents) else: files_to_zip.append(finish_f) set_run_instructions(run_orientate, step, files_to_zip, _instructions) # elif params.force_parse: step.save(data) orientate_chloroplast_finish(step) # , common_db, calc_seq_idents=calc_seq_idents) # else: step.save(data, completed=False) return step