def main(in_path, out_folder, fasta_path, deselected_scaffolds_path): coords_df = None deselected_scaffolds = [] if fasta_path == "": coords_df = get_scaffold_coords_by_source_features(in_path) else: coords_df = get_scaffold_coords_by_fasta(in_path, fasta_path) if deselected_scaffolds_path != "": deselected_scaffolds = hpf.l(deselected_scaffolds_path) records = list(SeqIO.parse(in_path, "embl")) t = os.system("mkdir -p " + out_folder) if t != 0: sys.stderr.write( "Error occurred when checking for the presence of output folder or creating the output folder ()" + out_folder + ")\n") sys.exit(1) for selected_scaff in range(0, coords_df.shape[0]): coords_df_entry = coords_df.iloc[selected_scaff] scaff_name = coords_df_entry["header"] scaff_id = coords_df_entry["id"] my_sequence_record = None if scaff_name not in deselected_scaffolds: query_start_coord = int(coords_df_entry.start_coord) query_end_coord = int(coords_df_entry.end_coord) out_path = out_folder + "/" + scaff_id + ".embl" union_seq = str(records[0].seq) seq = union_seq[query_start_coord - 1:query_end_coord - 1] my_sequence = Seq(seq) my_sequence_record = SeqRecord(my_sequence, id=scaff_id, name=scaff_name, description="unknown_description", dbxrefs=[]) my_sequence_record.seq.alphabet = generic_dna my_sequence_record.accession = "unknown_accession" my_sequence_record = process_record_features( records, coords_df, query_start_coord, query_end_coord, my_sequence_record) SeqIO.write(my_sequence_record, out_path, "embl")