if args.sampledata: data_file_name = "sample_data" else: data_file_name = "rosalind_{0}.txt".format(unit_name.lower()) output_file_name = "output" print("unit_name:", unit_name) print("data_file_name:", data_file_name) data_file = os.path.join(unit_name, data_file_name) output_file = os.path.join(unit_name, output_file_name) print("data file:", data_file) seqs = [] for seq_id, seq in utils.ifasta_file(data_file): #for seq_id, seq in utils.ifasta_file("CONS/rosalind_cons.txt"): seqs.append(seq) dna_seq = seqs[0] """ A better way to do this would be to 'cut' the seq at all stop codons, and then parallelize the search for open reading frame substrings, which will begin with start codon, within each ORF substring. (by 'cut' it would be better to use indexes and iterators.) """ print("\ndna_seq:", dna_seq)
args = parser.parse_args() if args.sampledata: data_file_name = "sample_data" else: data_file_name = "rosalind_{0}.txt".format(unit_name.lower()) output_file_name = "output" data_file = os.path.join(unit_name, data_file_name) output_file = os.path.join(unit_name, output_file_name) print("data file:", data_file) seqs = [] # for seq_id, seq in utils.ifasta_file(data_file): for seq_id, seq in utils.ifasta_file("CONS/rosalind_cons.txt"): seqs.append(seq) print("\n\n") print(seqs) profs = list(map(nts_profiler, *seqs)) nts_reduction = set() for prof in profs: nts_reduction.update(prof) print(nts_reduction) full_nts_profiler = partial(nts_profiler, nts_set=nts_reduction) full_profs = list(map(full_nts_profiler, *seqs))