Esempio n. 1
0
File: main.py Progetto: giphahne/gen
    if args.sampledata:
        data_file_name = "sample_data"
    else:
        data_file_name = "rosalind_{0}.txt".format(unit_name.lower())
        
    output_file_name = "output"
    
    print("unit_name:", unit_name)
    print("data_file_name:", data_file_name)
    data_file = os.path.join(unit_name, data_file_name)
    output_file = os.path.join(unit_name, output_file_name)
    print("data file:", data_file)
    
    seqs = []
    for seq_id, seq in utils.ifasta_file(data_file):
    #for seq_id, seq in utils.ifasta_file("CONS/rosalind_cons.txt"):
        seqs.append(seq)
        
    dna_seq = seqs[0]
    
    """
    A better way to do this would be to 'cut' the seq at 
    all stop codons, and then parallelize the search for
    open reading frame substrings, which will begin with
    start codon, within each ORF substring. (by 'cut' it
    would be better to use indexes and iterators.)
    """
    
    
    print("\ndna_seq:", dna_seq)
Esempio n. 2
0
File: main.py Progetto: giphahne/gen
    args = parser.parse_args()

    if args.sampledata:
        data_file_name = "sample_data"
    else:
        data_file_name = "rosalind_{0}.txt".format(unit_name.lower())

    output_file_name = "output"

    data_file = os.path.join(unit_name, data_file_name)
    output_file = os.path.join(unit_name, output_file_name)
    print("data file:", data_file)

    seqs = []
    # for seq_id, seq in utils.ifasta_file(data_file):
    for seq_id, seq in utils.ifasta_file("CONS/rosalind_cons.txt"):
        seqs.append(seq)

    print("\n\n")
    print(seqs)

    profs = list(map(nts_profiler, *seqs))

    nts_reduction = set()
    for prof in profs:
        nts_reduction.update(prof)

    print(nts_reduction)

    full_nts_profiler = partial(nts_profiler, nts_set=nts_reduction)
    full_profs = list(map(full_nts_profiler, *seqs))