def findBestSeq(seqobject):
    dna_seq = str(seqobject.seq)
    my_seq = DNA.makeSequence(dna_seq,seqobject.id)
#    x=0
#    framedict = dict()
#    while x  < 3:
#        temp1 = my_seq[x:]
#        temp2 = temp1..withoutTerminalStopCodon()
#        framedict[x] = temp2.getTranslation()
#        x+=1
    
    all_six = standard_code.sixframes(my_seq)
    seqlist = list()
    for frame in all_six:
        seqreturned = frame.split('*')[0]
        seqlist.append(seqreturned)
    longestseq = ''
    x=0
    while x < 3:
        if len(longestseq) < len(seqlist[x]):
            longestseq = seqlist[x]
            correctdnaseq = my_seq[x:]
        x+=1
    #longest_seq = max(seqlist, key=len)
    return longestseq, correctdnaseq 
예제 #2
0
def translateSixFrame(seq):
    """Translate seq in 6 frames"""
    from cogent import DNA
    from cogent.core.genetic_code import DEFAULT as standard_code
    translations = standard_code.sixframes(seq)
    stops_frame1 = standard_code.getStopIndices(seq, start=0)
    print translations
    return
예제 #3
0
def translateSixFrame(seq):
    """Translate seq in 6 frames"""
    from cogent import DNA
    from cogent.core.genetic_code import DEFAULT as standard_code
    translations = standard_code.sixframes(seq)
    stops_frame1 = standard_code.getStopIndices(seq, start=0)
    print translations
    return
예제 #4
0
def assign_dna_reads_to_protein_database(query_fasta_fp, database_fasta_fp, 
                        output_fp, temp_dir = "/tmp", params = None):
    """Assign DNA reads to a database fasta of protein sequences.

    Wraps assign_reads_to_database, setting database and query types. All
    parameters are set to default unless params is passed. A temporary
    file must be written containing the translated sequences from the input
    query fasta file because BLAT cannot do this automatically.

    query_fasta_fp: absolute path to the query fasta file containing DNA
                   sequences.
    database_fasta_fp: absolute path to the database fasta file containing
                      protein sequences.
    output_fp: absolute path where the output file will be generated.
    temp_dir: optional. Change the location where the translated sequences
              will be written before being used as the query. Defaults to 
              /tmp.
    params: optional. dict containing parameter settings to be used
                  instead of default values. Cannot change database or query
                  file types from protein and dna, respectively.

    This method returns an open file object. The output format
    defaults to blast9 and should be parsable by the PyCogent BLAST parsers.
    """
    if params is None:
        params = {}

    my_params = {'-t': 'prot',
              '-q': 'prot'
             }

    # make sure temp_dir specifies an absolute path
    if not isabs(temp_dir):
        raise ApplicationError("temp_dir must be an absolute path.")

    # if the user specified parameters other than default, then use them.
    # However, if they try to change the database or query types, raise an
    # applciation error.
    if '-t' in params or '-q' in params:
        raise ApplicationError("Cannot change database or query types " + \
                                "when using " + \
                                "assign_dna_reads_to_dna_database. " + \
                                "Use assign_reads_to_database instead.")
    
    my_params.update(params)

    # get six-frame translation of the input DNA sequences and write them to
    # temporary file.
    tmp = get_tmp_filename(tmp_dir=temp_dir, result_constructor=str)
    tmp_out = open(tmp, 'w')

    for label, sequence in MinimalFastaParser(open(query_fasta_fp)):
        seq_id = label.split()[0]

        s = DNA.makeSequence(sequence)
        translations = standard_code.sixframes(s)
        frames = [1,2,3,-1,-2,-3]
        translations = dict(list(zip(frames, translations)))

        for frame, translation in sorted(translations.items()):
            entry = '>{seq_id}_frame_{frame}\n{trans}\n'
            entry = entry.format(seq_id=seq_id, frame=frame, trans=translation)
            tmp_out.write(entry)

    tmp_out.close()
    result = assign_reads_to_database(tmp, database_fasta_fp, output_fp, \
                                      params = my_params)

    remove(tmp)

    return result