Python get_uniprot_subkeysの例

プログラミング言語: Python

名前空間/パッケージ名: index_unitprot_db

メソッド/関数: get_uniprot_subkeys

hotexamples.comのコード掲載数: 4

Python get_uniprot_subkeys - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのindex_unitprot_db.get_uniprot_subkeysの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

ファイル: annotate_genes_with_swissprot.py プロジェクト: jakeyeung/alternative-splicing

def write_annotations_to_output(output_dic, output_file, summary_file):
    '''
    Given output dic of annotated sequences, write to outputfile.
    We want to include all the information that is already in summary file
    so
    we use summary file to read each row and add annotations.
    
    Each row in summary file may write 0 or more lines depending on 
    how many uniprot annotations match to the amino acid sequence.
    '''
    # initialize writefile as write obj
    outfile = open(output_file, 'wb')
    mywriter = csv.writer(outfile, delimiter='\t')
    
    # define column names
    # summary file colnames
    gene_name_colname, miso_event_colname, reading_frame_colname, \
    nucleotide_seq_colname, amino_acid_seq_colname, gene_id_colname, \
    transcript_id_colname, exon_number_colname = \
        get_summary_file_colnames()
    summary_colnames = [gene_name_colname,
                        miso_event_colname, 
                        reading_frame_colname, 
                        nucleotide_seq_colname, 
                        amino_acid_seq_colname, 
                        gene_id_colname, 
                        transcript_id_colname, 
                        exon_number_colname]
    
    # get annotation colnames
    start_colname, end_colname, descript_colname = get_uniprot_subkeys()
    exon_start_colname = 'exon_start'
    exon_end_colname = 'exon_end'
    feature_colname = 'feature'
    annotation_colnames = [feature_colname,
                           start_colname,
                           end_colname,
                           descript_colname,
                           exon_start_colname,
                           exon_end_colname]
    
    # Write header to output file. Order matters.
    outheader = summary_colnames + annotation_colnames
    mywriter.writerow(outheader)
    
    # init writecount.
    writecount = 0
    
    # create read file obj for summary file
    with open(summary_file, 'rb') as readfile:
        myreader = csv.reader(readfile, delimiter='\t')
        readheader = myreader.next()
        for row in myreader:
            # get amino acid seq, our key used to access output dic annotes
            aa_seq = row[readheader.index(amino_acid_seq_colname)]
            
            # if no associatd annotations, skip to next.
            if aa_seq not in output_dic:
                continue
            
            '''
            # iterate over list in list of features, write corresponding annotes
            # along with summary information. Expect multiple annotations (or none)
            # for each miso event.
            '''
            for i in range(0, len(output_dic[aa_seq][feature_colname])):
                row_to_write = []    # initialize
                for summary_colname in summary_colnames:
                    row_to_write.append(row[readheader.index(summary_colname)])
                for annote_colname in annotation_colnames:
                    row_to_write.append(output_dic[aa_seq][annote_colname][i])
                mywriter.writerow(row_to_write)
                writecount += 1
    outfile.close()
    return writecount

コード例 #2

ファイルを表示

def write_annotations_to_output(output_dic, output_file, summary_file):
    '''
    Given output dic of annotated sequences, write to outputfile.
    We want to include all the information that is already in summary file
    so
    we use summary file to read each row and add annotations.
    
    Each row in summary file may write 0 or more lines depending on 
    how many uniprot annotations match to the amino acid sequence.
    '''
    # initialize writefile as write obj
    outfile = open(output_file, 'wb')
    mywriter = csv.writer(outfile, delimiter='\t')

    # define column names
    # summary file colnames
    gene_name_colname, miso_event_colname, reading_frame_colname, \
    nucleotide_seq_colname, amino_acid_seq_colname, gene_id_colname, \
    transcript_id_colname, exon_number_colname = \
        get_summary_file_colnames()
    summary_colnames = [
        gene_name_colname, miso_event_colname, reading_frame_colname,
        nucleotide_seq_colname, amino_acid_seq_colname, gene_id_colname,
        transcript_id_colname, exon_number_colname
    ]

    # get annotation colnames
    start_colname, end_colname, descript_colname = get_uniprot_subkeys()
    exon_start_colname = 'exon_start'
    exon_end_colname = 'exon_end'
    feature_colname = 'feature'
    annotation_colnames = [
        feature_colname, start_colname, end_colname, descript_colname,
        exon_start_colname, exon_end_colname
    ]

    # Write header to output file. Order matters.
    outheader = summary_colnames + annotation_colnames
    mywriter.writerow(outheader)

    # init writecount.
    writecount = 0

    # create read file obj for summary file
    with open(summary_file, 'rb') as readfile:
        myreader = csv.reader(readfile, delimiter='\t')
        readheader = myreader.next()
        for row in myreader:
            # get amino acid seq, our key used to access output dic annotes
            aa_seq = row[readheader.index(amino_acid_seq_colname)]

            # if no associatd annotations, skip to next.
            if aa_seq not in output_dic:
                continue
            '''
            # iterate over list in list of features, write corresponding annotes
            # along with summary information. Expect multiple annotations (or none)
            # for each miso event.
            '''
            for i in range(0, len(output_dic[aa_seq][feature_colname])):
                row_to_write = []  # initialize
                for summary_colname in summary_colnames:
                    row_to_write.append(row[readheader.index(summary_colname)])
                for annote_colname in annotation_colnames:
                    row_to_write.append(output_dic[aa_seq][annote_colname][i])
                mywriter.writerow(row_to_write)
                writecount += 1
    outfile.close()
    return writecount

コード例 #3

ファイルを表示

ファイル: annotate_genes_with_swissprot.py プロジェクト: jakeyeung/alternative-splicing

def append_dic_if_feature_within_start_end(exon_start, exon_end, 
                                           amino_acid_seq,
                                           uniprot_dic, gene_key, feature,
                                           output_dic):
    '''
    Given start and end, check if a particular feature within
    a gene inside uniprot_dic matches start and ends in the
    feature annotation.
    
    Return all instances where it matches in a dictionary object.
    
    amino acid sequence comes from a particular exon.
    
    Dictionary format:
    {amino_acid_sequence: {feature: {[start], [end], [description]}}}
    '''
    # get uniprot subkeys for accessing feature starts, stops and descriptions
    start_subkey, end_subkey, descript_subkey = get_uniprot_subkeys()
    # define additional subkeys: exon_start and exon_end and feature
    exon_start_subkey = 'exon_start'
    exon_end_subkey = 'exon_end'
    feature_subkey = 'feature'
    
    # initialize match_count
    match_count = 0
    
    # get start, end, description from uniprot dic
    feature_start_list = uniprot_dic[gene_key][feature][start_subkey]
    feature_end_list = uniprot_dic[gene_key][feature][end_subkey]
    descript_list = uniprot_dic[gene_key][feature][descript_subkey]

    '''
    # iterate feature start/end in parallel, ask if it is within 
    # the exon start/end range.
    Criteria for if it is NOT within range is:
    exon_start > feature_end
    exon_end < feature_start
    '''
    for feature_start, feature_end, descript in zip(feature_start_list, 
                                                    feature_end_list,
                                                    descript_list):
        if exon_start > feature_end or exon_end < feature_start:
            # feature outside of relevant range, go to next start/end
            continue
        else:
            # feature within relevant range, store to output dic
            # intialize relevant keynames if not yet initialized already.
            output_keyname = amino_acid_seq
            if output_keyname not in output_dic:
                output_dic[output_keyname] = {}
                for subkey in [start_subkey, end_subkey, descript_subkey, 
                               exon_start_subkey, exon_end_subkey, 
                               feature_subkey]:
                    output_dic[output_keyname][subkey] = []
            else:
                # already initialized, so simply append subvals to list.
                pass
            # store values into subkey
            for subkey, subval in \
                zip([start_subkey, end_subkey, descript_subkey, 
                     exon_start_subkey, exon_end_subkey, feature_subkey],
                    [feature_start, feature_end, descript, 
                     exon_start, exon_end, feature]):
                output_dic[output_keyname][subkey].append(subval)
            match_count += 1
    return output_dic, match_count

コード例 #4

ファイルを表示

def append_dic_if_feature_within_start_end(exon_start, exon_end,
                                           amino_acid_seq, uniprot_dic,
                                           gene_key, feature, output_dic):
    '''
    Given start and end, check if a particular feature within
    a gene inside uniprot_dic matches start and ends in the
    feature annotation.
    
    Return all instances where it matches in a dictionary object.
    
    amino acid sequence comes from a particular exon.
    
    Dictionary format:
    {amino_acid_sequence: {feature: {[start], [end], [description]}}}
    '''
    # get uniprot subkeys for accessing feature starts, stops and descriptions
    start_subkey, end_subkey, descript_subkey = get_uniprot_subkeys()
    # define additional subkeys: exon_start and exon_end and feature
    exon_start_subkey = 'exon_start'
    exon_end_subkey = 'exon_end'
    feature_subkey = 'feature'

    # initialize match_count
    match_count = 0

    # get start, end, description from uniprot dic
    feature_start_list = uniprot_dic[gene_key][feature][start_subkey]
    feature_end_list = uniprot_dic[gene_key][feature][end_subkey]
    descript_list = uniprot_dic[gene_key][feature][descript_subkey]
    '''
    # iterate feature start/end in parallel, ask if it is within 
    # the exon start/end range.
    Criteria for if it is NOT within range is:
    exon_start > feature_end
    exon_end < feature_start
    '''
    for feature_start, feature_end, descript in zip(feature_start_list,
                                                    feature_end_list,
                                                    descript_list):
        if exon_start > feature_end or exon_end < feature_start:
            # feature outside of relevant range, go to next start/end
            continue
        else:
            # feature within relevant range, store to output dic
            # intialize relevant keynames if not yet initialized already.
            output_keyname = amino_acid_seq
            if output_keyname not in output_dic:
                output_dic[output_keyname] = {}
                for subkey in [
                        start_subkey, end_subkey, descript_subkey,
                        exon_start_subkey, exon_end_subkey, feature_subkey
                ]:
                    output_dic[output_keyname][subkey] = []
            else:
                # already initialized, so simply append subvals to list.
                pass
            # store values into subkey
            for subkey, subval in \
                zip([start_subkey, end_subkey, descript_subkey,
                     exon_start_subkey, exon_end_subkey, feature_subkey],
                    [feature_start, feature_end, descript,
                     exon_start, exon_end, feature]):
                output_dic[output_keyname][subkey].append(subval)
            match_count += 1
    return output_dic, match_count