Ejemplo n.º 1
0
def run(filename, query_gene_name):
    biogrid_uniprot_conv.run()
    id_parsed_df = id_parser.run(filename)
    id_converted_df = id_converter.run(df=id_parsed_df)
    gene_name_conv_df = uniprot_gene_name_conv.run(
        df=id_converted_df, query_gene_name=query_gene_name)
    cleaned_file_df = clean_file.run(df=gene_name_conv_df)
    return calc_weighted_score.run(df=cleaned_file_df)
Ejemplo n.º 2
0
        df['Interactor name'])
    df['Interactor name'] = np.where(
        (df['Parsed A gene name'] == query_gene_name) &
        (df['Parsed B gene name'] == query_gene_name),
        df['Parsed A gene name'], df['Interactor name'])
    return df


def run(df, query_gene_name):
    '''Calls the find_gene_name function once to create a dataframe column of common gene names of protein A and again
     to create a column for the common gene names of protein B. An 'Interactor name' column is then generated for the
     dataframe with the interactor_column function. NaN values are dropped from the returned function, which will
      mostly be interactors that are not proteins or not human proteins.
    '''
    print('Generating common gene names...')
    gene_name_a_df = find_gene_name(df=df, label='Parsed A')
    gene_name_b_df = find_gene_name(df=gene_name_a_df, label='Parsed B')
    print('Common gene names generated')
    print('Generating Interactor column...')
    interactor_column_df = interactor_column(
        df=gene_name_b_df,
        query_gene_name=query_gene_name).dropna(axis=0).reset_index(drop=True)
    print('Interactor column generated')
    return interactor_column_df


if __name__ == '__main__':
    id_parsed_df = id_parser.run(filename='clusteredQuery_MST1R.txt')
    id_converted_df = id_converter.run(df=id_parsed_df)
    print(run(df=id_converted_df, query_gene_name='MST1R'))