def run(filename, query_gene_name): biogrid_uniprot_conv.run() id_parsed_df = id_parser.run(filename) id_converted_df = id_converter.run(df=id_parsed_df) gene_name_conv_df = uniprot_gene_name_conv.run( df=id_converted_df, query_gene_name=query_gene_name) cleaned_file_df = clean_file.run(df=gene_name_conv_df) return calc_weighted_score.run(df=cleaned_file_df)
df['Interactor name']) df['Interactor name'] = np.where( (df['Parsed A gene name'] == query_gene_name) & (df['Parsed B gene name'] == query_gene_name), df['Parsed A gene name'], df['Interactor name']) return df def run(df, query_gene_name): '''Calls the find_gene_name function once to create a dataframe column of common gene names of protein A and again to create a column for the common gene names of protein B. An 'Interactor name' column is then generated for the dataframe with the interactor_column function. NaN values are dropped from the returned function, which will mostly be interactors that are not proteins or not human proteins. ''' print('Generating common gene names...') gene_name_a_df = find_gene_name(df=df, label='Parsed A') gene_name_b_df = find_gene_name(df=gene_name_a_df, label='Parsed B') print('Common gene names generated') print('Generating Interactor column...') interactor_column_df = interactor_column( df=gene_name_b_df, query_gene_name=query_gene_name).dropna(axis=0).reset_index(drop=True) print('Interactor column generated') return interactor_column_df if __name__ == '__main__': id_parsed_df = id_parser.run(filename='clusteredQuery_MST1R.txt') id_converted_df = id_converter.run(df=id_parsed_df) print(run(df=id_converted_df, query_gene_name='MST1R'))