def test_run_with_kegg_returns_dicts_of_dataframes_with_shape(self): results = _.all(expression_table=self.expression_table, ) self.assertIsInstance(results, dict) self.assertIsInstance(results['harmonic'], pd.DataFrame) self.assertEqual(results['harmonic'].shape, (324, 3)) self.assertEqual(results['geometric'].shape, (324, 3)) self.assertEqual(results['min_p_val'].shape, (324, 3))
def test_pathway_assessor_returns_none_if_statistic_is_set_to_false(self): results = _.all( expression_table=self.expression_table, pathways=self.user_pathway_db, geometric=False, min_p_val=False, ) self.assertIsNone(results['geometric']) self.assertIsNone(results['min_p_val'])
def test_pathway_assessor_returns_three_dataframes_of_expected_values( self): user_pathways = { 'Sample_pathway': ['SLC2A6', 'PHOSPHO1', 'PIKFYVE', 'VHL'] } results = _.all(expression_table=self.expression_table, pathways=user_pathways) harmonic_avg = results['harmonic'].loc['Sample_pathway'].to_dict() geometric_avg = results['geometric'].loc['Sample_pathway'].to_dict() min_p_vals = results['min_p_val'].loc['Sample_pathway'].to_dict() expected_harmonic_avg = { 'Sample_A': 8.610084236222475, 'Sample_B': 9.519349644266763, 'Sample_C': 6.02244237008846 } expected_geometric_avg = { 'Sample_A': 5.916555748731008, 'Sample_B': 7.153859966001466, 'Sample_C': 4.75439878004548 } expected_min_p_vals = { 'Sample_A': 9.690912952571226, 'Sample_B': 10.576744476960645, 'Sample_C': 6.695180679017199 } self.assertEqual(len(results), 3) self.assertAlmostEqual(harmonic_avg['Sample_A'], expected_harmonic_avg['Sample_A']) self.assertAlmostEqual(harmonic_avg['Sample_B'], expected_harmonic_avg['Sample_B']) self.assertAlmostEqual(harmonic_avg['Sample_C'], expected_harmonic_avg['Sample_C']) self.assertAlmostEqual(geometric_avg['Sample_A'], expected_geometric_avg['Sample_A']) self.assertAlmostEqual(geometric_avg['Sample_B'], expected_geometric_avg['Sample_B']) self.assertAlmostEqual(geometric_avg['Sample_C'], expected_geometric_avg['Sample_C']) self.assertAlmostEqual(min_p_vals['Sample_A'], expected_min_p_vals['Sample_A']) self.assertAlmostEqual(min_p_vals['Sample_B'], expected_min_p_vals['Sample_B']) self.assertAlmostEqual(min_p_vals['Sample_C'], expected_min_p_vals['Sample_C'])
def write_table(score_type): if ascending: direction = 'suppression' else: direction = 'activation' output_f = '{}/scores/{}_{}_{}_{}'.format(examples_dir, tumor, pw_name, direction, score_type) scores[score_type].to_csv(output_f, sep='\t') print('Finished: {}'.format(output_f)) if __name__ == '__main__': pa_home = os.getenv('PATHWAY_ASSESSOR_HOME') tumor = 'blca_normal_slim' pw_name = 'kegg_immune' db_dir = '{}/pathway_assessor/databases/as_tables'.format(pa_home) ascending = True examples_dir = os.path.dirname(os.path.abspath(__file__)) expression_table_f = '{}/{}'.format(examples_dir, tumor) pathways_f = '{}/{}.tsv'.format(db_dir, pw_name) expression_table = pd.read_csv(expression_table_f, sep='\t', index_col=0) pathways = pathways_dict(pathways_f) scores = pa.all(expression_table=expression_table, ascending=ascending, pathways=pathways) for method in scores: write_table(method)
else: user_pw_db_f = None output_dir = output_dir_path(base_dir, expression_table_f, pathway_db_choice, ascending, rank_method) os.makedirs(output_dir, exist_ok=True) expression_df = pd.read_csv(expression_table_f, sep='\t', index_col=0) if pathway_db_choice == 'xcell' \ or pathway_db_choice == 'xcell_complete_signatures' \ or pathway_db_choice == 'all' \ or pathway_db_choice == 'wikipathways' \ or pathway_db_choice == 'immune_all': user_pathways = pickle.load(open('databases/{}.pkl'.format(pathway_db_choice), 'rb')) scores = pa.all(expression_table=expression_df, pathways=user_pathways, ascending=ascending, rank_method=rank_method) elif not user_pw_db_f: scores = pa.all(expression_table=expression_df, db=pathway_db_choice, ascending=ascending, rank_method=rank_method) else: user_pathways = user_pathways_dict(user_pw_db_f) scores = pa.all(expression_table=expression_df, pathways=user_pathways, ascending=ascending, rank_method=rank_method) for score_type in scores: write_score_table(output_dir_path=output_dir, score_type=score_type, df=scores[score_type])
import pandas as pd import pathway_assessor as pa expression_table = "C:\\Users\\Boris\\Desktop\\PW_A_Local\\ucec.normal.txt" expression_df = pd.read_csv(expression_table, sep='\t', index_col=0) scores = pa.all(expression_table=expression_df, db='hallmark', ascending=True, rank_method='max') print(scores)