Ejemplo n.º 1
0
	def test_read(self):

		in_gmt = gmt.read(os.path.join(FUNCTIONAL_TESTS_DIR, "test.gmt"))

		self.assertEqual(len(self.example_gmt), len(in_gmt))
		self.assertEqual(self.example_gmt[0], in_gmt[0])
		self.assertEqual(self.example_gmt[1], in_gmt[1])

		with self.assertRaises(AssertionError) as e:
			gmt.read(os.path.join(FUNCTIONAL_TESTS_DIR, "test_bad.gmt"))
		self.assertIn("3 tab-delimited items. line_num: 0", str(e.exception))

		with self.assertRaises(AssertionError) as e:
			gmt.read(os.path.join(FUNCTIONAL_TESTS_DIR, "test_bad2.gmt"))
		self.assertIn("same set. line_num: 1", str(e.exception))
Ejemplo n.º 2
0
	def test_write(self):

		out_path = os.path.join(FUNCTIONAL_TESTS_DIR, "test_write.gmt")
		gmt.write(self.example_gmt, out_path)
		self.assertTrue(os.path.exists(out_path))

		read_back_in = gmt.read(out_path)
		self.assertEqual(len(self.example_gmt), len(read_back_in))
		self.assertEqual(self.example_gmt[0], read_back_in[0])
		self.assertEqual(self.example_gmt[1], read_back_in[1])

		# Cleanup
		os.remove(out_path)
Ejemplo n.º 3
0
def wtks(gct,
         metadata,
         outfolder,
         gmt_path='/Users/elemire/Workspace/merino/full_sensitivities.gmt',
         group_col='weave_prefix'):
    gmt_file = gmt.read(gmt_path)

    sensitivity_map = reformat_gmt(gmt_file)

    metadata = metadata.loc[gct.data_df.columns]

    expected_sensitivity_ranks = []

    for rep in metadata[group_col].dropna().unique():

        if not os.path.exists(os.path.join(outfolder, rep)):
            os.mkdir(os.path.join(outfolder, rep))
            os.mkdir(os.path.join(outfolder, rep, 'enrichment_ecdf'))
            os.mkdir(os.path.join(outfolder, rep, 'mountain_plots'))

        rep_folder = os.path.join(outfolder, rep)

        ids = metadata[metadata[group_col] == rep].index

        data = gct.data_df[ids]

        data.index = [str(x) for x in data.index]

        col_meta = metadata.loc[ids]

        marks = {}

        s_qi_map = make_sqi_map(sensitivity_map, data=data, col_meta=col_meta)

        for key in s_qi_map:
            #print key.split('_')[0]
            if key.split('_')[0] in col_meta['pert_id'].tolist():

                if len(s_qi_map[key][0]) > 0 and len(s_qi_map[key][1]) > 0:
                    #calculate enrichment score
                    sensitivity_score, cumsum = compute_wtcs.compute_wtcs(
                        pd.Series(s_qi_map[key][0]),
                        pd.Series(s_qi_map[key][1]))
                    #make mountain plot of enrichment score
                    plot_enrichment_score.plot_enrichment_score(
                        sensitivity_score,
                        cumsum,
                        title='Enrichment Score of {}, Set Size = {}'.format(
                            col_meta[col_meta['pert_id'] == key.split('_')[0]]
                            ['pert_iname'][0], len(s_qi_map[key][1])),
                        outfile=os.path.join(
                            rep_folder, 'mountain_plots', '{}_{}_dose'.format(
                                col_meta[col_meta['pert_id'] == key.split('_')
                                         [0]]['pert_iname'][0],
                                key.split('_')[1]) + '.png'))

                    bortezomib_scores = []
                    dmso_scores = []
                    pos_dex = col_meta[col_meta['pert_type'] ==
                                       'trt_poscon'].index.tolist()
                    neg_dex = col_meta[col_meta['pert_type'] ==
                                       'ctl_vehicle'].index.tolist()
                    bortez = data[pos_dex]
                    dmso = data[neg_dex]

                    brd = key.split('_')[0]
                    rids = sensitivity_map[brd]

                    ################################

                    for column in bortez:
                        temp_s = bortez[column].dropna()
                        temp_qi = [
                            thing for thing in np.where(
                                temp_s.dropna().index.isin(rids))[0].tolist()
                        ]

                        if (len(temp_s) - len(temp_qi)) < 1:
                            continue

                        y, x = compute_wtcs.compute_wtcs(
                            temp_s, pd.Series(temp_qi))
                        bortezomib_scores.append(y)

                    for column in dmso:
                        temp_s = dmso[~dmso.index.isin(invariants
                                                       )][column].dropna()
                        temp_qi = [
                            thing for thing in np.where(
                                temp_s.dropna().index.isin(rids))[0].tolist()
                        ]

                        if (len(temp_s) - len(temp_qi)) < 1:
                            continue

                        y, x = compute_wtcs.compute_wtcs(
                            temp_s, pd.Series(temp_qi))
                        dmso_scores.append(y)

                    all_scores = []
                    for column in data:
                        temp_s = data[~data.index.isin(invariants
                                                       )][column].dropna()
                        temp_qi = [
                            thing for thing in np.where(
                                temp_s.dropna().index.isin(rids))[0].tolist()
                        ]

                        if (len(temp_s) - len(temp_qi)) < 1:
                            continue

                        y, x = compute_wtcs.compute_wtcs(
                            temp_s, pd.Series(temp_qi))
                        all_scores.append(y)

                    ecdf = ECDF(all_scores)
                    mark = bisect.bisect_left(ecdf.x, sensitivity_score)
                    if mark == len(ecdf.y):
                        mark -= 1
                    marks[col_meta[col_meta['pert_id'] == key.split('_')
                                   [0]]['pert_iname'][0] + '_' +
                          str(key.split('_')[1])] = mark

                    poscons_x = []
                    poscons_y = []

                    for pos in bortezomib_scores:
                        y = bisect.bisect_left(ecdf.x, pos)
                        poscons_x.append(pos)
                        poscons_y.append(ecdf.y[y - 1])

                    neg_x = []
                    neg_y = []

                    for neg in dmso_scores:
                        y = bisect.bisect_left(ecdf.x, neg)
                        neg_x.append(neg)
                        neg_y.append(ecdf.y[y - 1])

                    plt.figure()
                    plt.plot(ecdf.x, ecdf.y)
                    plt.scatter(poscons_x,
                                poscons_y,
                                marker='o',
                                color='g',
                                label='Bortezomib')
                    # plt.scatter(neg_x, neg_y, marker='o', color='b', label='DMSO')
                    plt.scatter(sensitivity_score,
                                ecdf.y[mark],
                                marker='o',
                                color='r',
                                label='Sensitivity {}_dose'.format(
                                    key.split('_')[1]))
                    plt.xlabel(
                        'Enrichment Score for {} Sensitivities Compound Rank = {}'
                        .format(
                            col_meta[col_meta['pert_id'] == key.split('_')[0]]
                            ['pert_iname'][0], mark))
                    plt.ylabel('Fraction of Compounds')
                    plt.title(
                        'ECDF of Enrichment Score by Compound, Set Size = {}'.
                        format(len(s_qi_map[key][1])))

                    axes = plt.gca()
                    axes.set_xlim(
                        [np.nanmin(ecdf.x[ecdf.x != -np.inf]),
                         max(ecdf.x)])
                    axes.set_ylim([0, 1])
                    axes.legend(bbox_to_anchor=(0., 0.8, 0.8, .102),
                                loc=3,
                                borderaxespad=0.)
                    plt.savefig(
                        os.path.join(
                            rep_folder, 'enrichment_ecdf',
                            '{}_Competitive_Enrichment_ECDF_{}_dose.png'.
                            format(
                                col_meta[col_meta['pert_id'] == key.split('_')
                                         [0]]['pert_iname'][0],
                                key.split('_')[1])))
                    plt.clf()

        plate_summary = pd.Series(marks)
        # plate_summary.set_index('det_plate', inplace=True)
        plate_summary.rename(rep, inplace=True)
        pd.DataFrame(plate_summary).to_csv(os.path.join(
            rep_folder, '{}_expected_sensitivity_ranks.txt'.format(rep)),
                                           sep='\t')
        expected_sensitivity_ranks.append(marks)

        marks['det_plate'] = rep

    summary = pd.DataFrame(expected_sensitivity_ranks)
    summary.set_index('det_plate', inplace=True)

    summary.to_csv(os.path.join(outfolder, 'expected_sensitivity_ranks.txt'),
                   sep='\t')