def test_output_tsv_score_across_conditions(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath inputs = Array("x", numpy.random.random((100, 10))) outputs = Array('y', numpy.random.randint(2, size=(100, 2)), conditions=['c1', 'c2']) bwm = get_janggu(inputs, outputs) dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, exporter=ExportTsv()) dummy_evalacross = Scorer('scoreacross', lambda y_true, y_pred: 0.15, exporter=ExportTsv(), percondition=False) bwm.evaluate(inputs, outputs, callbacks=[dummy_eval, dummy_evalacross]) # percondition=True assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "score.tsv"), sep='\t', header=[0]).shape == (1, 2) # percondition=False val = pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "scoreacross.tsv"), sep='\t', header=[0]) assert val['across'][0] == .15 assert val.shape == (1, 1)
def get_scorer(scorer): """Function maps string names to the Scorer objects. This function takes a scorer by name or a Scorer object and returns an instantiation of a Scorer object. """ if isinstance(scorer, Scorer): pass elif scorer in ['ROC', 'roc']: scorer = Scorer(scorer, wrap_roc_, exporter=ExportScorePlot(xlabel='FPR', ylabel='TPR')) elif scorer in ['PRC', 'prc']: scorer = Scorer(scorer, wrap_prc_, exporter=ExportScorePlot(xlabel='Recall', ylabel='Precision')) elif scorer in ['auc', 'AUC', 'auROC', 'auroc']: scorer = Scorer(scorer, roc_auc_score, exporter=ExportTsv()) elif scorer in ['auprc', 'auPRC', 'ap', 'AP']: scorer = Scorer(scorer, average_precision_score, exporter=ExportTsv()) elif scorer in ['cor', 'pearson']: scorer = Scorer(scorer, wrap_cor_, exporter=ExportTsv()) elif scorer in ['var_explained']: scorer = Scorer(scorer, explained_variance_score, exporter=ExportTsv()) elif scorer in ['mse', 'MSE']: scorer = Scorer(scorer, mean_squared_error, exporter=ExportTsv()) elif scorer in ['mae', 'MAE']: scorer = Scorer(scorer, mean_absolute_error, exporter=ExportTsv()) else: raise ValueError("scoring callback {} unknown.".format(scorer)) return scorer
def test_output_tsv_score(tmpdir): os.environ['JANGGU_OUTPUT'] = tmpdir.strpath inputs = Array("x", numpy.random.random((100, 10))) outputs = Array('y', numpy.random.randint(2, size=(100, 1)), conditions=['random']) bwm = get_janggu(inputs, outputs) dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, exporter=ExportTsv()) bwm.evaluate(inputs, outputs, callbacks=[dummy_eval]) assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name, "score.tsv"), sep='\t', header=[0]).iloc[0, 0] == 0.15
order=args.order, cache=True) Y = np.asarray([[1] for _ in range(nseqs(SAMPLE_1))] + [[0] for _ in range(nseqs(SAMPLE_2))]) LABELS_TEST = Array('y', Y, conditions=['TF-binding']) annot_test = pd.DataFrame(Y[:], columns=LABELS_TEST.conditions).applymap( lambda x: 'Oct4' if x == 1 else 'Mafk').to_dict(orient='list') # clustering plots based on hidden features heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(annot=annot_test, z_score=1.)) # output the predictions as tables or json files pred_tsv = Scorer('pred', exporter=ExportTsv(annot=annot_test, row_names=DNA_TEST.gindexer.chrs)) # do the evaluation on the independent test data # after the evaluation and prediction has been performed, # the callbacks further process the results allowing # to automatically generate summary statistics or figures # into the JANGGU_OUTPUT directory. model.evaluate(DNA_TEST, LABELS_TEST, datatags=['test'], callbacks=['auc', 'auprc', 'roc', 'auroc']) pred = model.predict(DNA_TEST, datatags=['test'], callbacks=[pred_tsv, heatmap_eval], layername='motif')
DNA_TEST = Bioseq.create_from_seq('dna', fastafile=[SAMPLE_1, SAMPLE_2], order=args.order, datatags=['test'], cache=True) Y = np.asarray([1 for _ in range(nseqs(SAMPLE_1))] + [0 for _ in range(nseqs(SAMPLE_2))]) LABELS_TEST = Array('y', Y, conditions=['TF-binding']) annot_test = pd.DataFrame(Y[:], columns=LABELS_TEST.conditions).applymap( lambda x: 'Oct4' if x == 1 else 'Mafk').to_dict(orient='list') # instantiate various evaluation callback objects # score metrics auc_eval = Scorer('auROC', roc_auc_score, exporter=ExportTsv()) prc_eval = Scorer('PRC', wrap_prc, exporter=ExportScorePlot()) roc_eval = Scorer('ROC', wrap_roc, exporter=ExportScorePlot()) auprc_eval = Scorer('auPRC', average_precision_score, exporter=ExportTsv()) # clustering plots based on hidden features heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(annot=annot_test, z_score=1.)) tsne_eval = Scorer('tsne', exporter=ExportTsne()) # output the predictions as tables or json files pred_tsv = Scorer('pred', exporter=ExportTsv(annot=annot_test, row_names=DNA_TEST.gindexer.chrs)) pred_json = Scorer('pred', exporter=ExportJson(annot=annot_test,
roi=ROI_FILE, binsize=200, order=args.order, datatags=['ref']) LABELS_TEST = Cover.create_from_bed('peaks', bedfiles=PEAK_FILE, roi=ROI_FILE, binsize=200, resolution=200, datatags=['test']) # instantiate various evaluation callback objects # score metrics auc_eval = Scorer('auROC', roc_auc_score, exporter=ExportTsv()) prc_eval = Scorer('PRC', wrap_prc, exporter=ExportScorePlot()) roc_eval = Scorer('ROC', wrap_roc, exporter=ExportScorePlot()) auprc_eval = Scorer('auPRC', average_precision_score, exporter=ExportTsv()) # clustering plots based on hidden features heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(z_score=1.)) tsne_eval = Scorer('tsne', exporter=ExportTsne()) # output the predictions as tables or json files pred_tsv = Scorer('pred', exporter=ExportTsv(row_names=DNA_TEST.gindexer.chrs)) pred_json = Scorer('pred', exporter=ExportJson(row_names=DNA_TEST.gindexer.chrs)) # plotly will export a special table that is used for interactive browsing # of the results pred_plotly = Scorer('pred', exporter=ExportTsv(row_names=DNA_TEST.gindexer.chrs,
metrics=['acc']) hist = model.fit(DNA, LABELS, epochs=100) print('#' * 40) print('loss: {}, acc: {}'.format(hist.history['loss'][-1], hist.history['acc'][-1])) print('#' * 40) # clustering plots based on hidden features heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(z_score=1.)) tsne_eval = Scorer('tsne', exporter=ExportTsne()) # output the predictions as tables or json files pred_tsv = Scorer('pred', exporter=ExportTsv(row_names=DNA_TEST.gindexer.chrs)) # do the evaluation on the independent test data model.evaluate(DNA_TEST, LABELS_TEST, datatags=['test'], callbacks=['auc', 'prc', 'roc', 'auprc']) pred = model.predict(DNA_TEST) cov_pred = Cover.create_from_array('BindingProba', pred, LABELS_TEST.gindexer) print('Oct4 predictions scores should be greater than Mafk scores:') print('Prediction score examples for Oct4') for i in range(4): print('{}.: {}'.format(i, pred[i])) print('Prediction score examples for Mafk') for i in range(1, 5): print('{}.: {}'.format(i, pred[-i]))