Exemple #1
0
def test_output_tsv_score_across_conditions(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 2)),
                    conditions=['c1', 'c2'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15,
                        exporter=ExportTsv())
    dummy_evalacross = Scorer('scoreacross',
                              lambda y_true, y_pred: 0.15,
                              exporter=ExportTsv(),
                              percondition=False)

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval, dummy_evalacross])

    # percondition=True
    assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                                        "score.tsv"),
                           sep='\t', header=[0]).shape == (1, 2)
    # percondition=False
    val = pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                                       "scoreacross.tsv"),
                          sep='\t', header=[0])
    assert val['across'][0] == .15
    assert val.shape == (1, 1)
Exemple #2
0
def get_scorer(scorer):
    """Function maps string names to the Scorer objects.

    This function takes a scorer by name or a Scorer object
    and returns an instantiation of a Scorer object.
    """
    if isinstance(scorer, Scorer):
        pass
    elif scorer in ['ROC', 'roc']:
        scorer = Scorer(scorer,
                        wrap_roc_,
                        exporter=ExportScorePlot(xlabel='FPR', ylabel='TPR'))
    elif scorer in ['PRC', 'prc']:
        scorer = Scorer(scorer,
                        wrap_prc_,
                        exporter=ExportScorePlot(xlabel='Recall',
                                                 ylabel='Precision'))
    elif scorer in ['auc', 'AUC', 'auROC', 'auroc']:
        scorer = Scorer(scorer, roc_auc_score, exporter=ExportTsv())
    elif scorer in ['auprc', 'auPRC', 'ap', 'AP']:
        scorer = Scorer(scorer, average_precision_score, exporter=ExportTsv())
    elif scorer in ['cor', 'pearson']:
        scorer = Scorer(scorer, wrap_cor_, exporter=ExportTsv())
    elif scorer in ['var_explained']:
        scorer = Scorer(scorer, explained_variance_score, exporter=ExportTsv())
    elif scorer in ['mse', 'MSE']:
        scorer = Scorer(scorer, mean_squared_error, exporter=ExportTsv())
    elif scorer in ['mae', 'MAE']:
        scorer = Scorer(scorer, mean_absolute_error, exporter=ExportTsv())
    else:
        raise ValueError("scoring callback {} unknown.".format(scorer))
    return scorer
Exemple #3
0
def test_output_tsv_score(tmpdir):
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
    inputs = Array("x", numpy.random.random((100, 10)))
    outputs = Array('y', numpy.random.randint(2, size=(100, 1)),
                    conditions=['random'])

    bwm = get_janggu(inputs, outputs)

    dummy_eval = Scorer('score', lambda y_true, y_pred: 0.15, exporter=ExportTsv())

    bwm.evaluate(inputs, outputs, callbacks=[dummy_eval])

    assert pandas.read_csv(os.path.join(tmpdir.strpath, "evaluation", bwm.name,
                                        "score.tsv"),
                           sep='\t', header=[0]).iloc[0, 0] == 0.15
Exemple #4
0
                                  order=args.order,
                                  cache=True)

Y = np.asarray([[1] for _ in range(nseqs(SAMPLE_1))] +
               [[0] for _ in range(nseqs(SAMPLE_2))])
LABELS_TEST = Array('y', Y, conditions=['TF-binding'])
annot_test = pd.DataFrame(Y[:], columns=LABELS_TEST.conditions).applymap(
    lambda x: 'Oct4' if x == 1 else 'Mafk').to_dict(orient='list')

# clustering plots based on hidden features
heatmap_eval = Scorer('heatmap',
                      exporter=ExportClustermap(annot=annot_test, z_score=1.))

# output the predictions as tables or json files
pred_tsv = Scorer('pred',
                  exporter=ExportTsv(annot=annot_test,
                                     row_names=DNA_TEST.gindexer.chrs))

# do the evaluation on the independent test data
# after the evaluation and prediction has been performed,
# the callbacks further process the results allowing
# to automatically generate summary statistics or figures
# into the JANGGU_OUTPUT directory.
model.evaluate(DNA_TEST,
               LABELS_TEST,
               datatags=['test'],
               callbacks=['auc', 'auprc', 'roc', 'auroc'])

pred = model.predict(DNA_TEST,
                     datatags=['test'],
                     callbacks=[pred_tsv, heatmap_eval],
                     layername='motif')
Exemple #5
0
DNA_TEST = Bioseq.create_from_seq('dna',
                                  fastafile=[SAMPLE_1, SAMPLE_2],
                                  order=args.order,
                                  datatags=['test'],
                                  cache=True)

Y = np.asarray([1 for _ in range(nseqs(SAMPLE_1))] +
               [0 for _ in range(nseqs(SAMPLE_2))])
LABELS_TEST = Array('y', Y, conditions=['TF-binding'])
annot_test = pd.DataFrame(Y[:], columns=LABELS_TEST.conditions).applymap(
    lambda x: 'Oct4' if x == 1 else 'Mafk').to_dict(orient='list')

# instantiate various evaluation callback objects
# score metrics
auc_eval = Scorer('auROC', roc_auc_score, exporter=ExportTsv())
prc_eval = Scorer('PRC', wrap_prc, exporter=ExportScorePlot())
roc_eval = Scorer('ROC', wrap_roc, exporter=ExportScorePlot())
auprc_eval = Scorer('auPRC', average_precision_score, exporter=ExportTsv())

# clustering plots based on hidden features
heatmap_eval = Scorer('heatmap',
                      exporter=ExportClustermap(annot=annot_test, z_score=1.))
tsne_eval = Scorer('tsne', exporter=ExportTsne())

# output the predictions as tables or json files
pred_tsv = Scorer('pred',
                  exporter=ExportTsv(annot=annot_test,
                                     row_names=DNA_TEST.gindexer.chrs))
pred_json = Scorer('pred',
                   exporter=ExportJson(annot=annot_test,
                                        roi=ROI_FILE,
                                        binsize=200,
                                        order=args.order,
                                        datatags=['ref'])

LABELS_TEST = Cover.create_from_bed('peaks',
                                    bedfiles=PEAK_FILE,
                                    roi=ROI_FILE,
                                    binsize=200,
                                    resolution=200,
                                    datatags=['test'])


# instantiate various evaluation callback objects
# score metrics
auc_eval = Scorer('auROC', roc_auc_score, exporter=ExportTsv())
prc_eval = Scorer('PRC', wrap_prc, exporter=ExportScorePlot())
roc_eval = Scorer('ROC', wrap_roc, exporter=ExportScorePlot())
auprc_eval = Scorer('auPRC', average_precision_score, exporter=ExportTsv())

# clustering plots based on hidden features
heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(z_score=1.))
tsne_eval = Scorer('tsne', exporter=ExportTsne())

# output the predictions as tables or json files
pred_tsv = Scorer('pred', exporter=ExportTsv(row_names=DNA_TEST.gindexer.chrs))
pred_json = Scorer('pred', exporter=ExportJson(row_names=DNA_TEST.gindexer.chrs))

# plotly will export a special table that is used for interactive browsing
# of the results
pred_plotly = Scorer('pred', exporter=ExportTsv(row_names=DNA_TEST.gindexer.chrs,
Exemple #7
0
              metrics=['acc'])

hist = model.fit(DNA, LABELS, epochs=100)

print('#' * 40)
print('loss: {}, acc: {}'.format(hist.history['loss'][-1],
                                 hist.history['acc'][-1]))
print('#' * 40)


# clustering plots based on hidden features
heatmap_eval = Scorer('heatmap', exporter=ExportClustermap(z_score=1.))
tsne_eval = Scorer('tsne', exporter=ExportTsne())

# output the predictions as tables or json files
pred_tsv = Scorer('pred', exporter=ExportTsv(row_names=DNA_TEST.gindexer.chrs))

# do the evaluation on the independent test data
model.evaluate(DNA_TEST, LABELS_TEST, datatags=['test'],
               callbacks=['auc', 'prc', 'roc', 'auprc'])

pred = model.predict(DNA_TEST)
cov_pred = Cover.create_from_array('BindingProba', pred, LABELS_TEST.gindexer)

print('Oct4 predictions scores should be greater than Mafk scores:')
print('Prediction score examples for Oct4')
for i in range(4):
    print('{}.: {}'.format(i, pred[i]))
print('Prediction score examples for Mafk')
for i in range(1, 5):
    print('{}.: {}'.format(i, pred[-i]))