Esempio n. 1
0
def run(rs_model=False,
        rs_structure=True,
        filename='',
        maxstruct_train=3,
        maxstruct_predict=3):
    data = get_test_data()
    res = []
    for e in data:
        train = list(data.keys())
        train.remove(e)
        if rs_model:
            model = sn.make_model(data, train, maxstruct=maxstruct_train)
        else:
            model = ss.make_model(data, train)

        target_sequence = data[e][1]
        target_struct = data[e][2]

        if rs_structure:
            my_react = np.array(
                sn.predict(model, target_sequence,
                           maxstruct=maxstruct_predict))
        else:
            my_react = np.array(
                ss.predict2(model, target_sequence, target_struct))

        print('x', end=' ')
        res.append(">%s" % e)
        res.append('\n'.join(
            ["%s\t%.4f" % (i, e) for i, e in enumerate(my_react)]))
        res.append('')

    with open(filename, 'w') as f:
        f.write('\n'.join(res))
def main(args):
    print "Load input data ..."
    records = loadRecords(args.input, order="sequence,structure,reactivity")
    data = {}
    for name in records.keys():
        data[name] = [
            records[name]["reactivity"], records[name]["sequence"],
            records[name]["structure"]
        ]
        reactivity = []
        for x in data[name][0]:
            if np.isnan(x):
                reactivity.append(None)
            else:
                reactivity.append(x)
        data[name][0] = reactivity
    print "Done ."
    print "Train SHAKER model ..."

    fperformance = open(args.performance,
                        "w") if args.performance != "-" else sys.stdout
    fperformance.write("\t".join([
        "name", "spearmanr", "p-value", "AUROC-observed-reactivity",
        "AUROC-predicted-reactivity", "RMSE"
    ]) + "\n")

    fout = open(args.reactivity, "w")

    for name in data.keys():
        print name
        keys = set(data.keys())
        keys.remove(name)
        # data[name][0] reactivity
        # data[name][1] sequence
        # data[name][2] structure
        model = sim.make_model(data, list(keys))
        graph = util.sequence_dotbracket_to_graph(data[name][1], data[name][2])
        embedding = eg.vertex_vectorize([graph])[0]
        reactivity_pred = model.predict(embedding).reshape(-1)
        fout.write(">" + name + "\n")
        fout.write(",".join(np.round(reactivity_pred, 3).astype(str)) + "\n")
        reactivity = np.array(data[name][0]).astype(float)
        structure = data[name][2]
        auc = AUC(structure, reactivity)
        auc_pred = AUC(structure, reactivity_pred)
        nan_mask = np.isnan(reactivity)
        reactivity = reactivity[~nan_mask]
        reactivity_pred = reactivity_pred[~nan_mask]
        corr, p = spearmanr(reactivity_pred, reactivity)
        rmse = RMSE(reactivity_pred, reactivity)
        fperformance.write("\t".join(
            [name, str(corr),
             str(p),
             str(auc),
             str(auc_pred),
             str(rmse)]) + "\n")
    fperformance.close()
    fout.close()
def main(args):
    print("Load input data ...")
    data = rio.get_all_data(args.reactivity, args.dot_bracket)
    print("Done .")
    print("Train SHAKER model ...")
    model = sim.make_model(data, data.keys())
    print("Done .")
    print("Saving the model...")
    with open(args.model, 'wb') as f:
        pickle.dump(model, f)
    print("Done .")
Esempio n. 4
0
def run2(rs_structure=True,
         filename='',
         maxstruct_predict=3,
         train_data=None,
         test_data=None):
    '''
    :param rs_structure:
    :param filename:
    :param maxstruct_train:
    :param maxstruct_predict:
    :return:

    the idea is to split train and test data..
    '''

    if test_data == None:
        test_data = get_test_data()
    if train_data == None:
        train_data = get_train_data()

    res = []
    for e in test_data:
        train = list(train_data.keys())
        if e in train: train.remove(e)
        model = ss.make_model(train_data, train)

        target_sequence = test_data[e][1]
        target_struct = test_data[e][2]

        if rs_structure:
            my_react = np.array(
                sn.predict(model, target_sequence,
                           maxstruct=maxstruct_predict))
        else:
            my_react = np.array(
                ss.predict2(model, target_sequence, target_struct))

        print('x', end=' ')
        res.append(">%s" % e)
        res.append('\n'.join(
            ["%s\t%.4f" % (i, e) for i, e in enumerate(my_react)]))
        res.append('')

    with open(filename, 'w') as f:
        f.write('\n'.join(res))