def main(args):
    print "Load input data ..."
    records = loadRecords(args.input, order="sequence,structure,reactivity")
    data = {}
    for name in records.keys():
        data[name] = [
            records[name]["reactivity"], records[name]["sequence"],
            records[name]["structure"]
        ]
        reactivity = []
        for x in data[name][0]:
            if np.isnan(x):
                reactivity.append(None)
            else:
                reactivity.append(x)
        data[name][0] = reactivity
    print "Done ."
    print "Train SHAKER model ..."

    fperformance = open(args.performance,
                        "w") if args.performance != "-" else sys.stdout
    fperformance.write("\t".join([
        "name", "spearmanr", "p-value", "AUROC-observed-reactivity",
        "AUROC-predicted-reactivity", "RMSE"
    ]) + "\n")

    fout = open(args.reactivity, "w")

    for name in data.keys():
        print name
        keys = set(data.keys())
        keys.remove(name)
        # data[name][0] reactivity
        # data[name][1] sequence
        # data[name][2] structure
        model = sim.make_model(data, list(keys))
        graph = util.sequence_dotbracket_to_graph(data[name][1], data[name][2])
        embedding = eg.vertex_vectorize([graph])[0]
        reactivity_pred = model.predict(embedding).reshape(-1)
        fout.write(">" + name + "\n")
        fout.write(",".join(np.round(reactivity_pred, 3).astype(str)) + "\n")
        reactivity = np.array(data[name][0]).astype(float)
        structure = data[name][2]
        auc = AUC(structure, reactivity)
        auc_pred = AUC(structure, reactivity_pred)
        nan_mask = np.isnan(reactivity)
        reactivity = reactivity[~nan_mask]
        reactivity_pred = reactivity_pred[~nan_mask]
        corr, p = spearmanr(reactivity_pred, reactivity)
        rmse = RMSE(reactivity_pred, reactivity)
        fperformance.write("\t".join(
            [name, str(corr),
             str(p),
             str(auc),
             str(auc_pred),
             str(rmse)]) + "\n")
    fperformance.close()
    fout.close()
Ejemplo n.º 2
0
def draw_seq_rea(sequence, react_list, stru=None, **kwargs):
    ''' given sequence, reactivuty and maybe a structure: draw graph row showing reactivity '''
    if stru != None:
        brack = stru
    else:
        print("FIXME, i should use rnafold here")
        brack = rna_tools.shape(sequence)[0][0]
    graph = util.sequence_dotbracket_to_graph(sequence, brack)
    graph.graph['structure'] = brack
    draw3(graph, react_list, **kwargs)
Ejemplo n.º 3
0
def draw_print(seq, shape, file_name= 'lol.svg',stru=None):
    '''seq+shape => image'''
    if stru != None:
        brack = stru
    else:
        brack = rnafold.fold(seq)

    graph = util.sequence_dotbracket_to_graph(seq,brack)
    graph.graph['structure']= brack
    graph = annotate(graph.copy(), shape)

    ed.draw_graph(graph, size=5, layout="RNA",vertex_color='col',
                    vertex_label=None, edge_alpha=0.4, vertex_size=150,
                    vertex_border=False, file_name=file_name)
Ejemplo n.º 4
0
def predict(model, sequence, seq_to_db_function=rnasubopt):
    db_list = seq_to_db_function(sequence)

    if len(db_list) == 1:
        graph = util.sequence_dotbracket_to_graph(sequence, db_list[0])
        return model.predict(eg.vertex_vectorize([graph])[0])

    # get probability for each structure
    struct_proba = probabilities_of_structures(sequence, db_list)
    structures, weights = zip(*struct_proba)

    # edenize and predict reacticuty
    graphs = map(lambda x: getgraph(sequence, x), structures)
    vecs = list(eg.vertex_vectorize(graphs, r=3, d=3))
    predictions_all_structures = [model.predict(blob) for blob in vecs]

    # mix reactivity with probabilities
    return weighted_average(weights, predictions_all_structures)
Ejemplo n.º 5
0
def getgraph(sequence, structure):
    """returns networkx graph"""
    return util.sequence_dotbracket_to_graph(sequence, structure)
                    '-m',
                    help="Trained model for simulation",
                    default="data/reactivity/shaker-model.pkl")
args = parser.parse_args()

print("Load model ...")
with open(args.model, 'rb') as fmdl:
    model = pickle.load(fmdl)
print("Done .")
fout = open(args.output, "w")

first_entry = True
with open(args.input) as fin:
    for line in fin:
        line = line.strip()
        if line.startswith(">"):
            name = line.replace(">", "")
            print("Processing {} ...".format(name))
            line = next(fin)
            sequence = line.strip()
            line = next(fin)
            dbn = line.split(" ")[0].strip()
            graph = util.sequence_dotbracket_to_graph(sequence, dbn)
            embedding = eg.vertex_vectorize([graph])[0]
            reactivity = model.predict(embedding).reshape(-1)
            data = [name] + list(reactivity.astype(str))
            fout.write("\t".join(data) + "\n")
        else:
            continue
fout.close()
Ejemplo n.º 7
0
 def getgraph(self,seq):
     struct = self.getstruct(seq)
     if len(seq)!=len(struct):
         print ("skipping seq, bad stru")
     return util.sequence_dotbracket_to_graph(seq,struct)