def get_linreg_optimal_codons(linreg_dir, aa_seq, maximum=False): linreg_data_fname = linreg_dir + "/linreg_data.txt" d = proc.load_linreg_data_file(linreg_data_fname) rel_cod_idxs = d["rel_cod_idxs"] wts = proc.load_obj(linreg_dir + "/wts.pkl") opt_total_seq, opt_vit_score = opt.get_optimal_codons_linreg( aa_seq, wts, rel_cod_idxs, maximum=maximum) return opt_total_seq, opt_vit_score
def get_protein_score_dist(nn_dir, epoch, aa_seq, num_samples, nt_feats=False): init_data_pkl = nn_dir + "/init_data/init_data.pkl" params = proc.load_obj(init_data_pkl) rel_cod_idxs = params["rel_cod_idxs"] my_nn = load_lasagne_feedforward_nn(nn_dir, epoch) scores_sorted, cod_seqs_sorted = opt.get_score_dist(aa_seq, my_nn, rel_cod_idxs, num_samples, nt_feats=nt_feats) return scores_sorted, cod_seqs_sorted
def load_lasagne_adjacency_nn(nn_dir, epoch): init_data_pkl = nn_dir + "/init_data/init_data.pkl" params = proc.load_obj(init_data_pkl) try: max_struc_start_idx = params["max_struc_start_idx"], except KeyError: max_struc_start_idx = None try: max_struc_width = params["max_struc_width"], except KeyError: max_struc_width = None X_tr, y_tr, X_te, y_te = proc.load_lasagne_data( params["gene_len_fname"], params["gene_seq_fname"], params["tr_codons_fname"], params["te_codons_fname"], params["outputs_fname"], rel_cod_idxs=params["rel_cod_idxs"], rel_nt_idxs=params["rel_nt_idxs"], rel_struc_idxs=params["rel_struc_idxs"], struc_fname=params["struc_fname"], max_struc_start_idx=max_struc_start_idx, max_struc_width=max_struc_width, filter_pct=params["filter_pct"]) my_nn = lasagnenn.AdjacencyMLP(X_tr, y_tr, X_te, y_te, name=params["name"], out_dir=params["out_dir"], rel_cod_idxs=params["rel_cod_idxs"], cod_adj_idxs=params["cod_adj_idxs"], rel_nt_idxs=params["rel_nt_idxs"], nt_adj_idxs=params["nt_adj_idxs"], learning_rate=params["learning_rate"], update_method=params["update_method"], widths=params["widths"], nonlinearity=params["nonlinearity"], input_drop_rate=params["input_drop_rate"], hidden_drop_rate=params["hidden_drop_rate"], num_outputs=params["num_outputs"], momentum=params["momentum"], batch_size=params["batch_size"], reloaded=True) my_nn.unpickle_epoch(epoch) return my_nn
def load_lasagne_feedforward_nn(nn_dir, epoch): init_data_pkl = nn_dir + "/init_data/init_data.pkl" params = proc.load_obj(init_data_pkl) #Kludge, take this out in the future if not params.get("max_struc_start_idx", False): params["max_struc_start_idx"] = None if not params.get("max_struc_width", False): params["max_struc_width"] = None if not params.get("aa_feats", False): params["aa_feats"] = False if not params.get("nonnegative", False): params["nonnegative"] = False X_tr, _, X_te, _ = proc.load_lasagne_data( params["gene_len_fname"], params["gene_seq_fname"], params["tr_codons_fname"], params["te_codons_fname"], params["outputs_fname"], rel_cod_idxs=params["rel_cod_idxs"], rel_nt_idxs=params["rel_nt_idxs"], rel_struc_idxs=params["rel_struc_idxs"], struc_fname=params["struc_fname"], max_struc_start_idx=params["max_struc_start_idx"], max_struc_width=params["max_struc_width"], aa_feats=params["aa_feats"], filter_pct=params["filter_pct"]) my_nn = lasagnenn.FeedforwardMLP( X_tr, params["y_tr"], X_te, params["y_te"], name=params["name"], out_dir=params["out_dir"], learning_rate=params["learning_rate"], update_method=params["update_method"], widths=params["widths"], nonlinearity=params["nonlinearity"], input_drop_rate=params["input_drop_rate"], hidden_drop_rate=params["hidden_drop_rate"], num_outputs=params["num_outputs"], momentum=params["momentum"], batch_size=params["batch_size"], nonnegative=params["nonnegative"], reloaded=True) my_nn.unpickle_epoch(epoch) return my_nn
def get_lasagne_optimal_codons(nn_dir, epoch, aa_seq, nn_type="Feedforward", maximum=False, nt_feats=False): init_data_pkl = nn_dir + "/init_data/init_data.pkl" params = proc.load_obj(init_data_pkl) if nn_type == "Feedforward": my_nn = load_lasagne_feedforward_nn(nn_dir, epoch) elif nn_type == "Adjacency": my_nn = load_lasagne_adjacency_nn(nn_dir, epoch) elif nn_type == "Split": my_nn = load_lasagne_split_nn(nn_dir, epoch) else: print "nn_type must be in [Feedforward, Adjacency, Split]" rel_cod_idxs = params["rel_cod_idxs"] opt_total_seq, opt_vit_score = opt.get_optimal_codons_lasagne( aa_seq, my_nn, rel_cod_idxs, maximum=maximum, nt_feats=nt_feats) return opt_total_seq, opt_vit_score