Python load_lasagne_data 예제들, iXnos.process.load_lasagne_data Python 예제들

예제 #1

0

파일 보기

파일: interface.py 프로젝트: rtunney/iXnos

def load_lasagne_feedforward_nn(nn_dir, epoch):
    init_data_pkl = nn_dir + "/init_data/init_data.pkl"
    params = proc.load_obj(init_data_pkl)
    #Kludge, take this out in the future
    if not params.get("max_struc_start_idx", False):
        params["max_struc_start_idx"] = None
    if not params.get("max_struc_width", False):
        params["max_struc_width"] = None
    if not params.get("aa_feats", False):
        params["aa_feats"] = False
    if not params.get("nonnegative", False):
        params["nonnegative"] = False
    X_tr, _, X_te, _ = proc.load_lasagne_data(
        params["gene_len_fname"],
        params["gene_seq_fname"],
        params["tr_codons_fname"],
        params["te_codons_fname"],
        params["outputs_fname"],
        rel_cod_idxs=params["rel_cod_idxs"],
        rel_nt_idxs=params["rel_nt_idxs"],
        rel_struc_idxs=params["rel_struc_idxs"],
        struc_fname=params["struc_fname"],
        max_struc_start_idx=params["max_struc_start_idx"],
        max_struc_width=params["max_struc_width"],
        aa_feats=params["aa_feats"],
        filter_pct=params["filter_pct"])

    my_nn = lasagnenn.FeedforwardMLP(
        X_tr,
        params["y_tr"],
        X_te,
        params["y_te"],
        name=params["name"],
        out_dir=params["out_dir"],
        learning_rate=params["learning_rate"],
        update_method=params["update_method"],
        widths=params["widths"],
        nonlinearity=params["nonlinearity"],
        input_drop_rate=params["input_drop_rate"],
        hidden_drop_rate=params["hidden_drop_rate"],
        num_outputs=params["num_outputs"],
        momentum=params["momentum"],
        batch_size=params["batch_size"],
        nonnegative=params["nonnegative"],
        reloaded=True)

    my_nn.unpickle_epoch(epoch)

    return my_nn

예제 #2

0

파일 보기

파일: interface.py 프로젝트: rtunney/iXnos

def load_lasagne_adjacency_nn(nn_dir, epoch):
    init_data_pkl = nn_dir + "/init_data/init_data.pkl"
    params = proc.load_obj(init_data_pkl)
    try:
        max_struc_start_idx = params["max_struc_start_idx"],
    except KeyError:
        max_struc_start_idx = None
    try:
        max_struc_width = params["max_struc_width"],
    except KeyError:
        max_struc_width = None
    X_tr, y_tr, X_te, y_te = proc.load_lasagne_data(
        params["gene_len_fname"],
        params["gene_seq_fname"],
        params["tr_codons_fname"],
        params["te_codons_fname"],
        params["outputs_fname"],
        rel_cod_idxs=params["rel_cod_idxs"],
        rel_nt_idxs=params["rel_nt_idxs"],
        rel_struc_idxs=params["rel_struc_idxs"],
        struc_fname=params["struc_fname"],
        max_struc_start_idx=max_struc_start_idx,
        max_struc_width=max_struc_width,
        filter_pct=params["filter_pct"])

    my_nn = lasagnenn.AdjacencyMLP(X_tr,
                                   y_tr,
                                   X_te,
                                   y_te,
                                   name=params["name"],
                                   out_dir=params["out_dir"],
                                   rel_cod_idxs=params["rel_cod_idxs"],
                                   cod_adj_idxs=params["cod_adj_idxs"],
                                   rel_nt_idxs=params["rel_nt_idxs"],
                                   nt_adj_idxs=params["nt_adj_idxs"],
                                   learning_rate=params["learning_rate"],
                                   update_method=params["update_method"],
                                   widths=params["widths"],
                                   nonlinearity=params["nonlinearity"],
                                   input_drop_rate=params["input_drop_rate"],
                                   hidden_drop_rate=params["hidden_drop_rate"],
                                   num_outputs=params["num_outputs"],
                                   momentum=params["momentum"],
                                   batch_size=params["batch_size"],
                                   reloaded=True)

    my_nn.unpickle_epoch(epoch)

    return my_nn

예제 #3

0

파일 보기

파일: interface.py 프로젝트: rtunney/iXnos

def make_lasagne_feedforward_nn(name,
                                expt_dir,
                                gene_seq_fname,
                                gene_len_fname,
                                tr_codons_fname,
                                te_codons_fname,
                                outputs_fname,
                                rel_cod_idxs=[],
                                rel_nt_idxs=[],
                                nonlinearity="tanh",
                                widths=[200],
                                input_drop_rate=0,
                                hidden_drop_rate=0,
                                num_outputs=1,
                                update_method="sgd",
                                filter_max=False,
                                filter_test=False,
                                filter_pct=False,
                                rel_struc_idxs=False,
                                struc_fname=False,
                                max_struc_start_idx=None,
                                max_struc_width=None,
                                aa_feats=False,
                                learning_rate=0.01,
                                lr_decay=16,
                                momentum=0.9,
                                batch_size=500,
                                log_y=False,
                                scaled_psct=0,
                                raw_psct=False,
                                loss_fn="L2",
                                drop_zeros=False,
                                nonnegative=True):
    """
    Sets up neural network model directory, 
        initializes neural network model, 
        saves initial parameters, 
        and returns neural network model

    Args:
        name (str) - name of neural network model
        expt_dir (str) - name of experiment directory
        gene_seq_fname (str) - name of transcriptome fasta file
        gene_len_fname (str) - name of gene lengths file
        tr_codons_fname (str) - name of training set codons file
        te_codons_fname (str) - name of test set codons file
        outputs_fname (str) - name of outputs file
        rel_cod_idxs (list of ints) - indices of codon features in model
        rel_nt_idxs (list of ints) - indices of nucleotide features in model
        nonlinearity (str) - name of nonlinearity fn [tanh|rectify|linear]
        widths (list of ints) - # of units in each hidden layer, in order
        input_drop_rate (float) - dropout rate for inputs
        hidden_drop_rate (float) - dropout rate for hidden unit
        num_outputs (int) - number of units in output layer
        update_method (str) - name of update method [sgd|momentum|nesterov]
        NOTE: more arg descriptions here

    Returns: 
        my_nn (lasagnenn.FeedforwardMLP) - neural network object
    """
    # Initialize neural network directories
    setup_lasagne_nn(name, expt_dir)

    # Load neural network data matrices
    X_tr, y_tr, X_te, y_te = proc.load_lasagne_data(
        gene_len_fname,
        gene_seq_fname,
        tr_codons_fname,
        te_codons_fname,
        outputs_fname,
        rel_cod_idxs=rel_cod_idxs,
        rel_nt_idxs=rel_nt_idxs,
        rel_struc_idxs=rel_struc_idxs,
        struc_fname=struc_fname,
        max_struc_start_idx=max_struc_start_idx,
        max_struc_width=max_struc_width,
        aa_feats=aa_feats,
        filter_max=filter_max,
        filter_pct=filter_pct,
        filter_test=filter_test)

    # NOTE: Should I remove this?
    # Handle log transformation of y values
    if log_y:
        #Must have either a scaled psct to add, or a raw psct that has already
        #been put in the counts_by_codon when making the outputs file
        # Maybe change this scheme for raw pscts in the future?
        if scaled_psct <= 0 and not raw_psct and not drop_zeros:
            raise ValueError("Pseudocount must be >= 0 for log y")
        if scaled_psct > 0:
            y_tr = np.log(y_tr + scaled_psct)
            y_te = np.log(y_te + scaled_psct)
        if (not scaled_psct > 0) and raw_psct:
            y_tr = np.log(y_tr)
            y_te = np.log(y_te)
        if ((not scaled_psct > 0) and not raw_psct) and drop_zeros:
            positive = (y_tr > 0).ravel()
            y_tr = y_tr[positive]
            X_tr = X_tr[positive]

    # Save initial parameters
    out_dir = expt_dir + "/lasagne_nn"
    _, _, _, params = inspect.getargvalues(inspect.currentframe())
    del params["X_tr"]
    del params["X_te"]
    proc.pickle_obj(params,
                    out_dir + "/{0}/init_data/init_data.pkl".format(name))

    # Make neural network object
    my_nn = lasagnenn.FeedforwardMLP(X_tr,
                                     y_tr,
                                     X_te,
                                     y_te,
                                     name=name,
                                     out_dir=out_dir,
                                     learning_rate=learning_rate,
                                     lr_decay=lr_decay,
                                     update_method=update_method,
                                     widths=widths,
                                     nonlinearity=nonlinearity,
                                     input_drop_rate=input_drop_rate,
                                     hidden_drop_rate=hidden_drop_rate,
                                     num_outputs=num_outputs,
                                     momentum=momentum,
                                     batch_size=batch_size,
                                     loss_fn=loss_fn,
                                     nonnegative=nonnegative)

    # Return neural network object
    return my_nn