コード例 #1
0
def test_encode_data_method():
    m = 'random'
    fp = 'data/train_data.csv'
    nrows = 10
    df = pd.read_csv(fp, sep='\t', nrows=nrows)
    with pytest.raises(AttributeError):
        encode_data(df, m)
コード例 #2
0
def test_encode_data():
    fp = 'data/train_data.csv'
    nrows = 10
    df = pd.read_csv(fp, sep='\t', nrows=nrows)
    x, y = encode_data(df)
    assert x.shape == (nrows, 280)
    assert len(y) == nrows
コード例 #3
0
def evaluate_model():
    """
    Use this function to validate DeepMiRNA model 

    :return: keras history object with the results
    """

    # get training parameters
    model_name = gv.TRAIN_FINAL_MODEL_NAME
    train_set_fp = os.path.join(gv.ROOT_DIR, gv.TRAIN_SET_LOCATION)
    true_labels_fp = os.path.join(gv.ROOT_DIR, gv.TRUE_LABELS)
    ohe_duplexes_fp = os.path.join(gv.ROOT_DIR, gv.ONE_HOT_ENCODED_DUPLEXES)
    batch_size = gv.BATCH_SIZE
    n_epochs = gv.N_EPOCHS
    keep_prob = gv.KEEP_PROB
    training_df = pd.read_csv(train_set_fp, sep='\t', usecols=gv.TRAIN_SET_COLUMNS)

    # check if encoded data already exists
    if Path(ohe_duplexes_fp).exists():
        # load data
        _logger.info(' One-hot encoded training set found. Loading data ...')
        ytrain = np.loadtxt(true_labels_fp)
        with h5py.File(ohe_duplexes_fp, 'r') as hf:
            xtrain = hf['encoded_training_set'][:]

    else :
        _logger.info(' Encoding the training set. This might take some time')
        xtrain, ytrain = encode_data(training_df)
        # save for next computation
        _logger.info(' Saving encoded data to disk.')
        np.savetxt(true_labels_fp, ytrain)
        with h5py.File(ohe_duplexes_fp, 'w') as hf:
            hf.create_dataset('encoded_training_set',  data=xtrain)

    _logger.info(' Building and compiling the model')
    model = _create_mlp_model(xtrain.shape[1], keep_prob)

    _logger.info(' Training started')
    history = train_eval(model, model_name, xtrain, ytrain, batch_size, n_epochs)

    _plot_model_history(history)

    _logger.info(' model {} saved.'.format(model_name))
    _logger.info(' Best model achieved {:.2f} accuracy and {:.2f} validation loss on the validation set.'
                 .format(max(history.history['acc']), min(history.history['val_loss'])))

    return history
コード例 #4
0
def train_model():
    """
    Train DeepMiRNA model over the whole training set and obtain the final model
    
    :return: keras history object 
    """

    # get training parameters
    model_name = gv.TRAIN_FINAL_MODEL_NAME
    train_set_fp = os.path.join(gv.ROOT_DIR, gv.TRAIN_SET_LOCATION)
    true_labels_fp = os.path.join(gv.ROOT_DIR, gv.TRUE_LABELS)
    ohe_duplexes_fp = os.path.join(gv.ROOT_DIR, gv.ONE_HOT_ENCODED_DUPLEXES)
    batch_size = gv.BATCH_SIZE
    n_epochs = gv.N_EPOCHS
    keep_prob = gv.KEEP_PROB
    training_df = pd.read_csv(train_set_fp, sep='\t', usecols=gv.TRAIN_SET_COLUMNS)

    # check if encoded data already exists
    if Path(ohe_duplexes_fp).exists():
        # load data
        _logger.info(' One-hot encoded training set found. Loading data ...')
        ytrain = np.loadtxt(true_labels_fp)
        with h5py.File(ohe_duplexes_fp, 'r') as hf:
            xtrain = hf['encoded_training_set'][:]

    else :
        _logger.info(' Encoding the training set. This might take some time')
        xtrain, ytrain = encode_data(training_df)
        # save for next computation
        _logger.info(' Saving encoded data to disk.')
        np.savetxt(true_labels_fp, ytrain)
        with h5py.File(ohe_duplexes_fp, 'w') as hf:
            hf.create_dataset('encoded_training_set',  data=xtrain)

    _logger.info(' Building and compiling the model')
    model = _create_mlp_model(xtrain.shape[1], keep_prob)

    _logger.info(' Training started')
    history = train(model, model_name, xtrain, ytrain, batch_size, n_epochs)

    return history
コード例 #5
0
def test_encode_data_header():
    d = {'a': [0], 'b': [1], 'c': [2]}
    df = pd.DataFrame(d)
    with pytest.raises(SystemExit):
        encode_data(df)