Beispiel #1
0
np.random.seed(1337)  # for reproducibility
from sklearn.metrics.classification import accuracy_score

from dbn.tensorflow import SupervisedDBNClassification
from Rafd import Rafd

# Splitting data
rafd = Rafd("entrenamiento/")
X_train, X_test, Y_train, Y_test = rafd.getData()

# Training
classifier = SupervisedDBNClassification(hidden_layers_structure=[256, 256],
                                         learning_rate_rbm=0.05,
                                         learning_rate=0.001,
                                         n_epochs_rbm=15,
                                         n_iter_backprop=100,
                                         batch_size=32,
                                         activation_function='sigmoid',
                                         dropout_p=0.2)
classifier.fit(X_train, Y_train)

# Save the model
classifier.save('model.pkl')

# Restore it
classifier = SupervisedDBNClassification.load('model.pkl')

# Test
Y_pred = classifier.predict(X_test)
print('Done.\nAccuracy: %f' % accuracy_score(Y_test, Y_pred))
Beispiel #2
0
#                   learning_rate_rbm=0.05, # learning rate para RBM pretraining
#                   learning_rate=0.1, # Learning rate para el backpropagation de los nodos
#                   n_epochs_rbm=10, # Epochs para RBM pretraining (n epochs x capa)
#                   n_iter_backprop=100, # Packpropagation iterations
#                   batch_size=46, # Tamaño de batch
#                   activation_function='sigmoid', # Función de activación
#                   dropout_p=0.1) # Dropout de nodos para evitar overfitting

# classifier.fit(X_train, y_train) # Entrenamiento del modelo según datos procesados en 3.2.2

#%% [markdown]
# ##### 3.2.4 Obtención de  resultados con modelo ya entrenado

#%%
# En primer lugar, cargamos los datos
tclassifier = DBNC.load('model.pkl')

# Datos trained classifier
tclassifier, tclassifier.unsupervised_dbn.hidden_layers_structure


#%%
# Predicción de test
y_pred = tclassifier.predict(X_test)


#%%
# Análisis de resultados
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
Beispiel #3
0
    print('Loading in ' + files)
    f = glob.glob(files)
    full_Y = np.genfromtxt('converted_stage1_labels.csv', delimiter=',')
    Y = full_Y[:, tz - 1]
    X = np.empty([len(f), 62500])
    for i in range(len(f)):
        tmp = np.load(f[i])
        tmp = np.reshape(tmp, [1, 62500])
        X[i, :] = tmp

    return X, Y


f = open(Model_Location + 'Output_Acc_Sum.txt', 'w')
for tz in range(0, 17):
    print('\nChecking Accuracy of NN for zone {}'.format(tz + 1))
    filename = Model_Location + 'Matt_Net_Zone_{}.pkl'.format(tz + 1)
    print('Loading ' + filename)
    My_Net = SupervisedDBNClassification.load(filename)
    X, Y = get_dataset(tz + 1)
    X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=0)
    Yp = My_Net.predict(X_test)
    score = accuracy_score(Y_test, Yp)
    print('NN for Zone {} accuracy == {}'.format(tz + 1, score))
    f.write('Zone, {}, accuracy, {}\n'.format(tz + 1, score))

f.close()
Beispiel #4
0
if __name__ == "__main__":
    path = sys.argv[0]
    path = path[:-(len(path) - path.rindex("/") - 1)]
    #load config of the model
    jsonfilename, jsonvariationfilename, classifiername, classificationfilename, classificationlevel, k, data_max = LoadConfig(
        modelname)
    #represent sequences of the test dataset as k-mer vector
    testfilename = GetBase(testfastafilename)
    matrixfilename = testfilename + "." + str(k) + ".matrix"
    command = path + "fasta2matrix.py " + str(
        k) + " " + testfastafilename + " " + matrixfilename
    os.system(command)
    testseqIDs, testinputs, testlabels, classes, nb_classes, input_length, level = loadData(
        matrixfilename, data_max, classificationfilename, classificationlevel)
    #load model
    model = SupervisedDBNClassification.load(classifiername)
    #predict labels for test dataset
    pred_labels = model.predict(testinputs)
    probas = model.predict_proba(testinputs)
    #load classes with sequences
    classeswithsequences = {}
    variation = {}
    testseqrecords = []
    if minprobaforBlast <= 1.0:
        testseqrecords = list(SeqIO.parse(testfastafilename, "fasta"))
        #load classes
        with open(jsonfilename) as json_file:
            classeswithsequences = json.load(json_file)
        #load variation
#		if not os.path.exists(jsonvariationfilename):
#			basename=modelname
Beispiel #5
0
def run(params):

    # ##################### get parameters and define logger ################

    # device
    os.environ['CUDA_VISIBLE_DEVICES'] = str(params.gpu)

    # get parameters
    data_name = params.data.data_name
    data_dir = params.data.data_dir
    target_dir = params.data.target_dir
    train_prop = params.data.train_prop
    val_prop = params.data.val_prop

    train_params = params.train
    method_name = params.method_name
    result_dir = params.result_dir
    folder_level = params.folder_level

    train_prop = train_prop if train_prop < 1 else int(train_prop)
    val_prop = val_prop if val_prop < 1 else int(val_prop)

    result_root = result_dir
    local_v = locals()
    for s in folder_level:
        result_dir = check_path(os.path.join(result_dir, str(local_v[s])))

    # define output dirs
    acc_dir = os.path.join(result_root, 'accuracy.csv')
    log_dir = os.path.join(result_dir, 'train.log')
    model_dir = os.path.join(result_dir, 'weights.pkl')
    # soft_dir = os.path.join(result_dir, 'soft_label.mat')
    # loss_dir = os.path.join(result_dir, 'loss_curve.png')

    # define logger
    logger = define_logger(log_dir)

    # print parameters
    num1 = 25
    num2 = 100
    logger.info('%s begin a new training: %s %s' %
                ('#' * num1, method_name, '#' * num1))
    params_str = recur_str_dict_for_show(params, total_space=num2)
    logger.info('show parameters ... \n%s' % params_str)

    # ########################### get data, train ############################

    logger.info('get data ...')
    mask_dir = os.path.dirname(data_dir)
    data, target = read_data(data_dir, target_dir)
    train_mask, val_mask, test_mask = load_masks(mask_dir, target, train_prop,
                                                 val_prop)
    x_train, y_train = get_vector_samples(data, target, train_mask)

    logger.info('get model ...')
    from dbn.tensorflow import SupervisedDBNClassification
    classifier = SupervisedDBNClassification(**train_params)

    logger.info('begin to train ...')
    s = time.time()
    classifier.fit(x_train, y_train)
    e = time.time()
    train_time = e - s
    logger.info('training time: %.4fs' % train_time)

    logger.info('save model ...')
    classifier.save(model_dir)

    # ########################### predict, output ###########################

    all_data = data.reshape(-1, data.shape[1] * data.shape[2]).T

    classifier = SupervisedDBNClassification.load(model_dir)

    logger.info('begin to predict ...')
    s = time.time()
    pred = classifier.predict(all_data)
    pred = np.array(pred)
    pred = pred.reshape(target.shape) + 1
    e = time.time()
    pred_time = (e - s)
    logger.info('predicted time: %.4fs' % pred_time)

    # output predicted map(png/mat), accuracy table and other records
    logger.info('save classification maps etc. ...')
    train_records = {
        'train_time': '%.4f' % train_time,
        'pred_time': '%.4f' % pred_time
    }

    ro = ResultOutput(pred,
                      data,
                      target,
                      train_mask,
                      val_mask,
                      test_mask,
                      result_dir,
                      acc_dir,
                      hyper_params=params,
                      train_records=train_records)
    ro.output()