def train(): config = Config() datasets = Datasets(config) train_txts, train_labels = get_dataset(config.filename_train_txt, config.filename_train_label, config.filename_vocab) valid_txts, valid_labels = get_dataset(config.filename_valid_txt, config.filename_valid_label, config.filename_vocab) train_datasets = data_util.TensorDataset(train_txts, train_labels) valid_datasets = data_util.TensorDataset(valid_txts, valid_labels) train_datasets = data_util.DataLoader(train_datasets, config.batch_size, shuffle=True, num_workers=2) train_datasets = data_util.DataLoader(valid_datasets, config.batch_size, shuffle=False, num_workers=2) model = Model(config) if torch.cuda.is_available(): model = model.cuda() train(model, config, (train_datasets, valid_datasets))
def load_dir(directory, config=defaults): """Load train, devel and test data from directory, return Datasets.""" # Datasets are assumed to be named {train,devel,test}.tsv. datasets = [] for dset in ('train', 'devel', 'test'): name = '{}-{}'.format(path.basename(directory.rstrip('/')), dset) datasets.append(load(path.join(directory, dset + '.tsv'), config, name)) return Datasets(*datasets)
def make_recovery_lists(data): lfns = [] for line in data.split("\n"): f = line.split(" ") if len(f) > 0 and f[0].find("/store/user") > -1: lfn = Lfn(f[0]) lfns.append(lfn) print " Found %d entries." % (len(lfns)) datasets = Datasets() for lfn in lfns: dataset_name = "%s/%s/%s" % (lfn.config, lfn.version, lfn.dataset) dataset = datasets.add_dataset(dataset_name) # get block and size -- we do not know them but this is not an issues block_name = "INVALID" file_size = -1 # add the file print " Add LFN: %s" % (lfn.lfn) dataset.add_block_file(block_name, lfn.lfn, file_size) return datasets
def test(): config = Config() datasets = Datasets(config) test_txts, test_labels = get_dataset(config.filename_test_txt, config.filename_test_label, config.filename_vocab) test_datasets = data_util.TensorDataset(test_txts, test_labels) test_datasets = data_util.DataLoader(test_datasets, config.batch_size, shuffle=True, num_workers=2) model = Model(config) model.load_state_dict(torch.load(filename, map_location='gpu')) valid(model, config, test_datasets)
def run_forrest_run(dataset_list, activation_list, modelname): for dataset_name in dataset_list: for name in activation_list: for model in modelname: if model == "DNN": dataset = Datasets() if (dataset_name == 'MNIST'): x_train, x_test, y_train, y_test = dataset.get_mnist( "DNN") num_classes = dataset.num_classes input_shape = dataset.input_shape elif (dataset_name == 'Fashion-MNIST'): x_train, x_test, y_train, y_test = dataset.get_fashion_mnist( "DNN") num_classes = dataset.num_classes input_shape = dataset.input_shape dnn = DNN(name) score, history = dnn.run_model(input_shape, x_train, x_test, y_train, y_test, 1) else: dataset = Datasets() if (dataset_name == 'MNIST'): x_train, x_test, y_train, y_test = dataset.get_mnist( "CNN") elif (dataset_name == 'Fashion-MNIST'): x_train, x_test, y_train, y_test = dataset.get_fashion_mnist( "CNN") num_classes = dataset.num_classes input_shape = dataset.input_shape if model == "CNN": cnn = CNN(name) score, history = cnn.run_model(input_shape, x_train, x_test, y_train, y_test) elif model == "CAE": cae = CAE(name) score, history = cae.run_model(input_shape, x_train, x_test, y_train, y_test) score, history = cnn.run_model(input_shape, x_train, x_test, y_train, y_test) plot_model(history, name, model, dataset_name)
def load_dir(directory, config=defaults): """Load train, devel and test data from directory, return Datasets. Expects each of the training, development and test datasets to be contained in two files with basenames 'train', 'devel' and 'test' (resp.) in the given directory and suffixes '.pos' for positive and '.neg' for negative examples. """ if config.random_seed is not None: random.seed(config.random_seed) datasets = [] for dset in ('train', 'devel', 'test'): fname = lambda l: path.join(directory, '{}.{}'.format(dset, l)) pos_doc = load_documents(fname('pos'), 'pos', config) neg_doc = load_documents(fname('neg'), 'neg', config) if config.oversample and dset == 'train': pos_doc, neg_doc = balance_examples([pos_doc, neg_doc]) documents = pos_doc + neg_doc # Shuffle list of document to avoid training batches consisting # of only positive or negative example. random.shuffle(documents) dname = '{}-{}'.format(path.basename(directory.rstrip('/')), dset) datasets.append(Dataset(documents=documents, name=dname)) return Datasets(*datasets)
import os import sys from data import Datasets from utils import configuration, create_logger, attach_exception_hook from trainer import QuasiSiameseNetwork if __name__ == '__main__': args = configuration() logger = create_logger(__name__) sys.excepthook = attach_exception_hook(logger) logger.info('START with Configuration:') for k, v in sorted(vars(args).items()): logger.info('{0}: {1}'.format(k, v)) qsn = QuasiSiameseNetwork(args) datasets = Datasets(args, qsn.transforms) save_path = os.path.join(args.checkpointPath, 'best_model_wts.pkl') if not args.test: qsn.train(args.numberOfEpochs, datasets, args.device, save_path) logger.info('Evaluation on test dataset') qsn.test(datasets, args.device, save_path) logger.info('END')
def neural_net(X_train, Y_train, X_test, Y_test, lr, act_f, epochs, prt): # 1. contar cantidad atributos (input layer) atts = X_train.shape[1] # 2. contar cantidad de clases diferentes (output layer) clss = len(np.unique(Y_train)) top = [atts, atts - 1, clss] Y_pred = rna.fit(X_train, Y_train, X_test, Y_test, top, lr, act_f, epochs, prt) accuracy = rna.accuracy_score(Y_test, Y_pred) return accuracy dts = Datasets() S = 2 dts.remove_data(S) ### ABALONE ### x_abalone = dts.X_abalone y_abalone = dts.Y_abalone x_abalone_r = dts.X_rem_abalone y_abalone_r = dts.Y_rem_abalone X_train_abalone, Y_train_abalone, X_test_abalone, Y_test_abalone = separate( 0.3, x_abalone, y_abalone, S) X_train_r_abalone, Y_train_r_abalone, X_test_r_abalone, Y_test_r_abalone = separate( 0.3, x_abalone_r, y_abalone_r, S) print("\nAbalone original:")
# -*- coding: utf-8 -*- """ Created on Tue Apr 14 21:54:40 2020 @author: 4PF41LA_RS6 """ from data import Datasets import numpy as np dts = Datasets() #dts.remove_data(10) dts.remove_data(1) y_abalone = dts.Y_abalone y_abalone_r = dts.Y_rem_abalone print("Abalone:") print(dts.data_info(y_abalone), dts.data_info(y_abalone_r), dts.reduce) y_digits = dts.Y_digits y_digits_r = dts.Y_rem_digits print("\nDigits:") print(dts.data_info(y_digits), dts.data_info(y_digits_r), dts.reduce) y_cancer = dts.Y_cancer y_cancer_r = dts.Y_rem_cancer print("\nCancer:") print(dts.data_info(y_cancer), dts.data_info(y_cancer_r), dts.reduce) y_human = dts.Y_human y_human_r = dts.Y_rem_human