def process_dataset(self): if self.dataset: logging.debug("Processing dataset.") X, Y = preprocess.read_dataset(self.dataset, balanced=True) print X.shape Y = to_categorical(Y) logging.debug("X example: %s\ny example: %s" % (X[0], Y[0])) X_train, X_val, X_test, y_train, y_val, y_test = preprocess.split_dataset( X, Y) self.num_steps = X.shape[1] elif self.train_path: X_train, y_train = preprocess.read_set(self.train_path) X_train, X_val, y_train, y_val = preprocess.split_dataset( X_train, y_train, test_size=0.2, validation=False) X_test, y_test = preprocess.read_set(self.test_path) self.num_steps = X_train.shape[1] return X_train, X_val, X_test, y_train, y_val, y_test
def train(self, app, ui, window): if not self.x.size or not self.y.size: QtWidgets.QMessageBox.critical(window, "Error", "No training data loaded") else: x_training, x_validation, y_training, y_validation = pp.split_dataset( self.x, self.y, self.data_split) ui.netOutput.setText("Training dataset size: {}".format( x_training.shape[0])) ui.netOutput.append("Validation dataset size: {}".format( x_validation.shape[0])) ui.netOutput.append("") self.model.train(x_training, y_training, x_validation, y_validation, self.learning_rate, self.epochs, self.batch_size, app, ui)
LEARNING_RATE = 1e-3 #1e-3 USE_CUDA = True PRINT_INTERVAL = 100 LOG_PATH = './logs/log.pkl' MODEL_PATH = './checkpoints/' COMPARE_PATH = './comparisons/' use_cuda = USE_CUDA and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") print('Using device', device) print('num cpus:', multiprocessing.cpu_count()) print(torch.cuda.is_available()) # training code train_ids, test_ids = prep.split_dataset() print('num train_images:', len(train_ids)) print('num test_images:', len(test_ids)) data_train = prep.ImageDiskLoader(train_ids) data_test = prep.ImageDiskLoader(test_ids) print(data_train) kwargs = { 'num_workers': multiprocessing.cpu_count(), 'pin_memory': True } if use_cuda else {} #train_loader = torch.utils.data.DataLoader(data_train, batch_size=BATCH_SIZE, shuffle=True, **kwargs) train_loader = torch.utils.data.DataLoader(x_train_new, batch_size=BATCH_SIZE,
input_size = 1 hidden_size = 128 num_layers = 1 batch_size = 128 num_classes = 5 out = "./result" device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") root_dir = './data' if __name__ == "__main__": train_dataset_dict, test_dataset_dict = load_ECG_dataset(root_dir) train_dataset, validation_dataset = split_dataset(train_dataset_dict, val_num=100, seed=0) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) val_dataloader = torch.utils.data.DataLoader(validation_dataset, batch_size=16, shuffle=False) dataloaders_dict = {"train": train_dataloader, "val": val_dataloader} model = LSTM(num_classes, input_size, hidden_size, num_layers, device) model = model.to(device) criterion = nn.CrossEntropyLoss().to(device)
def process_dataset(self): X, Y = preprocess.read_dataset(self.dataset, balanced=True) return preprocess.split_dataset(X, Y, validation=False)
# Load dictionaries #index_to_word= load_obj("data/index_to_word") #word_to_index = load_obj("data/word_to_index") FNAME = "../data/spam.csv" #training set file MIN_COUNT = 5 SEQ_LENGTH = 200 SPLIT_FRAC = 0.8 BATCH_SIZE = 32 # Get matrix of features. word_to_index, index_to_word, matrix, labels = csv_to_matrix( fname=FNAME, min_count=MIN_COUNT, seq_length=SEQ_LENGTH) # Get loaders. train_loader, valid_loader, test_loader, tensor_train, tensor_validation, tensor_test = split_dataset( matrix, labels, split_frac=SPLIT_FRAC, batch_size=BATCH_SIZE) # MODEL PARAMETERS # Instantiate the model w/ hyperparams vocab_size = len(index_to_word) output_size = cfg['output_size'] embedding_dim = cfg['embedding_dim'] #200 hidden_dim = cfg['hidden_dim'] #128 n_layers = cfg['n_layers'] DEVICE = th.device('cuda' if th.cuda.is_available() else 'cpu') # First checking if GPU is available train_on_gpu = th.cuda.is_available() print("Train on GPU", train_on_gpu)
import pandas as pd import load_dataset as load import preprocess from categorizing_model import CModel import plotting # load dataset FILENAME = 'Dataset.txt' dataset = load.load_dataset(FILENAME) # tokenize titles tensor, vocab_size = preprocess.tokenize(dataset['titles'].to_list()) # split and shuffle dataset X_train, X_test, Y_train, Y_test = preprocess.split_dataset( tensor, dataset['labels'].to_list()) # make dataset pairs (title, labels) train_dataset = tf.data.Dataset.from_tensor_slices((X_train, Y_train)) test_dataset = tf.data.Dataset.from_tensor_slices((X_test, Y_test)) # split dataset to batches BATCH_SIZE = 64 train_dataset = train_dataset.batch(BATCH_SIZE) test_dataset = test_dataset.batch(BATCH_SIZE) HIDDEN_UNITS = 64 FINAL_OUTPUT_UNITS = 6 DROPOUT_RATE = 0.4 DENSE_LAYERS = 1 L2_RATE = 0.01