def main(): train_path = './data/train' valid_set = GCommandLoader('./data/valid') test_set = GCommandLoaderTest('./data/test') train_set = GCommandLoader(train_path) print("train_path: {}".format(train_path)) train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=10, pin_memory=False, sampler=None, drop_last=True) train_loader = list(train_loader) valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=BATCH_SIZE, shuffle=None, num_workers=0, pin_memory=False, sampler=None) test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=None, num_workers=0, pin_memory=False, sampler=None) net = convnet(train_set.n_chars) print("n chars {}".format(train_set.n_chars)) net.perform_training(train_loader, valid_loader, test_loader, train_set.class_to_idx)
def main(): ''' Train model and evaluate on validation ''' # Load data train_data = GCommandLoader('./data/train') dev_data = GCommandLoader('./data/valid') train_loader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) dev_loader = torch.utils.data.DataLoader(dev_data, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) # Set GPU device device = 'cuda:0' if torch.cuda.is_available() else 'cpu' # Train and evaluate model model = train(train_loader, dev_loader, device, lr=1e-4, wd=1e-5, epochs=150) # Saved trained model to file torch.save({'model_state_dict': model.state_dict()}, 'model.pt')
def get_inputs(cnn_model): training = GCommandLoader('./ML4_dataset/data/train') training_set = torch.utils.data.DataLoader(training, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) validation = GCommandLoader('./ML4_dataset/data/valid') validation_set = torch.utils.data.DataLoader(validation, batch_size=100, shuffle=False, num_workers=20, pin_memory=True, sampler=None) testing = GCommandLoader('./ML4_dataset/data/test1') testing_set = torch.utils.data.DataLoader(testing, batch_size=100, shuffle=False, num_workers=20, pin_memory=True, sampler=None) optimizer = optim.Adam(cnn_model.parameters(), lr=LEARNING_RATE) return training_set, validation_set, testing_set, optimizer
def load_data(batch_size): train_dataset = GCommandLoader('./gcommands/train') train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=20, pin_memory=True, sampler=None) val_dataset = GCommandLoader('./gcommands/valid') val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=20, pin_memory=True, sampler=None) test_dataset = GCommandLoader('./gcommands/test') test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, sampler=None) return train_loader, val_loader, test_loader
def main(): total_t = time.time() #load data trainset = GCommandLoader('./train') testset = GCommandLoader('./test') validationset = GCommandLoader('./valid') train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=20, pin_memory=True, sampler=None) validation_loader = torch.utils.data.DataLoader(validationset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, sampler=None) test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=20, pin_memory=True, sampler=None) train_loss, val_loss = [], [] train_acc, val_acc = [], [] model = ResNet(ResidualBlock, num_blocks).to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) for e in range(num_epochs): t = time.time() train_avg_loss, train_avg_acc = train(train_loader, model, optimizer) train_loss.append(train_avg_loss) train_acc.append(train_avg_acc) val_avg_loss, val_avg_acc = val(validation_loader, model) val_loss.append(val_avg_loss) val_acc.append(val_avg_acc) print("Epoch: {}/{}".format(e + 1, num_epochs), "Train loss: {:.3f}".format(train_avg_loss), "Train acc: {:.3f}".format(train_avg_acc), "Val loss: {:.3f}".format(val_avg_loss), "Val acc: {:.3f}".format(val_avg_acc), "time in min: {:.3f}".format((time.time() - t)) / 60) test_file_output(testset, test_loader, model) print("total time", (time.time() - total_t) / 60) e = 0
def main(): torch.multiprocessing.freeze_support() device = torch.device("cpu") # model = net dataset = GCommandLoader('.data/train') train_set = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) validation = GCommandLoader('./valid') validation_set = torch.utils.data.DataLoader(validation, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) test_loader = GCommandLoader('./test') # testSet = torch.utils.data.DataLoader( # test_loader, batch_size=100, shuffle=None, # num_workers=20, pin_memory=True, sampler=None) # data, train_set, validation, testSet = dataset.to(device), train_set.to(device), testSet.to(device) dataLoader = {'train': train_set, 'val': validation_set} dataset_sizes = { 'train': len(dataset.spects), 'val': len(validation.spects) } criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.0005) train_model(criterion, optimizer, dataLoader, dataset_sizes, 25) model.eval() # Set model to evaluate mode outputs = [] for x in test_loader: outputs.append( torch.argmax( model(x[0].reshape( [1, x[0].shape[0], x[0].shape[1], x[0].shape[2]])), 1).item()) #x[0] photo and size spects = test_loader.spects newTest(spects, outputs)
def validate(batch_size_validate): dataset = GCommandLoader('./data/valid') validation_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size_validate, shuffle=None, num_workers=20, pin_memory=True, sampler=None) correct = 0 total = 0 with torch.no_grad(): for data2 in validation_loader: inputs2, labels2 = data2 inputs2, labels2 = inputs2.to(device), labels2.to(device) outputs2 = net(inputs2) _, predicted = torch.max(outputs2.data, 1) total += labels2.size(0) correct += (predicted == labels2).sum().item() print('Accuracy of the network on the ' + str(batch_size_validate) + ' Validation audios: %.4f ' % (100 * correct / total)) return correct / total
def main(): dataSetTest = GCommandLoader("/home/herold55/PycharmProjects/ex4ML/Test") dataSetTrain = GCommandLoader( "/home/herold55/PycharmProjects/ex4ML/Train2") dataSetValid = GCommandLoader("/home/herold55/PycharmProjects/ex4ML/Valid") print(len(dataSetTest)) print(len(dataSetTrain)) print(len(dataSetValid)) x_test = loadData(dataSetTest, 100) x_train = loadData(dataSetTrain, 100) x_valid = loadData(dataSetValid, 100) epochs = 5 learning_rate = np.exp(-23) model = ConvNet() # Loss and optimizer criterion = nn.NLLLoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # Train the model total_step = len(x_train) loss_list = [] acc_list = [] for epoch in range(epochs): for (images, labels) in x_train: # Run the forward pass outputs = model(images) loss = criterion(outputs, labels) loss_list.append(loss.item()) # Backprop and perform Adam optimisation optimizer.zero_grad() loss.backward() optimizer.step() # Track the accuracy total = labels.size(0) _, predicted = torch.max(outputs.data, 1) correct = (predicted == labels).sum().item() acc_list.append(correct / total) #if (True): # print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%' # .format(epoch + 1, epochs, total_step, loss.item(), # (correct / total) * 100)) model.validation_model(x_valid)
def data_loading(address, is_shuffle): cmd_load = GCommandLoader(address) data_loader = torch.utils.data.DataLoader(cmd_load, batch_size=100, shuffle=is_shuffle, num_workers=20, pin_memory=True, sampler=None) return data_loader
def main(): cuda = torch.cuda.is_available() device = torch.device("cuda" if cuda else "cpu") #train_set = GCommandLoader('./ML4_dataset/data/train') #validation_set = GCommandLoader('./ML4_dataset/data/valid') #test_set = GCommandLoader('./ML4_dataset/data/test') train_set = GCommandLoader('./data/train') validation_set = GCommandLoader('./data/valid') test_set = GCommandLoader('./data/test') train_loader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=0, pin_memory=True, sampler=None) #bla validation_loader = torch.utils.data.DataLoader(validation_set, batch_size=BATCH_SIZE, shuffle=None, num_workers=0, pin_memory=True, sampler=None) test_loader = torch.utils.data.DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=None, num_workers=0, pin_memory=True, sampler=None) # bla #model = NeuralNet(101 * 161) model = ConvolutionalNN().to(device) #model = ConvolutionalNN() # Loss and optimizer optimizer = torch.optim.Adam(model.parameters(), lr=ETA) for epoch in range(EPOCH_NUM): print("epoch " + str(epoch)) train(model, train_loader, optimizer, cuda) # bla test(model, validation_loader, cuda) print_report(model, test_loader, cuda)
def load_data_for_test(directory): dir_path = "data/" + directory data_set = GCommandLoader(dir_path) test_loader = torch.utils.data.DataLoader(data_set, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) return test_loader, data_set
def load_set(path, batch_size=100, num_workers=20, shuffle=True): dset = GCommandLoader(path) dloader = torch.utils.data.DataLoader(dset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True, sampler=None) return dloader
def load(): dataset = GCommandLoader('./data/train') train_loader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=True, num_workers=20, pin_memory=True) return train_loader
def main(): data = GCommandLoader('./data/train') tensor = torch.utils.data.DataLoader(data, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) dataTrain(tensor) return
def load(directory): dir_name = "data/" + directory data_set = GCommandLoader(dir_name) dir_loader = torch.utils.data.DataLoader(data_set, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) return dir_loader
def load_data(data_dource): dataset = GCommandLoader(data_dource) test_loader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) return test_loader
def main(): # device = torch.device("cuda" if torch.cuda.is_available() else "cpu") cnn_model = Cnn.ConvolutionNN() #.to(device) training_set, validation_set, testing_set, optimizer = get_inputs( cnn_model) criterion = nn.CrossEntropyLoss() train_conv(training_set, optimizer, EPOCHS, criterion, cnn_model) test_conv(cnn_model, validation_set) loader = GCommandLoader('./ML4_dataset/data/test1') prediction_list, file_names = get_predictions(testing_set, loader, cnn_model)
def main(): train_data = GCommandLoader('./data/train') dev_data = GCommandLoader('./data/valid') train_loader = torch.utils.data.DataLoader(train_data, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) dev_loader = torch.utils.data.DataLoader(dev_data, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) # cuda = random.randint(0, 3) # device = 'cuda:' + str(cuda) device = 'cuda:0' model = train(train_loader, dev_loader, device, epochs=100) pass
def loadData(dir, batch_size, shuffle): dataset = GCommandLoader(dir) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=20, pin_memory=True, sampler=None) return loader
def load_data(): dataset = GCommandLoader('./gcommands/train/train') test_loader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) for k, (input, label) in enumerate(test_loader): print(input.size(), len(label))
def main(): train_set = GCommandLoader('./data/train') valid_set = GCommandLoader('./data/valid') train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=True, num_workers=30, pin_memory=False, sampler=None) train_loader = list(train_loader) valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=100, shuffle=None, num_workers=5, pin_memory=False, sampler=None) net = convnet() net.perform_training(train_loader, valid_loader)
def main(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") validset = GCommandLoader('./ML4_dataset/data/valid') trainset = GCommandLoader('./ML4_dataset/data/train') testset = GCommandLoader('./ML4_dataset/data/test') valid_loader = torch.utils.data.DataLoader( validset, batch_size=100, shuffle=False, num_workers=20, pin_memory=True, sampler=None) #test batch size is 1 so we could ge each sound classification test_loader = torch.utils.data.DataLoader( testset, batch_size=1, shuffle=False, num_workers=20, pin_memory=True, sampler=None) train_loader = torch.utils.data.DataLoader( trainset, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) model = ConvNet().to(device) # Loss and optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001) #optimizer = torch.optim.Adam(model.parameters(), lr=0.001) train(device, optimizer, 5, model, train_loader, criterion) valid(model,valid_loader,device) test(model,test_loader,device)
def load_data(): train_set = GCommandLoader('./data/train') valid_set = GCommandLoader('./data/valid') test_set = GCommandLoader('./data/test') test_file_names = [] for i in range(len(test_set)): name = (test_set.spects)[i][0].split("\\")[2] test_file_names.append(name) train_loader = torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=True, num_workers=20, pin_memory=True, sampler=None) valid_loader = torch.utils.data.DataLoader( valid_set, batch_size=batch_size, shuffle=True, num_workers=20, pin_memory=True, sampler=None) test_loader = torch.utils.data.DataLoader( test_set,batch_size=batch_size, shuffle=None, num_workers=20, pin_memory=True, sampler=None) return train_loader, valid_loader, test_loader, test_file_names
def main(): trainset = GCommandLoader('./sample/train') testset = GCommandLoader('./sample/test') validationset = GCommandLoader('./sample/valid') data_loader = torch.utils.data.DataLoader( trainset, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) test_loader = torch.utils.data.DataLoader( testset, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) valid_loader = torch.utils.data.DataLoader( validationset, batch_size=100, shuffle=True, num_workers=20, pin_memory=True, sampler=None) model = MyCNN(image_size=161*101) model.to(device) optimizer = optim.Adam(model.parameters(), lr=ETA) trainer = ModelTrainer(data_loader, valid_loader, test_loader, model, optimizer) all_pred = trainer.run() write_test(testset.spects,all_pred)
def load_data(train_path=r'./data/train', valid_path=r'./data/valid', test_path=r'./data/test'): ''' This method gets paths for .wav files arrange by folders of validation,training and test. each folder contains .wav files of speech commands into folders named as the right labels. :param train_path: training set path :param valid_path: validation set path :param test_path: test set path :return: train, validation and test loaders objects ''' train_set = GCommandLoader(train_path) valid_set = GCommandLoader(valid_path) test_set = GCommandLoader(test_path) train_loader = torch.utils.data.DataLoader(train_set, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) test_loader = torch.utils.data.DataLoader(test_set, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) return train_loader, valid_loader, test_loader
def main(): # loading data (train, valid and test) train_loader = data_loading(TRAIN_PATH, True) valid_loader = data_loading(VALID_PATH, True) test_loader = data_loading(TEST_PATH, False) # create our model - the convolution network model = ConvolutionModel().to(device) # create loss function and optimizer loss_func = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=eta) # train, validate and test train(model, train_loader, loss_func, optimizer) validate(model, valid_loader) #test(model, test_loader, test_set) test(model, test_loader, GCommandLoader(TEST_PATH))
def load(dir, batch_size, shuffle): """ This method responsible of loading and processing the data. We convert the audio files into sound wave pictures. Args: batch_size(int): the size of the batch. shuffle(callable): A function to shuffle the data. """ dataset = GCommandLoader(dir) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=0, pin_memory=True, sampler=None) return loader
def load_dataset(fname, dataset_params, loader_params, is_test=False): if not is_test: dataset = GCommandLoader(fname, window_size=dataset_params["win_size"], window_stride=dataset_params["win_stride"], window_type=dataset_params["win_type"], normalize=dataset_params["normalize"]) else: dataset = GCommandTestLoader( fname, window_size=dataset_params["win_size"], window_stride=dataset_params["win_stride"], window_type=dataset_params["win_type"], normalize=dataset_params["normalize"]) loader = torch.utils.data.DataLoader( dataset, batch_size=loader_params["batch_size"], shuffle=loader_params["shuffle"], num_workers=loader_params["num_of_workers"], pin_memory=loader_params["cuda"], sampler=loader_params["sampler"]) return loader
def predict(): ''' Predict test data to file ''' # Load test data test_data = GCommandLoader('./data/test') test_loader = torch.utils.data.DataLoader(test_data, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None) # Get test filenames filenames = [s[0].split('/')[-1] for s in test_data.spects] # Set GPU device = 'cuda:0' if torch.cuda.is_available() else 'cpu' # Set model model = GCommandClassifier(device) # Load model parameters from file checkpoint = torch.load('model.pt') model.load_state_dict(checkpoint['model_state_dict']) # Move to GPU model.to(device) model.eval() index = 0 # For all samples for (input, label) in test_loader: # Move to GPU input = input.to(device) # Get model predictions probs = model(input) pred = torch.argmax(probs, dim=1) # Write all predictions in batch for label in pred: with open('test_y', 'a+') as file: file.write(filenames[index] + ', ' + str(label.item()) + '\n') index += 1
from gcommand_loader import GCommandLoader import torch dataset = GCommandLoader('./gcommands/test') test_loader = torch.utils.data.DataLoader(dataset, batch_size=100, shuffle=None, num_workers=20, pin_memory=True, sampler=None)