log_dir=log_dir, num_resblock=args.num_resblock) trainer.summary() try: if weights_path is not None: print('loading weights') trainer.load_checkpoint(weights_path) else: print('no weights for initalization are available') except Exception as e: print(e) if args.train_generator: trainer.fit(train_dataset=train_ds, valid_dataset=valid_ds, epochs=args.epochs, valid_lr=valid_lr, valid_hr=valid_hr) print('training finished, saving model now') trainer.save_model('_only_generator') if args.train_gan: trainer.train_gan(train_dataset=train_ds, valid_dataset=valid_ds, epochs=args.epochs, valid_lr=valid_lr, valid_hr=valid_hr) print('training finished, saving model now') trainer.save_model()
def train(hps, device, batch_size, test_batch_size, epochs, learning_rate, num_gpus, hosts, backend, current_host, model_dir, output_dir, seed, log_interval, beta1, nz, nc, ngf, ndf, dataloader): trainer = Trainer(nz, nc, ngf, ndf, weights_init, device=device, num_gpus=num_gpus) trainer.fixed_noise = torch.randn(batch_size, nz, 1, 1, device=device) # setup optimizer trainer.optimizerD = optim.Adam(trainer.netD.parameters(), lr=learning_rate, betas=(beta1, 0.999)) trainer.optimizerG = optim.Adam(trainer.netG.parameters(), lr=learning_rate, betas=(beta1, 0.999)) for epoch in range(epochs): trainer.train(epoch=epoch, epochs=epochs, log_batch=log_batch, sample_batch=sample_batch, dataloader=dataloader, log_interval=log_interval, output_dir=output_dir) # do checkpointing checkpoint_epoch(trainer, epoch, output_dir) trainer.save_model(model_dir) return is_distributed = len(hosts) > 1 and backend is not None logger.debug("Distributed training - {}".format(is_distributed)) if is_distributed: # Initialize the distributed environment. world_size = len(hosts) os.environ['WORLD_SIZE'] = str(world_size) host_rank = hosts.index(current_host) os.environ['RANK'] = str(host_rank) dist.init_process_group(backend=backend, rank=host_rank, world_size=world_size) logger.info( 'Initialized the distributed environment: \'{}\' backend on {} nodes. ' .format(backend, dist.get_world_size()) + 'Current host rank is {}. Number of gpus: {}'.format( dist.get_rank(), num_gpus)) # set the seed for generating random numbers torch.manual_seed(seed) if device_name == "cuda": torch.cuda.manual_seed(seed) logging.getLogger().setLevel(logging.DEBUG) logger.debug("Processes {}/{} ({:.0f}%) of train data".format( len(train_loader.sampler), len(train_loader.dataset), 100. * len(train_loader.sampler) / len(train_loader.dataset))) logger.debug("Processes {}/{} ({:.0f}%) of test data".format( len(test_loader.sampler), len(test_loader.dataset), 100. * len(test_loader.sampler) / len(test_loader.dataset))) model = Net().to(device) if is_distributed and use_cuda: # multi-machine multi-gpu case model = torch.nn.parallel.DistributedDataParallel(model) else: # single-machine multi-gpu case or single-machine or multi-machine cpu case model = torch.nn.DataParallel(model) optimizer = optim.SGD(model.parameters(), lr=learning_rate) for epoch in range(1, epochs + 1): model.train() for batch_idx, (data, target) in enumerate(train_loader, 1): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.nll_loss(output, target) loss.backward() if is_distributed and not device == "cuda": # average gradients manually for multi-machine cpu case only _average_gradients(model) optimizer.step() if batch_idx % log_interval == 0: logger.info( 'Train Epoch: {} [{}/{} ({:.0f}%)] Loss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.sampler), 100. * batch_idx / len(train_loader), loss.item())) test(model, test_loader, device) save_model(model_dir, model)
dataset, target = readTrainCSV(filecsv, fea_sel=0) # Split from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split( dataset, target, test_size=0.2, random_state=0) X_train = np.asarray(X_train).astype(np.float) X_test = np.asarray(X_test).astype(np.float) y_train = np.asarray(y_train).astype(np.float) y_test = np.asarray(y_test).astype(np.float) model_name = 'SVM' classifer = Trainer(model_name, kernel='rbf', C=1, gamma='scale') # classifer.load_model('kNN_classifier.txt') # parameters = {'kernel':('linear', 'rbf'), 'C':[1,2]} # classifer.gridsearchCV(parameters) classifer.fit(X_train, y_train) classifer.predict(X_test) classifer.report(y_test) print(classifer.model) classifer.save_model('SVM_classifier.sav')