def train(self, epoch_num=10, step_one_epoch=20, save_frq=1000, evl_frq=500): self.model.train() Loss = CrossEntropyLoss(weight=torch.Tensor([0.9, 1, 1])).to( self.device) if self.load_pretrain == True: global_step = int(os.listdir('./model' + str(self.m))[0]) epoch = global_step // step_one_epoch _, max_acc = self.evaluate() self.train_loss = self.train_loss[0:global_step // evl_frq + 1] self.train_acc = self.train_acc[0:global_step // evl_frq + 1] self.verify_loss = self.verify_loss[0:global_step // evl_frq + 1] self.verify_acc = self.verify_acc[0:global_step // evl_frq + 1] self.lr = self.lr[0:global_step // evl_frq + 1] else: global_step = 0 epoch = 1 max_acc = 0.0 while (epoch < epoch_num): if (epoch > 20): self.scheduler = lr_scheduler.LambdaLR(self.optimizer, lr_lambda=self.lambda1) self.scheduler.step(epoch) for i in range(step_one_epoch): if self.mixup == True: images1, labels1 = next(self.train_generator) images2, labels2 = next(self.train_generator) lam = np.random.beta(0.4, 0.4) images1 = images1.to(self.device) images2 = images2.to(self.device) images = images1 * lam + images2 * (1 - lam) images = images.to(self.device) labels1 = labels1.to(self.device) labels2 = labels2.to(self.device) self.optimizer.zero_grad() logits = self.model(images) loss = Loss(logits, labels1) * lam + Loss( logits, labels2) * (1 - lam) loss.backward() self.optimizer.step() else: images, labels = next(self.train_generator) images = images.to(self.device) labels = labels.to(self.device) self.optimizer.zero_grad() logits = self.model(images) loss = Loss(logits, labels) loss.backward() self.optimizer.step() if global_step % evl_frq == 0: if self.mixup == True: with torch.no_grad(): logits1 = self.model(images1) logits2 = self.model(images2) train_loss = (Loss(logits1, labels1) + Loss(logits2, labels2)) / 2 train_acc = (accuracy(logits1, labels1) + accuracy(logits2, labels2)) / 2 else: train_loss = loss train_acc = accuracy(logits, labels) verify_loss, verify_acc = self.evaluate() print('step: {:} learning rate: {:6f}'.format( global_step, self.scheduler.get_lr()[0])) print('train_loss: {:4f} train_acc: {:4f}'. format(train_loss, train_acc)) print('verify_loss: {:4f} verify_acc: {:4f}'. format(verify_loss, verify_acc)) self.train_loss.append(float(train_loss)) self.train_acc.append(train_acc) self.verify_loss.append(float(verify_loss)) self.verify_acc.append(verify_acc) self.lr.append(float(self.scheduler.get_lr()[0])) train_data = [ self.lr, self.train_loss, self.train_acc, self.verify_loss, self.verify_acc ] train_data = np.array(train_data).T with open('train_data' + str(self.m) + '.csv', 'w') as csvfile: writer = csv.writer(csvfile) writer.writerows(train_data) if global_step % save_frq == 0: if verify_acc >= max_acc: save_model(self.model, self.optimizer, self.scheduler, global_step, self.m) max_acc = verify_acc if max_acc > 0.92: epoch = epoch_num global_step += 1 epoch += 1
def main(): """ train and test the quality of the produced encodings by training a classifier using the encoded images """ skip_training = False n_components = 10 n_epochs = 4 # device = torch.device('cuda:0') device = torch.device('cpu') data_dir = tools.select_data_dir() transform = transforms.Compose([ transforms.ToTensor(), # Transform to tensor transforms.Normalize((0.5,), (0.5,)) # Minmax normalization to [-1, 1] ]) trainset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True) # Create a deep autoencoder encoder = Encoder(n_components) encoder.to(device) decoder = Decoder(n_components) decoder.to(device) # Training loop if not skip_training: encoder_optimizer = torch.optim.Adam(encoder.parameters(),lr=0.001) decoder_optimizer = torch.optim.Adam(decoder.parameters(),lr=0.001) loss_method = nn.MSELoss() for epoch in range(n_epochs): for i, data in enumerate(trainloader, 0): images, labels = data encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() encoder_output = encoder.forward(images) decoder_output = decoder.forward(encoder_output) loss = loss_method(decoder_output,images) loss.backward() encoder_optimizer.step() decoder_optimizer.step() print('Train Epoch {}: Loss: {:.6f}'.format(epoch +1, loss.item())) print('training is finished.') tools.save_model(encoder, 'ae_encoder.pth') tools.save_model(decoder, 'ae_decoder.pth') else: device = torch.device("cpu") encoder = Encoder(n_components=10) tools.load_model(encoder, 'ae_encoder.pth', device) decoder = Decoder(n_components=10) tools.load_model(decoder, 'ae_decoder.pth', device) # Test the quality of the produced embeddings by classification print('start testing the quality of the produced embeddings by classification') testset = torchvision.datasets.MNIST(root=data_dir, train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False) traincodes, trainlabels = encode(trainset, encoder) # traincodes is (60000, 10) testcodes, testlabels = encode(testset, encoder) # testcodes is (10000, 10) logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial') logreg.fit(traincodes.cpu(), trainlabels.cpu()) predicted_labels = logreg.predict(testcodes.cpu()) # (10000,) accuracy = np.sum(testlabels.cpu().numpy() == predicted_labels) / predicted_labels.size print('Accuracy with a linear classifier: %.2f%%' % (accuracy*100))
def main(): """ train and test the quality of the produced encodings by training a classifier using the encoded images """ skip_training = False n_components = 10 n_epochs = 4 # device = torch.device('cuda:0') device = torch.device('cpu') data_dir = tools.select_data_dir() transform = transforms.Compose([ transforms.ToTensor(), # Transform to tensor transforms.Lambda(lambda x: x * torch.randn_like(x)) ]) trainset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True) encoder = Encoder(n_components=n_components) decoder = Decoder(n_components=n_components) encoder = encoder.to(device) decoder = decoder.to(device) # Training loop if not skip_training: en_optimizer = torch.optim.Adam(encoder.parameters(),lr=0.001) de_optimizer = torch.optim.Adam(decoder.parameters(),lr=0.001) n_epochs = 10 for epoch in range(n_epochs): for i, data in enumerate(trainloader, 0): images, labels = data images, labels = images.to(device), labels.to(device) en_optimizer.zero_grad() de_optimizer.zero_grad() z_mu, z_logvar = encoder.forward(images) sample = encoder.sample(z_mu,z_logvar) y_mu, y_logvar = decoder.forward(sample) loss =loss_kl(z_mu, z_logvar) + loss_loglik(y_mu, y_logvar, images) loss.backward() en_optimizer.step() de_optimizer.step() print('Train Epoch {}: Loss: {:.6f}'.format(epoch +1, loss.item())) tools.save_model(encoder, 'vae_encoder.pth') tools.save_model(decoder, 'vae_decoder.pth') else: encoder = Encoder(n_components=10) tools.load_model(encoder, 'vae_encoder.pth', device) decoder = Decoder(n_components=10) tools.load_model(decoder, 'vae_decoder.pth', device) # Test the quality of the produced embeddings by classification print('start testing the quality of the produced embeddings by classification') testset = torchvision.datasets.MNIST(root=data_dir, train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False) traincodes, trainlabels = encode(trainset, encoder) # traincodes is (60000, 10) testcodes, testlabels = encode(testset, encoder) # testcodes is (10000, 10) # Train a simple linear classifier logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', max_iter=400) logreg.fit(traincodes.cpu(), trainlabels.cpu()) predicted_labels = logreg.predict(testcodes.cpu()) # (10000,) # Compute accuracy of the linear classifier accuracy = np.sum(testlabels.cpu().numpy() == predicted_labels) / predicted_labels.size print('Accuracy with a linear classifier: %.2f%%' % (accuracy*100))
def train(self, epoch_num = 10, step_one_epoch = 20, save_frq = 1000, evl_frq = 500): self.model.train() if self.load_pretrain == True: global_step = int(os.listdir('./model' + str(self.m))[0]) epoch = global_step//step_one_epoch _, max_acc = self.evaluate() self.train_loss = self.train_loss[0:global_step//evl_frq+1] self.train_acc = self.train_acc[0:global_step//evl_frq+1] self.verify_loss = self.verify_loss[0:global_step//evl_frq+1] self.verify_acc = self.verify_acc[0:global_step//evl_frq+1] self.lr = self.lr[0:global_step//evl_frq+1] else: global_step = 0 epoch = 1 max_acc = 0.0 if self.uda == True: Label1 = torch.LongTensor([0 for i in range(self.batch)]).to(self.device) Label2 = torch.LongTensor([1 for i in range(self.batch)]).to(self.device) UDA_loss = torch.nn.CrossEntropyLoss(reduction='sum').to(self.device) D = True else: D = False while(epoch<epoch_num): for i in range(step_one_epoch): self.optimizer.zero_grad() if self.uda == True: self.optimizer1.zero_grad() if i%10 == 0: images, Offset, Judge1, Judge2, true_box_coors = next(self.train_generator2) #加入验证集训练 else: images, Offset, Judge1, Judge2, true_box_coors = next(self.train_generator) images, Offset, Judge1, Judge2 = images.to(self.device), Offset.to(self.device), Judge1.to(self.device), Judge2.to(self.device) if self.uda == False or D == False: class_sum, loc_sum = self.model(images) loss_l, loss_c = self.Loss(class_sum, loc_sum, Offset, Judge1, Judge2) loss = 10*loss_l + loss_c loss.backward() # torch.nn.utils.clip_grad_norm_(self.model.parameters(),10) self.optimizer.step() flag = True if self.uda == True: ''' UDA对抗领域自适应 ''' self.optimizer.zero_grad() self.optimizer1.zero_grad() if D == True: ''' 训练域分辨器 test_img 输出域标签为1 训练集输出域标签为0 ''' test_imgs = np.random.choice(self.UDA_img, self.batch) test_images = [] for img_path in test_imgs: img = cv.imread(img_path) img = cv.resize(img,(480,270)) test_images.append(img) test_images = np.array(test_images)/255. test_images = torch.Tensor(test_images).permute(0,3,1,2).to(self.device) feature1 = self.model.feature1(images) feature2 = self.model.feature1(test_images) feature = torch.cat((feature1,feature2),dim=0) logits = self.UDA_model(feature) uda_acc = accuracy_uda(logits, torch.cat((Label1,Label2),dim=0)) loss_uda = UDA_loss(logits, torch.cat((Label1,Label2),dim=0)) loss_uda.backward() self.optimizer1.step() flag = False if uda_acc>0.9: D = False elif D == False: ''' 训练主模型 训练集的输出域标签设为1, 令其输出域和实际测试集一样 ''' self.optimizer.zero_grad() self.optimizer1.zero_grad() feature1 = self.model.feature1(images) logits = self.UDA_model(feature1) uda_acc = accuracy_uda(logits, Label2) loss_uda = UDA_loss(logits, Label2) loss_uda.backward() self.optimizer.step() if uda_acc>0.9: D = True if global_step%evl_frq==0: if flag == True: train_loss = loss train_acc = accuracy(class_sum, loc_sum, true_box_coors) verify_loss, verify_acc = self.evaluate() print('step: {:} learning rate: {:6f}'.format(global_step,self.scheduler.get_lr()[0])) print('loss_l: {:4f} loss_c: {:4f}'.format(loss_l, loss_c)) print('train_loss: {:4f} train_acc: {:4f}'.format(train_loss, train_acc)) print('verify_loss: {:4f} verify_acc: {:4f}'.format(verify_loss, verify_acc)) self.train_loss.append(float(train_loss)) self.train_acc.append(train_acc) self.verify_loss.append(float(verify_loss)) self.verify_acc.append(verify_acc) self.lr.append(float(self.scheduler.get_lr()[0])) train_data = [self.lr,self.train_loss,self.train_acc,self.verify_loss,self.verify_acc] train_data = np.array(train_data).T with open('train_data'+str(self.m)+'.csv','w') as csvfile: writer = csv.writer(csvfile) writer.writerows(train_data) if global_step%save_frq==0: if verify_acc >= max_acc: save_model(self.model,self.optimizer,self.scheduler, global_step, self.m) max_acc = verify_acc if max_acc>0.96: epoch=epoch_num global_step += 1 epoch+=1 self.scheduler.step(epoch) if self.uda == True: self.scheduler1.step(epoch)
def train_and_test(args: argparse.Namespace): param_config = load_yaml(args.param_file, append=False) # Select device cuda_device = 'cuda:%d' % args.gpu device = torch.device(cuda_device if torch.cuda.is_available() else 'cpu') # Generic arguments num_epochs = param_config.get('general').get( 'num_epochs') if args.epochs is None else args.epochs num_neighbors = param_config.get('general').get('num_neighbors') # Load the selected dataset selected_dataset = getattr(datasets, param_config.get('dataset').get('class_name')) # Initiate datasets and loaders for each modality train_inertial, val_inertial, test_inertial = get_train_val_test_datasets( selected_dataset, 'inertial', param_config) train_sdfdi, val_sdfdi, test_sdfdi = get_train_val_test_datasets( selected_dataset, 'sdfdi', param_config) if param_config.get('modalities').get('skeleton'): train_skeleton, val_skeleton, test_skeleton = get_train_val_test_datasets( selected_dataset, 'skeleton', param_config) train_datasets = [train_inertial, train_sdfdi] val_datasets = [val_inertial, val_sdfdi] test_datasets = [test_inertial, test_sdfdi] if param_config.get('modalities').get('skeleton'): train_datasets.append(train_skeleton) val_datasets.append(val_skeleton) test_datasets.append(test_skeleton) # Prepare concat datasets and loaders train_dataset = ConcatDataset(*train_datasets) val_dataset = ConcatDataset(*val_datasets) test_dataset = ConcatDataset(*test_datasets) num_actions = len(train_dataset.datasets[0].actions) batch_size = param_config.get('general').get('batch_size') shuffle = param_config.get('general').get('shuffle') train_loader = DataLoader( dataset=train_dataset, batch_sampler=BalancedSampler( labels=train_dataset.labels, n_classes=num_actions, n_samples=param_config.get('general').get('num_samples'))) val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=shuffle) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=shuffle) class_names = train_dataset.get_class_names() # Load medusa network n1_kwargs = param_config.get('modalities').get('inertial').get( 'model').get('kwargs') n2_kwargs = param_config.get('modalities').get('sdfdi').get('model').get( 'kwargs') n3_kwargs = None if param_config.get('modalities').get('skeleton'): n3_kwargs = param_config.get('modalities').get('skeleton').get( 'model').get('kwargs') mlp_kwargs = param_config.get('general').get('mlp_kwargs') if args.out_size: n1_kwargs['out_size'] = args.out_size n2_kwargs['out_size'] = args.out_size if param_config.get('modalities').get('skeleton'): n3_kwargs['out_size'] = args.out_size mlp_kwargs['out_size'] = args.out_size # Also adjust the input of the mlp due to the change in out_size mlp_kwargs['input_size'] = 3 * args.out_size if args.dr: mlp_kwargs['dropout_rate'] = args.dr if args.mlp_hidden_size: mlp_kwargs['hidden_size'] = args.mlp_hidden_size model = Medusa(mlp_kwargs, n1_kwargs, n2_kwargs, n3_kwargs) if args.test: model.load_state_dict(torch.load(args.saved_state)) model = model.to(device) # Criterion, optimizer criterion = param_config.get('general').get('criterion').get('class_name') criterion_from = param_config.get('general').get('criterion').get( 'from_module') criterion_kwargs = param_config.get('general').get('criterion').get( 'kwargs') optimizer = param_config.get('general').get('optimizer').get('class_name') optimizer_from = param_config.get('general').get('optimizer').get( 'from_module') optimizer_kwargs = param_config.get('general').get('optimizer').get( 'kwargs') if args.margin: criterion_kwargs['margin'] = args.margin if args.semi_hard is not None: criterion_kwargs['semi_hard'] = args.semi_hard if args.lr: optimizer_kwargs['lr'] = args.lr criterion = getattr(importlib.import_module(criterion_from), criterion)(**criterion_kwargs) optimizer = getattr(importlib.import_module(optimizer_from), optimizer)(model.parameters(), **optimizer_kwargs) if not args.test: if args.experiment is None: datetime = time.strftime("%Y%m%d_%H%M", time.localtime()) experiment = '%s_medusa' % datetime else: experiment = args.experiment writer = SummaryWriter('../logs/' + experiment) train_losses, val_losses, val_accuracies, train_accuracies = train_triplet_loss( model, criterion, optimizer, class_names, train_loader, val_loader, num_epochs, device, experiment, num_neighbors, writer, verbose=True, skip_accuracy=args.skip_accuracy) # Save last state of model save_model(model, '%s_last_state.pt' % experiment) cm, test_acc, test_scores, test_labels = get_predictions_with_knn( n_neighbors=num_neighbors, train_loader=train_loader, test_loader=test_loader, model=model, device=device) cm_image = plot_confusion_matrix(cm=cm, title='Confusion Matrix- Test Loader', normalize=False, save=False, show_figure=False, classes=test_dataset.get_class_names()) if not args.test: writer.add_images('ConfusionMatrix/Test', cm_image, dataformats='CHW', global_step=num_epochs - 1) writer.add_embedding( test_scores, metadata=[class_names[idx] for idx in test_labels.int().tolist()], tag="test (%f%%)" % test_acc) writer.add_text('config', json.dumps(param_config, indent=2)) writer.add_text('args', json.dumps(args.__dict__, indent=2)) writer.flush() writer.close() if args.print_tsne or args.save_tsne: train_scores, train_labels = get_predictions(train_loader, model, device, apply_softmax=False) if device.type == 'cuda': train_scores = train_scores.cpu() train_labels = train_labels.cpu() run_tsne(train_scores, train_labels.argmax(1), class_names, filename='train_medusa_embeddings.png', save=args.save_tsne, show=args.print_tsne) run_tsne(test_scores, test_labels, class_names, filename='test_medusa_embeddings.png', save=args.save_tsne, show=args.print_tsne) print('Test acc: %.5f' % test_acc) return test_acc
# Add epochs arguemnt parser.add_argument('-e', '--epochs', type=int, default=30, help='Number of epochs. (default: 30)') # Add batch_size arguemnt parser.add_argument('-b', '--batch_size', type=int, default=64, help='Number of batch size. (default: 64)') # Parse arguments args = parser.parse_args() # Train model model = train_model(inputs=importJSON(args.inputs), outputs=importJSON(args.outputs), evaluation_split=args.evaluation_split, validation_split=args.validation_split, epochs=args.epochs, batch_size=args.batch_size) # Save model process if input('Save model?[y/N]: ').lower() in ('y', 'yes'): model_path = input('Path for model?[model/]: ') if model_path == '': model_path = './model' save_model(model, model_path) print('Model saved') else: print('Model not saved')
def train_and_test(args: argparse.Namespace): if args.test and args.saved_state is None: print( 'You have to use --saved_state when using --test, to specify the weights of the model' ) sys.exit(0) # Select device cuda_device = 'cuda:%d' % args.gpu device = torch.device(cuda_device if torch.cuda.is_available() else 'cpu') # Load parameters from yaml file. param_config = load_yaml(args.param_file) # Basic parameters modality = args.modality modality_config = param_config.get('modalities').get(modality) # Hyper params num_neighbors = modality_config.get( 'num_neighbors') if args.num_neighbors is None else args.num_neighbors batch_size = modality_config.get('batch_size') num_epochs = modality_config.get( 'num_epochs') if args.epochs is None else args.epochs shuffle = param_config.get('dataset').get('shuffle') # Criterion, optimizer and scheduler model_class_name = modality_config.get('model').get('class_name') criterion = modality_config.get('criterion').get('class_name') criterion_from = modality_config.get('criterion').get('from_module') criterion_kwargs = modality_config.get('criterion').get('kwargs') if args.margin: criterion_kwargs['margin'] = args.margin if args.semi_hard is not None: criterion_kwargs['semi_hard'] = args.semi_hard optimizer = modality_config.get('optimizer').get('class_name') optimizer_from = modality_config.get('optimizer').get('from_module') optimizer_kwargs = modality_config.get('optimizer').get('kwargs') if args.lr: optimizer_kwargs['lr'] = args.lr if args.optimizer: optimizer = args.optimizer # Dataset config selected_dataset = getattr(datasets, param_config.get('dataset').get('class_name')) transforms, test_transforms = get_transforms_from_config( param_config.get('modalities').get(modality).get('transforms')) train_dataset_kwargs = param_config.get('dataset').get('train_kwargs') validation_dataset_kwargs = param_config.get('dataset').get( 'validation_kwargs') test_dataset_kwargs = param_config.get('dataset').get('test_kwargs') # Load Data train_dataset = selected_dataset(modality=modality, transform=transforms, **train_dataset_kwargs) num_actions = len(train_dataset.actions) train_loader = DataLoader(dataset=train_dataset, batch_sampler=BalancedSampler( labels=train_dataset.labels, n_classes=num_actions, n_samples=modality_config['num_samples'])) validation_dataset = selected_dataset(modality=modality, transform=test_transforms, **validation_dataset_kwargs) validation_loader = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=shuffle) test_dataset = selected_dataset(modality=modality, transform=test_transforms, **test_dataset_kwargs) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=shuffle) class_names = train_dataset.get_class_names() # Initiate the model model_kwargs = modality_config.get('model').get('kwargs') if args.pretrained is not None and model_class_name == 'MobileNetV2': model_kwargs['pretrained'] = args.pretrained if args.out_size is not None: model_kwargs['out_size'] = args.out_size if args.dr is not None: model_kwargs['dropout_rate'] = args.dr model = getattr(models, model_class_name)( *modality_config.get('model').get('args'), **model_kwargs) if args.test: model.load_state_dict(torch.load(args.saved_state)) model = model.to(device) # Loss, optimizer and scheduler criterion = getattr(importlib.import_module(criterion_from), criterion)(**criterion_kwargs) optimizer = getattr(importlib.import_module(optimizer_from), optimizer)(model.parameters(), **optimizer_kwargs) scheduler = None if not args.no_scheduler: scheduler_class_name = modality_config.get('scheduler').get( 'class_name') scheduler_from = modality_config.get('scheduler').get('from_module') scheduler_kwargs = modality_config.get('scheduler').get('kwargs') scheduler = getattr(importlib.import_module(scheduler_from), scheduler_class_name)(optimizer, **scheduler_kwargs) # Training procedure: # 1. Instantiate tensorboard writer # 2. Run training with triplet loss max_val_acc = -1 max_train_acc = -1 min_train_loss = -1 min_val_loss = -1 if not args.test: if args.experiment is None: print('Specify an experiment name by using --experiment argument') sys.exit(0) elif args.experiment == 'auto': experiment = '%s_%s_TL_A%s_M%s_LR%s_%s_%sep' % ( model.name, modality, str(num_actions), str(criterion_kwargs['margin']), str(optimizer_kwargs['lr']), 'semi_hard' if criterion_kwargs['semi_hard'] else 'hard', num_epochs) else: experiment = args.experiment if args.verbose: print('Experiment: %s' % experiment) writer = SummaryWriter('../logs/' + experiment) train_losses, val_losses, val_accs, train_accs = train_triplet_loss( model=model, criterion=criterion, optimizer=optimizer, scheduler=scheduler, class_names=class_names, train_loader=train_loader, val_loader=validation_loader, num_epochs=num_epochs, device=device, experiment=experiment, writer=writer, n_neighbors=num_neighbors, verbose=args.verbose) # Save last state of model save_model(model, '%s_last_state.pt' % experiment) max_val_acc = max(val_accs) if len(val_accs) > 0 else max_val_acc max_train_acc = max( train_accs) if len(train_accs) > 0 else max_train_acc min_train_loss = max( train_losses) if len(train_losses) > 0 else min_train_loss min_val_loss = max(val_losses) if len(val_losses) > 0 else min_val_loss cm, test_acc, test_scores, test_labels = get_predictions_with_knn( n_neighbors=num_neighbors, train_loader=train_loader, test_loader=test_loader, model=model, device=device) cm_image = plot_confusion_matrix(cm=cm, title='Confusion Matrix- Test Loader', normalize=False, save=False, show_figure=False, classes=test_dataset.get_class_names()) if not args.test: writer.add_hparams( { 'learning_rate': optimizer_kwargs['lr'], 'margin': criterion_kwargs['margin'], 'semi_hard': criterion_kwargs['semi_hard'], 'out_size': model_kwargs['out_size'] }, { 'hparam/val_acc': max_val_acc, 'hparam/test_acc': test_acc, 'hparam/train_acc': max_train_acc }, run_name='hparams') writer.add_images('ConfusionMatrix/Test', cm_image, dataformats='CHW', global_step=num_epochs - 1) writer.add_embedding( test_scores, metadata=[class_names[idx] for idx in test_labels.int().tolist()], tag="test (%f%%)" % test_acc) writer.add_text('config', json.dumps(param_config, indent=2)) writer.add_text('args', json.dumps(args.__dict__, indent=2)) writer.flush() writer.close() return { 'lr': optimizer_kwargs['lr'], 'margin': criterion_kwargs['margin'], 'semi_hard': criterion_kwargs['semi_hard'], 'out_size': model_kwargs['out_size'], 'test_acc': test_acc, 'max_train_acc': max_train_acc, 'max_val_acc': max_val_acc, 'min_train_loss': min_train_loss, 'min_val_loss': min_val_loss } return {'test_acc': test_acc}
def main(): """ function to train model, plot generated samples, compute training score, save train model, load train model, and evaluate model """ # device = torch.device('cuda:0') device = torch.device('cpu') skip_training = False batch_size = 100 n_epochs = 20 scorer = Scorer() scorer.to(device) nz = 10 netG = Generator(nz=nz, ngf=64, nc=1) netD = Discriminator(nc=1, ndf=64) netD = netD.to(device) netG = netG.to(device) if not skip_training: d_optimizer = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999)) g_optimizer = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999)) for epoch in range(n_epochs): for i, data in enumerate(trainloader, 0): images, _ = data images = images.to(device) netD.train() netD.zero_grad() d_optimizer.zero_grad() noise = torch.randn(batch_size, nz, 1, 1, device=device) fake_images = netG(noise) d_loss_real, D_real, d_loss_fake, D_fake = discriminator_loss( netD, images, fake_images) d_loss_real.backward(retain_graph=True) d_loss_fake.backward(retain_graph=True) d_loss = d_loss_real + d_loss_fake d_optimizer.step() netG.train() netG.zero_grad() g_optimizer.zero_grad() g_loss = generator_loss(netD, fake_images) g_loss.backward(retain_graph=True) g_optimizer.step() with torch.no_grad(): # Plot generated images z = torch.randn(144, nz, 1, 1, device=device) samples = netG(z) tools.plot_generated_samples(samples) # Compute score z = torch.randn(1000, nz, 1, 1, device=device) samples = netG(z) samples = (samples + 1) / 2 # Re-normalize to [0, 1] score = scorer(samples) print('Train Epoch {}: D_real {}: D_fake{}: score {}'.format( epoch + 1, D_real, D_fake, score)) tools.save_model(netG, '11_dcgan_g.pth') tools.save_model(netD, '11_dcgan_d.pth') else: nz = 10 netG = Generator(nz=nz, ngf=64, nc=1) netD = Discriminator(nc=1, ndf=64) tools.load_model(netG, '11_dcgan_g.pth', device) tools.load_model(netD, '11_dcgan_d.pth', device) with torch.no_grad(): z = torch.randn(1000, nz, 1, 1, device=device) samples = (netG(z) + 1) / 2 score = scorer(samples) print(f'The trained DCGAN achieves a score of {score:.5f}')
def save_model(self, path): tools.save_model(path, self.tree)
epochs = 25 batch_size = 70 validation_split = 0.1 X, y, X_final = dataread() X_train, X_test, y_train, y_test, prop_HF = datatreat_A1( X, y, train_size=0.8, Shuffle=True, preprocess="None", ratio="50/50", balancing_method="SMOTEENN") model = cnn_1() history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split) save_model(model, id) accuracy, roc, f1_macro, f1_wei = test_1(model, X_test, y_test, id) save_results(id, 'datatreat_A1', 'cnn_1', epochs, batch_size, accuracy, roc, f1_macro, f1_wei, validation_split) plot_loss_acc_history(history, id, validation_split)
pa = ap.parse_args() where = pa.data_dir path = pa.save_dir lr = pa.learning_rate save_dir = pa.save_dir dropout = pa.dropout power = pa.gpu epochs = pa.epochs architecture = pa.pretrained_model hiddenl = pa.hidden_units trainloader, validloader, testloader, train_data, valid_data, test_data = load_data(where) pretr_model = pa.pretrained_model model = getattr(models, pretr_model)(pretrained = True) build_classifier(model) build_classifier(model) criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(),lr=0.0001) model, optimizer = train_model(model, epochs, trainloader, validloader, criterion, optimizer, power, lr, hiddenl, dropout) test_model(model, testloader) save_model(model, train_data, optimizer, save_dir, epochs, lr, architecture, hiddenl, dropout) print("The Model is trained")
def main(args): if args.test and args.saved_state is None: print( 'You have to use --saved_state when using --test, to specify the weights of the model' ) sys.exit(0) # Select device cuda_device = 'cuda:%d' % args.gpu device = torch.device(cuda_device if torch.cuda.is_available() else 'cpu') # Load parameters from yaml file. param_config = load_yaml(args.param_file) # Assign parameters modality = args.modality modality_config = param_config.get('modalities').get(modality) selected_dataset = getattr(datasets, param_config.get('dataset').get('class_name')) transforms, test_transforms = get_transforms_from_config( modality_config.get('transforms')) batch_size = modality_config.get( 'batch_size') if args.bs is None else args.bs num_epochs = modality_config.get( 'num_epochs') if args.epochs is None else args.epochs shuffle = param_config.get('dataset').get('shuffle') model_class_name = modality_config.get('model').get('class_name') criterion = modality_config.get('criterion').get('class_name') criterion_from = modality_config.get('criterion').get('from_module') optimizer = modality_config.get('optimizer').get('class_name') optimizer_from = modality_config.get('optimizer').get('from_module') optimizer_kwargs = modality_config.get('optimizer').get('kwargs') if args.lr: optimizer_kwargs['lr'] = args.lr train_dataset_kwargs = param_config.get('dataset').get('train_kwargs') validation_dataset_kwargs = param_config.get('dataset').get( 'validation_kwargs') test_dataset_kwargs = param_config.get('dataset').get('test_kwargs') # Load Data train_dataset = selected_dataset(modality=modality, transform=transforms, **train_dataset_kwargs) train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=shuffle) validation_dataset = selected_dataset(modality=modality, transform=test_transforms, **validation_dataset_kwargs) validation_loader = DataLoader(dataset=validation_dataset, batch_size=batch_size, shuffle=shuffle) test_dataset = selected_dataset(modality=modality, transform=test_transforms, **test_dataset_kwargs) test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=shuffle) # Initiate the model model_kwargs = modality_config.get('model').get('kwargs') if args.dr is not None: model_kwargs['dropout_rate'] = args.dr model = getattr( models, model_class_name)(*modality_config.get('model').get('args'), **modality_config.get('model').get('kwargs')) if args.test: model.load_state_dict(torch.load(args.saved_state)) model = model.to(device) # Loss and optimizer criterion = getattr(importlib.import_module(criterion_from), criterion)() optimizer = getattr(importlib.import_module(optimizer_from), optimizer)(model.parameters(), **optimizer_kwargs) # Training procedure max_val_acc = -1 max_train_acc = -1 min_train_loss = -1 min_val_loss = -1 if not args.test: # Initiate Tensorboard writer with the given experiment name or generate an automatic one experiment = '%s_%s_%s_%s' % ( selected_dataset.__name__, modality, args.param_file.split('/')[-1], time.strftime("%Y%m%d_%H%M", time.localtime()) ) if args.experiment is None else args.experiment writer_name = '../logs/%s' % experiment writer = SummaryWriter(writer_name) # Print parameters print_table({ 'param_file': args.param_file, 'experiment': experiment, 'tensorboard_folder': writer_name, 'dataset': selected_dataset.__name__, 'criterion': type(criterion).__name__, 'optimizer': type(optimizer).__name__, 'modality': modality, 'model': model.name, 'learning_rate': optimizer_kwargs['lr'], 'batch_size': batch_size, 'num_epochs': num_epochs, }) # Start training train_accs, val_accs, train_losses, val_losses = train( model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, validation_loader=validation_loader, num_epochs=num_epochs, device=device, experiment=experiment, writer=writer) # Save last state of model save_model(model, '%s_last_state.pt' % experiment) max_val_acc = max(val_accs) if len(val_accs) > 0 else max_val_acc max_train_acc = max( train_accs) if len(train_accs) > 0 else max_train_acc min_train_loss = max( train_losses) if len(train_losses) > 0 else min_train_loss min_val_loss = max(val_losses) if len(val_losses) > 0 else min_val_loss cm_image_train = plot_confusion_matrix( cm=get_confusion_matrix(train_loader, model, device), title='Confusion Matrix - Training', normalize=False, save=False, classes=train_dataset.get_class_names(), show_figure=False) cm_image_validation = plot_confusion_matrix( cm=get_confusion_matrix(validation_loader, model, device), title='Confusion Matrix - Validation', normalize=False, save=False, classes=validation_dataset.get_class_names(), show_figure=False) cm_image_test = plot_confusion_matrix( cm=get_confusion_matrix(test_loader, model, device), title='Confusion Matrix - Test', normalize=False, save=False, classes=test_dataset.get_class_names(), show_figure=False) # Add confusion matrices for each dataset, mark it for the last step which is num_epochs - 1 writer.add_images('ConfusionMatrix/Train', cm_image_train, dataformats='CHW', global_step=num_epochs - 1) writer.add_images('ConfusionMatrix/Validation', cm_image_validation, dataformats='CHW', global_step=num_epochs - 1) writer.add_images('ConfusionMatrix/Test', cm_image_test, dataformats='CHW', global_step=num_epochs - 1) print('Best validation accuracy: %f' % max(val_accs)) writer.add_text('config', json.dumps(param_config, indent=2)) writer.add_text('args', json.dumps(args.__dict__, indent=2)) writer.flush() writer.close() test_accuracy = get_accuracy(test_loader, model, device) print('Test accuracy (not based on val): %f' % test_accuracy) return { 'test_acc': test_accuracy, 'max_train_acc': max_train_acc, 'max_val_acc': max_val_acc, 'min_train_loss': min_train_loss, 'min_val_loss': min_val_loss }
def main(): """ """ args = parser.parse_args() if args.cuda: device = torch.device("cuda:0") else: device = torch.device("cpu") data_dir = tools.select_data_dir() trainset = Sudoku(data_dir, train=True) testset = Sudoku(data_dir, train=False) trainloader = DataLoader(trainset, batch_size=args.batch_size, collate_fn=collate) testloader = DataLoader(testset, batch_size=args.batch_size, collate_fn=collate) # Create network gnn = GNN(device) if not args.skip_training: optimizer = torch.optim.Adam(gnn.parameters(), lr=args.learning_rate) loss_method = nn.CrossEntropyLoss(reduction="mean") for epoch in range(args.n_epochs): for i, data in enumerate(trainloader, 0): inputs, targets, src_ids, dst_ids = data inputs, targets = inputs.to(device), targets.to(device) src_ids, dst_ids = src_ids.to(device), dst_ids.to(device) optimizer.zero_grad() gnn.zero_grad() output = gnn.forward(inputs, src_ids, dst_ids) output = output.to(device) output = output.view(-1, output.shape[2]) targets = targets.repeat(7, 1) targets = targets.view(-1) loss = loss_method(output, targets) loss.backward() optimizer.step() fraction = fraction_of_solved_puzzles(gnn, testloader, device) print("Train Epoch {}: Loss: {:.6f} Fraction: {}".format(epoch + 1, loss.item(), fraction)) tools.save_model(gnn, "7_gnn.pth") else: gnn = GNN(device) tools.load_model(gnn, "7_gnn.pth", device) # Evaluate the trained model # Get graph iterations for some test puzzles with torch.no_grad(): inputs, targets, src_ids, dst_ids = iter(testloader).next() inputs, targets = inputs.to(device), targets.to(device) src_ids, dst_ids = src_ids.to(device), dst_ids.to(device) batch_size = inputs.size(0) // 81 outputs = gnn(inputs, src_ids, dst_ids).to(device) # [n_iters, n_nodes, 9] solution = outputs.view(gnn.n_iters, batch_size, 9, 9, 9).to(device) final_solution = solution[-1].argmax(dim=3).to(device) print("Solved puzzles in the current mini-batch:") print((final_solution.view(-1, 81) == targets.view(batch_size, 81)).all(dim=1)) # Visualize graph iteration for one of the puzzles ix = 0 for i in range(gnn.n_iters): tools.draw_sudoku(solution[i, 0], logits=True) fraction_solved = fraction_of_solved_puzzles(gnn, testloader,device) print(f"Accuracy {fraction_solved}")
import tools if __name__ == '__main__': args = tools.parse_args_fit() model = tools.LinearRegression(args['learning_rate'], args['verbose']) data, target = tools.read_data(args['data_path']) model.fit(data, target) tools.save_model(model, args['save_path'])
def main(): """ function to train model, plot generated samples, compute training score, save train model, load train model, and evaluate model """ # device = torch.device('cuda:0') device = torch.device('cpu') batch_size=32 n_epochs = 15 lambda_n = 10 scorer = Scorer() scorer.to(device) nz = 10 netG = Generator(nz=nz, ngf=128, nc=1).to(device) netD = Critic(nc=1, ndf=128).to(device) if not skip_training: d_optimizer = torch.optim.Adam(netD.parameters(),lr=0.0001) g_optimizer = torch.optim.Adam(netG.parameters(),lr=0.0001) for epoch in range(n_epochs): for i, data in enumerate(trainloader, 0): images, _= data images= images.to(device) netD.train() netD.zero_grad() noise = torch.randn(batch_size, nz, 1, 1, device=device) fake_images = netG(noise) d_loss = critic_loss(netD, images, fake_images) grad_penalty,x_hat = gradient_penalty(netD, images, fake_images.detach()) critic_loss_total = d_loss + grad_penalty*lambda_n critic_loss_total.sum().backward(retain_graph=True) d_optimizer.step() netG.train() netG.zero_grad() g_loss = generator_loss(netD, fake_images) g_loss.backward(retain_graph=True) g_optimizer.step() with torch.no_grad(): # Plot generated images z = torch.randn(144, nz, 1, 1, device=device) samples = netG(z) tools.plot_generated_samples(samples) # Compute score z = torch.randn(1000, nz, 1, 1, device=device) samples = netG(z) samples = (samples + 1) / 2 # Re-normalize to [0, 1] score = scorer(samples) print('Train Epoch {}: score {}'.format(epoch +1,score)) tools.save_model(netG, 'wgan_g.pth') tools.save_model(netD, 'wgan_d.pth') else: nz = 10 netG = Generator(nz=nz, ngf=128, nc=1) netD = Critic(nc=1, ndf=128) tools.load_model(netG, 'wgan_g.pth', device) tools.load_model(netD, 'wgan_d.pth', device) with torch.no_grad(): z = torch.randn(2000, nz, 1, 1, device=device) samples = (netG(z) + 1) / 2 score = scorer(samples) print(f'The trained WGAN-GP achieves a score of {score:.5f}')
def main(): """ train and test the quality of the produced encodings by training a classifier using the encoded images """ skip_training = False n_components = 10 n_epochs = 4 # device = torch.device('cuda:0') device = torch.device('cpu') data_dir = tools.select_data_dir() transform = transforms.Compose([ transforms.ToTensor(), # Transform to tensor transforms.Lambda(lambda x: x * torch.randn_like(x)) ]) trainset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True) dae = DAE(n_components) dae.to(device) # Training loop if not skip_training: optimizer = torch.optim.Adam(dae.parameters(), lr=0.001) n_epochs = 5 loss_method = nn.MSELoss() for epoch in range(n_epochs): for i, data in enumerate(trainloader, 0): images, _ = data noise = torch.randn(*images.shape) * 0.2 noisy_images = images + noise optimizer.zero_grad() _, output = dae.forward(noisy_images) loss = loss_method(output * noisy_images, images) loss.backward() optimizer.step() print('Train Epoch {}: Loss: {:.6f}'.format( epoch + 1, loss.item())) tools.save_model(dae, 'dae.pth') else: device = torch.device('cpu') dae = DAE(n_components=10) tools.load_model(dae, 'dae.pth', device) # Test the quality of the produced embeddings by classification print( 'start testing the quality of the produced embeddings by classification' ) testset = torchvision.datasets.MNIST(root=data_dir, train=False, download=True, transform=transform) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False) traincodes, trainlabels = encode(trainset, dae) # traincodes is (60000, 10) testcodes, testlabels = encode(testset, dae) # testcodes is (10000, 10) # Train a simple linear classifier logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', max_iter=200) logreg.fit(traincodes.cpu(), trainlabels.cpu()) predicted_labels = logreg.predict(testcodes.cpu()) # (10000,) accuracy = np.sum( testlabels.cpu().numpy() == predicted_labels) / predicted_labels.size print('Accuracy with a linear classifier: %.2f%%' % (accuracy * 100))
root='./data', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.2010))] ) ) } loaders = { 'train': DataLoader(data['train'], batch_size=128, shuffle=True, num_workers=10, pin_memory=True, drop_last=True), 'valid': DataLoader(data['valid'], batch_size=128, num_workers=10, pin_memory=True, drop_last=False) } best_acc = 0.0 for epoch in range(1, epochs + 1): train_log = train(loaders['train']) valid_log = test(loaders['valid']) logs = dict(train_log, **valid_log) show_info = f'\nEpoch: {epoch} - ' + "-".join([f' {key}: {value:.4f} ' for key, value in logs.items()]) print(show_info) train_monitor.epoch_step(logs) if logs['valid_acc'] >=best_acc: print(f"Epoch {epoch}: valid_acc improved from {best_acc:.5f} to {logs['valid_acc']:.5f}") best_acc = logs['valid_acc'] save_model(model,f'./checkpoints/{arch}.bin')
def main(): device = torch.device('cuda:0') n_features = 256 n_epochs = 40 batch_size = 64 skip_training = False # Create the transformer model encoder = Encoder(src_vocab_size=trainset.input_lang.n_words, n_blocks=3, n_features=n_features, n_heads=16, n_hidden=1024) decoder = Decoder(tgt_vocab_size=trainset.output_lang.n_words, n_blocks=3, n_features=n_features, n_heads=16, n_hidden=1024) encoder.to(device) decoder.to(device) # define training loop parameters parameters = list(encoder.parameters()) + list(decoder.parameters()) adam = torch.optim.Adam(parameters, lr=0, betas=(0.9, 0.98), eps=1e-9) optimizer = NoamOptimizer(n_features, 2, 10000, adam) loss_method = nn.NLLLoss(ignore_index=0, reduction='mean') # prepare data data_dir = tools.select_data_dir() trainset = TranslationDataset(data_dir, train=True) trainloader = DataLoader(dataset=trainset, batch_size=64, shuffle=True, collate_fn=collate, pin_memory=True) # training if not skip_training: for epoch in range(n_epochs): loss = training_loop(encoder, decoder, optimizer, loss_method, trainloader) print(f'Train Epoch {epoch+1}: Loss: {loss}') # save and load trained model tools.save_model(encoder, 'tr_encoder.pth') tools.save_model(decoder, 'tr_decoder.pth') else: encoder = Encoder(src_vocab_size=trainset.input_lang.n_words, n_blocks=3, n_features=256, n_heads=16, n_hidden=1024) tools.load_model(encoder, 'tr_encoder.pth', device) decoder = Decoder(tgt_vocab_size=trainset.output_lang.n_words, n_blocks=3, n_features=256, n_heads=16, n_hidden=1024) tools.load_model(decoder, 'tr_decoder.pth', device) # Generate translations with the trained model # translate sentences from the training set print('Translate training data:') print('-----------------------------') for i in range(5): src_sentence, tgt_sentence = trainset[np.random.choice(len(trainset))] print( '>', ' '.join(trainset.input_lang.index2word[i.item()] for i in src_sentence)) print( '=', ' '.join(trainset.output_lang.index2word[i.item()] for i in tgt_sentence)) out_sentence = translate(encoder, decoder, src_sentence) print( '<', ' '.join(trainset.output_lang.index2word[i.item()] for i in out_sentence), '\n') # translate sentences from the test set testset = TranslationDataset(data_dir, train=False) print('Translate test data:') print('-----------------------------') for i in range(5): input_sentence, target_sentence = testset[np.random.choice( len(testset))] print( '>', ' '.join(testset.input_lang.index2word[i.item()] for i in input_sentence)) print( '=', ' '.join(testset.output_lang.index2word[i.item()] for i in target_sentence)) output_sentence = translate(encoder, decoder, input_sentence) print( '<', ' '.join(testset.output_lang.index2word[i.item()] for i in output_sentence), '\n')
def main(): """ train and test the quality of the produced encodings by training a classifier using the encoded images """ args = parser.parse_args() if args.cuda: device = torch.device('cuda:0') else: device = torch.device('cpu') data_dir = tools.select_data_dir() transform = transforms.Compose([ transforms.ToTensor(), ]) trainset = torchvision.datasets.MNIST(root=data_dir, train=True, download=True, transform=transform) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True) net = PixelCNN(n_channels=args.n_channels, kernel_size=args.kernel_size) net.to(device) if not args.skip_training: optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) for epoch in range(args.n_epochs): for i, data in enumerate(trainloader, 0): images, _ = data images = images.to(device) net.train() optimizer.zero_grad() y = net(images) y = y.to(device) loss = loss_fn(y, images) loss = loss.to(device) loss.backward() optimizer.step() with torch.no_grad(): samples = generate(net, n_samples=args.n_samples, device=device) tools.plot_generated_samples(samples) print('Train Epoch {}: Loss: {:.6f}'.format( epoch + 1, loss.item())) # Save the model to disk tools.save_model(net, '10_pixelcnn.pth') else: net = PixelCNN(n_channels=args.n_channels, kernel_size=args.kernel_size) tools.load_model(net, '10_pixelcnn.pth', device) # Generate samples print('Generate samples with trained model') with torch.no_grad(): samples = generate(net, n_samples=args.n_samples, device=device) tools.plot_generated_samples(samples)
def main(): args = get_args() # get log args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) tools.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logger = logging.getLogger('Train Search') logger.addHandler(fh) # monitor pymonitor = ProgressMonitor(logger) tbmonitor = TensorBoardMonitor(logger, args.save) monitors = [pymonitor, tbmonitor] # set random seed if args.seed is None: args.seed = random.randint(1, 10000) np.random.seed(args.seed) torch.manual_seed(args.seed) args.use_cuda = args.gpus > 0 and torch.cuda.is_available() args.multi_gpu = args.gpus > 1 and torch.cuda.is_available() args.device = torch.device('cuda:0' if args.use_cuda else 'cpu') if args.use_cuda: torch.cuda.manual_seed(args.seed) cudnn.enabled = True cudnn.benchmark = True setting = {k: v for k, v in args._get_kwargs()} logger.info(setting) with open(os.path.join(args.save, "args.yaml"), "w") as yaml_file: # dump experiment config yaml.dump(args, yaml_file) # get dataloader if args.dataset_name == "cifar10": train_transform, valid_transform = tools._data_transforms_cifar10(args) traindata = dset.CIFAR10(root=args.dataset, train=True, download=False, transform=train_transform) valdata = dset.CIFAR10(root=args.dataset, train=False, download=False, transform=valid_transform) else: train_transform, valid_transform = tools._data_transforms_mnist(args) traindata = dset.MNIST(root=args.dataset, train=True, download=False, transform=train_transform) valdata = dset.MNIST(root=args.dataset, train=False, download=False, transform=valid_transform) trainLoader = torch.utils.data.DataLoader(traindata, batch_size=args.batch_size, pin_memory=True, shuffle=True, num_workers=args.workers) valLoader = torch.utils.data.DataLoader(valdata, batch_size=args.batch_size, pin_memory=True, num_workers=args.workers) # load pretrained model model_t = Network(C=args.init_channels, num_classes=args.class_num, layers=args.layers, steps=args.nodes, multiplier=args.nodes, stem_multiplier=args.stem_multiplier, group=args.group) model_t, _, _ = loadCheckpoint(args.model_path, model_t, args) model_t.freeze_arch_parameters() # 冻结教师网络 for para in list(model_t.parameters())[:-2]: para.requires_grad = False model_s = Network(C=args.init_channels, num_classes=args.class_num, layers=args.layers, steps=args.nodes, multiplier=args.nodes, stem_multiplier=args.stem_multiplier, group=args.group) model_s, _, _ = loadCheckpoint(args.model_path, model_s, args) model_s._initialize_alphas() criterion = nn.CrossEntropyLoss().to(args.device) model_d = Discriminator().to(args.device) model_s = model_s.to(args.device) logger.info("param size = %fMB", tools.count_parameters_in_MB(model_s)) optimizer_d = optim.SGD(model_d.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_s = optim.SGD(model_s.weight_parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_m = FISTA(model_s.arch_parameters(), lr=args.learning_rate, gamma=args.sparse_lambda) scheduler_d = StepLR(optimizer_d, step_size=args.lr_decay_step, gamma=0.1) scheduler_s = StepLR(optimizer_s, step_size=args.lr_decay_step, gamma=0.1) scheduler_m = StepLR(optimizer_m, step_size=args.lr_decay_step, gamma=0.1) perf_scoreboard = PerformanceScoreboard(args.num_best_scores) if args.resume: logger.info('=> Resuming from ckpt {}'.format(args.resume_path)) ckpt = torch.load(args.resume_path, map_location=args.device) start_epoch = ckpt['epoch'] model_s.load_state_dict(ckpt['state_dict_s']) model_d.load_state_dict(ckpt['state_dict_d']) optimizer_d.load_state_dict(ckpt['optimizer_d']) optimizer_s.load_state_dict(ckpt['optimizer_s']) optimizer_m.load_state_dict(ckpt['optimizer_m']) scheduler_d.load_state_dict(ckpt['scheduler_d']) scheduler_s.load_state_dict(ckpt['scheduler_s']) scheduler_m.load_state_dict(ckpt['scheduler_m']) perf_scoreboard = ckpt['perf_scoreboard'] logger.info('=> Continue from epoch {}...'.format(start_epoch)) models = [model_t, model_s, model_d] optimizers = [optimizer_d, optimizer_s, optimizer_m] schedulers = [scheduler_d, scheduler_s, scheduler_m] for epoch in range(start_epoch, args.num_epochs): for s in schedulers: logger.info('epoch %d lr %e ', epoch, s.get_lr()[0]) _, _, _ = train(trainLoader, models, epoch, optimizers, monitors, args, logger) v_top1, v_top5, v_loss = validate(valLoader, model_s, criterion, epoch, monitors, args, logger) l, board = perf_scoreboard.update(v_top1, v_top5, epoch) for idx in range(l): score = board[idx] logger.info( 'Scoreboard best %d ==> Epoch [%d][Top1: %.3f Top5: %.3f]', idx + 1, score['epoch'], score['top1'], score['top5']) logger.info("normal: \n{}".format( model_s.alphas_normal.data.cpu().numpy())) logger.info("reduce: \n{}".format( model_s.alphas_reduce.data.cpu().numpy())) logger.info('Genotypev1: {}'.format(model_s.genotypev1())) logger.info('Genotypev2: {}'.format(model_s.genotypev2())) logger.info('Genotypev3: {}'.format(model_s.genotypev3())) mask = [] pruned = 0 num = 0 for param in model_s.arch_parameters(): weight_copy = param.clone() param_array = np.array(weight_copy.detach().cpu()) pruned += sum(w == 0 for w in param_array) num += len(param_array) logger.info("Epoch:{} Pruned {} / {}".format(epoch, pruned, num)) if epoch % args.save_freq == 0: model_state_dict = model_s.module.state_dict() if len( args.gpus) > 1 else model_s.state_dict() state = { 'state_dict_s': model_state_dict, 'state_dict_d': model_d.state_dict(), 'optimizer_d': optimizer_d.state_dict(), 'optimizer_s': optimizer_s.state_dict(), 'optimizer_m': optimizer_m.state_dict(), 'scheduler_d': scheduler_d.state_dict(), 'scheduler_s': scheduler_s.state_dict(), 'scheduler_m': scheduler_m.state_dict(), "perf_scoreboard": perf_scoreboard, 'epoch': epoch + 1 } tools.save_model(state, epoch + 1, is_best, path=os.path.join(args.save, "ckpt")) # update learning rate for s in schedulers: s.step(epoch)
recall_score(y, y_pred, average='macro'))) df_l.loc[epoch].Train_Loss = avg_train_loss df_acc.loc[epoch].Train_Acc = accuracy_score(y, y_pred) df_f1.loc[epoch].Train_F1 = f1_score(y, y_pred, average="macro") avg_val_loss, (y, y_pred) = eval_dataset(loader_val, model, criterion) print("\tEval: loss={:.4f}, acc={:.4f}, f1={:.4f}, p={:.4f}, r={:.4f}". format(avg_val_loss, accuracy_score(y, y_pred), f1_score(y, y_pred, average="macro"), precision_score(y, y_pred, average="micro"), recall_score(y, y_pred, average='macro'))) df_l.loc[epoch].Val_Loss = avg_val_loss df_acc.loc[epoch].Val_Acc = accuracy_score(y, y_pred) df_f1.loc[epoch].Val_F1 = f1_score(y, y_pred, average="macro") # Print curves acc_curve(df_acc, EPOCHS) loss_curve(df_l, EPOCHS) f1_curve(df_f1, EPOCHS) ############################################################# # Save Model ############################################################# save_model(model, 'TSA_model.pt')
def train(): tf.io.gfile.mkdir(FLAGS.output) log_path = os.path.join(FLAGS.output, 'model.log') logger = get_logger(log_path) # load data sets train_sentences = load_sentences(os.path.join(FLAGS.data, "train.txt"), FLAGS.zeros) dev_sentences = load_sentences(os.path.join(FLAGS.data, "dev.txt"), FLAGS.zeros) test_sentences = load_sentences(os.path.join(FLAGS.data, "test.txt"), FLAGS.zeros) # create maps if not exist map_file = os.path.join(FLAGS.output, 'maps.pkl') if not os.path.isfile(map_file): # Create a dictionary and a mapping for tags _t, tag_to_id, id_to_tag = tag_mapping(train_sentences) with open(map_file, "wb") as f: pickle.dump([tag_to_id, id_to_tag], f) else: with open(map_file, "rb") as f: tag_to_id, id_to_tag = pickle.load(f) # prepare data, get a collection of list containing index train_data = prepare_dataset(train_sentences, FLAGS.max_seq_len, tag_to_id) dev_data = prepare_dataset(dev_sentences, FLAGS.max_seq_len, tag_to_id) test_data = prepare_dataset(test_sentences, FLAGS.max_seq_len, tag_to_id) logger.info("%i / %i / %i sentences in train / dev / test." % (len(train_data), len(dev_data), len(test_data))) train_manager = BatchManager(train_data, FLAGS.batch_size) dev_manager = BatchManager(dev_data, FLAGS.batch_size) test_manager = BatchManager(test_data, FLAGS.batch_size) # make path for store log and model if not exist config_file = os.path.join(FLAGS.output, 'config.json') if os.path.isfile(config_file): config = load_config(config_file) else: config = config_model(tag_to_id) save_config(config, config_file) print_config(config, logger) # limit GPU memory tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True steps_per_epoch = train_manager.len_data with tf.Session(config=tf_config) as sess: model = create_model(sess, Model, os.path.join(FLAGS.output, 'checkpoint'), config, logger) logger.info("start training") loss = [] for i in range(100): for batch in train_manager.iter_batch(shuffle=True): step, batch_loss = model.run_step(sess, True, batch) loss.append(batch_loss) if step % FLAGS.steps_check == 0: iteration = step // steps_per_epoch + 1 logger.info("iteration:{} step:{}/{}, " "NER loss:{:>9.6f}".format( iteration, step % steps_per_epoch, steps_per_epoch, np.mean(loss))) loss = [] best = evaluate(sess, model, "dev", dev_manager, id_to_tag, logger) if best: save_model(sess, model, os.path.join(FLAGS.output, 'checkpoint'), logger, global_steps=step) evaluate(sess, model, "test", test_manager, id_to_tag, logger)