def test(epoch): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(testloader): inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) #test_loss += loss.data[0] test_loss+=loss.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar(batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total if acc > best_acc: print('Saving..') state = { 'net': net.module if use_cuda else net, 'acc': acc, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt.t7') best_acc = acc
def progress(self, section_num, n, total): sprog = pprog = '' if section_progress: sprog = progress_bar(section_num, len(self.sections), 45) if pause_progress: pprog = progress_bar(n, total, 20) printne(stat_tpl % (pprog, sprog))
def fit(self, training_data): """Train the network. Parameters ---------- training_data : list of pairs In each pair, the first element should be a list of items from the vocabulary (for the NLI task, this is the concatenation of the premise and hypothesis), and the second element should be the one-hot label vector. Attributes ---------- self.output_dim : int Set based on the length of the labels in `training_data`. self.W_xh : np.array Dense connections between the word representations and the hidden layers self.W_hh : np.array Dense connections between the hidden representations. self.W_hy : np.array Dense connections from the final hidden layer to the output layer. """ self.output_dim = len(training_data[0][1]) self.W_xh = randmatrix(self.word_dim, self.hidden_dim) self.W_hh = randmatrix(self.hidden_dim, self.hidden_dim) self.W_hy = randmatrix(self.hidden_dim, self.output_dim) # SGD: iteration = 0 error = sys.float_info.max while error > self.epsilon and iteration < self.maxiter: error = 0.0 random.shuffle(training_data) for seq, labels in training_data: self._forward_propagation(seq) # Cross-entropy error reduces to log(prediction-for-correct-label): error += -np.log(self.y[np.argmax(labels)]) # Back-prop: d_W_hy, d_W_hh, d_W_xh = self._backward_propagation(seq, labels) # Updates: self.W_hy -= self.eta * d_W_hy self.W_hh -= self.eta * d_W_hh self.W_xh -= self.eta * d_W_xh iteration += 1 if self.display_progress: # Report the average error: error /= len(training_data) progress_bar("Finished epoch %s of %s; error is %s" % (iteration, self.maxiter, error)) if self.display_progress: sys.stderr.write('\n')
def fit(self, training_data): """The training algorithm. Parameters ---------- training_data : list A list of (example, label) pairs, where `example` and `label` are both np.array instances. Attributes ---------- self.x : the input layer self.h : the hidden layer self.y : the output layer self.W1 : dense weight connection from self.x to self.h self.W2 : dense weight connection from self.h to self.y Both self.W1 and self.W2 have the bias as their final column. The following attributes are created here for efficiency but used only in `backward_propagation`: self.y_err : vector of output errors self.x_err : vector of input errors """ # Dimensions determined by the data: self.input_dim = len(training_data[0][0]) self.output_dim = len(training_data[0][1]) # Parameter initialization: self.x = np.ones(self.input_dim+1) # +1 for the bias self.h = np.ones(self.hidden_dim+1) # +1 for the bias self.y = np.ones(self.output_dim) self.W1 = utils.randmatrix(self.input_dim+1, self.hidden_dim) self.W2 = utils.randmatrix(self.hidden_dim+1, self.output_dim) self.y_err = np.zeros(self.output_dim) self.x_err = np.zeros(self.input_dim+1) # SGD: iteration = 0 error = sys.float_info.max while error > self.epsilon and iteration < self.maxiter: error = 0.0 random.shuffle(training_data) for ex, labels in training_data: self.forward_propagation(ex) error += self.backward_propagation(labels) iteration += 1 if self.display_progress: utils.progress_bar('completed iteration %s; error is %s' % (iteration, error)) if self.display_progress: sys.stderr.write('\n')
def get_progress_bar(self): ''' Returns the current progress of the download as a string containing a progress bar. .. NOTE:: That's an alias for pySmartDL.utils.progress_bar(obj.get_progress()). :rtype: string ''' return utils.progress_bar(self.get_progress())
def get_progress_bar(self, length=20): ''' Returns the current progress of the download as a string containing a progress bar. .. NOTE:: That's an alias for pySmartDL.utils.progress_bar(obj.get_progress()). :param length: The length of the progress bar in chars. Default is 20. :type length: int :rtype: string ''' return utils.progress_bar(self.get_progress(), length)
def train(epoch): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() #train_loss += loss.data[0] train_loss+=loss.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
def test(epoch): global best_acc net.eval() test_loss = 0 correct = 0 total = 0 batch_time_total = 0 with torch.no_grad(): end = time.time() for batch_idx, (inputs, targets) in enumerate(testloader): data_time = time.time() - end inputs, targets = inputs.to(device), targets.to(device) outputs = net(inputs) loss = criterion(outputs, targets) batch_time = time.time() - end test_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() batch_time_total += batch_time end = time.time() progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total is_best = acc > best_acc best_acc = max(acc, best_acc) if is_best: try: torch.save(model, os.path.join(model_dir, "model.pth")) except: print("WARNING: Unable to save model.pth") try: torch.save(model.state_dict(), os.path.join(model_dir, "weights.pth")) except: print("WARNING: Unable to save weights.pth") save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': net.state_dict(), 'best_acc1': best_acc, 'optimizer': optimizer, #'lr_scheduler' : lr_scheduler, }, is_best, model_dir) return (test_loss / (batch_idx + 1), 100. * correct / total, batch_time_total / (batch_idx + 1))
pred, _ = model(points) pred_choice = pred.data.max(1)[1] for cat in np.unique(label.cpu()): classacc = pred_choice[label == cat].eq(label[label == cat].long().data).cpu().sum() class_acc[cat, 0] += classacc.item() / float(points[label == cat].size()[0]) class_acc[cat, 1] += 1 pred_choice = pred.data.max(1)[1] correct += pred_choice.eq(label.data).cpu().sum() total += label.size(0) cur_batch_data_adv = drop_points(points, label, model, loss, num_drop, num_steps) cur_batch_data_adv = cur_batch_data_adv.cuda() pred_adv, _ = model(cur_batch_data_adv) pred_choice_adv = pred_adv.data.max(1)[1] for cat in np.unique(label.cpu()): classacc_adv = pred_choice_adv[label == cat].eq(label[label == cat].long().data).cpu().sum() class_acc_adv[cat, 0] += classacc_adv.item() / float(cur_batch_data_adv[label == cat].size()[0]) class_acc_adv[cat, 1] += 1 correct_adv += pred_choice_adv.eq(label.long().data).cpu().sum() progress_bar(j, len(test_loader), 'Test Acc: %.3f%% (%d/%d)|Adv test acc: %.3f%%(%d/%d)'% (100. * correct.item() / total, correct,total,100.*correct_adv/total,correct_adv,total)) class_acc[:, 2] = class_acc[:, 0] / class_acc[:, 1] class_acc_adv[:, 2] = class_acc_adv[:, 0] / class_acc_adv[:, 1] for i, name in enumerate(SHAPE_NAMES): print('%10s:\t%0.3f' % (name, class_acc[:,2][i])) print('%10s:\t%0.3f' % (name, class_acc_adv[:,2][i]))
if model.epoch > 30: break if model.epoch == 10: model.lr = 0.01 if model.epoch == 20: model.lr = 0.001 model.global_step = 0 model.next_batch() model.train_net() model.calacc() model.calmeanloss() if model.epoch > 0: progress_bar( model.data_point, model.one_epoch_iter_num, 'epoch:%d, loss:%.5f, acc:%.5f, var:%.5f, entr:%.5f, lr:%.5f' % (model.epoch, model.v_meanloss, model.v_acc, model.v_var, model.v_entropy, model.lr)) if model.epoch_final == True: model.eval_weight() weight.append(model.v_weight) # model.save_model('exp1') if model.data_point % (model.one_epoch_iter_num // 2) == 0: model.dp = 1 if epoch < 2: model.evaluate_train() train_vrloss.append(model.v_vrloss) train_meanloss.append(model.v_meanloss)
val_images = dict() class_number = 0 class_label = dict() for c in classes: class_label[c] = class_number class_number += 1 images = glob.glob(path + c + "/*") no_samples_per_class[c] = len(images) val_samples = int(test_proportion * no_samples_per_class[c]) val_images[c] = np.zeros((val_samples, *batch_size[1:4])) tr_images[c] = np.zeros( (no_samples_per_class[c] - val_samples, *batch_size[1:4])) print(c, ":") bar = ut.progress_bar(steps=no_samples_per_class[c]) for i in range(len(images)): bar.tick() if i < val_samples: val_images[c][i] = load_patch(images[i])[:, :, np.newaxis] else: tr_images[c][i - val_samples] = load_patch(images[i])[:, :, np.newaxis] tx, ty = make_tx_ty() class_weights = make_class_weights() def test(experiment_name, func_model): epochs = 200 iterations_per_epoch = 100
def validation(self): self.D.eval() self.G.eval() val_loss = [] val_dis_loss = [] val_dis_real_loss = [] val_dis_fake_loss = [] val_gen_loss = [] with torch.no_grad(): for batch_idx, inputs in enumerate(self.validloader): inputs = inputs.to(self.device) noise = torch.autograd.Variable( torch.randn(size=inputs.size())).to(self.device) # Generator gen_outputs, _ = self.G(noise) gen_outputs = gen_outputs.detach() # gen loss gen_loss = self.gen_criterion(gen_outputs, inputs) # Discriminator dis_real_outputs, _ = self.D(inputs) dis_fake_outputs, _ = self.D(gen_outputs) dis_real_outputs = dis_real_outputs.detach() dis_fake_outputs = dis_fake_outputs.detach() # dis loss dis_real_loss = self.dis_criterion( dis_real_outputs, torch.full(dis_real_outputs.size(), 1, dtype=torch.float, device=self.device)) dis_fake_loss = self.dis_criterion( dis_fake_outputs, torch.full(dis_fake_outputs.size(), 0, dtype=torch.float, device=self.device)) dis_loss = dis_real_loss + dis_fake_loss # loss val_dis_loss.append(dis_loss.item()) val_dis_real_loss.append(dis_real_loss.item()) val_dis_fake_loss.append(dis_fake_loss.item()) val_gen_loss.append(gen_loss.item()) loss = gen_loss + dis_loss val_loss.append(loss.item()) progress_bar( current=batch_idx, total=len(self.validloader), name='VALID', msg= 'Total Loss: %.3f | Gen Loss: %.3f | Dis Loss: %.3f (Real %.3f / Fake %.3f)' % (np.mean(val_loss), np.mean(val_gen_loss), np.mean(val_dis_loss), np.mean(val_dis_real_loss), np.mean(val_dis_fake_loss))) val_dis_loss = np.mean(val_dis_loss) val_dis_real_loss = np.mean(val_dis_real_loss) val_dis_fake_loss = np.mean(val_dis_fake_loss) val_gen_loss = np.mean(val_gen_loss) val_loss = np.mean(val_loss) return val_dis_loss, val_dis_real_loss, val_dis_fake_loss, val_gen_loss, val_loss
def test(): global best_acc net1.eval() net2.eval() net3.eval() net4.eval() net5.eval() net6.eval() net7.eval() test_loss = 0 correct = 0 total = 0 idx = args.testsel * 10000 #idx = 0 count_net1 = 0 count_net2 = 0 count_net3 = 0 count_net4 = 0 count_net5 = 0 count_net6 = 0 count_net7 = 0 for batch_idx, (inputs, targets) in enumerate(test_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets) if FS_array[idx] < bar1: outputs = net1(inputs) count_net1 += 1 elif FS_array[idx] < bar2: outputs = net2(inputs) count_net2 += 1 elif FS_array[idx] < bar3: outputs = net3(inputs) count_net3 += 1 elif FS_array[idx] < bar4: outputs = net4(inputs) count_net4 += 1 elif FS_array[idx] < bar5: outputs = net5(inputs) count_net5 += 1 elif FS_array[idx] < bar6: outputs = net6(inputs) count_net6 += 1 else: outputs = net7(inputs) count_net7 += 1 ''' if idx%10000>20: exit() else: print(FS_array[idx], idx) ''' idx = idx + 1 #print(batch_idx) #outputs = net4(inputs) loss = criterion(outputs, targets) test_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar( batch_idx, len(test_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct / total, correct, total)) # Save checkpoint. acc = 100. * correct / total print(count_net1, count_net2, count_net3, count_net4, count_net5, count_net6, count_net7) ''' if acc > best_acc: # print('Saving..') state = { 'net': net.module if use_cuda else net, 'acc': acc, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') #torch.save(state, './checkpoint/ckpt_20180425.t0') best_acc = acc ''' return acc
def test(epoch, all_res, filename): global best_acc net.eval() test_loss = 0.0 correct = 0 total = 0 entropy = [] var = [] allloss = [] for batch_idx, (inputs, targets) in enumerate(testloader): one_hot_t = torch.FloatTensor(targets.size()[0], 10) one_hot_t = one_hot_t.zero_() targets_col = targets.view([-1, 1]) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() one_hot_t = one_hot_t.cuda() targets_col = targets_col.cuda() # loss = criterion(outputs, targets) one_hot_t = one_hot_t.scatter_(1, targets_col, 1) one_hot_t = Variable(one_hot_t) inputs, targets = Variable(inputs, volatile=True), Variable(targets) # targets_col = Variable(targets_col) outputs = net(inputs) log_softmax = torch.nn.LogSoftmax()(outputs) softmax = torch.nn.Softmax()(outputs) loss = -1 * torch.sum(log_softmax * one_hot_t, 1) allloss.append(loss) entropy.append(-1 * torch.mean(torch.sum(softmax * log_softmax, 1))) variance = torch.var(loss) test_loss += loss.data[0][0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() # print((test_loss.cpu().numpy()/(batch_idx+1) , 100.*correct/total, correct, total)) progress_bar( batch_idx, len(testloader), '(%d/%d) Loss: %.3f,Acc:%.3f%%,Lr %.4f,Var:%.4f,Entr:%.4f' % (correct, total, test_loss / (batch_idx + 1), 100. * correct / total, optimizer.param_groups[0]['lr'], variance.data[0], entropy[-1].data[0])) allloss = torch.cat(allloss) allentropy = torch.cat(entropy) v_meanloss = torch.mean(allloss) variance = torch.var(allloss) entropy = torch.mean(allentropy) all_res['test_meanloss'].append(v_meanloss.data[0]) all_res['test_var'].append(variance.data[0]) all_res['test_entropy'].append(entropy.data[0]) all_res['test_acc'].append(1.0 * correct / total) # Save checkpoint. acc = 100. * correct / total if acc > best_acc: print('Saving..') state = { 'net': net.module if use_cuda else net, 'acc': acc, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/' + filename + '_ckpt.t7') best_acc = acc
def train(net, epoch): global glob_gau global glob_blur print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 mask_channel = torch.load('mask_null.dat') mask_channel = utils.setMask(utils.setMask(mask_channel, 0, 1), 2, 0) for batch_idx, (inputs, targets) in enumerate(train_loader): glob_gau = 0 if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() net = utils.netMaskMul(net, mask_channel) net = utils.addNetwork(net, net2) if args.fixed == 1: net = utils.quantize(net, args.pprec) optimizer.step() train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += float(predicted.eq(targets.data).cpu().sum()) progress_bar( batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) for batch_idx, (inputs, targets) in enumerate(train_loader): glob_gau = 1 if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() net = utils.netMaskMul(net, mask_channel) net = utils.addNetwork(net, net2) if args.fixed == 1: net = utils.quantize(net, args.pprec) optimizer.step() train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += float(predicted.eq(targets.data).cpu().sum()) progress_bar( batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))
def train(epoch): net.train() batch_norm1, batch_norm2, classifier1, classifier2, classifier, batch_norm_bottleneck = classifier_blocks for bn in [batch_norm1, batch_norm2, batch_norm_bottleneck]: if bn is not None: bn.train() train_loss = 0 total = 0 correct = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(device), targets.to(device) optimizer.zero_grad() patches_shape = kernel_convolution_2.shape operator = (V * ev_rescale) @ V.t() # operator = (V * torch.exp(ev_rescale)) @ V.t() zca_patches = kernel_convolution_2.view(patches_shape[0], -1) @ operator zca_patches_normalized = zca_patches / ( zca_patches.norm(dim=1, keepdim=True) + 1e-8) # zca_patches_normalized = zca_patches kernel_conv = zca_patches_normalized.view(patches_shape) outputs1, outputs2 = net(inputs, kernel_conv) outputs, targets = compute_classifier_outputs(outputs1, outputs2, targets, args, batch_norm1, batch_norm2, classifier1, classifier2, classifier, batch_norm_bottleneck, train=True) loss = criterion(outputs, targets) if args.lambda_1 > 0.: group_sparsity_norm = torch.norm(torch.cat( [classifier1.weight, classifier2.weight], dim=0), dim=0, p=2).mean() loss += args.lambda_1 * group_sparsity_norm loss.backward() optimizer.step() if torch.isnan(loss): return False train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar( batch_idx, len(trainloader), 'Train, epoch: %i; Loss: %.3f | Acc: %.3f%% (%d/%d) ; threshold %.3f' % (epoch, train_loss / (batch_idx + 1), 100. * correct / total, correct, total, args.bias), hide=args.no_progress_bar) if args.no_progress_bar: print( 'Train, epoch: {}; Loss: {:.2f} | Acc: {:.1f} ; threshold {:.3f}'. format(epoch, train_loss / (batch_idx + 1), 100. * correct / total, args.bias)) return True
def train(self, epoch, no_of_steps, trainloader): self.model.train() train_loss, correct, total = 0, 0, 0 # If epoch less than 5 use warmup, else use scheduler. if epoch < 5: lr = self.warmup_learning_rate(self.lr, no_of_steps, epoch, len(trainloader)) for param_group in self.optimizer.param_groups: param_group['lr'] = lr elif epoch == 5: for param_group in self.optimizer.param_groups: param_group['lr'] = self.lr #scheduler = MultiStepLR( # self.optimizer, milestones=[80, 120, 160, 180], gamma=0.1) #if epoch >= 5: # scheduler.step(epoch=epoch) print('Learning Rate: %g' % (list( map(lambda group: group['lr'], self.optimizer.param_groups)))[0]) # Loss criterion is in FP32. criterion = nn.CrossEntropyLoss() for idx, (inputs, targets) in enumerate(trainloader): if self.train_on_gpu: inputs, targets = inputs.cuda(), targets.cuda() self.model.zero_grad() outputs = self.model(inputs) # We calculate the loss in FP32 since reduction ops can be # wrong when represented in FP16. loss = criterion(outputs, targets) if self.loss_scaling: # Sometime the loss may become small to be represente in FP16 # So we scale the losses by a large power of 2, 2**7 here. loss = loss * self._LOSS_SCALE # Calculate the gradients loss.backward() if self.fp16_mode: # Now we move the calculated gradients to the master params # so that we can apply the gradient update in FP32. self.model_grads_to_master_grads(self.model_params, self.master_params) if self.loss_scaling: # If we scaled our losses now is a good time to scale it # back since our gradients are in FP32. for params in self.master_params: params.grad.data = params.grad.data / self._LOSS_SCALE # Apply weight update in FP32. self.optimizer.step() # Copy the updated weights back FP16 model weights. self.master_params_to_model_params(self.model_params, self.master_params) else: self.optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += (targets == predicted).sum().item() progress_bar( idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (idx + 1), 100. * correct / total, correct, total)) if epoch >= 5: #modified at 2020.09.09 self.scheduler.step()
def train_model(model, data, optim, epoch, params): model.train() trainloader = data['trainloader'] for src, tgt, src_len, tgt_len, original_src, original_tgt in trainloader: model.zero_grad() if config.use_cuda: src = src.cuda() tgt = tgt.cuda() src_len = src_len.cuda() lengths, indices = torch.sort(src_len, dim=0, descending=True) src = torch.index_select(src, dim=0, index=indices) tgt = torch.index_select(tgt, dim=0, index=indices) dec = tgt[:, :-1] targets = tgt[:, 1:] try: if config.schesamp: if epoch > 8: e = epoch - 8 loss, outputs = model(src, lengths, dec, targets, teacher_ratio=0.9**e) else: loss, outputs = model(src, lengths, dec, targets) else: loss, outputs = model(src, lengths, dec, targets) pred = outputs.max(2)[1] targets = targets.t() num_correct = pred.eq(targets).masked_select(targets.ne( utils.PAD)).sum().item() num_total = targets.ne(utils.PAD).sum().item() if config.max_split == 0: loss = torch.sum(loss) / num_total loss.backward() optim.step() params['report_loss'] += loss.item() params['report_correct'] += num_correct params['report_total'] += num_total except RuntimeError as e: if 'out of memory' in str(e): print('| WARNING: ran out of memory') if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() else: raise e utils.progress_bar(params['updates'], config.eval_interval) params['updates'] += 1 if params['updates'] % config.eval_interval == 0: params['log']( "epoch: %3d, loss: %6.3f, time: %6.3f, updates: %8d, accuracy: %2.2f\n" % (epoch, params['report_loss'], time.time() - params['report_time'], params['updates'], params['report_correct'] * 100.0 / params['report_total'])) print('evaluating after %d updates...\r' % params['updates']) score = eval_model(model, data, params) for metric in config.metrics: params[metric].append(score[metric]) if score[metric] >= max(params[metric]): with codecs.open( params['log_path'] + 'best_' + metric + '_prediction.txt', 'w', 'utf-8') as f: f.write( codecs.open(params['log_path'] + 'candidate.txt', 'r', 'utf-8').read()) save_model( params['log_path'] + 'best_' + metric + '_checkpoint.pt', model, optim, params['updates']) model.train() params['report_loss'], params['report_time'] = 0, time.time() params['report_correct'], params['report_total'] = 0, 0 if params['updates'] % config.save_interval == 0: save_model(params['log_path'] + 'checkpoint.pt', model, optim, params['updates']) optim.updateLearningRate(score=0, epoch=epoch)
def run(self): # Called by Qt once the thread environment has been set up. url = self.songObj.url filesize = self.songObj.filesize audio_path = os.path.join(self.dl_dir, self.songObj.GetProperFilename('mp3')) # final path video_path = os.path.join(self.dl_dir, self.songObj.GetProperFilename()) # final path dest_audio_path = os.path.join(config.temp_dir, "%s.mp3" % utils.get_rand_string()) if not self.isMultimediaFile: dest_path = os.path.join(config.temp_dir, utils.get_rand_string()) elif self.songObj.ext == "mp3": # no convertion needed dest_path = dest_audio_path else: dest_path = os.path.join(config.temp_dir, "%s.%s" % (utils.get_rand_string(), self.songObj.ext)) dl_obj = Main.SmartDL(url, dest_path, logger=log) dl_obj.start() self.dl_obj = dl_obj while not dl_obj.isFinished(): if dl_obj.status == 'combining': self.status.emit(tr("Combining Parts...")) break self.downloadProgress.emit(int(dl_obj.get_progress()*100), dl_obj.get_speed(), dl_obj.get_eta(), dl_obj.get_downloaded_size(), filesize) time.sleep(0.1) while not dl_obj.isFinished(): # if we were breaking the last loop, we are waiting for # parts to get combined. we shall wait. time.sleep(0.1) if dl_obj._failed: log.error("Got DownloadFailedException() for %s" % url) self.error.emit(Main.SmartDL.DownloadFailedException()) self.terminate() return self.downloadProgress.emit(100, dl_obj.get_speed(), dl_obj.get_eta(), filesize, filesize) if self.convertNeeded: t1 = time.time() log.debug("Encoding Audio...") self.status.emit(tr("Encoding Audio...")) est_final_filesize = self.songObj.final_filesize if est_final_filesize: print "Encoding: %s (%.2f MB) to %s" % (dest_audio_path, est_final_filesize / 1024.0 / 1024.0, self.dl_dir) else: print "Encoding: %s to %s" % (dest_audio_path, self.dl_dir) proc = Wrappers.FFMpeg(dest_path, dest_audio_path, config.itag_audio_bitrates[self.songObj.itag.quality]) self.encProgress.emit(0) for fs_counter in proc: if not est_final_filesize: continue status = r"Encoding: %.2f MB / %.2f MB %s [%3.2f%%]" % (fs_counter / 1024.0, est_final_filesize / 1024.0**2, utils.progress_bar(1.0*fs_counter*1024/est_final_filesize) , fs_counter*1024 * 100.0 / est_final_filesize) status = status + chr(8)*(len(status)+1) print status, self.encProgress.emit(int(fs_counter*1024 * 100.0 / est_final_filesize)) self.encProgress.emit(100) t2 = time.time() self.encode_time += t2-t1 if not config.downloadVideo or not self.isVideo: log.debug("Removing %s..." % dest_path) os.unlink(dest_path) else: dest_audio_path = dest_path if config.downloadAudio and config.trimSilence: t1 = time.time() log.debug("Trimming Silence...") self.status.emit(tr("Trimming Silence from edges...")) temp_audio_trimmed_path = "%s.tmp.mp3" % dest_audio_path if os.path.exists(temp_audio_trimmed_path): os.unlink(temp_audio_trimmed_path) os.rename(dest_audio_path, temp_audio_trimmed_path) est_final_filesize = self.songObj.final_filesize print "Trimming Silence: %s (%.2f MB) to %s" % (dest_audio_path, est_final_filesize / 1024.0**2, self.dl_dir) self.encProgress.emit(0) proc = Wrappers.SoX(temp_audio_trimmed_path, dest_audio_path) for progress in proc: status = r"Trimming Silence: %s" % utils.progress_bar(progress/100.0) status = status + chr(8)*(len(status)+1) print status, self.encProgress.emit(progress) self.encProgress.emit(100) t2 = time.time() self.encode_time += t2-t1 if not os.path.exists(dest_audio_path): log.error('SoX failed.') log.debug("Copying Files...") self.status.emit(tr("Copying Files...")) if self.isVideo: # IMPROVE: this crashes when a video is running in media player, os.unlink removes it, but it is still running in media player. if config.downloadAudio: log.debug("Moving %s to %s" % (dest_audio_path, audio_path)) shutil.move(dest_audio_path, audio_path) if config.downloadVideo: log.debug("Moving %s to %s" % (dest_path, video_path)) shutil.move(dest_path, video_path) if self.isAudio: log.debug("Moving %s to %s" % (dest_path, audio_path)) shutil.move(dest_path, audio_path) dl_time = dl_obj.get_dl_time() dl_time_s = int(dl_time)%60 dl_time_m = int(dl_time)/60 if filesize/dl_time/1024**2 > 1: # If dlRate is in MBs if dl_time_m: stats_str = tr('Download: %d:%.2d (%.2f MB/s)') % (dl_time_m, dl_time_s, filesize/dl_time/1024**2) else: stats_str = tr('Download: %ds (%.2f MB/s)') % (dl_time, filesize/dl_time/1024**2) else: if dl_time_m: stats_str = tr('Download: %d:%.2d (%.2f KB/s)') % (dl_time_m, dl_time_s, filesize/dl_time/1024) else: stats_str = tr('Download: %ds (%.2f KB/s)') % (dl_time, filesize/dl_time/1024) if self.encode_time: stats_str += tr('; Encoded: %ds') % self.encode_time self.status.emit(stats_str)
def train(model, model_zero, model_zero_zero, optimizer, loss_fn, trainloader, valloader, device, maxi): max = maxi for epoch in range(10): model.eval() model_zero.eval() model_zero_zero.train() count = 0 num_zero_zero = 0 total = 0 model_zero_zero.acc = 0 for batch_num, (data, target) in enumerate(trainloader): data = data.to(device) target = target.to(device) target1 = target.clone().to(device) target1[(target1 == 0).nonzero()] = 100 target1[(target1 == 13).nonzero()] = 100 target1[(target1 == 24).nonzero()] = 100 target1[(target1 == 36).nonzero()] = 100 target1[(target1 == 2).nonzero()] = 100 target1[(target1 == 35).nonzero()] = 100 target1[(target1 == 26).nonzero()] = 100 target1[(target1 == 44).nonzero()] = 100 target1[(target1 == 8).nonzero()] = 100 target1[(target1 == 11).nonzero()] = 100 target1[(target1 == 41).nonzero()] = 100 target1[(target1 == 9).nonzero()] = 100 target1[(target1 == 1).nonzero()] = 101 target1[(target1 == 18).nonzero()] = 101 target1[(target1 == 21).nonzero()] = 101 target1[(target1 == 19).nonzero()] = 101 target1[(target1 == 3).nonzero()] = 102 target1[(target1 == 4).nonzero()] = 103 target1[(target1 == 17).nonzero()] = 103 target1[(target1 == 30).nonzero()] = 103 target1[(target1 == 34).nonzero()] = 103 target1[(target1 == 10).nonzero()] = 103 target1[(target1 == 31).nonzero()] = 103 target1[(target1 == 33).nonzero()] = 103 target1[(target1 == 20).nonzero()] = 103 target1[(target1 == 27).nonzero()] = 103 target1[(target1 == 42).nonzero()] = 103 target1[(target1 == 37).nonzero()] = 103 target1[(target1 == 6).nonzero()] = 103 target1[(target1 == 16).nonzero()] = 103 target1[(target1 == 5).nonzero()] = 104 target1[(target1 == 28).nonzero()] = 104 target1[(target1 == 7).nonzero()] = 105 target1[(target1 == 12).nonzero()] = 106 target1[(target1 == 39).nonzero()] = 106 target1[(target1 == 14).nonzero()] = 107 target1[(target1 == 15).nonzero()] = 108 target1[(target1 == 40).nonzero()] = 108 target1[(target1 == 46).nonzero()] = 108 target1[(target1 == 29).nonzero()] = 108 target1[(target1 == 22).nonzero()] = 109 target1[(target1 == 43).nonzero()] = 109 target1[(target1 == 23).nonzero()] = 110 target1[(target1 == 32).nonzero()] = 110 target1[(target1 == 25).nonzero()] = 111 target1[(target1 == 38).nonzero()] = 112 target1[(target1 == 45).nonzero()] = 113 target1 -= 100 next_data ,net_out = model(data) indices = (target1 == 0).nonzero()[:,0] zero_data = next_data[indices] zero_target = target[indices] zero_target1 = zero_target.clone().to(device) zero_target1[(zero_target1 == 0).nonzero()] = 100 zero_target1[(zero_target1 == 24).nonzero()] = 100 zero_target1[(zero_target1 == 13).nonzero()] = 101 zero_target1[(zero_target1 == 36).nonzero()] = 102 zero_target1[(zero_target1 == 2).nonzero()] = 103 zero_target1[(zero_target1 == 35).nonzero()] = 103 zero_target1[(zero_target1 == 26).nonzero()] = 104 zero_target1[(zero_target1 == 44).nonzero()] = 105 zero_target1[(zero_target1 == 41).nonzero()] = 105 zero_target1[(zero_target1 == 9).nonzero()] = 105 zero_target1[(zero_target1 == 8).nonzero()] = 106 zero_target1[(zero_target1 == 11).nonzero()] = 107 zero_target1 -= 100 next_data ,net_out = model_zero(zero_data) if (zero_target1 == 0).nonzero().shape[0] != 0: indices = (zero_target1 == 0).nonzero()[:,0] zero_zero_data = next_data[indices] zero_zero_target = zero_target[indices] zero_zero_target1 = zero_zero_target.clone().to(device) zero_zero_target1[(zero_zero_target1 == 0).nonzero()] = 100 zero_zero_target1[(zero_zero_target1 == 24).nonzero()] = 101 zero_zero_target1 -= 100 loss = train_mod(model_zero_zero, optimizer, loss_fn, zero_zero_data, zero_zero_target1, device) num_zero_zero += zero_zero_data.shape[0] progress_bar(batch_num, len(trainloader), 'Loss: %.4f | Acc: %.3f%% (%d/%d)' % (loss / (batch_num + 1), 100. * model_zero_zero.acc / num_zero_zero, model_zero_zero.acc, num_zero_zero)) print("Epoch: ",epoch+1,"\nTrain: ", model_zero_zero.acc / num_zero_zero) model.eval() count1 = 0 model_zero.eval() model_zero_zero.train() model_zero_zero.acc = 0 num_zero_zero = 0 count_zero_zero = 0 for data, target in valloader: data = data.to(device) target = target.to(device) target1 = target.clone().to(device) target1[(target1 == 0).nonzero()] = 100 target1[(target1 == 13).nonzero()] = 100 target1[(target1 == 24).nonzero()] = 100 target1[(target1 == 36).nonzero()] = 100 target1[(target1 == 2).nonzero()] = 100 target1[(target1 == 35).nonzero()] = 100 target1[(target1 == 26).nonzero()] = 100 target1[(target1 == 44).nonzero()] = 100 target1[(target1 == 8).nonzero()] = 100 target1[(target1 == 11).nonzero()] = 100 target1[(target1 == 41).nonzero()] = 100 target1[(target1 == 9).nonzero()] = 100 target1[(target1 == 1).nonzero()] = 101 target1[(target1 == 18).nonzero()] = 101 target1[(target1 == 21).nonzero()] = 101 target1[(target1 == 19).nonzero()] = 101 target1[(target1 == 3).nonzero()] = 102 target1[(target1 == 4).nonzero()] = 103 target1[(target1 == 17).nonzero()] = 103 target1[(target1 == 30).nonzero()] = 103 target1[(target1 == 34).nonzero()] = 103 target1[(target1 == 10).nonzero()] = 103 target1[(target1 == 31).nonzero()] = 103 target1[(target1 == 33).nonzero()] = 103 target1[(target1 == 20).nonzero()] = 103 target1[(target1 == 27).nonzero()] = 103 target1[(target1 == 42).nonzero()] = 103 target1[(target1 == 37).nonzero()] = 103 target1[(target1 == 6).nonzero()] = 103 target1[(target1 == 16).nonzero()] = 103 target1[(target1 == 5).nonzero()] = 104 target1[(target1 == 28).nonzero()] = 104 target1[(target1 == 7).nonzero()] = 105 target1[(target1 == 12).nonzero()] = 106 target1[(target1 == 39).nonzero()] = 106 target1[(target1 == 14).nonzero()] = 107 target1[(target1 == 15).nonzero()] = 108 target1[(target1 == 40).nonzero()] = 108 target1[(target1 == 46).nonzero()] = 108 target1[(target1 == 29).nonzero()] = 108 target1[(target1 == 22).nonzero()] = 109 target1[(target1 == 43).nonzero()] = 109 target1[(target1 == 23).nonzero()] = 110 target1[(target1 == 32).nonzero()] = 110 target1[(target1 == 25).nonzero()] = 111 target1[(target1 == 38).nonzero()] = 112 target1[(target1 == 45).nonzero()] = 113 target1 -= 100 next_data,net_out = model(data) indices = (target1 == 0).nonzero()[:,0] zero_data = next_data[indices] zero_target = target[indices] zero_target1 = zero_target.clone().to(device) zero_target1[(zero_target1 == 0).nonzero()] = 100 zero_target1[(zero_target1 == 24).nonzero()] = 100 zero_target1[(zero_target1 == 13).nonzero()] = 101 zero_target1[(zero_target1 == 36).nonzero()] = 102 zero_target1[(zero_target1 == 2).nonzero()] = 103 zero_target1[(zero_target1 == 35).nonzero()] = 103 zero_target1[(zero_target1 == 26).nonzero()] = 104 zero_target1[(zero_target1 == 44).nonzero()] = 105 zero_target1[(zero_target1 == 41).nonzero()] = 105 zero_target1[(zero_target1 == 9).nonzero()] = 105 zero_target1[(zero_target1 == 8).nonzero()] = 106 zero_target1[(zero_target1 == 11).nonzero()] = 107 zero_target1 -= 100 next_data ,net_out = model_zero(zero_data) if (zero_target1 == 0).nonzero().shape[0] != 0: indices = (zero_target1 == 0).nonzero()[:,0] zero_zero_data = next_data[indices] zero_zero_target = zero_target[indices] zero_zero_target1 = zero_zero_target.clone().to(device) zero_zero_target1[(zero_zero_target1 == 0).nonzero()] = 100 zero_zero_target1[(zero_zero_target1 == 24).nonzero()] = 101 zero_zero_target1 -= 100 _, zero_zero_out = model_zero_zero(zero_zero_data) pred_zero_zero = zero_zero_out.max(1, keepdim=True)[1] count_zero_zero += pred_zero_zero.eq(zero_zero_target1.view_as(pred_zero_zero)).sum().item() num_zero_zero += zero_zero_data.shape[0] print("Val: ", count_zero_zero/num_zero_zero) if count_zero_zero/num_zero_zero > max: print("checkpoint saved") max = count_zero_zero/num_zero_zero torch.save(model_zero_zero.state_dict(), "../Models/emnist_0_0.pth") return max
def begin_train(self): N_EPOCHS = 30 N_BATCH = 20 N_TRAIN_INS = len(self.train_ending) best_val_accuracy = 0 best_test_accuracy = 0 test_threshold = 1000.0 prev_percetage = 0.0 speed = 0.0 batch_count = 0.0 for epoch in range(N_EPOCHS): print "epoch ", epoch,":" shuffled_index_list = utils.shuffle_index(N_TRAIN_INS) max_batch = N_TRAIN_INS/N_BATCH start_time = time.time() total_cost = 0.0 total_err_count = 0.0 for batch in range(max_batch): batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)] train_story = [self.train_story[index] for index in batch_index_list] train_ending = [self.train_ending[index] for index in batch_index_list] neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0): neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) neg_end1 = [self.train_ending[index] for index in neg_end_index_list] answer = np.random.randint(2, size = N_BATCH) answer_vec = np.concatenate((answer.reshape(-1,1), (1 - answer).reshape(-1,1)), axis = 1).astype('int64') train_end1 = [] train_end2 = [] for i in range(N_BATCH): if answer[i] == 0: train_end1.append(train_ending[i]) train_end2.append(neg_end1[i]) else: train_end1.append(neg_end1[i]) train_end2.append(train_ending[i]) train_story_matrix = utils.padding(train_story) train_end1_matrix = utils.padding(train_end1) train_end2_matrix = utils.padding(train_end2) train_story_mask = utils.mask_generator(train_story) train_end1_mask = utils.mask_generator(train_end1) train_end2_mask = utils.mask_generator(train_end2) cost = self.train_func(train_story_matrix, train_story_mask, train_end1_matrix, train_end1_mask, train_end2_matrix, train_end2_mask, answer_vec) prediction = self.prediction(train_story_matrix, train_story_mask, train_end1_matrix, train_end1_mask, train_end2_matrix, train_end2_mask) total_err_count += (prediction - answer).sum() if batch_count != 0 and batch_count % 10 == 0: speed = N_BATCH * 10.0 / (time.time() - start_time) start_time = time.time() percetage = ((batch_count % test_threshold)+1) / test_threshold * 100 if percetage - prev_percetage >= 1: utils.progress_bar(percetage, speed) # peek on val set every 5000 instances(1000 batches) if batch_count % test_threshold == 0: if batch_count == 0: print "initial test" else: print" " print"test on valid set..." val_result, val_result_list = self.val_set_test() print "accuracy is: ", val_result*100, "%" if val_result > best_val_accuracy: print "new best! test on test set..." best_val_accuracy = val_result self.saving_model('val', best_val_accuracy) pickle.dump(val_result_list, open('./prediction/BLSTM_1neg_class_best_val_prediction.pkl','wb')) test_accuracy, test_result_list = self.test_set_test() print "test set accuracy: ", test_accuracy * 100, "%" if test_accuracy > best_test_accuracy: best_test_accuracy = test_accuracy print "saving model..." self.saving_model('test', best_test_accuracy) pickle.dump(test_result_list, open('./prediction/BLSTM_1neg_class_best_test_prediction.pkl','wb')) batch_count += 1 total_cost += cost accuracy = 1-(total_err_count/(max_batch*N_BATCH)) print "" print "total cost in this epoch: ", total_cost print "accuracy in this epoch: ", accuracy * 100, "%" print "reload best model for testing on test set" self.reload_model('val') print "test on test set..." test_result = self.test_set_test() print "accuracy is: ", test_result * 100, "%"
def train(self): step = 1 avg_qa_loss = 0 avg_dis_loss = 0 iter_lst = [self.get_iter(self.features_lst, self.args)] num_batches = sum([len(iterator[0]) for iterator in iter_lst]) for epoch in range(self.args.start_epoch, self.args.start_epoch + self.args.epochs): start = time.time() self.model.train() batch_step = 1 for data_loader, sampler in iter_lst: if self.args.distributed: sampler.set_epoch(epoch) for i, batch in enumerate(data_loader, start=1): input_ids, input_mask, seg_ids, start_positions, end_positions, labels = batch # remove unnecessary pad token seq_len = torch.sum(torch.sign(input_ids), 1) max_len = torch.max(seq_len) input_ids = input_ids[:, :max_len].clone() input_mask = input_mask[:, :max_len].clone() seg_ids = seg_ids[:, :max_len].clone() start_positions = start_positions.clone() end_positions = end_positions.clone() if self.args.use_cuda: input_ids = input_ids.cuda(self.args.gpu, non_blocking=True) input_mask = input_mask.cuda(self.args.gpu, non_blocking=True) seg_ids = seg_ids.cuda(self.args.gpu, non_blocking=True) start_positions = start_positions.cuda( self.args.gpu, non_blocking=True) end_positions = end_positions.cuda(self.args.gpu, non_blocking=True) qa_loss = self.model(input_ids, seg_ids, input_mask, start_positions, end_positions, labels, dtype="qa", global_step=step) qa_loss = qa_loss.mean() qa_loss.backward() # update qa model avg_qa_loss = self.cal_running_avg_loss( qa_loss.item(), avg_qa_loss) self.qa_optimizer.step() self.qa_optimizer.zero_grad() # update discriminator dis_loss = self.model(input_ids, seg_ids, input_mask, start_positions, end_positions, labels, dtype="dis", global_step=step) dis_loss = dis_loss.mean() dis_loss.backward() avg_dis_loss = self.cal_running_avg_loss( dis_loss.item(), avg_dis_loss) self.dis_optimizer.step() self.dis_optimizer.zero_grad() step += 1 if epoch != 0 and i % 2000 == 0: result_dict = self.evaluate_model(i) for dev_file, f1 in result_dict.items(): print("GPU/CPU {} evaluated {}: {:.2f}".format( self.args.gpu, dev_file, f1), end="\n") batch_step += 1 msg = "{}/{} {} - ETA : {} - QA loss: {:.4f}, DIS loss: {:.4f}" \ .format(batch_step, num_batches, progress_bar(batch_step, num_batches), eta(start, batch_step, num_batches), avg_qa_loss, avg_dis_loss) print(msg, end="\r") print( "[GPU Num: {}, Epoch: {}, Final QA loss: {:.4f}, Final DIS loss: {:.4f}]" .format(self.args.gpu, epoch, avg_qa_loss, avg_dis_loss)) # save model if not self.args.distributed or self.args.rank == 0: self.save_model(epoch, avg_qa_loss) if self.args.do_valid: result_dict = self.evaluate_model(epoch) for dev_file, f1 in result_dict.items(): print("GPU/CPU {} evaluated {}: {:.2f}".format( self.args.gpu, dev_file, f1), end="\n")
print('done', flush=True) print("Parsing and slicing artists...", end='', flush=True) artists = get_artists(song_data) slices = slice_artists(artists) print('done\n', flush=True) spotify = utils.get_spotify_client() artist_data = defaultdict(list) tot = len(slices) for i, aslice in enumerate(slices): print("\033[K\033[1000D", utils.progress_frac(i + 1, tot), " \033[3mFetching Artist Data\033[0m ", utils.progress_bar(i + 1, tot, width=30), flush=True, sep='', end='') artist_data = utils.get_artist_info(spotify, aslice, artist_data) print("done", flush=True) print("Processing Genres...", end='', flush=True) genres = process_genres(artist_data) print("done") print("Saving artist data to output/artists.json") with open('./output/artists.json', 'w') as fp: json.dump(artist_data, fp)
def retrain(epoch, mask_conv0, mask_conv3, mask_conv7, mask_conv10, mask_conv14, mask_conv17, mask_conv20, mask_conv24, mask_conv27, mask_conv30, mask_conv34, mask_conv37, mask_conv40, mask_fc1, mask_fc4, mask_fc6): print('\nEpoch: %d' % epoch) global best_acc net.train() train_loss = 0 total = 0 correct = 0 try: mask = torch.load('mask_{}.dat'.format(args.pr)) mask_conv0 = mask['mask_conv0'] mask_conv3 = mask['mask_conv3'] mask_conv7 = mask['mask_conv7'] mask_conv10 = mask['mask_conv10'] mask_conv14 = mask['mask_conv14'] mask_conv17 = mask['mask_conv17'] mask_conv20 = mask['mask_conv20'] mask_conv24 = mask['mask_conv24'] mask_conv27 = mask['mask_conv27'] mask_conv30 = mask['mask_conv30'] mask_conv34 = mask['mask_conv34'] mask_conv37 = mask['mask_conv37'] mask_conv40 = mask['mask_conv40'] mask_fc1 = mask['mask_fc1'] mask_fc4 = mask['mask_fc4'] mask_fc6 = mask['mask_fc6'] except: pass for batch_idx, (inputs, targets) in enumerate(train_loader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() for child in net.children(): for param in child.conv1[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv0) param.data = torch.mul(param.data, mask_conv0) for child in net.children(): for param in child.conv2[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv3) param.data = torch.mul(param.data, mask_conv3) for child in net.children(): for param in child.conv3[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv7) param.data = torch.mul(param.data, mask_conv7) for child in net.children(): for param in child.conv4[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv10) param.data = torch.mul(param.data, mask_conv10) for child in net.children(): for param in child.conv5[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv14) param.data = torch.mul(param.data, mask_conv14) for child in net.children(): for param in child.conv6[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv17) param.data = torch.mul(param.data, mask_conv17) for child in net.children(): for param in child.conv7[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv20) param.data = torch.mul(param.data, mask_conv20) for child in net.children(): for param in child.conv8[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv24) param.data = torch.mul(param.data, mask_conv24) for child in net.children(): for param in child.conv9[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv27) param.data = torch.mul(param.data, mask_conv27) for child in net.children(): for param in child.conv10[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv30) param.data = torch.mul(param.data, mask_conv30) for child in net.children(): for param in child.conv11[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv34) param.data = torch.mul(param.data, mask_conv34) for child in net.children(): for param in child.conv12[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv37) param.data = torch.mul(param.data, mask_conv37) for child in net.children(): for param in child.conv13[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_conv40) param.data = torch.mul(param.data, mask_conv40) for child in net.children(): for param in child.fc1[1].parameters(): param.grad.data = torch.mul(param.grad.data, mask_fc1) param.data = torch.mul(param.data, mask_fc1) for child in net.children(): for param in child.fc2[1].parameters(): param.grad.data = torch.mul(param.grad.data, mask_fc4) param.data = torch.mul(param.data, mask_fc4) for child in net.children(): for param in child.fc3[0].parameters(): param.grad.data = torch.mul(param.grad.data, mask_fc6) param.data = torch.mul(param.data, mask_fc6) optimizer.step() train_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar( batch_idx, len(train_loader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) acc = 100. * correct / total
def begin_train(self): N_EPOCHS = 30 N_BATCH = self.batchsize N_TRAIN_INS = len(self.train_ending) best_val_accuracy = 0 best_test_accuracy = 0 test_threshold = 5000/N_BATCH prev_percetage = 0.0 speed = 0.0 batch_count = 0.0 start_batch = 0.0 for epoch in range(N_EPOCHS): print "epoch ", epoch,":" shuffled_index_list = utils.shuffle_index(N_TRAIN_INS) max_batch = N_TRAIN_INS/N_BATCH start_time = time.time() total_cost = 0.0 total_err_count = 0.0 for batch in range(max_batch): batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)] train_story = [self.train_story[index] for index in batch_index_list] train_ending = [self.train_ending[index] for index in batch_index_list] neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0): neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) neg_end1 = [self.train_ending[index] for index in neg_end_index_list] answer = np.random.randint(2, size = N_BATCH) target1 = 1 - answer target2 = answer # answer_vec = np.concatenate(((1 - answer).reshape(-1,1), answer.reshape(-1,1)),axis = 1) end1 = [] end2 = [] for i in range(N_BATCH): if answer[i] == 0: end1.append(train_ending[i]) end2.append(neg_end1[i]) else: end1.append(neg_end1[i]) end2.append(train_ending[i]) train_story_matrix = utils.padding(train_story) train_end1_matrix = utils.padding(end1) train_end2_matrix = utils.padding(end2) train_story_mask = utils.mask_generator(train_story) train_end1_mask = utils.mask_generator(end1) train_end2_mask = utils.mask_generator(end2) cost = self.train_func(train_story_matrix, train_story_mask, train_end1_matrix, train_end1_mask, train_end2_matrix, train_end2_mask, target1, target2) prediction1, prediction2 = self.prediction(train_story_matrix, train_story_mask, train_end1_matrix, train_end1_mask, train_end2_matrix, train_end2_mask) prediction = np.concatenate((np.max(prediction1, axis = 1).reshape(-1,1), np.max(prediction2, axis = 1).reshape(-1,1)), axis = 1) total_err_count += abs((np.argmax(prediction, axis = 1) - answer)).sum() '''master version print''' percetage = ((batch_count % test_threshold)+1) / test_threshold * 100 if percetage - prev_percetage >= 1: speed = N_BATCH * (batch_count - start_batch) / (time.time() - start_time) start_time = time.time() start_batch = batch_count utils.progress_bar(percetage, speed) '''end of print''' # peek on val set every 5000 instances(1000 batches) if batch_count % test_threshold == 0: if batch_count == 0: print "initial test" else: print" " print"test on valid set..." val_result, val_result_list = self.val_set_test() print "accuracy is: ", val_result*100, "%" if val_result > best_val_accuracy: print "new best! test on test set..." best_val_accuracy = val_result self.saving_model('val', best_val_accuracy) pickle.dump(val_result_list, open('./prediction/BLSTMLP_'+self.blstmmlp_setting+'_bilinear_'+\ 'dropout'+str(self.dropout_rate)+'_batch_'+str(self.batchsize)+'_best_val.pkl','wb')) test_accuracy, test_result_list = self.test_set_test() print "test set accuracy: ", test_accuracy * 100, "%" if test_accuracy > best_test_accuracy: best_test_accuracy = test_accuracy print "saving model..." self.saving_model('test', best_test_accuracy) pickle.dump(test_result_list, open('./prediction/BLSTMLP_'+self.blstmmlp_setting+'_bilinear_'+\ 'dropout'+str(self.dropout_rate)+'_batch_'+str(self.batchsize)+'_best_test.pkl','wb')) batch_count += 1 total_cost += cost accuracy = 1.0 - (total_err_count/(max_batch*N_BATCH)) speed = max_batch * N_BATCH / (time.time() - start_time) print "=======================================" print "epoch summary:" print "average speed: ", speed, "instances/sec" print "" print "total cost in this epoch: ", total_cost print "accuracy in this epoch: ", accuracy * 100, "%" print "=======================================" print "reload best model for testing on test set" self.reload_model('val') print "test on test set..." test_result = self.test_set_test() print "accuracy is: ", test_result * 100, "%"
def train(epoch, all_res, filename): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): one_hot_t = torch.FloatTensor(targets.size()[0], 10) one_hot_t = one_hot_t.zero_() targets_col = targets.view([-1, 1]) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() one_hot_t = one_hot_t.cuda() targets_col = targets_col.cuda() # loss = criterion(outputs, targets) one_hot_t = one_hot_t.scatter_(1, targets_col, 1) one_hot_t = Variable(one_hot_t) inputs, targets = Variable(inputs), Variable(targets) targets_col = Variable(targets_col) outputs = net(inputs) log_softmax = torch.nn.LogSoftmax()(outputs) softmax = torch.nn.Softmax()(outputs) loss = -1 * torch.sum(log_softmax * one_hot_t, 1) _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() mask = predicted.eq(targets.data) mask = Variable(mask.float()) entropy = -1 * torch.mean(mask * torch.sum(softmax * log_softmax, 1)) # print(mask, entropy) variance = torch.var(loss) variance = torch.sqrt(variance) meanloss = torch.mean( loss) + args.entropy * entropy + args.variance * variance # meanloss = criterion(outputs , targets ) optimizer.zero_grad() meanloss.backward() optimizer.step() train_loss += meanloss.data[0] # print(batch_idx , train_loss, total , correct) progress_bar( batch_idx, len(trainloader), '(%d/%d) Loss: %.3f,Acc:%.3f%%,Lr %.4f,Var:%.4f,Entr:%.4f' % (correct, total, train_loss / (batch_idx + 1), 100. * correct / total, optimizer.param_groups[0]['lr'], variance.data[0], entropy.data[0]))
def fit(self, X, y, **kwargs): """Standard `fit` method. Parameters ---------- X : np.array y : array-like kwargs : dict For passing other parameters. If 'X_dev' is included, then performance is monitored every 10 epochs; use `dev_iter` to control this number. Returns ------- self """ # Incremental performance: X_dev = kwargs.get('X_dev') if X_dev is not None: dev_iter = kwargs.get('dev_iter', 10) # Data prep: X = np.array(X) self.input_dim = X.shape[1] self.classes_ = sorted(set(y)) self.n_classes_ = len(self.classes_) class2index = dict(zip(self.classes_, range(self.n_classes_))) y = [class2index[label] for label in y] # Dataset: X = torch.tensor(X, dtype=torch.float) y = torch.tensor(y, dtype=torch.long) dataset = torch.utils.data.TensorDataset(X, y) dataloader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=True) # Graph: self.model = self.define_graph() self.model.to(self.device) # Optimization: loss = nn.CrossEntropyLoss() optimizer = self.optimizer(self.model.parameters(), lr=self.eta, weight_decay=self.l2_strength) # Train: for iteration in range(1, self.max_iter + 1): epoch_error = 0.0 for X_batch, y_batch in dataloader: X_batch = X_batch.to(self.device) y_batch = y_batch.to(self.device) batch_preds = self.model(X_batch) err = loss(batch_preds, y_batch) epoch_error += err.item() optimizer.zero_grad() err.backward() optimizer.step() # Incremental predictions where possible: if X_dev is not None and iteration > 0 and iteration % dev_iter == 0: self.dev_predictions[iteration] = self.predict(X_dev) self.errors.append(epoch_error) progress_bar("Finished epoch {} of {}; error is {}".format( iteration, self.max_iter, epoch_error)) return self
def test_train(epoch, all_res, filename): net.eval() test_loss = 0.0 correct = 0 total = 0 entropy = [] var = [] allloss = [] for batch_idx, (inputs, targets) in enumerate(trainloader_test): one_hot_t = torch.FloatTensor(targets.size()[0], 10) one_hot_t = one_hot_t.zero_() targets_col = targets.view([-1, 1]) if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() one_hot_t = one_hot_t.cuda() targets_col = targets_col.cuda() # loss = criterion(outputs, targets) one_hot_t = one_hot_t.scatter_(1, targets_col, 1) one_hot_t = Variable(one_hot_t) inputs, targets = Variable(inputs, volatile=True), Variable(targets) # targets_col = Variable(targets_col) outputs = net(inputs) log_softmax = torch.nn.LogSoftmax()(outputs) softmax = torch.nn.Softmax()(outputs) loss = -1 * torch.sum(log_softmax * one_hot_t, 1) allloss.append(loss) entropy.append(-1 * torch.mean(torch.sum(softmax * log_softmax, 1))) variance = torch.var(loss) test_loss += loss.data[0][0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar( batch_idx, len(trainloader_test), '(%d/%d) Loss: %.3f,Acc:%.3f%%,Lr %.4f,Var:%.4f,Entr:%.4f' % (correct, total, test_loss / (batch_idx + 1), 100. * correct / total, optimizer.param_groups[0]['lr'], variance.data[0], entropy[-1].data[0])) allloss = torch.cat(allloss) allentropy = torch.cat(entropy) v_meanloss = torch.mean(allloss) variance = torch.var(allloss) entropy = torch.mean(allentropy) all_res['train_meanloss'].append(v_meanloss.data[0]) all_res['train_var'].append(variance.data[0]) all_res['train_entropy'].append(entropy.data[0]) all_res['train_acc'].append(1.0 * correct / total)
def linear_train(epoch, model, Linear, projector, loptim, attacker=None): Linear.train() if args.finetune: model.train() if args.ss: projector.train() else: model.eval() total_loss = 0 correct = 0 total = 0 for batch_idx, (ori, inputs, inputs_2, target) in enumerate(trainloader): ori, inputs_1, inputs_2, target = ori.cuda(), inputs.cuda( ), inputs_2.cuda(), target.cuda() input_flag = False if args.trans: inputs = inputs_1 else: inputs = ori if args.adv_img: advinputs = attacker.perturb(original_images=inputs, labels=target, random_start=args.random_start) if args.clean: total_inputs = inputs total_targets = target input_flag = True if args.ss: total_inputs = torch.cat((inputs, inputs_2)) total_targets = torch.cat((target, target)) if args.adv_img: if input_flag: total_inputs = torch.cat((total_inputs, advinputs)) total_targets = torch.cat((total_targets, target)) else: total_inputs = advinputs total_targets = target input_flag = True if not input_flag: assert ('choose the linear evaluation data type (clean, adv_img)') feat = model(total_inputs) if args.ss: output_p = projector(feat) B = ori.size(0) similarity, _ = pairwise_similarity(output_p[:2 * B, :2 * B], temperature=args.temperature, multi_gpu=False, adv_type='None') simloss = NT_xent(similarity, 'None') output = Linear(feat) _, predx = torch.max(output.data, 1) loss = criterion(output, total_targets) if args.ss: loss += simloss correct += predx.eq(total_targets.data).cpu().sum().item() total += total_targets.size(0) acc = 100. * correct / total total_loss += loss.data loptim.zero_grad() loss.backward() loptim.step() progress_bar( batch_idx, len(trainloader), 'Loss: {:.4f} | Acc: {:.2f}'.format(total_loss / (batch_idx + 1), acc)) print("Epoch: {}, train accuracy: {}".format(epoch, acc)) return acc, model, Linear, projector, loptim
def train(self): self.D.train() self.G.train() train_loss = [] train_dis_loss = [] train_dis_real_loss = [] train_dis_fake_loss = [] train_gen_loss = [] # TODO gen, dis scheduler gen_max = 3 gen_cnt = 0 for batch_idx, inputs in enumerate(self.trainloader): inputs = inputs.to(self.device) noise = torch.autograd.Variable( torch.randn(size=inputs.size())).to(self.device) # TAnoGAN training Discriminator first # Discriminator # set dis optimizer init self.dis_optim.zero_grad() # generate fake inputs inputs_fake, _ = self.G(noise) # discriminate dis_real_outputs, _ = self.D(inputs) dis_fake_outputs, _ = self.D(inputs_fake) # dis loss dis_real_loss = self.dis_criterion( dis_real_outputs, torch.full(dis_real_outputs.size(), 1, dtype=torch.float, device=self.device)) dis_fake_loss = self.dis_criterion( dis_fake_outputs, torch.full(dis_fake_outputs.size(), 0, dtype=torch.float, device=self.device)) dis_loss = dis_real_loss + dis_fake_loss # dis update dis_loss.backward() self.dis_optim.step() # Generator # set gen optimizer init self.gen_optim.zero_grad() # generate fakes, _ = self.G(noise) # gen loss gen_loss = self.gen_criterion(fakes, inputs) # gen update gen_loss.backward() self.gen_optim.step() # Loss train_dis_loss.append(dis_loss.item()) train_dis_real_loss.append(dis_real_loss.item()) train_dis_fake_loss.append(dis_fake_loss.item()) train_gen_loss.append(gen_loss.item()) loss = dis_loss + gen_loss train_loss.append(loss.item()) progress_bar( current=batch_idx, total=len(self.trainloader), name='TRAIN', msg= 'Total Loss: %.3f | Gen Loss: %.3f | Dis Loss: %.3f (Real %.3f / Fake %.3f)' % (np.mean(train_loss), np.mean(train_gen_loss), np.mean(train_dis_loss), np.mean(train_dis_real_loss), np.mean(train_dis_fake_loss))) train_dis_loss = np.mean(train_dis_loss) train_dis_real_loss = np.mean(train_dis_real_loss) train_dis_fake_loss = np.mean(train_dis_fake_loss) train_gen_loss = np.mean(train_gen_loss) train_loss = np.mean(train_loss) return train_dis_loss, train_dis_real_loss, train_dis_fake_loss, train_gen_loss, train_loss
outputs = orig_net(inputs) if ii == 0: X = F.softmax(outputs, dim=-1).cpu().numpy() y = targets.cpu().numpy() else: X = np.concatenate( (X, F.softmax(outputs, dim=-1).cpu().numpy()), axis=0) y = np.concatenate((y, targets.cpu().numpy()), axis=0) loss = criterion(outputs, targets) test_loss.append(loss.cpu().item()) _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (np.mean(test_loss), 100. * correct / total, correct, total)) preds = np.argmax(X, axis=-1) confs = np.amax(X, axis=-1) bins = { 0.1: [], 0.2: [], 0.3: [], 0.4: [], 0.5: [], 0.6: [], 0.7: [], 0.8: [], 0.9: [],
def train(epoch, optimizer, compression_scheduler=None): print('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 start_time = time.time() for batch_idx, (inputs, targets) in enumerate(trainloader): inputs, targets = inputs.to(device), targets.to(device) if compression_scheduler: compression_scheduler.on_minibatch_begin(epoch, minibatch_id=batch_idx, minibatches_per_epoch=128) outputs = net(inputs) loss = criterion(outputs, targets) if compression_scheduler: compression_scheduler.before_backward_pass( epoch, minibatch_id=batch_idx, minibatches_per_epoch=128, loss=loss) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) if compression_scheduler: compression_scheduler.on_minibatch_end(epoch, minibatch_id=batch_idx, minibatches_per_epoch=128) if batch_idx % 200 == 0 and batch_idx > 0: cur_loss = train_loss / 200 elapsed = time.time() - start_time lr = optimizer.param_groups[0]['lr'] msglogger.info( '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.4f} | ms/batch {:5.2f} ' '| loss {:5.2f} | ppl {:8.2f}'.format(epoch, batch_idx, len(trainloader) // 200, lr, elapsed * 1000 / 200, cur_loss, math.exp(cur_loss))) total_loss = 0 start_time = time.time() stats = ('Performance/Training/', OrderedDict([('Loss', cur_loss), ('Perplexity', math.exp(cur_loss)), ('LR', lr), ('Batch Time', elapsed * 1000)])) steps_completed = batch_idx + 1 distiller.log_training_progress(stats, net.named_parameters(), epoch, steps_completed, 128, 200, [tflogger])
def glove(mat, rownames, n=100, xmax=100, alpha=0.75, iterations=100, learning_rate=0.05, display_progress=True): """Basic GloVe. Parameters ---------- mat : 2d np.array This must be a square count matrix. rownames : list of str or None Not used; it's an argument only for consistency with other methods defined here. n : int (default: 100) The dimensionality of the output vectors. xmax : int (default: 100) Words with frequency greater than this are given weight 1.0. Words with frequency under this are given weight (c/xmax)**alpha where c is their count in mat (see the paper, eq. (9)). alpha : float (default: 0.75) Exponent in the weighting function (see the paper, eq. (9)). iterations : int (default: 100) Number of training epochs. learning_rate : float (default: 0.05) Controls the rate of SGD weight updates. display_progress : bool (default: True) Whether to print iteration number and current error to stdout. Returns ------- (np.array, list of str) The first member is the learned GloVe matrix and the second is `rownames` (unchanged). """ m = mat.shape[0] W = utils.randmatrix(m, n) # Word weights. C = utils.randmatrix(m, n) # Context weights. B = utils.randmatrix(2, m) # Word and context biases. indices = list(range(m)) for iteration in range(iterations): error = 0.0 random.shuffle(indices) for i, j in itertools.product(indices, indices): if mat[i,j] > 0.0: # Weighting function from eq. (9) weight = (mat[i,j] / xmax)**alpha if mat[i,j] < xmax else 1.0 # Cost is J' based on eq. (8) in the paper: diff = np.dot(W[i], C[j]) + B[0,i] + B[1,j] - np.log(mat[i,j]) fdiff = diff * weight # Gradients: wgrad = fdiff * C[j] cgrad = fdiff * W[i] wbgrad = fdiff wcgrad = fdiff # Updates: W[i] -= (learning_rate * wgrad) C[j] -= (learning_rate * cgrad) B[0,i] -= (learning_rate * wbgrad) B[1,j] -= (learning_rate * wcgrad) # One-half squared error term: error += 0.5 * weight * (diff**2) if display_progress: utils.progress_bar("iteration %s: error %s" % (iteration, error)) if display_progress: sys.stderr.write('\n') # Return the sum of the word and context matrices, per the advice # in section 4.2: return (W + C, rownames)
def test(epoch): global best_acc_top1 global best_acc_top5 global best_acc_top1_avg global best_acc_top5_avg global best_top1_index global best_top5_index global best_epoch global best_epoch_top1 global best_epoch_top5 losses = [] top1 = [] top5 = [] for i in range(train_models_num): net[i].eval() losses.append(AverageMeter()) top1.append(AverageMeter()) top5.append(AverageMeter()) losses_global = AverageMeter() top1_global = AverageMeter() top5_global = AverageMeter() with torch.no_grad(): for batch_idx, (data_places, data_imagenet, targets) in enumerate(testloader): data_places, data_imagenet, targets = data_places.to( device), data_imagenet.to(device), targets.to(device) outputs = [] for i in range(train_models_num): outputs.append(net[i](data_places, data_imagenet)) loss = [] for i in range(train_models_num): loss.append(criterion[i](outputs[i], targets)) for i in range(train_models_num): prec1, prec5 = accuracy_topk(outputs[i].data, targets, topk=(1, 5)) losses[i].update(loss[i].item(), data_places.size(0)) top1[i].update(prec1.item(), data_places.size(0)) top5[i].update(prec5.item(), data_places.size(0)) losses_global.update(loss[i].item(), data_places.size(0)) top1_global.update(prec1.item(), data_places.size(0)) top5_global.update(prec5.item(), data_places.size(0)) progress_bar( batch_idx, len(testloader), 'Loss: {loss.avg:.4f} | ' 'Prec@1: {top1.avg:.3f} | ' 'Prec@5: {top5.avg:.3f}'.format(loss=losses_global, top1=top1_global, top5=top5_global)) # Save checkpoint. acc_top1 = [] acc_top5 = [] current_best = [] for i in range(train_models_num): acc_top1.append(top1[i].avg) acc_top5.append(top5[i].avg) current_best.append(False) for i in range(train_models_num): if best_acc_top1_models[i] < acc_top1[i]: best_acc_top1_models[i] = acc_top1[i] best_epoch_top1_models[i] = epoch current_best[i] = True if best_acc_top5_models[i] < acc_top5[i]: best_acc_top5_models[i] = acc_top5[i] best_epoch_top5_models[i] = epoch best_acc_top1_avg = sum(best_acc_top1_models) / float( len(best_acc_top1_models)) best_acc_top5_avg = sum(best_acc_top5_models) / float( len(best_acc_top5_models)) max_acc_top1_index = acc_top1.index(max(acc_top1)) max_acc_top5_index = acc_top5.index(max(acc_top5)) max_acc_top1_value = max(acc_top1) max_acc_top5_value = max(acc_top5) if ((epoch + 1) % model_save_period_epoch) == 0: for i in range(train_models_num): state = { 'net': net[i].state_dict(), 'acc_top1': acc_top1[i], 'acc_top5': acc_top5[i], 'epoch': epoch, } torch.save( state, os.path.join(model_save_path, 'weights_idx_%04d_lastest.pt' % (i))) if max_acc_top1_value > best_acc_top1: print('Saving... best test accuracy-top1') state = { 'net': net[max_acc_top1_index].state_dict(), 'acc_top1': max_acc_top1_value, 'acc_top5': acc_top5[max_acc_top1_index], 'epoch': epoch, } torch.save(state, os.path.join(model_save_path, 'ckpt.pt')) # torch.save(state, os.path.join(model_save_path, 'weights_%07d.pt'%(epoch+1))) best_acc_top1 = max_acc_top1_value best_top1_index = max_acc_top1_index best_epoch_top1 = epoch best_epoch = epoch if max_acc_top5_value > best_acc_top5: print('Saving... best test accuracy-top5') state = { 'net': net[max_acc_top1_index].state_dict(), 'acc_top1': acc_top1[i], 'acc_top5': acc_top5[i], 'epoch': epoch, } torch.save(state, os.path.join(model_save_path, 'ckpt_top5.pt')) # torch.save(state, os.path.join(model_save_path, 'weights_%07d.pt'%(epoch+1))) best_acc_top5 = max_acc_top5_value best_top5_index = max_acc_top5_index best_epoch_top5 = epoch best_epoch = epoch for i in range(train_models_num): if current_best[i]: print('Saving... best test accuracy-top1 model: %d' % (i)) state = { 'net': net[i].state_dict(), 'acc_top1': max_acc_top1_value, 'acc_top5': acc_top5[i], 'epoch': epoch, } torch.save( state, os.path.join(model_save_path, 'ckpt_top1_idx_%04d.pt' % (i))) print('The best test accuracy-top1: %f epoch: %d index: %d' % (best_acc_top1, best_epoch_top1, best_top1_index)) print('The best test accuracy-top5: %f epoch: %d index: %d' % (best_acc_top5, best_epoch_top5, best_top5_index)) print('The best test avg acc-top1: %f%% acc-top5: %f%%' % (best_acc_top1_avg, best_acc_top5_avg)) for i in range(train_models_num): print('%03d_top1_%.3f%%_epoch_%d_top5_%.4f%%_epoch_%d' % (i, best_acc_top1_models[i], best_epoch_top1_models[i], best_acc_top5_models[i], best_epoch_top5_models[i])) print('') for i in range(train_models_num): print( 'model number: %03d | loss: %.3f | best acc: (%.3f%%, %.3f%%) current acc: (%.3f%%, %.3f%%) | best epoch: (%d, %d)' % (i, losses[i].avg, best_acc_top1_models[i], best_acc_top5_models[i], acc_top1[i], acc_top5[i], best_epoch_top1_models[i], best_epoch_top5_models[i]))
def train(epoch, opt, loss_list,\ local_curv_list,\ max_curv_list,\ min_curv_list,\ lr_list,\ lr_t_list,\ mu_t_list,\ dr_list,\ mu_list,\ dist_list,\ grad_var_list,\ lr_g_norm_list,\ lr_g_norm_squared_list,\ move_lr_g_norm_list,\ move_lr_g_norm_squared_list,\ lr_grad_norm_clamp_act_list,\ fast_view_act_list): logging.info('\nEpoch: %d' % epoch) net.train() train_loss = 0 correct = 0 total = 0 if epoch == 151: if args.opt_method == "YF": optimizer.set_lr_factor(optimizer.get_lr_factor() / 10.0) else: for group in optimizer.param_groups: group['lr'] /= 10.0 count = 0 for batch_idx, (inputs, targets) in enumerate(trainloader): count += 1 if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net(inputs) loss = criterion(outputs, targets) loss.backward() optimizer.step() lr_list.append(lr_curr) mu_list.append(momentum_curr) loss_list.append(loss.data.item()) train_loss += loss.data.item() _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss/(batch_idx+1), 100.*float(correct)/float(total), correct, total)) #if args.opt_method == "YF": # lr_list.append(optimizer._optimizer.param_groups[0]['lr'] ) # mu_list.append(optimizer._optimizer.param_groups[0]['momentum'] ) # else: # lr_list.append(optimizer.param_groups[0]['lr'] ) # mu_list.append(optimizer.param_groups[0]['momentum'] ) # print("num of batches per epoch = {}".format(count)) return loss_list,\ local_curv_list,\ max_curv_list,\ min_curv_list,\ lr_list,\ lr_t_list,\ mu_t_list,\ dr_list,\ mu_list,\ dist_list,\ grad_var_list,\ lr_g_norm_list,\ lr_g_norm_squared_list,\ move_lr_g_norm_list,\ move_lr_g_norm_squared_list,\ lr_grad_norm_clamp_act_list,\ fast_view_act_list
def begin_train(self): N_EPOCHS = 30 N_BATCH = self.batchsize N_TRAIN_INS = len(self.train_ending) best_val_accuracy = 0 best_test_accuracy = 0 test_threshold = 5000/N_BATCH prev_percetage = 0.0 speed = 0.0 batch_count = 0.0 for epoch in range(N_EPOCHS): print "epoch ", epoch,":" shuffled_index_list = utils.shuffle_index(N_TRAIN_INS) max_batch = N_TRAIN_INS/N_BATCH start_time = time.time() total_cost = 0.0 total_err = 0.0 for batch in range(max_batch): batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)] train_story = [self.train_story[index] for index in batch_index_list] train_ending = [self.train_ending[index] for index in batch_index_list] neg_end_index_matrix = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) while np.any((np.asarray(batch_index_list) - neg_end_index_matrix) == 0): neg_end_index_matrix = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) neg_end1 = [self.train_ending[index] for index in neg_end_index_matrix] answer = np.random.randint(2, size = N_BATCH) target1 = 1 - answer target2 = answer # answer_vec = np.concatenate(((1 - answer).reshape(-1,1), answer.reshape(-1,1)),axis = 1) end1 = [] end2 = [] for i in range(N_BATCH): if answer[i] == 0: end1.append(train_ending[i]) end2.append(neg_end1[i]) else: end1.append(neg_end1[i]) end2.append(train_ending[i]) story_sent1_feature = np.zeros((N_BATCH, self.ngram_feature_dim)) story_sent2_feature = np.zeros((N_BATCH, self.ngram_feature_dim)) story_sent3_feature = np.zeros((N_BATCH, self.ngram_feature_dim)) story_sent4_feature = np.zeros((N_BATCH, self.ngram_feature_dim)) train_end1_feature = np.zeros((N_BATCH, self.ngram_feature_dim)) train_end2_feature = np.zeros((N_BATCH, self.ngram_feature_dim)) for i in range(N_BATCH): story_sent1_feature[i] = self.ngram_feature_generator(train_story[i][0],4) story_sent2_feature[i] = self.ngram_feature_generator(train_story[i][1],4) story_sent3_feature[i] = self.ngram_feature_generator(train_story[i][2],4) story_sent4_feature[i] = self.ngram_feature_generator(train_story[i][3],4) train_end1_feature[i] = self.ngram_feature_generator(end1[i], 4) train_end2_feature[i] = self.ngram_feature_generator(end2[i], 4) cost, pred1, pred2 = self.train_func(story_sent1_feature, story_sent2_feature, story_sent3_feature, story_sent4_feature, train_end1_feature, train_end2_feature, target1, target2) total_cost += cost prediction = np.zeros(N_BATCH) predict_vec = np.argmax(np.concatenate((pred1, pred2), axis = 1)) for i in range(N_BATCH): if predict_vec == 0: prediction[i] = 1 elif predict_vec == 1: prediction[i] = 0 elif predict_vec == 2: prediction[i] = 0 else: prediction[i] = 1 total_err += (abs(prediction - answer)).sum() if batch_count % test_threshold == 0: if batch_count == 0: print "initial test" else: print" " print "training set accuracy: ", (1-(total_err/(batch*N_BATCH)))*100,"%" print"test on valid set..." val_result = self.val_set_test() print "accuracy is: ", val_result*100, "%" if val_result > best_val_accuracy: print "new best! test on test set..." best_val_accuracy = val_result test_accuracy = self.test_set_test() print "test set accuracy: ", test_accuracy * 100, "%" if test_accuracy > best_test_accuracy: best_test_accuracy = test_accuracy batch_count += 1 if batch_count != 0 and batch_count % 10 == 0: speed = N_BATCH * 10.0 / (time.time() - start_time) start_time = time.time() percetage = ((batch_count % test_threshold)+1) / test_threshold * 100 if percetage - prev_percetage >= 1: utils.progress_bar(percetage, speed) print "" print "=======================================" print "epoch summary:" print "total cost in this epoch: ", total_cost print "======================================="
def glove(df, n=100, xmax=100, alpha=0.75, max_iter=100, eta=0.05, tol=1e-4, display_progress=True): """Basic GloVe. This is mainly here as a reference implementation. We recommend using `mittens.GloVe` instead. Parameters ---------- df : pd.DataFrame or np.array This must be a square matrix. n : int (default: 100) The dimensionality of the output vectors. xmax : int (default: 100) Words with frequency greater than this are given weight 1.0. Words with frequency under this are given weight (c/xmax)**alpha where c is their count in mat (see the paper, eq. (9)). alpha : float (default: 0.75) Exponent in the weighting function (see the paper, eq. (9)). max_iter : int (default: 100) Number of training epochs. eta : float (default: 0.05) Controls the rate of SGD weight updates. tol : float (default: 1e-4) Stopping criterion for the loss. display_progress : bool (default: True) Whether to print iteration number and current error to stdout. Returns ------- pd.DataFrame With dimension `(df.shape[0], n)` """ X = df.values if isinstance(df, pd.DataFrame) else df m = X.shape[0] # Parameters: W = utils.randmatrix(m, n) # Word weights. C = utils.randmatrix(m, n) # Context weights. B = utils.randmatrix(2, m) # Word and context biases. # Precomputable GloVe values: X_log = utils.log_of_array_ignoring_zeros(X) X_weights = (np.minimum(X, xmax) / xmax)**alpha # eq. (9) # Learning: indices = list(range(m)) for iteration in range(max_iter): error = 0.0 random.shuffle(indices) for i, j in itertools.product(indices, indices): if X[i, j] > 0.0: weight = X_weights[i, j] # Cost is J' based on eq. (8) in the paper: diff = W[i].dot(C[j]) + B[0, i] + B[1, j] - X_log[i, j] fdiff = diff * weight # Gradients: wgrad = fdiff * C[j] cgrad = fdiff * W[i] wbgrad = fdiff wcgrad = fdiff # Updates: W[i] -= eta * wgrad C[j] -= eta * cgrad B[0, i] -= eta * wbgrad B[1, j] -= eta * wcgrad # One-half squared error term: error += 0.5 * weight * (diff**2) error /= m if display_progress: if error < tol: utils.progress_bar("Stopping at iteration {} with " "error {}".format(iteration, error)) break else: utils.progress_bar("Iteration {}: error {}".format( iteration, error)) if display_progress: sys.stderr.write('\n') # Return the sum of the word and context matrices, per the advice # in section 4.2: G = W + C if isinstance(df, pd.DataFrame): G = pd.DataFrame(G, index=df.index) return G
def run(self): t1 = time.time() while not self.obj.pool.isFinished(): self.dl_speed = self.calcDownloadSpeed(self.shared_var.value) if self.dl_speed > 0: self.eta = self.calcETA((self.obj.filesize-self.shared_var.value)/self.dl_speed) if self.show_output: if self.obj.filesize: status = r"%.2f / %.2f MB @ %.2fKB/s %s [%3.2f%%, %ds left] " % (self.shared_var.value / 1024.0**2, self.obj.filesize / 1024.0**2, self.dl_speed/1024.0, utils.progress_bar(1.0*self.shared_var.value/self.obj.filesize), self.shared_var.value * 100.0 / self.obj.filesize, self.eta) else: status = r"%.2f / ??? MB @ %.2fKB/s" % (self.shared_var.value / 1024.0**2, self.dl_speed/1024.0) status = status + chr(8)*(len(status)+1) print status, time.sleep(0.1) if self.obj._killed: self.logger.debug("File download process has been stopped.") return if self.show_output: if self.obj.filesize: print r"%.2f / %.2f MB @ %.2fKB/s %s [100%%, 0s left] " % (self.obj.filesize / 1024.0**2, self.obj.filesize / 1024.0**2, self.dl_speed/1024.0, utils.progress_bar(1.0)) else: print r"%.2f / %.2f MB @ %.2fKB/s" % (self.shared_var.value / 1024.0**2, self.shared_var.value / 1024.0**2, self.dl_speed/1024.0) t2 = time.time() self.dl_time = float(t2-t1) # self.logger.debug("Combining files...") # actually happens on post_threadpool_thread # self.obj.status = "combining" # actually happens on post_threadpool_thread while self.obj.post_threadpool_thread.is_alive(): time.sleep(0.1) self.obj.status = "finished" self.logger.debug("File downloaded within %.2f seconds." % self.dl_time)
def run(self): # Called by Qt once the thread environment has been set up. url = self.songObj.url filesize = self.songObj.filesize audio_path = r"%s\%s" % (self.dl_dir, self.songObj.GetProperFilename('mp3')) video_path = r"%s\%s" % (self.dl_dir, self.songObj.GetProperFilename()) dest_audio_path = r"%s\%s" % (config.temp_dir, "%s.mp3" % utils.get_rand_string()) if not self.isMultimediaFile: dest_path = r"%s\%s" % (config.temp_dir, utils.get_rand_string()) elif self.songObj.ext == "mp3": dest_path = dest_audio_path else: # video dest_path = r"%s\%s" % (config.temp_dir, "%s.vid" % utils.get_rand_string()) dl_obj = Main.SmartDL(url, dest_path, logger=log) dl_obj.start() self.dl_obj = dl_obj while not dl_obj.isFinished(): if dl_obj.status == 'combining': self.status.emit(tr("Combining Parts...")) break self.downloadProgress.emit(int(dl_obj.get_progress()*100), dl_obj.get_speed(), dl_obj.get_eta(), dl_obj.get_downloaded_size(), filesize) time.sleep(0.1) while not dl_obj.isFinished(): # if we were breaking the last loop, we are waiting for # parts to get combined. we shall wait. time.sleep(0.1) if dl_obj._failed: log.error("Got DownloadFailedException() for %s" % url) self.error.emit(Main.SmartDL.DownloadFailedException()) self.terminate() return self.downloadProgress.emit(100, dl_obj.get_speed(), dl_obj.get_eta(), filesize, filesize) if self.isVideo: dest_video_path = dest_path t1 = time.time() if config.downloadAudio: # if we want an audio file log.debug("Encoding Audio...") self.status.emit(tr("Encoding Audio...")) cmd = r'bin\ffmpeg -y -i "%s" -vn -ac 2 -b:a %d -f mp3 "%s"' % (dest_video_path, config.youtube_audio_bitrates[self.songObj.video_itag.quality], dest_audio_path) log.debug("Running '%s'" % cmd) est_final_filesize = self.songObj.final_filesize print "Encoding: %s (%.2f MB) to %s" % (dest_audio_path, est_final_filesize / 1024.0 / 1024.0, self.dl_dir) self.encProgress.emit(0) proc = utils.launch_without_console(cmd) old_encoded_fs_counter = 0 while True: out = proc.stderr.read(54) if not out: break # size= 2930kB time=00:03:07.49 bitrate= 128.0kbits/s if 'size=' in out and 'time=' in out: encoded_fs_counter = out.split('size=')[1].split('kB')[0].strip() if encoded_fs_counter.isdigit(): encoded_fs_counter = int(encoded_fs_counter) if encoded_fs_counter > old_encoded_fs_counter: status = r"Encoding: %.2f MB / %.2f MB %s [%3.2f%%]" % (encoded_fs_counter / 1024.0, est_final_filesize / 1024.0**2, utils.progress_bar(1.0*encoded_fs_counter*1024/est_final_filesize) , encoded_fs_counter*1024 * 100.0 / est_final_filesize) status = status + chr(8)*(len(status)+1) print status, self.encProgress.emit(int(encoded_fs_counter*1024 * 100.0 / est_final_filesize)) old_encoded_fs_counter = encoded_fs_counter time.sleep(0.1) self.encProgress.emit(100) proc.wait() t2 = time.time() self.encode_time += t2-t1 if not config.downloadVideo: log.debug("Removing %s..." % dest_path) os.unlink(dest_path) if config.downloadAudio and config.trimSilence: t1 = time.time() log.debug("Trimming Silence...") self.status.emit(tr("Trimming Silence from edges...")) temp_audio_trimmed_path = "%s.tmp.mp3" % dest_audio_path if os.path.exists(temp_audio_trimmed_path): os.unlink(temp_audio_trimmed_path) os.rename(dest_audio_path, temp_audio_trimmed_path) cmd = r'bin\sox -S "%s" "%s" silence 1 0.1 1%% reverse silence 1 0.1 1%% reverse' % (temp_audio_trimmed_path, dest_audio_path) log.debug("Running '%s'" % cmd) est_final_filesize = self.songObj.final_filesize print "Trimming Silence: %s (%.2f MB) to %s" % (dest_audio_path, est_final_filesize / 1024.0**2, self.dl_dir) self.encProgress.emit(0) proc = utils.launch_without_console(cmd) samples = 1 in_value = 0 out_value = 0 while True: # print out out = proc.stderr.read(70) if not out: break # Duration : 00:04:24.06 = 11644870 samples = 19804.2 CDDA sectors if 'samples =' in out: samples = out.split('samples')[0].split('=')[-1].strip() if samples.isdigit(): samples = int(samples) # In:100% 00:04:23.96 [00:00:00.09] Out:11.6M [ | ] Hd:0.0 Clip:400 if 'In:' in out: t = out.split('In:')[1].split('.')[0].strip() if t.isdigit() and int(t) > in_value: in_value = int(t) # In:100% 00:04:23.96 [00:00:00.09] Out:11.6M [ | ] Hd:0.0 Clip:400 if 'Out:' in out: t = out.split('Out:')[1].split(' ')[0].strip() try: if 'k' in t: out_value = t.split('k')[0] out_value = float(out_value)*1000 elif 'M' in t: out_value = t.split('M')[0] out_value = float(out_value)*1000000 except: pass progress = in_value*0.3+(out_value/samples*100)*0.7+1 status = r"Trimming Silence: %s" % utils.progress_bar(progress/100.0) status = status + chr(8)*(len(status)+1) print status, self.encProgress.emit(progress) time.sleep(0.1) self.encProgress.emit(100) proc.wait() t2 = time.time() self.encode_time += t2-t1 if not os.path.exists(dest_audio_path): log.error('SoX failed: %s' % out) log.debug("Copying Files...") self.status.emit(tr("Copying Files...")) if self.isVideo: # IMPROVE: this crashes when a video is running in media player, os.unlink removes it, but it is still running in media player. if config.downloadAudio: log.debug("Moving %s to %s" % (dest_audio_path, audio_path)) shutil.move(dest_audio_path, audio_path) if config.downloadVideo: log.debug("Moving %s to %s" % (dest_video_path, video_path)) shutil.move(dest_video_path, video_path) if self.isAudio: log.debug("Moving %s to %s" % (dest_path, audio_path)) shutil.move(dest_path, audio_path) dl_time = dl_obj.get_dl_time() dl_time_s = int(dl_time)%60 dl_time_m = int(dl_time)/60 if filesize/dl_time/1024**2 > 1: # If dlRate is in MBs if dl_time_m: stats_str = tr('Download: %d:%.2d (%.2f MB/s)') % (dl_time_m, dl_time_s, filesize/dl_time/1024**2) else: stats_str = tr('Download: %ds (%.2f MB/s)') % (dl_time, filesize/dl_time/1024**2) else: if dl_time_m: stats_str = tr('Download: %d:%.2d (%.2f KB/s)') % (dl_time_m, dl_time_s, filesize/dl_time/1024) else: stats_str = tr('Download: %ds (%.2f KB/s)') % (dl_time, filesize/dl_time/1024) if self.encode_time: stats_str += tr('; Encoded: %ds') % self.encode_time self.status.emit(stats_str)
def train(epoch): print('\nEpoch: %d' % epoch) netE.train() netG_Less.train() netG_More.train() netG_Total.train() netD.train() netZ2Y.train() netY2Z.train() train_loss = 0 train_MLP = 0 train_D = 0 train_kl = 0 train_less_rec = 0 train_more_rec = 0 train_total_rec = 0 correct = 0 total = 0 for batch_idx, (x, targets) in enumerate(trainloader): x, targets = x.to(device), targets.to(device) ## Update MLP z, mu, logvar, y = netE(x) rec_y = netZ2Y(z) rec_z = netY2Z(y) # loss MLP # or: (to prevent gradient explosion - nan) # gamma * (F.mse_loss(rec_z, z.detach(), reduction='sum').div(self.batch_size) \ # + F.mse_loss(rec_y, y.detach(), reduction='sum').div(self.batch_size)) loss_MLP = gamma * (loss_MSE(rec_z, z.detach()) + loss_MSE(rec_y, y.detach())) #/ args.batch_size optimizerMLP.zero_grad() loss_MLP.backward() optimizerMLP.step() # loss D index = np.arange(x.size()[0]) np.random.shuffle(index) y_shuffle = y.clone() y_shuffle = y_shuffle[index, :] real_score = netD(torch.cat([y.detach(), y.detach()], dim=1)) fake_score = netD(torch.cat([y.detach(), y_shuffle.detach()], dim=1)) # or: (to prevent gradient explosion - nan) # alpha * (F.binary_cross_entropy(real_score, ones, reduction='sum').div(self.batch_size) \ # + F.binary_cross_entropy(fake_score, zeros, reduction='sum').div(self.batch_size)) loss_D = -alpha * torch.mean( torch.log(real_score) + torch.log(1 - fake_score)) optimizerD.zero_grad() loss_D.backward() optimizerD.step() ## Update G z, mu, logvar, y = netE(x) rec_y = netZ2Y(z) rec_z = netY2Z(y) rec_less_x = netG_Less(z) rec_more_x = netG_More(y) rec_x = netG_Total(torch.cat([z, y], dim=1)) # loss MLP # or: (to prevent gradient explosion - nan) # (F.mse_loss(rec_z, z.detach(), reduction='sum').div(self.batch_size) \ # + F.mse_loss(rec_y, y.detach(), reduction='sum').div(self.batch_size)) loss_MLP = (loss_MSE(rec_z, z.detach()) + loss_MSE(rec_y, y.detach()) ) #/ args.batch_size # loss D index = np.arange(x.size()[0]) np.random.shuffle(index) y_shuffle = y.clone() y_shuffle = y_shuffle[index, :] real_score = netD(torch.cat([y, y.detach()], dim=1)) fake_score = netD(torch.cat([y, y_shuffle.detach()], dim=1)) # or: (to prevent gradient explosion - nan) # alpha * (F.binary_cross_entropy(real_score, ones, reduction='sum').div(self.batch_size) \ # + F.binary_cross_entropy(fake_score, zeros, reduction='sum').div(self.batch_size)) loss_D = -torch.mean(torch.log(real_score) + torch.log(1 - fake_score)) # loss KL loss_kl = loss_KL(mu, logvar) # loss Rec loss_less_rec = criterion(rec_less_x, targets) loss_more_rec = criterion(rec_more_x, targets) loss_total_rec = criterion(rec_x, targets) loss_rec = loss_less_rec + loss_more_rec + loss_total_rec # total Loss loss_total = rec * loss_rec + beta * loss_kl + alpha * loss_D - gamma * loss_MLP optimizerG.zero_grad() loss_total.backward() optimizerG.step() train_loss += loss_total.item() train_MLP += loss_MLP.item() train_D += loss_D.item() train_kl += loss_kl.item() train_less_rec += loss_less_rec.item() train_more_rec += loss_more_rec.item() train_total_rec += loss_total_rec.item() _, predicted = rec_x.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() # print('Train Epoch: {} [{}/{} ({:.0f}%)]\tloss_total: {:.5f}, loss_MLP: {:.5f}, loss_D: {:.5f}, loss_kl: {:.5f}, loss_less_rec: {:.5f}, loss_more_rec: {:.5f}, loss_total_rec: {:.5f}'.format( # epoch, batch_idx * len(x), len(trainloader.dataset), # 100. * batch_idx / len(trainloader), # loss_total.item(), loss_MLP.item(), loss_D.item(), loss_kl.item(), # loss_less_rec.item(), loss_more_rec.item(), loss_total_rec.item())) progress_bar( batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) writer.add_scalar('train_loss', train_loss, epoch) writer.add_scalar('train_MLP', train_MLP, epoch) writer.add_scalar('train_D', train_D, epoch) writer.add_scalar('train_kl', train_kl, epoch) writer.add_scalar('train_less_rec', train_less_rec, epoch) writer.add_scalar('train_more_rec', train_more_rec, epoch) writer.add_scalar('train_total_rec', train_total_rec, epoch)
def test(epoch, loader=testloader, msg='Test'): global best_acc net.eval() batch_norm1, batch_norm2, classifier1, classifier2, classifier, batch_norm_bottleneck = classifier_blocks for bn in [batch_norm1, batch_norm2, batch_norm_bottleneck]: if bn is not None: bn.eval() test_loss = 0 correct_top1, correct_top5 = 0, 0 total = 0 outputs_list = [] with torch.no_grad(): for batch_idx, (inputs, targets) in enumerate(loader): inputs, targets = inputs.to(device), targets.to(device) patches_shape = kernel_convolution_2.shape operator = (V * ev_rescale) @ V.t() # operator = (V * torch.exp(ev_rescale)) @ V.t() zca_patches = kernel_convolution_2.view(patches_shape[0], -1) @ operator zca_patches_normalized = zca_patches / zca_patches.norm( dim=1, keepdim=True) + 1e-8 # zca_patches_normalized = zca_patches kernel_conv = zca_patches_normalized.view( patches_shape).contiguous() # outputs1, outputs2 = net(inputs, kernel_convolution[0]) outputs1, outputs2 = net(inputs, kernel_conv) outputs, targets = compute_classifier_outputs( outputs1, outputs2, targets, args, batch_norm1, batch_norm2, classifier1, classifier2, classifier, batch_norm_bottleneck, train=False) loss = criterion(outputs, targets) outputs_list.append(outputs) test_loss += loss.item() cor_top1, cor_top5 = correct_topk(outputs, targets, topk=(1, 5)) correct_top1 += cor_top1 correct_top5 += cor_top5 _, predicted = outputs.max(1) total += targets.size(0) progress_bar(batch_idx, len(loader), 'Test, epoch: %i; Loss: %.3f | Acc: %.3f%% (%d/%d)' % (epoch, test_loss / (batch_idx + 1), 100. * correct_top1 / total, correct_top1, total), hide=args.no_progress_bar) test_loss /= (batch_idx + 1) acc1, acc5 = 100. * correct_top1 / total, 100. * correct_top5 / total if args.no_progress_bar: print( f'{msg}, epoch: {epoch}; Loss: {test_loss:.2f} | Acc: {acc1:.1f} @1 {acc5:.1f} @5 ; threshold {args.bias:.3f}' ) outputs = torch.cat(outputs_list, dim=0).cpu() if args.lambda_1 > 0.: group_sparsity_norm = torch.norm(torch.cat( [classifier1.weight, classifier2.weight], dim=0), dim=0, p=2) print(f'non_zero groups {(group_sparsity_norm != 0).int().sum()}') return acc1, outputs
def begin_train(self): N_EPOCHS = 30 N_BATCH = self.batchsize N_TRAIN_INS = len(self.val_answer) best_val_accuracy = 0 best_test_accuracy = 0 test_threshold = 1000/N_BATCH prev_percetage = 0.0 speed = 0.0 batch_count = 0.0 start_batch = 0.0 for epoch in range(N_EPOCHS): print "epoch ", epoch,":" shuffled_index_list = utils.shuffle_index(N_TRAIN_INS) max_batch = N_TRAIN_INS/N_BATCH start_time = time.time() total_cost = 0.0 total_err_count = 0.0 for batch in range(max_batch): batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)] train_story = [[self.val_story[index][i] for index in batch_index_list] for i in range(self.story_nsent)] train_ending = [self.val_ending1[index] for index in batch_index_list] # neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) # while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0): # neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) # neg_end1 = [self.train_ending[index] for index in neg_end_index_list] neg_end1 = [self.val_ending2[index] for index in batch_index_list] # answer = np.random.randint(2, size = N_BATCH) # target1 = 1 - answer # target2 = 1 - target1 answer = np.asarray([self.val_answer[index] for index in batch_index_list]) target1 = 1 - answer target2 = answer # answer_vec = np.concatenate(((1 - answer).reshape(-1,1), answer.reshape(-1,1)),axis = 1) end1 = [] end2 = [] # for i in range(N_BATCH): # if answer[i] == 0: # end1.append(train_ending[i]) # end2.append(neg_end1[i]) # else: # end1.append(neg_end1[i]) # end2.append(train_ending[i]) for i in range(N_BATCH): end1.append(train_ending[i]) end2.append(neg_end1[i]) train_story_matrices = [utils.padding(batch_sent) for batch_sent in train_story] train_end1_matrix = utils.padding(end1) train_end2_matrix = utils.padding(end2) train_story_mask = [utils.mask_generator(batch_sent) for batch_sent in train_story] train_end1_mask = utils.mask_generator(end1) train_end2_mask = utils.mask_generator(end2) cost, prediction1, prediction2 = self.train_func(train_story_matrices[0], train_story_matrices[1], train_story_matrices[2], train_story_matrices[3], train_end1_matrix, train_end2_matrix, train_story_mask[0], train_story_mask[1], train_story_mask[2], train_story_mask[3], train_end1_mask, train_end2_mask, target1, target2) prediction = np.argmax(np.concatenate((prediction1, prediction2), axis = 1), axis = 1) predict_answer = np.zeros((N_BATCH, )) for i in range(N_BATCH): if prediction[i] == 0: predict_answer[i] = 1 elif prediction[i] == 1: predict_answer[i] = 0 elif prediction[i] == 2: predict_answer[i] = 0 else: predict_answer[i] = 1 total_err_count += (abs(predict_answer - answer)).sum() # peek on val set every 5000 instances(1000 batches) if batch_count % test_threshold == 0: if batch_count == 0: print "initial test" else: if batch >= 0.0: print" " accuracy = 1.0 - (total_err_count/((batch+1)*N_BATCH)) print "training set accuracy: ", accuracy * 100, "%" print"test on valid set..." val_result = self.val_set_test() print "accuracy is: ", val_result*100, "%" if val_result > best_val_accuracy: print "new best! test on test set..." best_val_accuracy = val_result test_accuracy = self.test_set_test() print "test set accuracy: ", test_accuracy * 100, "%" if test_accuracy > best_test_accuracy: best_test_accuracy = test_accuracy batch_count += 1.0 '''master version print''' percetage = ((batch_count % test_threshold )*1.0) / test_threshold * 100 if percetage - prev_percetage >= 1.0: speed = N_BATCH * (batch_count- start_batch) / (time.time() - start_time) start_time = time.time() start_batch = batch_count utils.progress_bar(percetage, speed) prev_percetage = percetage if prev_percetage >= 99: prev_percetage = 0.0 '''end of print''' total_cost += cost print "=======================================" print "epoch summary:" print "average cost in this epoch: ", total_cost print "======================================="
def test(epoch=0): global best_acc netE.eval() netG_Less.eval() netG_More.eval() netG_Total.eval() netD.eval() netZ2Y.eval() netY2Z.eval() # Total test_loss = 0 correct_total = 0 correct_less = 0 correct_more = 0 total = 0 with torch.no_grad(): for batch_idx, (x, targets) in enumerate(testloader): x, targets = x.to(device), targets.to(device) z, mu, logvar, y = netE(x) outputs = netG_Total(torch.cat([mu, y], dim=1)) loss = criterion(outputs, targets) test_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct_total += predicted.eq(targets).sum().item() outputs_less = netG_Less(mu) _, predicted_less = outputs_less.max(1) correct_less += predicted_less.eq(targets).sum().item() outputs_more = netG_More(y) _, predicted_more = outputs_more.max(1) correct_more += predicted_more.eq(targets).sum().item() progress_bar( batch_idx, len(testloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (test_loss / (batch_idx + 1), 100. * correct_total / total, correct_total, total)) # Save checkpoint. acc_total = 100. * correct_total / total acc_less = 100. * correct_less / total acc_more = 100. * correct_more / total print('Error Rate: ', 100.0 - acc_total) if args.train: writer.add_scalar('accuracy', acc_total, epoch) writer.add_scalar('accuracy_less', acc_less, epoch) writer.add_scalar('accuracy_more', acc_more, epoch) if acc_total > best_acc: print('Saving..') state = { 'netE': netE.state_dict(), 'netG_Less': netG_Less.state_dict(), 'netG_More': netG_More.state_dict(), 'netG_Total': netG_Total.state_dict(), 'netD': netD.state_dict(), 'netZ2Y': netZ2Y.state_dict(), 'netY2Z': netY2Z.state_dict(), 'acc_total': acc_total, 'acc_less': acc_less, 'acc_more': acc_more, 'epoch': epoch, } if not os.path.isdir('checkpoints'): os.mkdir('checkpoints') torch.save( state, './checkpoints/ICP_{}_{}.t7'.format(args.dataset, args.model)) best_acc = acc_total
def run(self): t1 = time.time() while not self.obj.pool.done(): self.dl_speed = self.calcDownloadSpeed(self.shared_var.value) if self.dl_speed > 0: self.eta = self.calcETA((self.obj.filesize-self.shared_var.value)/self.dl_speed) if self.progress_bar: if self.obj.filesize: status = r"[*] %s / %s @ %s/s %s [%3.1f%%, %s left] " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0*self.shared_var.value/self.obj.filesize), self.shared_var.value * 100.0 / self.obj.filesize, utils.time_human(self.eta, fmt_short=True)) else: status = r"[*] %s / ??? MB @ %s/s " % (utils.sizeof_human(self.shared_var.value), utils.sizeof_human(self.dl_speed)) status = status + chr(8)*(len(status)+1) print status, time.sleep(0.1) if self.obj._killed: self.logger.debug("File download process has been stopped.") return if self.progress_bar: if self.obj.filesize: print r"[*] %s / %s @ %s/s %s [100%%, 0s left] " % (utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.obj.filesize), utils.sizeof_human(self.dl_speed), utils.progress_bar(1.0)) else: print r"[*] %s / %s @ %s/s " % (utils.sizeof_human(self.shared_var.value), self.shared_var.value / 1024.0**2, utils.sizeof_human(self.dl_speed)) t2 = time.time() self.dl_time = float(t2-t1) while self.obj.post_threadpool_thread.is_alive(): time.sleep(0.1) self.obj.pool.shutdown() self.obj.status = "finished" if not self.obj.errors: self.logger.debug("File downloaded within %.2f seconds." % self.dl_time)
def train(layer, logger, shapes, args, e, data_size, trainloader): criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(layer.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4) optimizer.zero_grad() layer.train() batch_idx = 0 def backward_rank1(): batch_idx = 0 grad_recv1 = torch.zeros(shapes[1]) dist.recv(tensor=grad_recv1, src=2) while True: print(" backward batch_idx:" + str(batch_idx)) grad_recv1 = grad_recv1.cuda(1) try: inputs, outputs = outputs_queue.get(block=True, timeout=4) except Empty: print("empty........") break inputs.requires_grad_() outputs.backward(grad_recv1) if batch_idx % args.ac == 0: optimizer.step() optimizer.zero_grad() batch_idx += 1 if data_size == batch_idx: transfer(3, inputs.grad.cpu(), None) print("backend In send..") break grad_recv1 = transfer(3, inputs.grad.cpu(), shapes[1]) #shapes[1] print("backward send.......") print("backard end....") def backward_rank0(semaphore): batch_idx = 0 grad_recv = torch.zeros(shapes[0]) dist.recv(tensor=grad_recv, src=1) while True: print(" backwardbatch_idx:" + str(batch_idx)) grad_recv = grad_recv.cuda(0) try: loss = outputs_queue.get(block=True, timeout=4) loss = loss.cuda(0) except Empty: print("empty........") break loss.backward(grad_recv) if batch_idx % args.ac == 0: # print("step: " + str(batch_idx)) optimizer.step() optimizer.zero_grad() batch_idx += 1 if data_size == batch_idx: print("eq...") break grad_recv = transfer(4, None, shapes[0]) #shapes[0] print("backward send.....") print("backward end..") if dist.get_rank() == 0: outputs_queue = ThreadQueue(args.buffer_size) semaphore = Semaphore(args.buffer_size) back_process = Process(target=backward_rank0, args=(semaphore, )) back_process.start() for batch_idx, (inputs, targets) in enumerate(trainloader): #semaphore.acquire() print("batch: " + str(batch_idx)) inputs = inputs.cuda(0) outputs = layer(inputs) #outputs = outputs.cpu() outputs_queue.put(outputs) #ransfer(dist.get_rank(), outputs, None) transfer(dist.get_rank(), outputs.cpu(), None) print("send........") print("start to end....") back_process.join() e.set() print("end....") elif dist.get_rank() == 1: outputs_queue = ThreadQueue(args.buffer_size) back_process = Process(target=backward_rank1, args=()) rec_val = torch.zeros(shapes[0]) dist.recv(tensor=rec_val, src=0) #fix bug.. back_process.start() for index, (_, targets) in enumerate(trainloader): print("batch_idx:" + str(index)) rec_val = rec_val.cuda(1) rec_val.requires_grad_() outputs = layer(rec_val) outputs_queue.put([rec_val, outputs]) if index == data_size - 1: transfer(dist.get_rank(), outputs.cpu(), None) print("the last send........") continue rec_val = transfer(dist.get_rank(), outputs.cpu(), shapes[0]) print("send.................") print("start to end....") back_process.join() e.wait() print("end......") elif dist.get_rank() == 2: rec_val = None residual = None train_loss = 0 correct = 0 total = 0 criterion.cuda(2) if not torch.is_tensor(rec_val): rec_val = torch.zeros(shapes[1]) dist.recv(tensor=rec_val, src=1) for batch_idx, (_, targets) in enumerate(trainloader): rec_val = rec_val.cuda(2) rec_val.requires_grad_() outputs = layer(rec_val) # start to backward.... targets = targets.cuda(2) loss = criterion(outputs, targets) loss.backward() if batch_idx % args.ac == 0: optimizer.step() train_loss += loss.item() _, predicted = outputs.max(1) total += targets.size(0) correct += predicted.eq(targets).sum().item() progress_bar( batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) optimizer.zero_grad() else: progress_bar( batch_idx, data_size, 'Loss: %.3f | Acc: %.3f%% (%d/%d)' % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total)) logger.error("train:" + str(train_loss / (batch_idx + 1))) acc_str = "tacc: %.3f" % (100. * correct / total, ) logger.error(acc_str) if batch_idx == data_size - 1: transfer(dist.get_rank(), rec_val.grad.cpu(), None) continue rec_val = transfer(dist.get_rank(), rec_val.grad.cpu(), shapes[1]) #print("\n start to end....") e.wait() print("end....")
def begin_train(self): N_EPOCHS = 30 N_BATCH = 5 N_TRAIN_INS = len(self.train_ending) best_val_accuracy = 0 best_test_accuracy = 0 test_threshold = 2000.0 prev_percentage = 0.0 speed = 0.0 batch_count = 0.0 for epoch in range(N_EPOCHS): print "epoch ", epoch,":" shuffled_index_list = utils.shuffle_index(N_TRAIN_INS) max_batch = N_TRAIN_INS/N_BATCH start_time = time.time() for batch in range(max_batch): batch_index_list = [shuffled_index_list[i] for i in range(batch * N_BATCH, (batch+1) * N_BATCH)] train_story = [self.train_story[index] for index in batch_index_list] train_ending = [self.train_ending[index] for index in batch_index_list] neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) while np.any((np.asarray(batch_index_list) - neg_end_index_list) == 0): neg_end_index_list = np.random.randint(N_TRAIN_INS, size = (N_BATCH,)) neg_end1 = [self.train_ending[index] for index in neg_end_index_list] train_story_matrix = utils.padding(train_story) train_ending_matrix = utils.padding(train_ending) neg_ending1_matrix = utils.padding(neg_end1) train_story_mask = utils.mask_generator(train_story) train_ending_mask = utils.mask_generator(train_ending) neg_ending1_mask = utils.mask_generator(neg_end1) self.train_func(train_story_matrix, train_story_mask, train_ending_matrix, train_ending_mask, neg_ending1_matrix, neg_ending1_mask) if batch_count != 0 and batch_count % 10 == 0: speed = N_BATCH * 10.0 / (time.time() - start_time) start_time = time.time() percentage = ((batch_count % test_threshold) + 1) / test_threshold * 100 if percentage - prev_percentage >= 1: utils.progress_bar(percentage, speed) # peek on val set every 5000 instances(1000 batches) if batch_count % test_threshold == 0: if batch_count == 0: print "initial test" else: print" " print"test on valid set..." val_result, val_result_list = self.val_set_test() print "accuracy is: ", val_result*100, "%" if val_result > best_val_accuracy: print "new best! test on test set..." best_val_accuracy = val_result self.saving_model('val', best_val_accuracy) pickle.dump(val_result_list, open('./prediction/LSTM_last_1neg_sharewemb_best_val_prediction.pkl','wb')) test_accuracy, test_result_list = self.test_set_test() print "test set accuracy: ", test_accuracy * 100, "%" if test_accuracy > best_test_accuracy: best_test_accuracy = test_accuracy print "saving model..." self.saving_model('test', best_test_accuracy) pickle.dump(test_result_list, open('./prediction/LSTM_last_1neg_sharewemb_best_test_prediction.pkl','wb')) batch_count += 1 print "reload best model for testing on test set" self.reload_model('val') print "test on test set..." test_result = self.test_set_test() print "accuracy is: ", test_result * 100, "%"
def test(net, m, criterion, testloader, epoch, args, savemodelpath): global best_acc global best_acc_gbt net.eval() test_loss = 0 correct = 0 total = 0 testfeat = np.zeros((len(testloader.dataset), 2560+CROPSIZE*CROPSIZE*CROPSIZE+1)) testlabel = np.zeros((len(testloader.dataset),)) idx = 0 for batch_idx, (inputs, targets, feat) in enumerate(testloader): if use_cuda: inputs, targets = inputs.cuda(), targets.cuda() inputs, targets = Variable(inputs, volatile=True), Variable(targets, volatile=True) outputs, dfeat = net(inputs) # add feature into the array testfeat[idx:idx+len(targets), :2560] = np.array((dfeat.data).cpu().numpy()) for i in range(len(targets)): testfeat[idx+i, 2560:] = np.array((Variable(feat[i]).data).cpu().numpy()) testlabel[idx+i] = np.array((targets[i].data).cpu().numpy()) idx += len(targets) loss = criterion(outputs, targets) test_loss += loss.data[0] _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(targets.data).cpu().sum() progress_bar(batch_idx, len(testloader), 'Test Loss: %.3f | Test Acc: %.3f%% (%d/%d)' % (test_loss/(batch_idx+1), 100.*correct/total, correct, total)) # print(testlabel.shape, testfeat.shape, testlabel)#, trainfeat[:, 3]) gbtteacc = 100.*np.mean(m.predict(testfeat) == testlabel) if gbtteacc > best_acc_gbt: pickle.dump(m, open(os.path.join(savemodelpath,'gbtmodel.sav'.format(fold)), 'wb')) logging.info('Saving gbt ..') state_dict = net.module.state_dict() state_dict = {k: v.cpu() for k, v in state_dict.items()} state = {'epoch': epoch, 'save_dir': savemodelpath, 'state_dict': state_dict, 'args': args, 'lr': get_lr(epoch), 'best_acc': best_acc, 'best_acc_gbt': best_acc_gbt} torch.save(state, os.path.join(savemodelpath, 'ckptgbt.t7')) best_acc_gbt = gbtteacc # Save checkpoint of best_acc acc = 100.*correct/total if acc > best_acc: logging.info('Saving..') state_dict = net.module.state_dict() state_dict = {k: v.cpu() for k, v in state_dict.items()} state = {'epoch': epoch, 'save_dir': savemodelpath, 'state_dict': state_dict, 'args': args, 'lr': get_lr(epoch), 'best_acc': best_acc, 'best_acc_gbt': best_acc_gbt} torch.save(state, os.path.join(savemodelpath, 'ckpt.t7')) best_acc = acc # Save every 50 epochs logging.info('Saving..') state_dict = net.module.state_dict() state_dict = {k: v.cpu() for k, v in state_dict.items()} state = {'epoch': epoch, 'save_dir': savemodelpath, 'state_dict': state_dict, 'args': args, 'lr': get_lr(epoch), 'best_acc': best_acc, 'best_acc_gbt': best_acc_gbt} if epoch % 50 == 0: torch.save(state, savemodelpath+'ckpt'+str(epoch)+'.t7') # Show and log metrics print('teacc '+str(acc)+' bestacc '+str(best_acc)+' gbtteacc '+str(gbtteacc)+' bestgbt '+str(best_acc_gbt)) print() logging.info('teacc '+str(acc)+' bestacc '+str(best_acc)+' gbtteacc '+str(gbtteacc)+' bestgbt '+str(best_acc_gbt))