def pretrain_fine(epoch, fine_id): net.fines[fine_id].train() optimizer, lr = get_optim(net.fines[fine_id].parameters(), args, mode='preTrain', epoch=epoch) print('==> Epoch #%d, LR=%.4f' % (epoch, lr)) required_train_loader = get_dataLoder(args, classes=net.class_set[fine_id], mode='preTrain') predictor = net.fines[fine_id] for batch_idx, (inputs, targets) in enumerate(required_train_loader): if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets).long() outputs = predictor(net.share(inputs)) # Forward Propagation loss = pred_loss(outputs, targets) loss.backward() # Backward Propagation optimizer.step() # Optimizer update num_ins = targets.size(0) _, outputs = torch.max(outputs, 1) correct = outputs.eq(targets.data).cpu().sum() acc = 100.*correct.item()/num_ins sys.stdout.write('\r') sys.stdout.write('Pre-train Epoch [%3d/%3d] Iter [%3d/%3d]\t\t Loss: %.4f Accuracy: %.3f%%' %(epoch, args.num_epochs_pretrain, batch_idx+1, (required_train_loader.dataset.train_data.shape[0]//args.pretrain_batch_size)+1, loss.item(), acc)) sys.stdout.flush()
def pretrain_coarse(epoch): net.share.train() net.coarse.train() param = list(net.share.parameters())+list(net.coarse.parameters()) optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch) print('\n==> Epoch #%d, LR=%.4f' % (epoch, lr)) for batch_idx, (inputs, targets) in enumerate(pretrainloader): if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU setting optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net.coarse(net.share(inputs)) # Forward Propagation loss = pred_loss(outputs, targets) loss.backward() # Backward Propagation optimizer.step() # Optimizer update _, predicted = torch.max(outputs.data, 1) num_ins = targets.size(0) correct = predicted.eq(targets.data).cpu().sum() sys.stdout.write('\r') sys.stdout.write('Pre-train Epoch [%3d/%3d] Iter [%3d/%3d]\t\t Loss: %.4f Accuracy: %.3f%%' %(epoch, args.num_epochs_pretrain, batch_idx+1, (pretrainloader.dataset.train_data.shape[0]//args.pretrain_batch_size)+1, loss.item(), 100.*correct.item()/num_ins)) sys.stdout.flush()
def train_branch(branch, clusting_result, classes): for epoch in range(args.num_epochs_train): required_train_loader = get_dataLoder(args, classes = classes, mode='Train', one_hot=True) param = list(branch.parameters()) optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch) for batch_idx, (inputs, targets) in enumerate(required_train_loader): if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets).float() outputs = branch(inputs) matrix = np.vstack(((np.ones(np.shape(clusting_result))-clusting_result), clusting_result)) matrix = torch.from_numpy(matrix.transpose().astype(np.float32)) if cf.use_cuda: matrix = matrix.cuda() outputs = outputs.mm(matrix) targets = targets.mm(matrix) loss = pred_loss(outputs,targets) loss.backward() # Backward Propagation optimizer.step() # Optimizer update sys.stdout.write('\r') sys.stdout.write('Train Branch with Epoch [%3d/%3d] Iter [%3d/%3d]\t\t Loss: %.4f' %(epoch+1, args.num_epochs_train, batch_idx+1, (pretrainloader.dataset.train_data.shape[0]//args.pretrain_batch_size)+1, loss.item())) sys.stdout.flush() return branch
def enhance_expert(Expert, Superclass, c, mode='clone'): if mode == 'clone': print( '\nThe new expert model is activate and waiting for another class added to build' ) elif mode == 'merge': for epoch in range(args.num_epochs_train): required_train_loader = get_dataLoder(args, classes=Superclass[c], mode='Train', encoded=False, one_hot=True) if epoch == 0: num = len(Superclass[c]) Expert[c] = prepared_model(num, c) if cf.use_cuda: Expert[c].cuda() cudnn.benchmark = True param = list(Expert[c].parameters()) optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch) for batch_idx, (inputs, targets) in enumerate(required_train_loader): if batch_idx >= args.num_test: break if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda( ) # GPU setting optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets).long() outputs = Expert[c](inputs) # Forward Propagation loss = pred_loss(outputs, targets) loss.backward() # Backward Propagation optimizer.step() # Optimizer update _, predicted = torch.max(outputs.data, 1) num_ins = targets.size(0) correct = predicted.eq((torch.max(targets.data, 1)[1])).cpu().sum() acc = 100. * correct.item() / num_ins sys.stdout.write('\r') sys.stdout.write( 'Train expert model with Epoch [%3d/%3d] Iter [%3d]\t\t Loss: %.4f Accuracy: %.3f%%' % (epoch + 1, args.num_epochs_train, batch_idx + 1, loss.item(), acc)) sys.stdout.flush() save_model(Expert[c], c) else: print('\nmode error') return Expert
def fine_tune(epoch): net.share.train() net.coarse.train() for i in range (args.num_superclasses): net.fines[i].train() param = list(net.share.parameters()) + list(net.coarse.parameters()) for k in range(args.num_superclasses): param += list(net.fines[k].parameters()) optimizer, lr = get_optim(param, args, mode='fineTune', epoch=epoch) print('\n==> fine-tune Epoch #%d, LR=%.4f' % (epoch, lr)) for batch_idx, (inputs, targets) in enumerate(trainloader): if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets).long() outputs, coarse_outputs = net(inputs, return_coarse=True) tloss = pred_loss(outputs, targets) closs = consistency_loss(coarse_outputs, t_k, weight=args.weight_consistency) loss = tloss + closs loss.backward() # Backward Propagation optimizer.step() _, predicted = torch.max(outputs.data, 1) num_ins = targets.size(0) correct = predicted.eq(targets.data).cpu().sum() acc = 100.*correct.item()/num_ins sys.stdout.write('\r') sys.stdout.write('Finetune Epoch [%3d/%3d] Iter [%3d/%3d]\t\t tloss: %.4f closs: %.4f Loss: %.4f Accuracy: %.3f%%' %(epoch, args.num_epochs_train, batch_idx+1, (trainloader.dataset.train_data.shape[0]//args.train_batch_size)+1, tloss.item(), closs.item(), loss.item(), acc)) sys.stdout.flush()
def pretrain_fine(epoch, cluster_result=None, u_kj=None): save_point = cf.model_dir + args.dataset net.share.train() net.croase.train() for i in range(args.num_superclass): net.fines[i].train() train_loss = 0 optimizer_share, lr = get_optim(net.share, args, mode='preTrain', epoch=epoch) optimizer_croase, lr = get_optim(net.croase, args, mode='preTrain', epoch=epoch) optimizer_fine = {} for k in range(args.num_superclass): for para in list(net.fines[k].parameters())[:-9]: para.requires_grad = False optimizer_fine[k], lr = get_optim(net.fines[k], args, mode='preTrain', epoch=epoch) if epoch == 1: print('\nprevious model activated') net.share.load_state_dict(torch.load(save_point + '/share_params.pkl')) net.croase.load_state_dict( torch.load(save_point + '/croase_params.pkl')) for i in range(args.num_superclass): net.fines[i].load_state_dict( torch.load(save_point + '/croase_params.pkl')) print('\ntrain data-loader activated') Data = enumerate(trainloader) for batch_idx, (inputs, targets) in Data: if batch_idx >= args.num_test: break if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings optimizer_share.zero_grad() optimizer_croase.zero_grad() for i in range(args.num_superclass): optimizer_fine[i].zero_grad() inputs, targets = Variable(inputs), Variable(targets) share = net.share.encoder(inputs) outputs = net.croase.independent(share) # Forward Propagation fine_out = {} fine_target = {} fine_result = {} # ==================== divide the fine result ===================== for k in range(args.num_superclass): fine_out[k] = [] fine_target[k] = [] fine_result[k] = net.fines[k].independent(share) for i in range(np.shape(targets)[0]): for j in range(args.num_fine_classes): if j == torch.max(targets, 1)[1][i]: for k in range(args.num_superclass): if cluster_result[j] == k or u_kj[j, k] >= u_t: if np.shape(fine_out[k])[0] == 0: fine_out[k] = torch.reshape( fine_result[k][i, :], [1, 10]) fine_target[k] = torch.reshape( targets[i, :], [1, 10]) else: fine_out[k] = torch.cat( (fine_out[k], torch.reshape(fine_result[k][i, :], [1, 10])), 0) fine_target[k] = torch.cat( (fine_target[k], torch.reshape(targets[i, :], [1, 10])), 0) fine_loss = {} for k in range(args.num_superclass): fine_loss[k] = pred_loss(fine_out[k], fine_target[k]) if k == 0: loss = fine_loss[k] else: loss += fine_loss[k] loss.backward() # Backward Propagation for k in range(args.num_superclass): optimizer_fine[k].step() # Optimizer update train_loss += loss.item() for k in range(args.num_superclass): if k == 0: predicted = torch.max(fine_out[k].data, 1)[1] targets = torch.max(fine_target[k].data, 1)[1] else: predicted = torch.cat( (predicted, torch.max(fine_out[k].data, 1)[1]), 0) targets = torch.cat( (targets, torch.max(fine_target[k].data, 1)[1]), 0) num_ins = targets.size(0) correct = predicted.eq(targets.data).cpu().sum() acc = 100. * correct.item() / num_ins sys.stdout.write('\r') sys.stdout.write( 'Pre-train Epoch [%3d/%3d] Iter [%3d/%3d]\t\t Loss: %.4f Accuracy: %.3f%%' % (epoch, args.num_epochs_pretrain, batch_idx + 1, (trainloader.dataset.train_data.shape[0] // args.train_batch_size) + 1, loss.item(), acc)) sys.stdout.flush() return acc
def pretrain_clustering(epoch, mode, cluster_result=None): net.share.train() net.croase.train() train_loss = 0 optimizer_share, lr = get_optim(net.share, args, mode='preTrain', epoch=epoch) optimizer_croase, lr = get_optim(net.croase, args, mode='preTrain', epoch=epoch) if mode == 'clustering': Data = enumerate(trainloader) print('\ntrain data-loader activated') else: print( '---------------- Warning! no mode is activated ----------------\n' ) print('=> pre-train Epoch #%d, LR=%.4f' % (epoch, lr)) for batch_idx, (inputs, targets) in Data: if batch_idx >= args.num_test: break if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings optimizer_share.zero_grad() optimizer_croase.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net.croase.independent( net.share.encoder(inputs)) # Forward Propagation if batch_idx == 0: total_outputs = outputs total_targets = targets else: total_outputs = torch.cat((total_outputs, outputs), 0) total_targets = torch.cat((total_targets, targets), 0) loss = pred_loss(outputs, targets) loss.backward() # Backward Propagation optimizer_share.step() # Optimizer update optimizer_croase.step() # Optimizer update train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) _, targets = torch.max(targets.data, 1) num_ins = targets.size(0) correct = predicted.eq(targets.data).cpu().sum() sys.stdout.write('\r') sys.stdout.write( 'Pre-train Epoch [%3d/%3d] Iter [%3d/%3d]\t\t Loss: %.4f Accuracy: %.3f%%' % (epoch, args.num_epochs_pretrain, batch_idx + 1, (trainloader.dataset.train_data.shape[0] // args.train_batch_size) + 1, loss.item(), 100. * correct.item() / num_ins)) sys.stdout.flush() print('\n=> valid epoch begining for clustering') if mode == 'clustering': clustering_data = enumerate(validloader) print('valid data-loader activated') for batch_idx, (inputs, targets) in clustering_data: if batch_idx >= args.num_test: break if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings optimizer_share.zero_grad() inputs, targets = Variable(inputs), Variable(targets) outputs = net.croase.independent( net.share.encoder(inputs)) # Forward Propagation if batch_idx == 0: total_outputs = outputs total_targets = targets else: total_outputs = torch.cat((total_outputs, outputs), 0) total_targets = torch.cat((total_targets, targets), 0) loss = pred_loss(outputs, targets) train_loss += loss.item() _, predicted = torch.max(outputs.data, 1) _, targets = torch.max(targets.data, 1) num_ins = targets.size(0) correct = predicted.eq(targets.data).cpu().sum() acc = 100. * correct.item() / num_ins sys.stdout.write('\r') sys.stdout.write( 'valid epoch begining [%3d/%3d] Iter [%3d/%3d]\t\t Accuracy: %.3f%%' % (epoch, args.num_epochs_pretrain, batch_idx + 1, (trainloader.dataset.train_data.shape[0] // args.train_batch_size) + 1, acc)) sys.stdout.flush() print('\nSaving model...\t\t\tTop1 = %.2f%%' % (acc)) share_params = net.share.state_dict() croase_params = net.croase.state_dict() save_point = cf.model_dir + args.dataset if not os.path.isdir(save_point): os.mkdir(save_point) torch.save(share_params, save_point + '/share_params.pkl') torch.save(croase_params, save_point + '/croase_params.pkl') return total_outputs, total_targets
def learn_and_clustering(args, L, epoch, classes, test = True, cluster = True, save = False): required_train_loader = get_dataLoder(args, classes= classes, mode='Train', one_hot=True) # L = leaf(args,np.shape( classes)[0]) param = list(L.parameters()) optimizer, lr = get_optim(param, args, mode='preTrain', epoch=epoch) print('\n==> Epoch %d, LR=%.4f' % (epoch+1, lr)) best_acc=0 required_data = [] required_targets = [] for batch_idx, (inputs, targets) in enumerate(required_train_loader): if batch_idx>=args.num_test: break # targets = targets[:, sorted(list({}.fromkeys((torch.max(targets.data, 1)[1]).numpy()).keys()))] if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU setting optimizer.zero_grad() inputs, targets = Variable(inputs), Variable(targets).long() outputs = L(inputs) # Forward Propagation loss = pred_loss(outputs,targets) loss.backward() # Backward Propagation optimizer.step() # Optimizer update _, predicted = torch.max(outputs.data, 1) num_ins = targets.size(0) correct = predicted.eq((torch.max(targets.data, 1)[1])).cpu().sum() acc=100.*correct.item()/num_ins sys.stdout.write('\r') sys.stdout.write('Train Epoch [%3d/%3d] Iter [%3d/%3d]\t\t Loss: %.4f Accuracy: %.3f%%' %(epoch+1, args.num_epochs_train, batch_idx+1, (pretrainloader.dataset.train_data.shape[0]//args.pretrain_batch_size)+1, loss.item(), acc)) sys.stdout.flush() #========================= saving the model ============================ if epoch+1 == args.num_epochs_train and acc>best_acc and save: print('\nSaving the best leaf model...\t\t\tTop1 = %.2f%%' % (acc)) save_point = cf.var_dir + args.dataset if not os.path.isdir(save_point): os.mkdir(save_point) torch.save(L.state_dict(), save_point + '/L0.pkl') best_acc=acc #============================ valid training result ================================== if epoch+1 == args.num_epochs_train and test: required_valid_loader = get_dataLoder(args, classes= classes, mode='Valid', one_hot = True) num_ins = 0 correct = 0 for batch_idx, (inputs, targets) in enumerate(required_valid_loader): if cf.use_cuda: inputs, targets = inputs.cuda(), targets.cuda() # GPU settings inputs, targets = Variable(inputs), Variable(targets).long() outputs = L(inputs) #======================= prepare data for clustering ============================ if cluster: batch_required_data = outputs batch_required_targets = targets batch_required_data = batch_required_data.data.cpu().numpy() if cf.use_cuda else batch_required_data.data.numpy() batch_required_targets = batch_required_targets.data.cpu().numpy() if cf.use_cuda else batch_required_targets.data.numpy() required_data = stack_or_create(required_data, batch_required_data, axis=0) required_targets = stack_or_create(required_targets, batch_required_targets, axis=0) targets = torch.argmax(targets,1) _, predicted = torch.max(outputs.data, 1) num_ins += targets.size(0) correct += predicted.eq(targets.data).cpu().sum().item() #============================ clustering ================================== if cluster: print('\n==> Doing the spectural clusturing') required_data = np.argmax(required_data, 1) required_targets = np.argmax(required_targets,1) F = function.confusion_matrix(required_data, required_targets) D = (1/2)*((np.identity(np.shape(classes)[0])-F)+np.transpose(np.identity(np.shape(classes)[0])-F)) cluster_result = function.spectral_clustering(D, K=args.num_superclasses, gamma=10) acc = 100.*correct/num_ins print("\nValidation Epoch %d\t\tAccuracy: %.2f%%" % (epoch+1, acc)) if cluster: return L, cluster_result, acc else: return L, _, acc else: return L, _ , _