def main(): logger = logging.getLogger('MAIN') parser = make_parser() args = parser.parse_args() utils.duplication_check(args) utils.display_args(args, logger) logger.info("Loading dataset") train_dataloader = dataset.get_dataloader(args.data, 'train', batchsize=args.bz) test_dataloader = dataset.get_dataloader(args.data, 'test', batchsize=args.test_bz, obj_pred=args.obj_pred) logger.info("Loading network and solver") network = importlib.import_module('network.' + args.network) net = network.Network(train_dataloader, args) with utils.create_session() as sess: sw = SolverWrapper(net, train_dataloader, test_dataloader, args) sw.trainval_model(sess, args.epoch)
def __init__(self, train_path, test_path, model, optimizer): # self.train_data_loader = dataset.loader(train_path) # self.test_data_loader = dataset.test_loader(test_path) self.train_data_loader = get_dataloader('CIFAR10', "train", 128) self.test_data_loader = get_dataloader('CIFAR10', "test", 128) self.optimizer = optimizer self.model = model self.criterion = torch.nn.CrossEntropyLoss() self.model.train()
def valid_epoch(model, criterion, epoch): """ 验证模型,单个epoch :param """ # -- 验证模式 -- # model.eval() valid_loss_list = [] valid_acc_list = [] valid_f1_score_list = [] with torch.no_grad(): for file_name in config.file_name_list: batch_size = config.batch_size_dict[file_name] file_path = os.path.join(config.processed_valid, f'{file_name}.csv') valid_dataloader = get_dataloader(file_path, batch_size, False) step_grad = config.gradient_accumulation_step_dict[file_name] describe = f'验证集 epoch:{epoch}/{config.epochs} 数据集:{file_name} ' valid_loss, valid_correct, valid_num, valid_pred_list, valid_label_list = \ normal_block(valid_dataloader, model, criterion, False, None, step_grad, describe, file_name, {}) epoch_valid_loss = valid_loss / valid_num # 这里必须除,因为是总数,别漏掉了 epoch_valid_acc, epoch_valid_f1_score = classification_metrics( valid_label_list, valid_pred_list) valid_loss_list.append(epoch_valid_loss) valid_acc_list.append(epoch_valid_acc) valid_f1_score_list.append(epoch_valid_f1_score) return valid_loss_list, valid_acc_list, valid_f1_score_list
def train_enhance(model, criterion, optimizer, loss_weights_dict): """ 用于训练集的增强训练,一般用于最后一次训练 用验证集去训练模型 :param model: 模型 :param criterion: 损失函数 :param optimizer: 优化器,控制梯度下降 :param loss_weights_dict: 主要用于获取动态权重系数 """ # 训练模式 model.train() valid_file_num = len( os.listdir( os.path.join(config.valid_split_dir, config.file_name_list[0]))) # 构建子集的索引 data_iter = tqdm(range(1, valid_file_num + 1)) for idx in data_iter: for file_name in config.file_name_list: file_path = os.path.join(config.valid_split_dir, file_name, f'{idx}.csv') batch_size = config.batch_size_dict2[file_name] # 专用batch_size step_grad = config.gradient_accumulation_step_dict2[ file_name] # 专用step_grad sub_dataloader = get_dataloader(file_path, batch_size, False) describe = f'增强训练中 数据集:{file_name}:{idx}/{valid_file_num}' normal_block(sub_dataloader, model, criterion, True, data_iter, step_grad, describe, file_name, loss_weights_dict, optimizer) return model
def main(): logger = logging.getLogger('MAIN') parser = make_parser() parser.add_argument("--test_set", type=str, default='test', choices=['test', 'val']) args = parser.parse_args() logger.info("Loading dataset") test_dataloader = dataset.get_dataloader(args.data, args.test_set, batchsize=args.test_bz) logger.info("Loading network and solver") network = importlib.import_module('network.' + args.network) net = network.Network(test_dataloader, args) with utils.create_session() as sess: sw = SolverWrapper(net, test_dataloader, args) sw.trainval_model(sess)
def train_net(net, epochs=5, batch_size=1, lr=0.1, val_percent=0.2, save_cp=True, gpu=False): data_dir = r'E:\workspace\dataset\RVSC\TrainingSet' dir_checkpoint = '' trainloader = dataset.get_dataloader(data_dir, batch_size, trainset=True, validation_split=val_percent, mask='inner', shuffle=True, normalize_images=True) valloader = dataset.get_dataloader(data_dir, batch_size=1, trainset=False, validation_split=val_percent, mask='inner', shuffle=True, normalize_images=True) _imgs = trainloader.dataset.images _imgsv = valloader.dataset.images print(''' Starting training: Epochs: {} Batch size: {} Learning rate: {} Training size: {} Validation size: {} Checkpoints: {} CUDA: {} '''.format(epochs, batch_size, lr, len(_imgs), len(_imgsv), str(save_cp), str(gpu))) #优化函数 optimizer = optim.Adam(net.parameters(), lr=lr) #二进制交叉熵 criterion = nn.BCELoss() N_train = len(_imgs) for epoch in range(epochs): print('Starting epoch {}/{}.'.format(epoch + 1, epochs)) net.train() epoch_loss = 0 for i, (images, masks) in enumerate(trainloader): images = images.type(torch.FloatTensor) masks = masks.type(torch.FloatTensor) if gpu: images = images.cuda() masks = masks.cuda() masks_pred = net(images) masks_probs_flat = masks_pred.view(-1) true_masks_flat = masks.view(-1) loss = criterion(masks_probs_flat, true_masks_flat) epoch_loss += loss.item() print('{0:.4f} --- loss: {1:.6f}'.format(i * batch_size / N_train, loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch finished ! Loss: {}'.format(epoch_loss / i)) if 1: val_dice = eval_net(net, valloader, gpu) print('Validation Dice Coeff: {}'.format(val_dice)) if epoch % 100 == 0: torch.save(net.state_dict(), dir_checkpoint + 'CP{}.pth'.format(epoch + 1)) print('Checkpoint {} saved !'.format(epoch + 1))
args = parser.parse_args() # --------------------------------------------------------------- use_cuda = torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch # model_name = 'ResNet20' # model_name = 'convnet' model_name = 'tucker_CIFARNet' source_dataset = 'CIFAR9' target_dataset = 'STL9' # source_dataset = 'STL9' # target_dataset = 'CIFAR9' # ---------------------------------------------------------------- source_train_loader = get_dataloader(source_dataset, 'train', 128) source_test_loader = get_dataloader(source_dataset, 'test', 128) target_test_loader = get_dataloader(target_dataset, 'test', 128) # Model if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load('./checkpoint/%s_%s_ckpt.t7' % (model_name, source_dataset)) net = checkpoint['net'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] else: net = CIFARNet() net.load_state_dict(torch.load('ft'))
pretrain_path = './checkpoint/tucker_CIFARNet9.pth' source_dataset_name = 'CIFAR9' target_dataset_name = 'STL9' n_epoch = 10 # --------------------------------- net = CIFARNet() pretrain_param = torch.load(pretrain_path) net.load_state_dict(pretrain_param) if use_cuda: net.cuda() # source_train_loader = get_dataloader(source_dataset_name, 'train', 128) # source_test_loader = get_dataloader(source_dataset_name, 'test', 128) target_train_loader = get_dataloader(target_dataset_name, 'train', 128) target_test_loader = get_dataloader(target_dataset_name, 'test', 128) # validate(net, target_test_loader) # Select trainable parameters trainable_parameters = list() for named, param in net.named_parameters(): if 'features.0.1.weight' in named or 'features.3.1.weight' in named or 'features.6.1.weight' in named: continue else: trainable_parameters.append(param) optimizer = optim.Adam(trainable_parameters, lr=1e-3) # optimizer = optim.Adam(net.parameters(), lr=1e-3)
pretrain_path = '%s/%s-%s-pretrain.pth' % (save_root, model_name, dataset_name) net.load_state_dict(torch.load(pretrain_path), strict=False) # Get layer name list layer_name_list = net.layer_name_list assert (len(layer_name_list) == gVar.meta_count) print('Layer name list completed.') if use_cuda: net.cuda() ################ # Load Dataset # ################ train_loader = get_dataloader(dataset_name, 'train', batch_size) test_loader = get_dataloader(dataset_name, 'test', 100) ########################## # Construct Meta-Network # ########################## if meta_method == 'LSTMFC': meta_net = MetaLSTMFC(hidden_size=hidden_size) SummaryPath = '%s/runs-Quant/Meta-%s-Nonlinear-%s-' \ 'hidden-size-%d-nlstm-1-%s-%s-%dbits-lr-%s' \ % (save_root, meta_method, args.meta_nonlinear, hidden_size, quantized_type, optimizer_type, bitW, lr_adjust) elif meta_method in ['FC-Grad']: meta_net = MetaFC(hidden_size=hidden_size, use_nonlinear=args.meta_nonlinear) SummaryPath = '%s/runs-Quant/Meta-%s-Nonlinear-%s-' \
from torchvision import transforms from utils.opt import Options from utils.dataset import get_datasets, get_dataloader opt = Options().parse() transform = transforms.Compose([ transforms.ToTensor() ]) datasets = get_datasets(opt.data_dir, transform=transform) data = get_dataloader(data=datasets, batch_size=4, shuffle=True, num_workers=4)
type=str, default='CIFAR10', help='Dataset') args = parser.parse_args() use_cuda = torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch model_name = args.model dataset_name = args.dataset pretrain_path = './Results/%s-%s/%s-%s-retrain.pth' % ( model_name, dataset_name, model_name, dataset_name) # Data print('==> Preparing data..') testloader = get_dataloader(dataset_name, 'test', 100) if model_name == 'CIFARNet': if dataset_name in ['CIFAR10', 'STL10']: net = CIFARNet(num_classes=10) elif dataset_name in ['CIFAR9', 'STL9']: net = CIFARNet(num_classes=9) else: raise ('%s in %s have not been finished' % (model_name, dataset_name)) else: raise NotImplementedError net.load_state_dict(torch.load(pretrain_path)) if use_cuda: net.cuda() net = torch.nn.DataParallel(net,
if batch_idx % 10 == 0: print ('[%s] Now finish image No. %d / %d' \ %(datetime.now(), batch_idx * batch_size, n_batch_used * batch_size)) if batch_idx == n_batch_used: break net.train() return (1.0 / n_batch_used) * layer_hessian if __name__ == '__main__': use_cuda = torch.cuda.is_available() #hessian_loader=data_loading('/git/data','CIFAR10','val',2) hessian_loader = get_dataloader("CIFAR10", 'val', batch_size=2) print('Length of hessian loader: %d' % (len(hessian_loader))) ################ # Load Models ## ################ net = NetWork() # net.load_state_dict(torch.load(pretrain_path)) if use_cuda: print('Dispatch model in %d GPUs' % (torch.cuda.device_count())) net.cuda() net = torch.nn.DataParallel(net, device_ids=range( torch.cuda.device_count())) cudnn.benchmark = True
# # del state # model = model.eval() # find_best_threshold_binary(np.arange(0.05, 1, 0.05), model, dataloader) # ------------------------------------------------------------------------------------------------------------------------------------------ df = read_dataset( '../dataset/train.csv', '../dataset/train_images', ) # df = df.dropna(subset=[1, 2, 3, 4], how='all') dataloader = get_dataloader(df, transforms, batch_size=2, shuffle=False, num_workers=6, phase='valid', catalyst=False, pin_memory=False, binary=False, multi=False) from stage_experiments.transforms_1_7.model import Model # Load model model = Model() # model = smp.Unet('resnet50', encoder_weights=None, classes=4, activation=None) state = torch.load( '/home/druzhinin/HDD/kaggle/kaggle_severstal/logdir/1.1.resnet50_stages_transforms/checkpoints/best.pth' ) # model = UNet16(4, pretrained=True).cuda().eval() # state = torch.load('/home/druzhinin/HDD/kaggle/kaggle_severstal/logdir/1.5.ternausnet/checkpoints/best.pth') model.load_state_dict(state['model_state_dict'])
# train on device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("Current device", torch.cuda.get_device_name(torch.cuda.current_device())) try: artifact.add_dir(DATA_PATH) run.log_artifact(artifact) except: artifact = run.use_artifact('Spectrograms:latest', type='Dataset') artifact_dir = artifact.download(DATA_PATH) # load dataset dataset = SpeechDataset(DATA_PATH) trainloader, validloader = get_dataloader(dataset=dataset, batch_size=args.batchsize) # get model and define loss func, optimizer n_classes = N_CLASSES epochs = args.epoch tag = 'Unet' if args.model == 'Unet': model = UNet(start_fm=args.startfm).to(device) else: tag = 'UnetRes' model = UNet_ResNet(dropout=args.dropout, start_fm=args.startfm).to(device) run.tags = [tag]
def train_epoch(model, criterion, optimizer, epoch, epoch_loss_weights_dict): """ 训练模型, 单个epoch :param model: 模型 :param criterion: 损失函数 :param optimizer: 优化器,控制梯度下降 :param epoch: 第n次训练 :param epoch_loss_weights_dict: 主要用于获取动态权重系数 """ # --- 训练模式 --- # model.train() epoch_train_label_dict = {} # 记录一个epoch下的所有train_label epoch_train_pred_label_dict = {} epoch_train_loss_dict = {} epoch_train_num_dict = {} # 初始化epoch字典 for file_name in config.file_name_list: epoch_train_label_dict[file_name] = [] epoch_train_pred_label_dict[file_name] = [] epoch_train_loss_dict[file_name] = 0 epoch_train_num_dict[file_name] = 0 # 构建子集的索引 train_file_num = len( os.listdir( os.path.join(config.train_split_dir, config.file_name_list[0]))) index_list1 = list(range(1, train_file_num + 1)) index_list2 = list(range(1, train_file_num + 1)) index_list3 = list(range(1, train_file_num + 1)) # 打散三个数据集的索引,防止每次训练数据都一样 random.shuffle(index_list1) random.shuffle(index_list2) random.shuffle(index_list3) # 训练子数据集然后将数据汇总 data_iter = tqdm(enumerate(zip(index_list1, index_list2, index_list3)), total=len(index_list1)) block_loss_weight = BlockLossWeight() for set_idx, index_list in data_iter: block_loss_weights_dict = block_loss_weight.run() result_loss_weight_dict = {} for file_name1 in config.file_name_list: value1 = epoch_loss_weights_dict[file_name1] value2 = block_loss_weights_dict[file_name1] result_loss_weight_dict[file_name1] = 0.5 * value1 + 0.5 * value2 for file_name, i in zip(config.file_name_list, index_list): batch_size = config.batch_size_dict[file_name] file_path = os.path.join(config.train_split_dir, file_name, f'{i}.csv') sub_dataloader = get_dataloader(file_path, batch_size, True) describe = f'训练集 epoch:{epoch}/{config.epochs} 数据集:{file_name}:{set_idx}/{train_file_num}' step_grad = config.gradient_accumulation_step_dict[file_name] train_loss, train_correct, train_num, train_pred_list, train_label_list =\ normal_block(sub_dataloader, model, criterion, True, data_iter, step_grad, describe, file_name, result_loss_weight_dict, optimizer) epoch_train_label_dict[file_name].extend(train_label_list) epoch_train_pred_label_dict[file_name].extend(train_pred_list) epoch_train_loss_dict[file_name] += train_loss epoch_train_num_dict[file_name] += train_num # --- 临时加入block_数据保存与block数据权重计算 block_train_loss = train_loss / train_num block_train_acc, block_train_f1_score = classification_metrics( train_label_list, train_pred_list) # 输出路径 out_dir = os.path.join(config.out_data_dir, 'block_dir') if not os.path.exists(out_dir): os.mkdir(out_dir) out_path = os.path.join(out_dir, f'out_{file_name}.csv') save_block_csv(set_idx, epoch, block_train_loss, block_train_acc, block_train_f1_score, out_path) # # 临时断开,测试一下程序效果,查看隐藏bug # break # 统计每个数据集的loss,acc,f1_score,然后求平均计算总的,每个都保存一下 train_loss_list = [] train_acc_list = [] train_f1_score_list = [] for file_name in config.file_name_list: epoch_train_loss = epoch_train_loss_dict[ file_name] / epoch_train_num_dict[file_name] epoch_train_acc, epoch_train_f1_score = classification_metrics( epoch_train_label_dict[file_name], epoch_train_pred_label_dict[file_name]) train_loss_list.append(epoch_train_loss) train_acc_list.append(epoch_train_acc) train_f1_score_list.append(epoch_train_f1_score) return train_loss_list, train_acc_list, train_f1_score_list
from models_CIFAR9STL9.CIFARNet import CIFARNet from utils.dataset import get_dataloader from utils.train import progress_bar, train, validate # -------------------------- use_cp = False decomposed_path = './checkpoint/%s_CIFARNet9.p' % ('cp' if use_cp else 'tucker') use_cuda = torch.cuda.is_available() n_epoch = 10 # -------------------------- # net = CIFARNet() net = torch.load(decomposed_path) train_loader = get_dataloader('CIFAR9', 'train', 128) test_loader = get_dataloader('CIFAR9', 'test', 128) target_test_loader = get_dataloader('STL9', 'test', 128) if use_cuda: net.cuda() print('First validation') validate(net, test_loader) optimizer = optim.Adam(net.parameters(), lr=0.001) # optimizer = optim.SGD(net.parameters(), lr=0.01) train(net, train_loader, optimizer, n_epoch=n_epoch, val_loader=test_loader) # for epoch in range(n_epoch): torch.save(net, './checkpoint/ft_%s_CIFARNet9.p' % ('cp' if use_cp else 'tucker')) print('Target test')
""" This code validate the *.pth file when generated """ import torch from utils.dataset import get_dataloader from utils.train import validate from models_CIFAR9STL9.tucker_CIFARNet_dual import CIFARNet # from models_CIFAR9STL9.tucker_CIFARNet_dual import CIFARNet2 as CIFARNet # Initial model net = CIFARNet() pretrain_param = torch.load('./checkpoint/tucker_CIFARNet9_dual.pth') # pretrain_param = torch.load('./checkpoint/tucker_CIFARNet9_dual_2.pth') net.load_state_dict(pretrain_param) # Load dataset test_loader = get_dataloader('STL9', 'test', 100) net.cuda() validate(net, test_loader)
def __init__(self, task_name, task_type = 'prune', optimizer_type = 'adam', save_root = None, SummaryPath = None, use_cuda = True, **kwargs): self.task_name = task_name self.task_type = task_type # prune, soft-quantize self.model_name, self.dataset_name = task_name.split('-') self.ratio = 'sample' if self.dataset_name in ['CIFARS'] else -1 ####### # Net # ####### if task_type == 'prune': if self.model_name == 'ResNet20': if self.dataset_name in ['CIFAR10', 'CIFARS']: self.net = resnet20_cifar() elif self.dataset_name == 'STL10': self.net = resnet20_stl() else: raise NotImplementedError elif self.model_name == 'ResNet32': if self.dataset_name in ['CIFAR10', 'CIFARS']: self.net = resnet32_cifar() elif self.dataset_name == 'STL10': self.net = resnet32_stl() else: raise NotImplementedError elif self.model_name == 'ResNet56': if self.dataset_name in ['CIFAR10', 'CIFARS']: self.net = resnet56_cifar() elif self.dataset_name == 'CIFAR100': self.net = resnet56_cifar(num_classes=100) elif self.dataset_name == 'STL10': self.net = resnet56_stl() else: raise NotImplementedError elif self.model_name == 'ResNet18': if self.dataset_name == 'ImageNet': self.net = resnet18() else: raise NotImplementedError elif self.model_name == 'vgg11': self.net = vgg11() if self.dataset_name == 'CIFAR10' else vgg11_stl10() else: print(self.model_name, self.dataset_name) raise NotImplementedError elif task_type == 'soft-quantize': if self.model_name == 'ResNet20': if self.dataset_name in ['CIFAR10', 'CIFARS']: self.net = soft_quantized_resnet20_cifar() elif self.dataset_name in ['STL10']: self.net = soft_quantized_resnet20_stl() else: raise NotImplementedError else: raise ('Task type not defined.') self.meta_opt_flag = True # True for enabling meta leraning ############## # Meta Prune # ############## self.mask_dict = dict() self.meta_grad_dict = dict() self.meta_hidden_state_dict = dict() ###################### # Meta Soft Quantize # ###################### self.quantized = 0 # Quantized type self.alpha_dict = dict() self.alpha_hidden_dict = dict() self.sq_rate = 0 self.s_rate = 0 self.q_rate = 0 ########## # Record # ########## self.dataset_type = 'large' if self.dataset_name in ['ImageNet'] else 'small' self.SummaryPath = SummaryPath self.save_root = save_root self.recorder = Recorder(self.SummaryPath, self.dataset_name, self.task_name) #################### # Load Pre-trained # #################### self.pretrain_path = '%s/%s-pretrain.pth' %(self.save_root, self.task_name) self.net.load_state_dict(torch.load(self.pretrain_path)) print('Load pre-trained model from %s' %self.pretrain_path) if use_cuda: self.net.cuda() # Optimizer for this task if optimizer_type in ['Adam', 'adam']: self.optimizer = Adam(self.net.parameters(), lr=1e-3) else: self.optimizer = SGD(self.net.parameters()) if self.dataset_name == 'ImageNet': try: self.train_loader = get_lmdb_imagenet('train', 128) self.test_loader = get_lmdb_imagenet('test', 100) except: self.train_loader = get_dataloader(self.dataset_name, 'train', 128) self.test_loader = get_dataloader(self.dataset_name, 'test', 100) else: self.train_loader = get_dataloader(self.dataset_name, 'train', 128, ratio=self.ratio) self.test_loader = get_dataloader(self.dataset_name, 'test', 128) self.iter_train_loader = yielder(self.train_loader)
if __name__ == '__main__': # hyper-parameters for Network lr = 1e-3 b1 = 0.5 b2 = 0.99 weight_decay = 2.5 * 1e-5 epoch = 500 batch_size = 128 data_name = 'cmn' hidden_size = 64 num_layers = 1 # get dataloader dataloader, input_size, input_decoder_size = get_dataloader( root=DATASETS_DIR, data_name=data_name, batch_size=batch_size) # initialization parameters translator = Translator(input_size=input_size, input_decoder_size=input_decoder_size, hidden_size=hidden_size, num_layers=num_layers).to(DEVICE) xe_loss = nn.CrossEntropyLoss().to(DEVICE) optimization = optim.Adam(translator.parameters(), lr=lr, betas=(b1, b2), weight_decay=weight_decay) # begin training print( 'translation system ==> data name is: {}, begin training......'.format(
'3': [22, 22], '6': [48, 48] } ''' N = len(model.features._modules.keys()) for i, key in enumerate(model.features._modules.keys()): # if i >= N - 2: # break if isinstance(model.features._modules[key], torch.nn.modules.conv.Conv2d): conv_layer = model.features._modules[key] if use_cp: rank = max(conv_layer.weight.data.numpy().shape)//2 decomposed = cp_decomposition_conv_layer(conv_layer, rank) else: decomposed = tucker_decomposition_conv_layer(conv_layer, None) model.features._modules[key] = decomposed ''' for i, key in enumerate(model.classifier._modules.keys()): if isinstance(model.classifier._modules[key], nn.Linear): fc_linear = model.classifier._modules[key] # torch.save(model, './checkpoint/%s_CIFARNet9.p' %('cp' if use_cp else 'tucker')) test_loader = get_dataloader('CIFAR9', 'test', 128) model.cuda() validate(model, test_loader)
def test_model(model, out_dir, out_file): # --测试模型 -- # model.eval() device = torch.device('cuda' if torch.cuda.is_available() else "cpu") # 提交结果的预测文件夹 predict_dir = os.path.join(config.out_data_dir, out_dir) if not os.path.exists(predict_dir): os.mkdir(predict_dir) else: file_list = os.listdir(predict_dir) file_path_list = [ os.path.join(predict_dir, file) for file in file_list ] for file_path in file_path_list: os.remove(file_path) with torch.no_grad(): for file_name in config.file_name_list: batch_size = config.batch_size_dict[file_name] file_path = os.path.join(config.processed_test, f'{file_name}.csv') test_dataloader = get_dataloader(file_path, batch_size, False) data_iter = tqdm(enumerate(test_dataloader), total=len(test_dataloader)) describe = f'开始test模型。数据集:{file_name}\t' total_pred_list = [] # 统计所有预测值 for idx, (input_id, segment_id, input_mask) in data_iter: input_id, segment_id, input_mask = input_id.to( device), segment_id.to(device), input_mask.to(device) label_num = config.label_num_dict[file_name] label_num = torch.from_numpy(np.array([label_num])).to( device) # 必须加载到device,否则无法训练 outputs = model(label_num, input_id, segment_id, input_mask) pred_list = torch.argmax(outputs, 1) total_pred_list.extend( pred_list.detach().cpu().data.numpy()) # 统计所有预测值 data_iter.set_description(describe + f'{idx}/{len(test_dataloader)}') # 将预测值转化为label with open(os.path.join(config.out_data_dir, 'id2label.json'), 'rt', encoding='utf-8') as f: id2label = json.load(f) pred_label_list = [ id2label[file_name][str(pred)] for pred in total_pred_list ] # 将预测值写入json file_path = os.path.join( predict_dir, '{}_predict.json'.format(file_name.lower())) with open(file_path, 'wt', encoding='utf-8') as f1: for idx, pred_label in enumerate(pred_label_list): label_dict = {"id": str(idx), "label": str(pred_label)} f1.write(json.dumps(label_dict, ensure_ascii=False)) if idx < len(pred_label_list) - 1: f1.write('\n') # 输出压缩文件夹 out_zip_file = os.path.join(config.out_data_dir, out_file) if os.path.exists(out_zip_file): os.remove(out_zip_file) zip_str = 'cd {}&&zip -q {} ./*'.format(predict_dir, out_zip_file) print('压缩ing, ', zip_str) os.system(zip_str) print('压缩finished')
parser.add_argument('--lr', default=0.1, type=float, help='learning rate') parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') args = parser.parse_args() use_cuda = torch.cuda.is_available() best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch model_name = 'resnet56' # Data print('==> Preparing data..') trainloader = get_dataloader('CIFAR10', 'train', 128) testloader = get_dataloader('CIFAR10', 'test', 100) # Model if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!' checkpoint = torch.load('./checkpoint/%s_ckpt.t7' % model_name) net = checkpoint['net'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] else: print('==> Building model..') # net = VGG(model_name) # net = ResNet18()
args.optimizer, args.bitW, args.bitA, args.lr_adjust, args.max_epoch, '-%s' % args.exp_spec if args.exp_spec is not None else '' ) ) else: ckpt_path = args.ckpt_path save_root = './Results/%s-%s/dorefa/%s-bitW-%d-bitA-%d-lr-adjust-%d-epoch-%d%s%s' %( model_name, dataset_name, args.optimizer, args.bitW, args.bitA, args.lr_adjust, args.max_epoch, "-pretrain" if args.pretrain else "", '-%s' % args.exp_spec if args.exp_spec is not None else '' ) # Data print('==> Preparing data..') train_loader = get_dataloader(dataset_name, 'train', 128) test_loader = get_dataloader(dataset_name, 'test', 100) if dataset_name in ['CIFAR10', 'STL10']: num_classes = 10 elif dataset_name in ['CIFAR100']: num_classes = 100 else: raise ValueError('Dataset %s not supported.' % dataset_name) # Model print('==> Building model..') if model_name == 'ResNet20': net = resnet20_cifar(num_classes=num_classes, bitW=args.bitW, bitA=args.bitA) elif model_name == 'ResNet32': net = resnet20_cifar(num_classes=num_classes, bitW=args.bitW, bitA=args.bitA)
if name in state_dict: state_dict[name] = param else: split_name = name.split('.') new_name = split_name[0] + '_' + split_name[1] + '_' + split_name[ 2] + '.' + split_name[3] if new_name in state_dict: state_dict[new_name] = param else: print('Param not found in state_dict: %s' % new_name) new_name_t = split_name[0] + '_' + split_name[1] + '_' + split_name[ 2] + '_t.' + split_name[3] if new_name_t in state_dict: state_dict[new_name_t] = param torch.save(state_dict, './checkpoint/tucker_CIFARNet9_dual_2.pth') # torch.save(state_dict, './checkpoint/tucker_CIFARNet9_dual.pth') # torch.save(state_dict, './checkpoint/tucker_CIFARNet9.pth') # For verification net.load_state_dict(state_dict) net.cuda() decompose_net.cuda() source_test_loader = get_dataloader('CIFAR9', 'test', 100) target_test_loader = get_dataloader('STL9', 'test', 100) print('Test with target dataset') test(net, True, target_test_loader) print('Test with source dataset') test(net, False, source_test_loader) # test(decompose_net, test_loader)
def resnet1001_cifar(**kwargs): model = ResNet_Cifar(Bottleneck, [111, 111, 111], **kwargs) return model def preact_resnet110_cifar(**kwargs): model = PreAct_ResNet_Cifar(PreActBasicBlock, [18, 18, 18], **kwargs) return model def preact_resnet164_cifar(**kwargs): model = PreAct_ResNet_Cifar(PreActBottleneck, [18, 18, 18], **kwargs) return model def preact_resnet1001_cifar(**kwargs): model = PreAct_ResNet_Cifar(PreActBottleneck, [111, 111, 111], **kwargs) return model if __name__ == '__main__': # net = preact_resnet110_cifar() # net = resnet20_cifar() # y = net(torch.autograd.Variable(torch.randn(1, 3, 32, 32))) # print(net) # print(y.size()) from utils.dataset import get_dataloader cifar100 = get_dataloader('CIFAR100', 'train', 128)
break net.train() return (1.0 / n_batch_used) * layer_hessian if __name__ == '__main__': import os os.environ['CUDA_VISIBLE_DEVICES'] = '0' from models_ImageNet.resnet_layer_input import resnet18 as NetWork from utils.dataset import get_dataloader use_cuda = torch.cuda.is_available() hessian_loader = get_dataloader("ImageNet", 'val', batch_size=2, length=10000) print('Length of hessian loader: %d' % (len(hessian_loader))) ################ # Load Models ## ################ net = NetWork() # net.load_state_dict(torch.load(pretrain_path)) if use_cuda: print('Dispatch model in %d GPUs' % (torch.cuda.device_count())) net.cuda() net = torch.nn.DataParallel(net, device_ids=range(torch.cuda.device_count())) cudnn.benchmark = True layer_collection_list = [('module.fc', '')]
source_pretrain_param = torch.load(source_pretrain_path) target_pretrain_param = torch.load(target_pretrain_path) source_net.load_state_dict(source_pretrain_param) target_net.load_state_dict(target_pretrain_param) overall_CR = get_sparse_param_analysis(target_net, CR_ratio) if use_cuda: source_net.cuda() target_net.cuda() ################ # Load Dataset # ################ source_loader = get_dataloader(source_dataset_name, 'train', 128) target_loader = get_dataloader(target_dataset_name, 'train', 128) source_test_loader = get_dataloader(source_dataset_name, 'test', 100) target_test_loader = get_dataloader(target_dataset_name, 'test', 100) ##################### # Initial mask dict # ##################### source_mask_dict = dict() target_mask_dict = dict() ##################### # Initial Recording # ##################### source_summary_path = '%s/runs-%s/CR%.2f' %(save_root, source_dataset_name, 100 * overall_CR) target_summary_path = '%s/runs-%s/CR%.2f' %(save_root, target_dataset_name, 100 * overall_CR)
kbits = args.kbits trainable_names_record_root = './%s/trainable_names/LDNQ%s' % (model_name, exp_spec) train_record = open('./%s/train_record/LDNQ%s.txt' % (model_name, exp_spec), 'w') val_record = open('./%s/val_record/LDNQ%s.txt' % (model_name, exp_spec), 'w') init_lr = 0.001 # -------------------------------------------------------------------- print ('You are going to quantize model %s into %d bits, using dataset %s, with specification name as %s' \ %(model_name, kbits, dataset_name, exp_spec)) input('Press any to continue. Ctrl+C to break.') ################ # Load Dataset # ################ train_loader = get_dataloader(dataset_name, 'limited', batch_size=128, ratio=0.01) print('Length of train loader: %d' % (len(train_loader))) hessian_loader = get_dataloader(dataset_name, 'limited', batch_size=2) print('Length of hessian loader: %d' % (len(hessian_loader))) test_loader = get_dataloader(dataset_name, 'test', batch_size=100) ################ # Load Models ## ################ quantized_net = resnet20_cifar() # quantized_net = resnet18() # For quantization of ResNet18 using ImageNet pretrain_param = torch.load(pretrain_path) quantized_net.load_state_dict(pretrain_param) original_net = resnet20_cifar()