def example2(): cm = ConfigManager('testset') imgs = DataLoader.get_images_objects(cm.get_dataset_path(), 'processed_x.pt', 'processed_y.pt', to_tensor=True) print(type(imgs)) dm = DatasetsManager(cm, imgs) n_output = 2 net = ConvNet(n_output) optimizer = optim.Adam(net.parameters(), lr=1e-3) loss_function = nn.MSELoss() EPOCHS = 10 BATCH_SIZE = 128 print('Start training') for epoch in range(EPOCHS): for k in tqdm(range(0, len(dm.train), BATCH_SIZE)): batch_x = torch.cat(dm.train.get_x(start=k, end=k + BATCH_SIZE), dim=0) batch_y = torch.Tensor(dm.train.get_y(start=k, end=k + BATCH_SIZE)) print(type(batch_x)) net.zero_grad() out = net(batch_x) loss = loss_function(out, batch_y) loss.backward() optimizer.step() print(f'Epoch: {epoch}. Loss: {loss}') correct = 0 total = 0 # with torch.no_grad(): # for k in tqdm(range(len(x_test))): # real_class = torch.argmax(y_test[k]) # net_out = net(x_test[k].view(-1, 1, IMG_SIZE, IMG_SIZE))[0] # returns list # predicted_class = torch.argmax(net_out) # if predicted_class == real_class: # correct += 1 # total += 1 print('Accuracy: ', round(correct / total, 3)) torch.save(net, 'data/cnn_cats_dogs_model.pt')
def baseline_fitness(state_dict,num_epochs=600): # Hyper Parameters param = { 'batch_size': 4, 'test_batch_size': 50, 'num_epochs': num_epochs, 'learning_rate': 0.001, 'weight_decay': 5e-4, } num_cnn_layer =sum( [ int(len(v.size())==4) for d, v in state_dict.items() ] ) num_fc_layer = sum( [ int(len(v.size())==2) for d, v in state_dict.items() ] ) state_key = [ k for k,v in state_dict.items()] cfg = [] first = True for d, v in state_dict.items(): #print(v.data.size()) if len(v.data.size()) == 4 or len(v.data.size()) ==2: if first: first = False cfg.append(v.data.size()[1]) cfg.append(v.data.size()[0]) assert num_cnn_layer + num_fc_layer == len(cfg) - 1 net = ConvNet(cfg, num_cnn_layer) # masks = [] for i, p in enumerate(net.parameters()): p.data = state_dict[ state_key[i] ] if len(p.data.size()) == 4: pass #p_np = p.data.cpu().numpy() #masks.append(np.ones(p_np.shape).astype('float32')) #value_this_layer = np.abs(p_np).sum(axis=(2,3)) # for j in range(len(value_this_layer)): # # for k in range(len(value_this_layer[0])): # # if abs( value_this_layer[j][k] ) < 1e-4: # # masks[-1][j][k] = 0. elif len(p.data.size()) == 2: pass #p_np = p.data.cpu().numpy() #masks.append(np.ones(p_np.shape).astype('float32')) #value_this_layer = np.abs(p_np) # for j in range(len(value_this_layer)): # # for k in range(len(value_this_layer[0])): # # if abs( value_this_layer[j][k] ) < 1e-4: # # masks[-1][j][k] = 0. #net.set_masks(masks) ## Retraining loader_train, loader_test = load_dataset() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) #if num_epochs > 0: # test(net, loader_test) #train(net, criterion, optimizer, param, loader_train) test_acc_list = [] for t in range(num_epochs ): param['num_epochs'] = 10 train(net, criterion, optimizer, param, loader_train) #print("--- After training ---") test_acc_list.append(test(net, loader_test)) plt.plot(test_acc_list) with open('baseline_result.csv','a',newline='') as csvfile: writer = csv.writer(csvfile) for row in test_acc_list: writer.writerow([row])
def retrain(state_dict, part=1, num_epochs=5): # Hyper Parameters param = { 'batch_size': 4, 'test_batch_size': 50, 'num_epochs': num_epochs, 'learning_rate': 0.001, 'weight_decay': 5e-4, } num_cnn_layer = sum( [int(len(v.size()) == 4) for d, v in state_dict.items()]) num_fc_layer = sum( [int(len(v.size()) == 2) for d, v in state_dict.items()]) state_key = [k for k, v in state_dict.items()] cfg = [] first = True for d, v in state_dict.items(): #print(v.data.size()) if len(v.data.size()) == 4 or len(v.data.size()) == 2: if first: first = False cfg.append(v.data.size()[1]) cfg.append(v.data.size()[0]) assert num_cnn_layer + num_fc_layer == len(cfg) - 1 net = ConvNet(cfg, num_cnn_layer, part) masks = [] for i, p in enumerate(net.parameters()): p.data = state_dict[state_key[i]] if len(p.data.size()) == 4: p_np = p.data.cpu().numpy() masks.append(np.ones(p_np.shape).astype('float32')) value_this_layer = np.abs(p_np).sum(axis=(2, 3)) for j in range(len(value_this_layer)): for k in range(len(value_this_layer[0])): if abs(value_this_layer[j][k]) < 1e-4: masks[-1][j][k] = 0. elif len(p.data.size()) == 2: p_np = p.data.cpu().numpy() masks.append(np.ones(p_np.shape).astype('float32')) value_this_layer = np.abs(p_np) for j in range(len(value_this_layer)): for k in range(len(value_this_layer[0])): if abs(value_this_layer[j][k]) < 1e-4: masks[-1][j][k] = 0. net.set_masks(masks) ## Retraining loader_train, loader_test = load_dataset() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) #if num_epochs > 0: # test(net, loader_test) train(net, criterion, optimizer, param, loader_train) for i, p in enumerate(net.parameters()): state_dict[state_key[i]] = p.data #print(p.data == state_dict[ state_key[i] ]) #print("--- After retraining ---") #test(net, loader_test) #return net.state_dict() return state_dict
def example1(): """ Train convnet and then save the model """ DATASETS_DICT = './data' IMG_SIZE = CONFIG['img_size'] # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_train_cats_dogs.npy')) # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_train_cats_dogs.npy')) # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_cats_dogs_skimage.npy')) # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_cats_dogs_skimage.npy')) # x_train = DataLoader.load(os.path.join(DATASETS_DICT, 'x_rps_skimage.npy')) # y_train = DataLoader.load(os.path.join(DATASETS_DICT, 'y_rps_skimage.npy')) x_train = DataLoader.load_npy(CONFIG['data']['x_path']) y_train = DataLoader.load_npy(CONFIG['data']['y_path']) x_train = torch.Tensor(x_train).view(-1, IMG_SIZE, IMG_SIZE) y_train = torch.Tensor(y_train) N_TRAIN = CONFIG['n_train'] N_EVAL = CONFIG['n_eval'] N_TEST = CONFIG['n_test'] if N_TRAIN + N_EVAL + N_TEST > len(x_train): raise Exception('Not enough data!') # resnet50 works with 224, 244 input size n_output = 2 net = ConvNet(n_output) optimizer = optim.Adam(net.parameters(), lr=1e-3) loss_function = nn.MSELoss() # split data x_eval = x_train[:N_EVAL] y_eval = y_train[:N_EVAL] x_test = x_train[N_EVAL:N_EVAL + N_TEST] y_test = y_train[N_EVAL:N_EVAL + N_TEST] x_train = x_train[N_EVAL + N_TEST:N_EVAL + N_TEST + N_TRAIN] y_oracle = y_train[N_EVAL + N_TEST:N_EVAL + N_TEST + N_TRAIN] # show_grid_imgs(x_train[:16], y_oracle[:16], (4, 4)) EPOCHS = 10 BATCH_SIZE = 128 print('Start training') for epoch in range(EPOCHS): for k in tqdm(range(0, len(x_train), BATCH_SIZE)): batch_x = x_train[k:k + BATCH_SIZE].view(-1, 1, IMG_SIZE, IMG_SIZE) batch_y = y_oracle[k:k + BATCH_SIZE] net.zero_grad() out = net(batch_x) loss = loss_function(out, batch_y) loss.backward() optimizer.step() print(f'Epoch: {epoch}. Loss: {loss}') correct = 0 total = 0 with torch.no_grad(): for k in tqdm(range(len(x_test))): real_class = torch.argmax(y_test[k]) net_out = net(x_test[k].view(-1, 1, IMG_SIZE, IMG_SIZE))[0] # returns list predicted_class = torch.argmax(net_out) if predicted_class == real_class: correct += 1 total += 1 print('Accuracy: ', round(correct / total, 3)) torch.save(net, f'{DATASETS_DICT}/cnn_rps_model.pt')
# Load the pretrained model net = ConvNet() net.load_state_dict(torch.load('models/convnet_pretrained.pkl')) if torch.cuda.is_available(): print('CUDA ensabled.') net.cuda() print("--- Pretrained network loaded ---") test(net, loader_test) # prune the weights masks = filter_prune(net, param['pruning_perc']) net.set_masks(masks) print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(net, criterion, optimizer, param, loader_train) # Check accuracy and nonzeros weights in each layer print("--- After retraining ---") test(net, loader_test) prune_rate(net) # Save and load the entire model torch.save(net.state_dict(), 'models/convnet_pruned.pkl')
# w.shape (output_channels, reshaped_inputs ) # w (10, 3136=7*7*64) # = (output_channels, size*size*input_channels) m1.weight.data = m0.weight.data[:, idx0_new.tolist()].clone() #m1.weight.data = m0.weight.data[:, idx0].clone() m1.bias.data = m0.bias.data.clone() print('m1.weight.data shape ', m1.weight.data.size()) layer_id_in_cfg += 1 continue m1.weight.data = m0.weight.data.clone() m1.bias.data = m0.bias.data.clone() num_parameters = sum([param.nelement() for param in new_net.parameters()]) # prune the weights #masks = filter_prune(net, param['pruning_perc']) #net.set_masks(masks) #print("--- {}% parameters pruned ---".format(param['pruning_perc'])) test(new_net, loader_test) # Retraining criterion = nn.CrossEntropyLoss() optimizer = torch.optim.RMSprop(new_net.parameters(), lr=param['learning_rate'], weight_decay=param['weight_decay']) train(new_net, criterion, optimizer, param, loader_train)
def main(): # data normalization input_size = 224 normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # data loaders kwargs = {'num_workers': 8, 'pin_memory': True} if args.cuda else {} if args.da: train_transforms = transforms.Compose([ random_transform, transforms.ToPILImage(), transforms.Resize((input_size, input_size)), transforms.ToTensor(), normalize ]) else: train_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((input_size, input_size)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) test_transforms = transforms.Compose([ transforms.ToPILImage(), transforms.Resize((input_size, input_size)), transforms.ToTensor(), normalize ]) train_loader = torch.utils.data.DataLoader(DataLoader(df_train, train_transforms, root=args.data_dir, mode=args.mode), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(DataLoader(df_gal, test_transforms, root=args.data_dir, mode=args.mode), batch_size=args.batch_size, shuffle=False, **kwargs) # instanciate the models output_shape, backbone = get_backbone(args) embed = LinearProjection(output_shape, args.dim_embed) model = ConvNet(backbone, embed) # instanciate the proxies fsem = get_semantic_fname(args.word) path_semantic = os.path.join('aux', 'Semantic', args.dataset, fsem) train_proxies = get_proxies(path_semantic, df_train['cat'].cat.categories) test_proxies = get_proxies(path_semantic, df_gal['cat'].cat.categories) train_proxynet = ProxyNet(args.n_classes, args.dim_embed, proxies=torch.from_numpy(train_proxies)) test_proxynet = ProxyNet(args.n_classes_gal, args.dim_embed, proxies=torch.from_numpy(test_proxies)) # criterion criterion = ProxyLoss(args.temperature) if args.multi_gpu: model = nn.DataParallel(model) if args.cuda: backbone.cuda() embed.cuda() model.cuda() train_proxynet.cuda() test_proxynet.cuda() parameters_set = [] low_layers = [] upper_layers = [] for c in backbone.children(): low_layers.extend(list(c.parameters())) for c in embed.children(): upper_layers.extend(list(c.parameters())) parameters_set.append({ 'params': low_layers, 'lr': args.lr * args.factor_lower }) parameters_set.append({'params': upper_layers, 'lr': args.lr * 1.}) optimizer = optim.SGD(parameters_set, lr=args.lr, momentum=0.9, nesterov=True, weight_decay=args.wd) n_parameters = sum([p.data.nelement() for p in model.parameters()]) print(' + Number of params: {}'.format(n_parameters)) scheduler = CosineAnnealingLR(optimizer, args.epochs * len(train_loader), eta_min=3e-6) print('Starting training...') for epoch in range(args.start_epoch, args.epochs + 1): # update learning rate scheduler.step() # train for one epoch train(train_loader, model, train_proxynet.proxies.weight, criterion, optimizer, epoch, scheduler) val_acc = evaluate(test_loader, model, test_proxynet.proxies.weight, criterion) # saving if epoch == args.epochs: save_checkpoint({'epoch': epoch, 'state_dict': model.state_dict()}) print('\nResults on test set (end of training)') write_logs('\nResults on test set (end of training)') test_acc = evaluate(test_loader, model, test_proxynet.proxies.weight, criterion)
import torch import torch.nn.functional as F from torch.autograd import Variable import copy import time from models import ConvNet, nCrossEntropyLoss from config import DefaultConfig from data.dataset import data_loader, data, dataset_size from utils.utils import equal net = ConvNet() optimizer = torch.optim.Adam(net.parameters(), lr=0.001) loss_func = nCrossEntropyLoss() best_model_wts = copy.deepcopy(net.state_dict()) best_acc = 0.0 since = time.time() for epoch in range(DefaultConfig.EPOCH): running_loss = 0.0 running_corrects = 0 for step, (inputs, label) in enumerate(data_loader): # 用 0 填充 LongTensor pred = torch.LongTensor(DefaultConfig.BATCH_SIZE, 1).zero_() inputs = Variable(inputs) # (bs, 3, 60, 160) label = Variable(label) # (bs, 4) # 梯度清零 optimizer.zero_grad()
test_loader = torch.utils.data.DataLoader(test_set, batch_size=1000) ################ initialize the model ################ if args.model == 'convnet': model = ConvNet() elif args.model == 'mymodel': model = MyModel() else: raise Exception('Incorrect model name') if args.cuda: model.cuda() ######## Define loss function and optimizer ########## ############## Write your code here ################## params = model.parameters() optimizer = optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay) criterion = nn.CrossEntropyLoss() ###################################################### def train(epoch): """ Runs training for 1 epoch epoch: int, denotes the epoch number for printing """ ############# Write train function ############### mean_training_loss = 0.0 model.train() for i, batch in enumerate(train_loader): ############ Write your code here ############ # Get input and labels
def main(args): init_process_group(backend='nccl') with open(args.config) as file: config = json.load(file) config.update(vars(args)) config = apply_dict(Dict, config) backends.cudnn.benchmark = True backends.cudnn.fastest = True cuda.set_device(distributed.get_rank() % cuda.device_count()) train_dataset = ImageDataset(root=config.train_root, meta=config.train_meta, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ) * 3, (0.5, ) * 3) ])) val_dataset = ImageDataset(root=config.val_root, meta=config.val_meta, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, ) * 3, (0.5, ) * 3) ])) train_sampler = utils.data.distributed.DistributedSampler(train_dataset) val_sampler = utils.data.distributed.DistributedSampler(val_dataset) train_data_loader = utils.data.DataLoader( dataset=train_dataset, batch_size=config.local_batch_size, sampler=train_sampler, num_workers=config.num_workers, pin_memory=True) val_data_loader = utils.data.DataLoader(dataset=val_dataset, batch_size=config.local_batch_size, sampler=val_sampler, num_workers=config.num_workers, pin_memory=True) model = ConvNet(conv_params=[ Dict(in_channels=3, out_channels=32, kernel_size=5, padding=2, stride=2, bias=False), Dict(in_channels=32, out_channels=64, kernel_size=5, padding=2, stride=2, bias=False), ], linear_params=[ Dict(in_channels=3136, out_channels=1024, kernel_size=1, bias=False), Dict(in_channels=1024, out_channels=10, kernel_size=1, bias=True), ]) config.global_batch_size = config.local_batch_size * distributed.get_world_size( ) config.optimizer.lr *= config.global_batch_size / config.global_batch_denom optimizer = optim.Adam(model.parameters(), **config.optimizer) epoch = 0 global_step = 0 if config.checkpoint: checkpoint = Dict(torch.load(config.checkpoint)) model.load_state_dict(checkpoint.model_state_dict) optimizer.load_state_dict(checkpoint.optimizer_state_dict) epoch = checkpoint.last_epoch + 1 global_step = checkpoint.global_step def train(data_loader): nonlocal global_step model.train() for images, labels in data_loader: images = images.cuda() labels = labels.cuda() optimizer.zero_grad() logits = model(images) loss = nn.functional.cross_entropy(logits, labels) loss.backward(retain_graph=True) average_gradients(model.parameters()) optimizer.step() predictions = logits.topk(k=1, dim=1)[1].squeeze() accuracy = torch.mean((predictions == labels).float()) average_tensors([loss, accuracy]) global_step += 1 dprint(f'[training] epoch: {epoch} global_step: {global_step} ' f'loss: {loss:.4f} accuracy: {accuracy:.4f}') @torch.no_grad() def validate(data_loader): model.eval() losses = [] accuracies = [] for images, labels in data_loader: images = images.cuda() labels = labels.cuda() logits = model(images) loss = nn.functional.cross_entropy(logits, labels) predictions = logits.topk(k=1, dim=1)[1].squeeze() accuracy = torch.mean((predictions == labels).float()) average_tensors([loss, accuracy]) losses.append(loss) accuracies.append(accuracy) loss = torch.mean(torch.stack(losses)).item() accuracy = torch.mean(torch.stack(accuracies)).item() dprint(f'[validation] epoch: {epoch} global_step: {global_step} ' f'loss: {loss:.4f} accuracy: {accuracy:.4f}') @torch.no_grad() def feed(data_loader): model.eval() for images, _ in data_loader: images = images.cuda() logits = model(images) def save(): if not distributed.get_rank(): os.makedirs('checkpoints', exist_ok=True) torch.save( dict(model_state_dict=model.state_dict(), optimizer_state_dict=optimizer.state_dict(), last_epoch=epoch, global_step=global_step), os.path.join('checkpoints', f'epoch_{epoch}')) if config.training: model.cuda() broadcast_tensors(model.state_dict().values()) for epoch in range(epoch, config.num_training_epochs): train_sampler.set_epoch(epoch) train(train_data_loader) validate(val_data_loader) save() if config.validation: model.cuda() broadcast_tensors(model.state_dict().values()) validate(val_data_loader) if config.quantization: model.cuda() broadcast_tensors(model.state_dict().values()) with QuantizationEnabler(model): with BatchStatsUser(model): for epoch in range(epoch, config.num_quantization_epochs): train_sampler.set_epoch(epoch) train(train_data_loader) validate(val_data_loader) save() with AverageStatsUser(model): for epoch in range(epoch, config.num_quantization_epochs): train_sampler.set_epoch(epoch) train(train_data_loader) validate(val_data_loader) save()
from models import ConvNet device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Load the dataset transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = torchvision.datasets.ImageFolder("D:/PokeRapper/Pokemon", transform=transform) dataloader = torch.utils.data.DataLoad(dataset, batch_size=1024, shuffle=True, num_workers=4) # build the model # TODO: Add support for loading different models model = ConvNet() model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9) # train the network for epoch in range(100): for i, data in enumerate(dataloader, 0): print('')