# print(label) labels.append(1) # abnormal sample else: labels.append(0) # normal sample print("The number of abnormal in training set: " + str(count)) labels = numpy.array(labels) target = torch.tensor(labels).to(device) training_dataset_loader = torch.utils.data.DataLoader( training_dataset, batch_size=batch_size, shuffle=False, drop_last=True, sampler=BalancedBatchSampler(training_dataset, target), num_workers=0, pin_memory=True) epoch_restart = 0 dataiter = iter(training_dataset_loader) for epoch in range(epoch_restart + 1, 1101): batch = tqdm(training_dataset_loader, total=len(training_dataset) // batch_size) new_count = 0 for x, label in batch: x = x.to(device) l = [] for index in range(0, x.shape[0]):
import torch from sampler import BalancedBatchSampler epochs = 3 size = 20 features = 5 classes_prob = torch.tensor([0.1, 0.4, 0.5]) dataset_X = torch.randn(size, features) dataset_Y = torch.distributions.categorical.Categorical(classes_prob.repeat(size, 1)).sample() dataset = torch.utils.data.TensorDataset(dataset_X, dataset_Y) train_loader = torch.utils.data.DataLoader(dataset, sampler=BalancedBatchSampler(dataset, dataset_Y), batch_size=6) for epoch in range(0, epochs): for batch_x, batch_y in train_loader: print("epoch: %d labels: %s\ninputs: %s\n" % (epoch, batch_y, batch_x))
# criterion = torch.nn.BCELoss().cuda() # # # print(train_dataloader.batch_size) # # for i, (images,labels) in enumerate(train_dataloader): # # print(sample['image']) # outputs = model(images) # labels = labels.float().reshape(-1,1) # print(outputs.shape,labels.shape) # loss = criterion(outputs,labels) # print('loss: ',loss) valconfig = {"dataset": "tb2020", "subset": '0'} val_config = dataconfig(**valconfig) validation_data = DataGenerator(val_config, transform=None, type='1: slice_sampled') #val_loader = DataLoader(validation_data, batch_size=12, num_workers=1, shuffle=True) #batch_sampler = batch_sampler(batch_size=6,class_list=range(6)) val_loader = DataLoader(validation_data, num_workers=1, sampler=BalancedBatchSampler(validation_data, type='multi_label'), batch_size=6) for i, (images, labels) in enumerate(val_loader): print(i) print(labels) print(images.shape)
def train_eval(clf_model, train_meta, validation_dataloader, base_dir, batch_size, weights=None, lr=2e-5, epochs=4, eval_every_num_iters=40, seed_val=42): """train and evaluate a deep learning model :params[in]: clf_model, a classifier :params[in]: train_meta, training data: data in ids, masks, and labels :params[in]: validation_dataloader, validation data :params[in]: base_dir, output directory to create the directory to save results :params[in]: lr, the learning rate :params[in]: epochs, the number of training epochs :params[in]: eval_every_num_iters, the number of iterations to evaluate :params[in]: seed_val, set a random seed """ # the 'W' stands for 'Warm up", AdamW is a class from the huggingface library optimizer = AdamW( clf_model.parameters(), lr=lr, # args.learning_rate - default is 5e-5, our notebook had 2e-5 eps=1e-8 # args.adam_epsilon - default is 1e-8. ) # Number of training epochs (authors recommend between 2 and 4) epochs = epochs train_inputs, train_masks, train_labels = train_meta train_size = train_inputs.shape[0] # training sample size # Total number of training steps is number of batches * number of epochs. total_steps = int(1. + train_size / batch_size) * epochs # Create the learning rate scheduler. scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=1, # Default value in run_glue.py num_training_steps=total_steps) # see if weights is None: if weights != None: weights = torch.FloatTensor(weights) # Set the seed value all over the place to make this reproducible. random.seed(seed_val) np.random.seed(seed_val) torch.manual_seed(seed_val) torch.cuda.manual_seed_all(seed_val) # Store the average loss after each epoch so we can plot them. loss_values = [] ## reconstruct a dataloader for epoch_i in range(0, epochs): # ======================================== # Training # ======================================== print("") print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs)) print('Training...') perm0 = torch.randperm(train_size) tmp_X, tmp_mask, tmp_Y = train_inputs[perm0, :], train_masks[ perm0, :], train_labels[perm0] dataset = torch.utils.data.TensorDataset(tmp_X, tmp_mask, tmp_Y) train_loader = torch.utils.data.DataLoader( dataset, sampler=BalancedBatchSampler(dataset, tmp_Y), batch_size=batch_size, drop_last=True) # Measure how long the training epoch takes. t0 = time.time() # Reset the total loss for this epoch. total_loss = 0 # Put the model into training mode. Don't be mislead--the call to # `train` just changes the *mode*, it doesn't *perform* the training. # `dropout` and `batchnorm` layers behave differently during training # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch) clf_model.train() ## model training mode # For each batch of training data... for step, batch in enumerate(train_loader): # Unpack this training batch from our dataloader. # # As we unpack the batch, we'll also copy each tensor to the GPU using the # `to` method. # # `batch` contains three pytorch tensors: # [0]: input ids # [1]: attention masks # [2]: labels b_input_ids = batch[0].to(device) b_input_mask = batch[1].to(device) b_labels = batch[2].to(device) # Always clear any previously calculated gradients before performing a # backward pass. PyTorch doesn't do this automatically because # accumulating the gradients is "convenient while training RNNs". # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch) clf_model.zero_grad() # Perform a forward pass (evaluate the model on this training batch). # This will return the loss (rather than the model output) because we # have provided the `labels`. # The documentation for this `model` function is here: # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification outputs = clf_model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels, weights=weights) #weights=torch.FloatTensor([100/127,100/191,100/34])) # The call to `model` always returns a tuple, so we need to pull the # loss value out of the tuple. loss = outputs[0] # Accumulate the training loss over all of the batches so that we can # calculate the average loss at the end. `loss` is a Tensor containing a # single value; the `.item()` function just returns the Python value # from the tensor. total_loss += loss.item() # Perform a backward pass to calculate the gradients. loss.backward() # Clip the norm of the gradients to 1.0. # This is to help prevent the "exploding gradients" problem. torch.nn.utils.clip_grad_norm_(clf_model.parameters(), 1.0) # Update parameters and take a step using the computed gradient. # The optimizer dictates the "update rule"--how the parameters are # modified based on their gradients, the learning rate, etc. optimizer.step() # Update the learning rate. scheduler.step() # eveluate the performance after some iterations if step % eval_every_num_iters == 0 and not step == 0: # Calculate elapsed time in minutes. elapsed = format_time(time.time() - t0) # Report progress. print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format( step, len(train_loader), elapsed)) tmp_dir = base_dir + '/epoch' + str( epoch_i + 1) + 'iteration' + str(step) ## save pretrained model evaluate_model(clf_model, validation_dataloader, tmp_dir) clf_model.train() ## model training mode # Calculate the average loss over the training data. avg_train_loss = total_loss / total_steps # Store the loss value for plotting the learning curve. loss_values.append(avg_train_loss) # save the data after epochs tmp_dir = base_dir + '/epoch' + str(epoch_i + 1) + '_done' ## save pretrained model evaluate_model(clf_model, validation_dataloader, tmp_dir) clf_model.train() ## model training mode
def train_net(base_path, size=32000, n_classes=4): experiments_path = os.path.join( r'C:\Users\kotov-d\Documents\TASKS\cross_inference', os.path.basename(base_path)) Path(experiments_path).mkdir(parents=True, exist_ok=True) [x_train, x_val, x_test, y_train, y_val, y_test] = get_raw_data(base_path, experiments_path, n_classes, size=16000) # x_train = np.vstack((x_train, x_val)) # y_train = np.hstack((y_train, y_val)) config = Config(lr=0.00001, batch_size=128, num_epochs=1000, n_classes=n_classes) net = torch_model(config, p_size=(3, 3, 3, 3), k_size=(64, 32, 16, 8)) if cuda.is_available(): device = 'cuda' else: device = 'cpu' sampler = BalancedBatchSampler(My_Dataset(x_train, y_train), y_train) batcher_train = DataLoader(My_Dataset(x_train, y_train), batch_size=config.batch_size, sampler=sampler) batcher_val = DataLoader(My_Dataset(x_val, y_val), batch_size=config.batch_size, shuffle=True) start_time = time.time() train_loss = [] valid_loss = [] train_fscore = [] valid_fscore = [] criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(net.parameters(), lr=config.lr) net.to(device) early_stopping = EarlyStopping() min_loss = 1000 for epoch in range(config.num_epochs): iter_loss = 0.0 correct = 0 f_scores = 0 iterations = 0 net.train() h = net.init_hidden(config.batch_size) for i, (items, classes) in enumerate(batcher_train): if classes.shape[0] != config.batch_size: break items = items.to(device) classes = classes.to(device) optimizer.zero_grad() h = tuple([each.data for each in h]) outputs, h = net(items, h) loss = criterion(outputs, classes.long()) iter_loss += loss.item() loss.backward() optimizer.step() _, predicted = torch.max(outputs.data, 1) correct += (predicted == classes.data.long()).sum() f_scores += f1_score(predicted.cpu().numpy(), classes.data.cpu().numpy(), average='macro') iterations += 1 torch.cuda.empty_cache() train_loss.append(iter_loss / iterations) train_fscore.append(f_scores / iterations) early_stopping.update_loss(train_loss[-1]) if early_stopping.stop_training(): break ############################ # Validate ############################ iter_loss = 0.0 correct = 0 f_scores = 0 iterations = 0 net.eval() # Put the network into evaluate mode val_h = net.init_hidden(config.batch_size) for i, (items, classes) in enumerate(batcher_val): if classes.shape[0] != config.batch_size: break items = items.to(device) classes = classes.to(device) val_h = tuple([each.data for each in val_h]) outputs, val_h = net(items, val_h) loss = criterion(outputs, classes.long()) iter_loss += loss.item() _, predicted = torch.max(outputs.data, 1) correct += (predicted == classes.data.long()).sum() f_scores += f1_score(predicted.cpu().numpy(), classes.data.cpu().numpy(), average='macro') iterations += 1 valid_loss.append(iter_loss / iterations) valid_fscore.append(f_scores / iterations) if valid_loss[-1] < min_loss: torch.save( net, os.path.join(experiments_path, "net.pb".format(n_classes))) min_loss = valid_loss[-1] print( 'Epoch %d/%d, Tr Loss: %.4f, Tr Fscore: %.4f, Val Loss: %.4f, Val Fscore: %.4f' % (epoch + 1, config.num_epochs, train_loss[-1], train_fscore[-1], valid_loss[-1], valid_fscore[-1])) with open(os.path.join(experiments_path, "loss_track.pkl"), 'wb') as f: pickle.dump([train_loss, train_fscore, valid_loss, valid_fscore], f) print(time.time() - start_time)
def __init__(self, args, s): # bank data --> sep=';' # adult, home data --> sep=',' # german data --> sep=' ' if args.dataset == 'german': train_path = 'german-data/german.train' elif args.dataset == 'german-pre-dp': if s == 0: train_path = 'german-data/german.train' else: train_path = 'german-data/synth/syth_data_correlated_{}.csv'.format( s) if args.dataset == 'bank': train_path = 'bank-data/bank-additional-full.csv' elif args.dataset == 'bank-pre-dp': if s == 0: train_path = 'bank-data/bank-additional-full.csv' else: train_path = 'bank-data/synth/syth_data_correlated_ymod_{}.csv'.format( s) elif args.dataset == 'adult': train_path = 'adult-data/adult.data' elif args.dataset == 'adult-pre-dp': if s == 0: train_path = 'adult-data/adult.data' else: train_path = 'adult-data/synth/syth_data_correlated_ymod_{}.csv'.format( s) elif args.dataset == 'home': train_path = 'home-data/hcdf_train.csv' elif args.dataset == 'home-pre-dp': if s == 0: train_path = 'home-data/hcdf_train.csv' else: train_path = 'home-data/synth/syth_data_correlated_ymod_{}.csv'.format( s) if args.dataset == 'german' or args.dataset == 'german-pre-dp': cols = [ 'existing_checking', 'duration', 'credit_history', 'purpose', 'credit_amount', 'savings', 'employment_since', 'installment_rate', 'status_sex', 'other_debtors', 'residence_since', 'property', 'age', 'other_installment_plans', 'housing', 'existing_credits', 'job', 'people_liable', 'telephone', 'foreign_worker', 'y' ] test_path = 'german-data/german.test' if args.dataset == 'german-pre-dp' and s > 0: sep = ',' else: sep = ' ' train_df = pd.read_csv(train_path, sep=sep, names=cols) print(train_df) test_df = pd.read_csv(test_path, sep=' ', names=cols) train_df['y'] = train_df['y'].apply(lambda x: 0 if x == 2 else 1) test_df['y'] = test_df['y'].apply(lambda x: 0 if x == 2 else 1) if args.dataset == 'bank' or args.dataset == 'bank-pre-dp': cols = [ 'age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed', 'y' ] test_path = 'bank-data/bank-additional.csv' train_df = pd.read_csv(train_path, sep=';', names=cols) test_df = pd.read_csv(test_path, sep=';', names=cols) elif args.dataset == 'adult' or args.dataset == 'adult-pre-dp': test_path = 'adult-data/adult.test' cols = [ 'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'y' ] train_df = pd.read_csv(train_path, sep=',', names=cols) test_df = pd.read_csv(test_path, sep=',', names=cols) train_df = train_df.replace({'?': np.nan}) test_df = test_df.replace({'?': np.nan}) train_df['y'] = train_df['y'].apply(lambda x: 0 if ">50K" in x else 1) test_df['y'] = test_df['y'].apply(lambda x: 0 if ">50K" in x else 1) elif args.dataset == 'home' or args.dataset == 'home-pre-dp': test_path = 'home-data/hcdf_test.csv' train_df = pd.read_csv(train_path, sep=',', header=0) test_df = pd.read_csv(test_path, sep=',', header=0) train_df = train_df.drop(columns=['FLAG_OWN_CAR']) test_df = test_df.drop(columns=['FLAG_OWN_CAR']) train_df = train_df.rename(columns={ "TARGET": "y", "CODE_GENDER": "GENDER" }) test_df = test_df.rename(columns={ "TARGET": "y", "CODE_GENDER": "GENDER" }) train_df = train_df.dropna() test_df = test_df.dropna() print(train_df.head()) train_df = train_df.sample(frac=1).reset_index(drop=True) # shuffle df #test_df = test_df.sample(frac=1).reset_index(drop=True) # shuffle df if args.num_teachers == 0 or s == 0: train_data = LoadDataset(train_df, args.dataset, args.sensitive) test_data = LoadDataset(test_df, args.dataset, args.sensitive) self.sensitive_keys = train_data.getkeys() self.train_size = len(train_data) self.test_size = len(test_data) self.sensitive_col_idx = train_data.get_sensitive_idx() self.cat_emb_size = train_data.categorical_embedding_sizes # size of categorical embedding print("***", self.cat_emb_size) self.num_conts = train_data.num_numerical_cols # number of numerical variables print(train_df.head(40)) print(train_df.y.value_counts()) class_count = dict(train_df.y.value_counts()) class_weights = [ value / len(train_data) for _, value in class_count.items() ] train_batch = args.batch_size test_batch = len(test_data) self.train_loader = DataLoader(dataset=train_data, sampler=BalancedBatchSampler( train_data, train_data.Y), batch_size=train_batch) self.test_loader = DataLoader(dataset=test_data, batch_size=test_batch, drop_last=True) else: student_train_size = int(len(train_df) * .3) teacher_train_df = train_df.iloc[student_train_size:, :] student_train_df = train_df.iloc[:student_train_size, :] train_data = LoadDataset(teacher_train_df, args.dataset, args.sensitive) student_train_data = LoadDataset(student_train_df, args.dataset, args.sensitive) test_data = LoadDataset(test_df, args.dataset, args.sensitive) self.sensitive_keys = train_data.getkeys() self.train_size = len(train_data) self.test_size = len(test_data) self.sensitive_col_idx = train_data.get_sensitive_idx() student_train_size = len(student_train_data) self.cat_emb_size = train_data.categorical_embedding_sizes # size of categorical embedding #print(self.cat_emb_size) self.num_conts = train_data.num_numerical_cols # number of numerical variables class_count = dict(train_df.y.value_counts()) class_weights = [ value / len(train_data) for _, value in class_count.items() ] train_batch = args.batch_size test_batch = len(test_data) self.teacher_loaders = [] data_size = self.train_size // args.num_teachers for i in range(args.num_teachers): indices = list(range(i * data_size, (i + 1) * data_size)) subset_data = Subset(train_data, indices) subset_data_Y = [i[2] for i in subset_data] subset_data_Y = torch.stack(subset_data_Y) loader = DataLoader(dataset=subset_data, sampler=BalancedBatchSampler( subset_data, subset_data_Y), batch_size=train_batch) self.teacher_loaders.append(loader) """ indices = list(range(len(test_data))) indices = random.sample(indices, len(indices)) student_split = int(len(test_data) * .7) student_train_data = Subset(test_data, indices[:student_split]) student_test_data = Subset(test_data, indices[student_split+1:]) """ self.student_train_loader = torch.utils.data.DataLoader( student_train_data, #sampler=BalancedBatchSampler(student_train_data, # student_train_data.Y), batch_size=student_train_size) self.student_test_loader = torch.utils.data.DataLoader( test_data, batch_size=test_batch)
def train(args, model, train_dataset): # total step step_tot = (len(train_dataset) // args.gradient_accumulation_steps // args.batch_size_per_gpu // args.n_gpu) * args.max_epoch train_sampler = data.distributed.DistributedSampler( train_dataset) if args.local_rank != -1 else BalancedBatchSampler( train_dataset, train_dataset.label) params = {"batch_size": args.batch_size_per_gpu, "sampler": train_sampler} train_dataloader = data.DataLoader(train_dataset, **params) # optimizer no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': args.weight_decay }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=args.lr, eps=args.adam_epsilon, correct_bias=False) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=step_tot) # amp training model, optimizer = amp.initialize(model, optimizer, opt_level="O1") # distributed training if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank, find_unused_parameters=True) print("Begin train...") print("We will train model in %d steps" % step_tot) global_step = 0 loss_record = [] step_record = [] for i in range(args.max_epoch): if args.local_rank != -1: # Distributed training train_sampler.set_epoch(i) for step, batch in enumerate(train_dataloader): if args.model == "MTB": inputs = { "l_input": batch[0].to(args.device), "l_mask": batch[1].to(args.device), "l_ph": batch[2].to(args.device), "l_pt": batch[3].to(args.device), "r_input": batch[4].to(args.device), "r_mask": batch[5].to(args.device), "r_ph": batch[6].to(args.device), "r_pt": batch[7].to(args.device), "label": batch[8].to(args.device) } elif args.model == "CP": inputs = { "input": batch[0].to(args.device), "mask": batch[1].to(args.device), "label": batch[2].to(args.device), "h_pos": batch[3].to(args.device), 't_pos': batch[4].to(args.device) } model.train() m_loss, r_loss = model(**inputs) loss = m_loss + r_loss if args.gradient_accumulation_steps > 1: loss = loss / args.gradient_accumulation_steps with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() if step % args.gradient_accumulation_steps == 0: nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) optimizer.step() scheduler.step() model.zero_grad() global_step += 1 if args.local_rank in [0, -1 ] and global_step % args.save_step == 0: if not os.path.exists("../ckpt"): os.mkdir("../ckpt") if not os.path.exists("../ckpt/" + args.save_dir): os.mkdir("../ckpt/" + args.save_dir) if type(model ) == torch.nn.parallel.DistributedDataParallel: ckpt = { 'bert-base': model.module.model.bert.state_dict() } else: ckpt = {'bert-base': model.model.bert.state_dict()} torch.save( ckpt, os.path.join("../ckpt/" + args.save_dir, "ckpt_of_step_" + str(global_step))) # if args.local_rank in [0, -1] and global_step % 5 == 0: # step_record.append(global_step) # loss_record.append(loss) # if args.local_rank in [0, -1] and global_step % 500 == 0: # log_loss(step_record, loss_record) if args.local_rank in [0, -1]: sys.stdout.write( "step: %d, shcedule: %.3f, mlm_loss: %.6f relation_loss: %.6f\r" % (global_step, global_step / step_tot, m_loss, r_loss)) sys.stdout.flush() if args.train_sample: print("sampling...") train_dataloader.dataset.__sample__() print("sampled")
data = ImageFolderWithName( return_fn=False, root='/home/chk/cars_stanford/cars_train_labelled/train', transform=transforms.Compose([ transforms.Resize(228), transforms.RandomCrop((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), loader=lambda x: Image.open(x).convert('RGB')) dataset = torch.utils.data.DataLoader(data, batch_sampler=BalancedBatchSampler( data, batch_size=32, batch_k=4, length=2000), num_workers=4) ''' data = MetricData(data_root='/home/chk/cars_stanford/cars_train', \ anno_file='/home/chk/cars_stanford/devkit/cars_train_annos.mat', \ idx_file='/home/chk/cars_stanford/devkit/cars_train_annos_idx.pkl', \ return_fn=True) sampler = SourceSampler(data) print('Batch sampler len:', len(sampler)) dataset = torch.utils.data.DataLoader(data, batch_sampler=sampler) ''' from model import MetricLearner #model = MetricLearner()
beta = checkpoint['beta'] print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) trn_dataset = dataset.load( name = args.dataset, root = data_root, mode = 'train', transform = dataset.utils.make_transform( is_train = True, is_inception = False )) batch_sampler = BalancedBatchSampler(trn_dataset, args.batch_size, args.batch_k, length=args.batch_num) train_loader = torch.utils.data.DataLoader( batch_sampler=batch_sampler, dataset=trn_dataset, num_workers=args.workers, pin_memory=True ) ev_dataset = dataset.load( name=args.dataset, root=data_root, mode='eval', transform=dataset.utils.make_transform( is_train=False, is_inception=False ))
def main(args): # checking assert args['batch_size'] % args['batch_k'] == 0 assert args['batch_size'] > 0 and args['batch_k'] > 0 assert args['batch_size'] // args['batch_k'] < args['classes'] # seed if args['seed'] is not None: random.seed(args['seed']) torch.manual_seed(args['seed']) cudnn.deterministic = True warnings.warn( '''You have chosen to seed training. This will turn on the CUDNN deterministic setting, which can slow down your training considerably! You may see unexpected behavior when restarting from checkpoints.''' ) # gpus setting os.environ['CUDA_VISIBLE_DEVICES'] = args['gpus'] # construct model if not args['use_pretrained']: model = resnet50(num_classes=args['feat_dim']) else: model = resnet50(pretrained=True) try: model.fc = nn.Linear(model.fc.in_features, args['feat_dim']) except NameError as e: print( "Error: current works only with model having fc layer as the last layer, try modify the code" ) exit(-1) model = MarginNet(base_net=model, emb_dim=args['embed_dim'], batch_k=args['batch_k'], feat_dim=args['feat_dim'], normalize=args['normalize_weights']) print(model.state_dict().keys()) model.cuda() if args['loss'] == 'margin': criterion = MarginLoss(margin=args['margin'], nu=args['nu']) elif args['loss'] == 'triplet': criterion = TripletLoss(margin=args['margin'], nu=args['nu']) else: raise NotImplementedError optimizer = torch.optim.SGD(model.parameters(), args['lr'], momentum=args['momentum'], weight_decay=args['wd']) beta = Parameter( torch.ones((args['classes'], ), dtype=torch.float32, device=torch.device('cuda')) * args['beta']) optimizer_beta = torch.optim.SGD([beta], args['lr_beta'], momentum=args['momentum'], weight_decay=args['wd']) if args['resume']: if os.path.isfile(args['resume']): print("=> loading checkpoint '{}'".format(args['resume'])) checkpoint = torch.load(args['resume']) args['start_epoch'] = checkpoint['epoch'] state_dict = {} for k, v in checkpoint['state_dict'].items(): if k.startswith('module.'): k = k[7:] state_dict[k] = v model.load_state_dict(state_dict) optimizer.load_state_dict(checkpoint['optimizer']) optimizer_beta.load_state_dict(checkpoint['optimizer_beta']) beta = checkpoint['beta'] print("=> loaded checkpoint '{}' (epoch {})".format( args['resume'], checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args['resume'])) # if len(args.gpus.split(',')) > 1: # model = torch.nn.DataParallel(model) # dataset traindir = os.path.join(args['data_path'], 'train') valdir = os.path.join(args['data_path'], 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ # transforms.RandomResizedCrop(224), transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ])) val_dataset = datasets.ImageFolder( valdir, transforms.Compose([ # transforms.RandomResizedCrop(224), transforms.Resize((224, 224)), # transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ])) batch_sampler = BalancedBatchSampler(train_dataset, args['batch_size'], args['batch_k'], length=args['batch_num']) train_loader = torch.utils.data.DataLoader(batch_sampler=batch_sampler, dataset=train_dataset, num_workers=args['workers'], pin_memory=True) val_loader = torch.utils.data.DataLoader(batch_sampler=batch_sampler, dataset=val_dataset, num_workers=args['workers'], pin_memory=True) if not os.path.exists('checkpoints/'): os.mkdir('checkpoints/') for epoch in range(args['start_epoch'], args['epochs']): adjust_learning_rate(optimizer, epoch, args) adjust_learning_rate(optimizer_beta, epoch, args, beta=True) # print(optimizer.state_dict()['param_groups'][0]['lr']) # print(optimizer_beta.state_dict()['param_groups'][0]['lr']) # train for one epoch train(train_loader, model, criterion, optimizer, optimizer_beta, beta, epoch, args) # evaluate # state = { 'epoch': epoch + 1, 'arch': args['model'], 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'optimizer_beta': optimizer_beta.state_dict(), 'beta': beta } torch.save( state, 'checkpoints/%s_checkpoint_%d.pth.tar' % (args['save_prefix'], epoch + 1))
std=[0.229, 0.224, 0.225])]), loader=loader) return data args = get_args() if args.gpu_ids: device = torch.device('cuda:{}'.format(args.gpu_ids[0])) cudnn.benchmark = True else: device = torch.device('cpu') #data = MetricData(data_root=args.img_folder, anno_file=args.anno, idx_file=args.idx_file) data = imagefolder(args.img_folder) data_test = imagefolder(args.img_folder_test) dataset = torch.utils.data.DataLoader(data, batch_sampler=BalancedBatchSampler(data, batch_size=args.batch, batch_k=args.batch_k, length=args.num_batch), \ num_workers=args.num_workers) dataset_test = torch.utils.data.DataLoader(data_test, batch_sampler=BalancedBatchSampler(data_test, batch_size=args.batch, batch_k=args.batch_k, length=args.num_batch//2)) model = MetricLearner(pretrain=args.pretrain, normalize=True, batch_k=args.batch_k, att_heads=args.att_heads) if not os.path.exists(args.ckpt): os.makedirs(args.ckpt) print('Init ', args.ckpt) if args.resume: if args.ckpt.endswith('.pth'): state_dict = torch.load(args.ckpt) else: state_dict = torch.load(os.path.join(args.ckpt, 'best_performance.pth')) best_performace = state_dict['loss'] start_epoch = state_dict['epoch'] + 1 model.load_state_dict(state_dict['state_dict'], strict=False) print('Resume training. Start from epoch %d'%start_epoch)