def test_combine(name="combine"): # build dataset train_dl = dataset.DataLoader( [dataset.FuturesData(is_train=True, from_npz=True)], BATCH_SIZE) val_dl = dataset.DataLoader( [dataset.FuturesData(is_train=False, from_npz=True)], BATCH_SIZE) # control var lr = tf.placeholder(tf.float32, [], "lr") # build network x = tf.placeholder(tf.float32, [None, 4, INPUT_LEN], name="x") y = tf.placeholder(tf.float32, [None, 4, OUTPUT_LEN], name="x") est_y = mlp_combine(x) regression_loss_inst = tf.reduce_mean(tf.abs(est_y - y), axis=[0, 2]) regression_loss = tf.reduce_mean(regression_loss_inst) optim = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9).minimize(regression_loss) inst_summary = [ tf.summary.scalar("regression/inst%s" % INST_TYPE[i], regression_loss_inst[i]) for i in range(4) ] summary = tf.summary.merge(inst_summary) summary_writer = tf.summary.FileWriter("logs/" + name) saver = tf.train.Saver() # init with tf.Session() as sess: sess.run(tf.global_variables_initializer()) global_iter = 0 for epoch_id in range(N_EPOCH): LR = 0.1**(N_EPOCH // STAIRCASE) train_dl.reset_state() for idx, sample in enumerate(train_dl.generator()): sample = sample[0] train_seq = np.array([ np.stack(sample[i, 0, :]) for i in range(sample.shape[0]) ]) label_seq = np.array([ np.stack(sample[i, 1, :]) for i in range(sample.shape[0]) ]) #loss_, _ = sess.run([loss, optim]LR, {x: train_seq, y: label_seq[:, 0, :]}) sum_, _ = sess.run([summary, optim], { x: train_seq, y: label_seq, lr: LR }) summary_writer.add_summary(sum_, global_iter) global_iter += 1 save_path = saver.save(sess, pj("model", name + ".ckpt")) print("Model saved in %s" % save_path)
class TestDatasetandDataLoader(unittest.TestCase): dir_env = dict() dir_env["ext"] = ".png" dir_env["root"] = "./sample_data/" dir_env["train_good_dir"] = "train/good" dir_env["test_good_dir"] = "test/good" dir_env["test_bad_dir"] = None mvtec_dataset_train = dataset.MVTecDataset(is_train=True, dir_env=dir_env) mvtec_dataset_test = dataset.MVTecDataset(is_train=False, dir_env=dir_env) dataloader_test = dataset.DataLoader( mvtec_dataset_test, batch_size=2, shuffle=True, drop_last=False, ) def test_dataset(self): self.assertEqual(len(self.mvtec_dataset_train), 10) self.assertEqual(len(self.mvtec_dataset_test), 2) self.assertEqual(len(self.mvtec_dataset_test[0]), 3) def test_dataloader(self): self.assertEqual(len(self.dataloader_test), 2) ret = 0 for _ in self.dataloader_test: ret += 1 self.assertEqual(ret, 2 // 2)
def _load(): loader = dataset.DataLoader(input_file=train_file, word_embed_file=embed_file) train_dataset = loader.load(train_file, train=True, bucketing=True) test_dataset = loader.load(test_file, train=False, bucketing=True) \ if test_file is not None else None return loader, train_dataset, test_dataset
def create_data_loaders(fold, batch_size, workers): train_ids, val_ids = dataset.get_split(fold) labeled_size = len(train_ids) unlabeled_size = 18000 sampler = dataset.TwoStreamBatchSampler( range(labeled_size), # labeled ids list(range(labeled_size, labeled_size + unlabeled_size)), # unlabeled ids batch_size, # total batch size (labeled + unlabeled) LABELED_BATCH_SIZE # labeled batch size # TODO: was .5 ) train_loader = dataset.DataLoader( dataset=dataset.MeanTeacherTGSDataset( train_ids, transform=dataset.train_transform(), mode='train'), # shuffle=True, num_workers=workers, batch_sampler=sampler, pin_memory=torch.cuda.is_available()) valid_loader = dataset.make_loader( val_ids, transform=dataset.val_transform(), shuffle=False, # batch_size=len(device_ids), batch_size=batch_size, # len(device_ids), workers=workers) return train_loader, valid_loader
def train_one_epoch(self, epoch): log('==================stat to train===================') self.optimizer.zero_grad() hist = np.zeros((self.label_num, self.label_num)) # prepare data loader train_loader = dataset.DataLoader(self.datasets['train'], batch_size=self.batch_size, shuffle=True, num_workers=8) total_batch_num = len(self.datasets['train']) / self.batch_size # train through dataset for batch_idx, batch in enumerate(train_loader): image, label = batch['image'], batch['label'] image, label = image.cuda(self.GPU), label.cuda(self.GPU) out = self.model(image)[0] out = nn.UpsamplingBilinear2d(size=image.shape[-2:])( out) # N C H W loss = self.criterion(out, label) iter_size = int(args['--iterSize']) # for i in range(len(out) - 1): # loss += loss_calc(out[i + 1], label[i + 1], gpu0) loss = loss / iter_size loss.backward() if batch_idx % iter_size == 0: self.optimizer.step() self.optimizer.zero_grad() # calculate IOU pred = torch.argmax(out, dim=1) hist = fast_hist(pred.cpu().data.numpy().flatten(), label.cpu().data.numpy().flatten(), self.label_num) ious = per_class_iu(hist) * 100 mean_iou = mean_iu(ious) # output result print('epoch %d | %d/%d complete | loss: %.4f | IoU: %.4f' % (epoch, batch_idx + 1, total_batch_num, loss.item() * iter_size, mean_iou)) summary_idx = int(total_batch_num * epoch + batch_idx) self.writer.add_scalar('training_loss', loss.item() * iter_size, summary_idx) self.writer.add_scalar('training_iou', mean_iou, summary_idx) if batch_idx % 100 == 0: id_map = logits2trainId(out[0]) save_img = image[0].cpu().data.numpy().transpose(1, 2, 0) + 70 save_img = save_img.astype('uint8') summary_idx = int(total_batch_num * epoch + batch_idx) # self.writer.add_image('train_pred %d' % batch_idx, trainId2color(id_map), summary_idx) self.writer.add_image('train_image %d' % batch_idx, save_img, summary_idx)
def check_grammar(test_file, limit=-1, grammar_type=1): loader = dataset.DataLoader(filter_coord=True) test_dataset = loader.load(test_file, train=True, bucketing=False, size=None if limit < 0 else limit) word_vocab = loader.get_processor('word').vocab from models.gold import GoldModel model = GoldModel() if grammar_type == 1: cfg = parsers.Grammar.CFG_COORD_1 + parsers.Grammar.CFG elif grammar_type == 2: cfg = parsers.Grammar.CFG_COORD_2 + parsers.Grammar.CFG else: raise ValueError("Invalid grammar type: {}".format(grammar_type)) grammar = parsers.Grammar(word_vocab, cfg) parser = parsers.CkyParser(model, grammar) evaluator = eval_module.Evaluator(parser, logger=logging, report_details=False) n_corrects = 0 pbar = tqdm(total=len(test_dataset)) for batch in test_dataset.batch(size=20, colwise=True, shuffle=False): xs, ts = batch[:-1], batch[-1] true_coords_batch = ts model.set_gold(true_coords_batch) pred_coords_batch = evaluator._parser.parse(*xs, n_best=1) for i, (pred_coord_entries, true_coords) in \ enumerate(zip(pred_coords_batch, true_coords_batch)): pred_coords, _score = pred_coord_entries[0] true_coords = { ckey: coord for ckey, coord in true_coords.items() if coord is not None } if pred_coords == true_coords: n_corrects += 1 else: sentence = ' '.join( [word_vocab.lookup(word_id) for word_id in xs[0][i]]) print("SENTENCE: {}\nPRED: {}\nTRUE: {}\n-".format( sentence, pred_coords, true_coords)) evaluator.add(pred_coords, true_coords) pbar.update(len(ts)) pbar.close() evaluator.report() logging.info("Number of correct tree: {}/{}".format( n_corrects, len(test_dataset)))
def eval_one_epoch(self, epoch): log('==================stat to val===================') self.optimizer.zero_grad() hist = np.zeros((self.label_num, self.label_num)) # prepare data loader train_loader = dataset.DataLoader(self.datasets['val'], batch_size=self.batch_size, shuffle=True, num_workers=8) total_batch_num = len(self.datasets['val']) / self.batch_size loss_sum = 0 # train through dataset for batch_idx, batch in enumerate(train_loader): image, label = batch['image'], batch['label'] image, label = image.cuda(self.GPU), label.cuda(self.GPU) out = self.model(image)[0] out = nn.UpsamplingBilinear2d(size=image.shape[-2:])(out) loss = self.criterion(out, label) loss_sum += loss.item() # cumulate confusion matrix pred = torch.argmax(out, dim=1) hist += fast_hist(pred.cpu().data.numpy().flatten(), label.cpu().data.numpy().flatten(), self.label_num) # save sample result to tensor board if batch_idx % 100 == 0: id_map = logits2trainId(out[0]) # save one image summary_idx = int(total_batch_num * epoch + batch_idx) save_img = image[0].cpu().data.numpy().transpose(1, 2, 0) + 70 save_img = save_img.astype('uint8') # self.writer.add_image('val_pred %d' % batch_idx, trainId2color(id_map), summary_idx) self.writer.add_image('val_image %d' % batch_idx, save_img, summary_idx) # output result avg_loss = loss_sum / total_batch_num ious = per_class_iu(hist) * 100 mean_iou = mean_iu(ious) self.writer.add_scalar('val_loss', avg_loss, epoch) self.writer.add_scalar('val_iou', mean_iou, epoch) log("avg_loss is %.4f" % avg_loss) log('mean IoU: %.3f' % mean_iou) print(' '.join('{:.03f}'.format(i) for i in ious))
def main(): # Training settings parser = argparse.ArgumentParser(description='Supervised training') parser.add_argument( '--autoaugment', action='store_true', default=False, help='Use autoaugment policy, only for CIFAR10 (Default: False)') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='Input batch size for training (default: 64)') parser.add_argument('--dataset', type=str, default='cifar10', help='Dataset name (default: CIFAR10)') parser.add_argument('--epochs', type=int, default=200, metavar='N', help='Number of epochs to train (default: 200)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='Learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument( '--network', type=str, default='ResNet-18', help= 'Network model (default: ResNet-18), choose between (ResNet-18, TempEns, RevNet-18)' ) parser.add_argument('--no_cuda', action='store_true', default=False, help='Disables CUDA training') parser.add_argument('--num_workers', type=int, default=4, help='Number of data loading workers') parser.add_argument('--rotnet_dir', type=str, default='', help='RotNet saved directory') parser.add_argument('--save_dir', type=str, default='./data/supervised/', help='Directory to save models') parser.add_argument('--seed', type=int, default=1, help='Random seed (default: 1)') args = parser.parse_args() args.name = 'supervised_%s_%s_seed%u' % (args.dataset.lower(), args.network.lower(), args.seed) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") torch.manual_seed(args.seed) np.random.seed(args.seed) dataset_train = dataset.GenericDataset(dataset_name=args.dataset, split='train', autoaugment=args.autoaugment) dataset_test = dataset.GenericDataset(dataset_name=args.dataset, split='test') dloader_train = dataset.DataLoader(dataset=dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True) dloader_test = dataset.DataLoader(dataset=dataset_test, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False) # Load model model = models.load_net(args.network, dataset_train.n_classes) # Use rotnet pretraining if args.rotnet_dir: # Load rotNet model, manually delete layers > 2 state_dict_rotnet = torch.load( os.path.join( args.rotnet_dir, 'rotNet_%s_%s_lr_best.pth' % (args.dataset, args.network.lower()))) for key in state_dict_rotnet.copy().keys(): if 'fc' in key or 'layer3' in key or 'layer4' in key: del state_dict_rotnet[key] model.load_state_dict(state_dict_rotnet, strict=False) # Only finetune lower layers (>2) for name, param in model.named_parameters(): if 'fc' not in name and 'layer3' not in name and 'layer4' not in name: param.requires_grad = False model = model.to(device) # Init optimizer and loss optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4, nesterov=True) exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) criterion = nn.CrossEntropyLoss() best_acc = 0 for epoch in range(args.epochs + 1): loss_record = train(epoch, model, device, dloader_train, optimizer, exp_lr_scheduler, criterion, args) acc_record = test(model, device, dloader_test, args) is_best = acc_record.avg > best_acc best_loss = max(acc_record.avg, best_acc) utils.save_checkpoint( model.state_dict(), is_best, args.save_dir, checkpoint=args.name + 'supervised_training_ckpt.pth', best_model=args.name + 'supervised_training_best.pth')
""" CONSTANTS """ MAX_SRC_SEQ_LEN = 45 MAX_TGT_SEQ_LEN = 45 """ MODEL AND DATA LOADER """ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Initialize Tokenizer object with input and output vocabulary files myTokenizer = tokenizer.Tokenizer(src_vocab_file, tgt_vocab_file, device=device) # Load model from checkpoint in evaluation mode model = utils.build_model(args.arch, myTokenizer.src_vocab_size, myTokenizer.tgt_vocab_size, args.embed_dim, args.fcn_dim, args.num_heads, args.num_layers, args.dropout, myTokenizer.src_to_tgt_vocab_conversion_matrix) model = utils.load_model(model, args.model_checkpoint, logger) model.to(device) model.eval() # Initialize DataLoader object data_loader = dataset.DataLoader(myTokenizer, train_file_path=None, valid_file_path=None, test_file_path=test_file, device=device) """ FUNCTIONS """ def prdeict_word(src, max_seq_len): # Add batch dimension src = src.unsqueeze(dim=0) src_key_padding_mask = data_loader.get_padding_mask(src) memory = model.encode(src, src_key_padding_mask) outputs = torch.zeros(1, max_seq_len, dtype=torch.long, device=device) outputs[0] = myTokenizer.sos_id for j in range(1, max_seq_len): # Compute output of model tgt_key_padding_mask = data_loader.get_padding_mask(outputs[:, :j]) out = model.decode(memory, outputs[:, :j], tgt_key_padding_mask, src_key_padding_mask).squeeze() if \
import functions import train import network import save_results import os if __name__ == '__main__': opt = options.Options().parser.parse_args() ratio_A = opt.ratio_A dataset_A = dataset.CustomDataset(ratio=ratio_A, train=True, dataset=opt.dataset) dataloader_A = dataset.DataLoader(dataset_A, batch_size=opt.batch_size_A, shuffle=True) ratio_B = opt.ratio_B dataset_B = dataset.CustomDataset(ratio=ratio_B, train=True, dataset=opt.dataset) dataloader_B = dataset.DataLoader(dataset_B, batch_size=opt.batch_size_B, shuffle=True) sampled_batch_size = opt.sampled_batch_size testset_A = dataset.CustomDataset(ratio=ratio_A, train=False) testloader_A = dataset.DataLoader(testset_A, batch_size=opt.batch_size_A,
# torch version 1.6.0+cuda 10.1 logging.info('Torch Version {}'.format(torch.__version__)) # test path => TODO argparse DATA_PATH = './data/fr-en' if __name__ == '__main__': # Load Dataset data_utils.load_data_from_file(DATA_PATH, DATA_PATH) en_dataset = FrEnDataset(txt_files='./data/fr-en/fr-en.en.txt', root_dir='./data/fr-en/') fr_dataset = FrEnDataset(txt_files='./data/fr-en/fr-en.fr.txt', root_dir='./data/fr-en/') en_loader = dataset.DataLoader(en_dataset, batch_size=4, shuffle=True, num_workers=1) # for i_batch, sample_batched in enumerate(dataloader): # print(i_batch, sample_batched.size()) # # # observe 4th batch and stop. # if i_batch == 3: # show_text_batch(sample_batched) # break # Train # Test
pos_padding_idx=const.POS_PADDING_IDX, chunk_padding_idx=const.CHUNK_PADDING_IDX, character_padding_idx=const.CHARACTER2INDEX['<PAD>'], tag_padding_idx=const.CHUNK_PADDING_IDX) dev_ds = dataset.Dataset(dev_sentences, word_padding_idx=voc.padding_index, pos_padding_idx=const.POS_PADDING_IDX, chunk_padding_idx=const.CHUNK_PADDING_IDX, character_padding_idx=const.CHARACTER2INDEX['<PAD>'], tag_padding_idx=const.CHUNK_PADDING_IDX) test_ds = dataset.Dataset(test_sentences, word_padding_idx=voc.padding_index, pos_padding_idx=const.POS_PADDING_IDX, chunk_padding_idx=const.CHUNK_PADDING_IDX, character_padding_idx=const.CHARACTER2INDEX['<PAD>'], tag_padding_idx=const.CHUNK_PADDING_IDX) train_dl = dataset.DataLoader(train_ds, batch_size=args.batch_size) dev_dl = dataset.DataLoader(dev_ds, batch_size=args.batch_size) test_dl = dataset.DataLoader(test_ds, batch_size=args.batch_size) # word_embedding_dim = 300 # momentum = 0.9 crf_loss_reduction = 'token_mean' model_fn = 'data/model/200_300_token_mean/best_model.tar' # model_fn = None model, optimizer, epoch, all_losses, eval_losses, test_scores, best_test_score = load_model(model_fn, voc, args.character_embedding_dim, args.character_hidden_dim, args.context_hidden_dim,
# embed_dim=EMBEDDING_DIM, nb_heads=NUM_HEADS, # src_hid_size=FCN_HIDDEN_DIM, src_nb_layers=NUM_LAYERS, # trg_hid_size=FCN_HIDDEN_DIM, trg_nb_layers=NUM_LAYERS, # dropout_p=DROPOUT, # tie_trg_embed=False, src_c2i=None, trg_c2i=None, attr_c2i=None, label_smooth=0.1) model.to(device) criterion = nn.NLLLoss(reduction='mean', ignore_index=myTokenizer.pad_id) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(args.beta, args.beta2)) scheduler = ReduceLROnPlateau(optimizer, 'min', min_lr=args.min_lr, factor=args.discount_factor, patience=args.patience_reduce) \ if (args.scheduler == "ReduceLROnPlateau") \ else utils.WarmupInverseSquareRootSchedule(optimizer, args.warmup_steps) # Initialize DataLoader object data_loader = dataset.DataLoader(myTokenizer, train_file_path=train_file, valid_file_path=valid_file, test_file_path=None, device=device, batch_size=args.batch_size, max_src_seq_len=MAX_SRC_SEQ_LEN, max_tgt_seq_len=MAX_TGT_SEQ_LEN) # data_loader = dataset.DataLoader(myTokenizer, train_file_path=train_file, valid_file_path=None, # test_file_path=None, device=device, batch_size=args.batch_size, # max_src_seq_len=MAX_SRC_SEQ_LEN, max_tgt_seq_len=MAX_TGT_SEQ_LEN) """ HELPER FUNCTIONS""" def get_lr(): if isinstance(scheduler, ReduceLROnPlateau): return optimizer.param_groups[0]['lr'] try: return scheduler.get_last_lr()[0] except: return scheduler.get_lr()[0]
def main(): # Training settings parser = argparse.ArgumentParser( description='Alternative Training for Semi-supervised learning') parser.add_argument( '--autoaugment', action='store_true', default=False, help='Use AutoAugment data augmentation (default: False)') parser.add_argument('--dataset', type=str, default='cifar10', help='Dataset (default: cifar10)') parser.add_argument('--epochs_refine', type=int, default=100, help='Refinement epochs on labelled set') parser.add_argument( '--log_interval', type=int, default=100, metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--lr', type=float, default=0.01, help='Learning rate (default 0.01)') parser.add_argument('--milestones_outer', nargs='+', type=int, default=[60, 100], help='Outer loop milestones') parser.add_argument( '--milestones_inner', nargs='+', type=int, default=[7, 10], help='Inner loop milestones (change of lr and number of epochs)') parser.add_argument('--momentum', type=float, default=0.9, help='SGD momentum (default: 0.9)') parser.add_argument( '--nb_labels_per_class', type=int, default=10, help='Number of labelled samples per class (default: 10)') parser.add_argument('--network', type=str, default='ResNet-18', help='Network (default: ResNet-18)') parser.add_argument('--no-cuda', action='store_true', default=False, help='Disables CUDA training (default: False)') parser.add_argument('--proportion_CE', type=float, default=0.5, help='Weight of cross entropy loss') parser.add_argument('--rotnet_dir', type=str, default='', help='RotNet saved directory') parser.add_argument('--seed', type=int, default=1, help='Random seed (default: 1)') parser.add_argument('--save_dir', type=str, default='./data/alternative_supervised/', help='Directory to save models') args = parser.parse_args() global logger_module logger_module = args logger_module.time_start = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') # Path to file os.makedirs(args.save_dir, exist_ok=True) args.name = 'alternative_%s_%s_seed%u' % (logger_module.dataset.lower(), logger_module.network.lower(), args.seed) logger_module.net_path = os.path.join(args.save_dir, args.name + '.pth') logger_module.pkl_path = os.path.join(args.save_dir, args.name + '.pkl') logger_module.train_loss = [] logger_module.train_acc = [] logger_module.test_loss = [] logger_module.test_acc = [] logger_module.test_acc5 = [] logger_module.percentage_correct_training = [] logger_module.number_training = [] train_data = 'train_data' if args.dataset != 'svhn' else 'data' train_labels = 'train_labels' if args.dataset != 'svhn' else 'labels' with open(logger_module.pkl_path, "wb") as output_file: pickle.dump(vars(logger_module), output_file) # Set up seed and GPU usage torch.manual_seed(args.seed) np.random.seed(args.seed) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") # Initialize the dataset train_set = dataset.GenericDataset(args.dataset, 'train') test_set = dataset.GenericDataset(args.dataset, 'test') # Build meta set containing only the restricted labeled samples meta_set = dataset.GenericDataset(args.dataset, 'train') index_meta = [] for target in range(train_set.n_classes): index_meta.extend( np.random.choice( np.argwhere( np.array(getattr(train_set.data, train_labels)) == target) [:, 0], args.nb_labels_per_class, False)) setattr( meta_set.data, train_labels, list(itemgetter(*index_meta)(getattr(train_set.data, train_labels)))) setattr(meta_set.data, train_data, list(itemgetter(*index_meta)(getattr(train_set.data, train_data)))) # Copy train set for future reassignment trainset_targets_save = np.copy(getattr(train_set.data, train_labels)) trainset_data_save = np.copy(getattr(train_set.data, train_data)) # Dataloader iterators # TODO Autoaugment trainloader = dataset.DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2) metaloader = dataset.DataLoader(meta_set, batch_size=128, shuffle=True, num_workers=2) testloader = dataset.DataLoader(test_set, batch_size=1000, shuffle=False, num_workers=1) # First network intialization model = models.load_net(logger_module.network, train_set.n_classes) # Load model if args.rotnet_dir: state_dict_rotnet = torch.load( os.path.join( args.rotnet_dir, 'rotNet_%s_%s_lr_best.pth' % (logger_module.dataset.lower(), logger_module.network.lower()))) del state_dict_rotnet['fc.weight'] del state_dict_rotnet['fc.bias'] model.load_state_dict(state_dict_rotnet, strict=False) model = model.to(device) global thought_targets global meta_labels_total for outer_loop in range(0, args.milestones_outer[1]): print('Entering outer loop %u' % (outer_loop)) # Step 1: Fine-tune network and assign Labels fine_tune_and_assign_labels(args, model, metaloader, trainloader, testloader, device, train_set, trainset_data_save, trainset_targets_save,\ index_meta, outer_loop) # Self distillation starts from a uniform distribution meta_labels_total = torch.ones(len(trainloader.dataset), trainloader.dataset.n_classes) / float( trainloader.dataset.n_classes) # Step 1.5: Reinitialize net model = models.load_net(logger_module.network, train_set.n_classes) # Load model if args.rotnet_dir: state_dict_rotnet = torch.load( os.path.join( args.rotnet_dir, 'rotNet_%s_%s_lr_best.pth' % (logger_module.dataset.lower(), logger_module.network.lower()))) del state_dict_rotnet['fc.weight'] del state_dict_rotnet['fc.bias'] model.load_state_dict(state_dict_rotnet, strict=False) model = model.to(device) # Freeze net first two blocks for name, param in model.named_parameters(): if 'fc' not in name and 'layer3' not in name and 'layer4' not in name: param.requires_grad = False # Optimizer and LR scheduler optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4, nesterov=True) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[args.milestones_inner[0]], gamma=0.1) # Step 2: Training using predicted labels print('Labels assignment done. Entering inner loop') for epoch in range(args.milestones_inner[1]): scheduler.step() train(args, model, device, trainloader, optimizer, epoch, 'train', outer_loop) test(args, model, device, testloader) logger_module.epoch = epoch with open(logger_module.pkl_path, "wb") as output_file: pickle.dump(vars(logger_module), output_file) torch.save(model.state_dict(), logger_module.net_path) test(args, model, device, testloader, True)
def main(): # Training settings parser = argparse.ArgumentParser(description='RotNet') parser.add_argument( '--autoaugment', action='store_true', default=False, help='Use autoaugment policy, only for CIFAR10 (Default: False)') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='Input batch size for training (default: 64)') parser.add_argument('--dataset', type=str, default='cifar10', help='Dataset name (default: CIFAR10)') parser.add_argument('--epochs', type=int, default=200, metavar='N', help='Number of epochs to train (default: 200)') parser.add_argument('--lr', type=float, default=0.1, metavar='LR', help='Learning rate (default: 0.1)') parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum (default: 0.9)') parser.add_argument( '--network', type=str, default='ResNet-18', help= 'Network model (default: ResNet-18), choose between (ResNet-18, TempEns, RevNet-18)' ) parser.add_argument('--no_cuda', action='store_true', default=False, help='Disables CUDA training') parser.add_argument('--num_workers', type=int, default=4, help='Number of data loading workers') parser.add_argument('--save_dir', type=str, default='./data/rotNet', help='Directory to save models') parser.add_argument('--seed', type=int, default=1, help='Random seed (default: 1)') args = parser.parse_args() use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") torch.manual_seed(args.seed) dataset_train = dataset.GenericDataset(dataset_name=args.dataset, split='train', autoaugment=args.autoaugment) dataset_test = dataset.GenericDataset(dataset_name=args.dataset, split='test') dloader_train = dataset.DataLoader(rotnet=True, dataset=dataset_train, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=True) dloader_test = dataset.DataLoader(rotnet=True, dataset=dataset_test, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False) model = models.load_net(args.network) model = model.to(device) # follow the same setting as RotNet paper optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=5e-4, nesterov=True) exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[60, 120, 160, 200], gamma=0.2) criterion = nn.CrossEntropyLoss() best_acc = 0 for epoch in range(args.epochs + 1): loss_record = train(epoch, model, device, dloader_train, optimizer, exp_lr_scheduler, criterion, args) acc_record = test(model, device, dloader_test, args) is_best = acc_record.avg > best_acc best_loss = max(acc_record.avg, best_acc) utils.save_checkpoint(model.state_dict(), is_best, args.save_dir, checkpoint='rotNet_%s_%s_lr_checkpoint.pth' % (args.dataset, args.network.lower()), best_model='rotNet_%s_%s_lr_best.pth' % (args.dataset, args.network.lower())) # Saving milestones only if epoch in [59, 119, 159, 199]: print('Saving model at milestone: %u' % (epoch)) utils.save_checkpoint(model.state_dict(), False, args.save_dir, checkpoint='rotNet_%s_%s_%u_checkpoint.pth' % (args.dataset, args.network.lower(), epoch))
while not is_stop: paragraph = input('Enter a paragraph: ') if paragraph == 'n' or paragraph == 'N': is_stop = True elif paragraph == '': continue else: text_sentences = utils.get_sentences(paragraph) sentences = [Sentence(s, word_vocab=voc) for s in text_sentences] ds = dataset.Dataset(sentences, word_padding_idx=voc.padding_index, pos_padding_idx=const.POS_PADDING_IDX, chunk_padding_idx=const.CHUNK_PADDING_IDX, character_padding_idx=const.CHARACTER2INDEX['<PAD>'], tag_padding_idx=const.CHUNK_PADDING_IDX) dl = dataset.DataLoader(ds, batch_size=len(sentences)) for ((batch_sentence_word_indexes, batch_sentence_pos_indexes, batch_sentence_chunk_indexes, batch_sentence_word_character_indexes), batch_sentence_tag_indexes, batch_sentence_lengths, batch_word_lengths) in dl: pred_seqs = model(batch_sentence_word_indexes, batch_sentence_pos_indexes, batch_sentence_chunk_indexes, batch_sentence_word_character_indexes, batch_sentence_lengths, batch_word_lengths, None)
def main(): parser = argparse.ArgumentParser(description='Anomaly Detection') parser.add_argument('split', nargs="?", choices=["train", "test", "visualize"]) parser.add_argument('ini', nargs='?', type=ini_file, help='inifile name') args = parser.parse_args() with open(args.ini) as f: config = yaml.load(f, yaml.SafeLoader) numpy.random.seed(config['seed']) paths = config['paths'] model_params = config['model_params'] transform = config['transform'] preprocesses = [] preprocesses.append(preprocessor.Resize((256, 256))) if transform['gray2rgb']: preprocesses.append(preprocessor.Gray2RGB()) preprocesses.append(preprocessor.HWC2CHW()) #preorocesses.append(preprocessor.Normalize()) preprocesses.append(preprocessor.DivideBy255()) preprocesses.append(preprocessor.TransformForTorchModel()) model_preprocesses = [] model_preprocesses.append(preprocessor.ToTensor()) model_preprocesses.append( preprocessor.VGG16ScaledFeatures( last_layer=22, cutoff_edge_width=model_params['cutoff_edge_width'])) model_preprocesses.append( preprocessor.BatchSplitImg(patch_size=model_params['patch_size'], stride=model_params['stride'], data_format='CHW')) if args.split == 'train': train_dataset = dataset.MVTecDataset(root=paths['root'], ext=paths['ext'], train=True, neg_dir=paths['train_good_dir'], preprocessor=preprocesses) train_loader = dataset.DataLoader( train_dataset, batch_size=model_params['batch_size'], shuffle=True, drop_last=False) model = models.SparseCodingWithMultiDict( preprocesses=model_preprocesses, num_of_basis=model_params['num_of_basis'], alpha=model_params['alpha'], transform_algorithm=model_params['transform_algorithm'], transform_alpha=model_params['transform_alpha'], fit_algorithm=model_params['fit_algorithm'], n_iter=model_params['n_iter'], num_of_nonzero=model_params['num_of_nonzero'], train_loader=train_loader) model.train() model.save_dict(paths['dict_file']) elif args.split == 'test': test_neg_dataset = dataset.MVTecDataset(root=paths['root'], ext=paths['ext'], train=False, mode='neg', neg_dir=paths['test_good_dir'], preprocessor=preprocesses) if paths['test_bad_dir'] is None: test_pos_dataset = dataset.MVTecDataset( root=paths['root'], ext=paths['ext'], train=False, mode='pos', neg_dir=paths['test_good_dir'], preprocessor=preprocesses) else: test_pos_dataset = dataset.MVTecDataset( root=paths['root'], ext=paths['ext'], train=False, mode='pos', pos_dir=paths['test_bad_dir'], preprocessor=preprocesses) test_neg_loader = dataset.DataLoader(test_neg_dataset, batch_size=1, shuffle=False, drop_last=False) test_pos_loader = dataset.DataLoader(test_pos_dataset, batch_size=1, shuffle=False, drop_last=False) model = models.SparseCodingWithMultiDict( preprocesses=model_preprocesses, num_of_basis=model_params['num_of_basis'], alpha=model_params['alpha'], transform_algorithm=model_params['transform_algorithm'], transform_alpha=model_params['transform_alpha'], fit_algorithm=model_params['fit_algorithm'], n_iter=model_params['n_iter'], num_of_nonzero=model_params['num_of_nonzero'], test_neg_loader=test_neg_loader, test_pos_loader=test_pos_loader) model.load_dict(paths['dict_file']) model.test() elif args.split == 'visualize': test_neg_dataset = dataset.MVTecDataset(root=paths['root'], ext=paths['ext'], train=False, mode='neg', neg_dir=paths['test_good_dir'], preprocessor=preprocesses) if paths['test_bad_dir'] is None: test_pos_dataset = dataset.MVTecDataset( root=paths['root'], ext=paths['ext'], train=False, mode='pos', neg_dir=paths['test_good_dir'], preprocessor=preprocesses) else: test_pos_dataset = dataset.MVTecDataset( root=paths['root'], ext=paths['ext'], train=False, mode='pos', pos_dir=paths['test_bad_dir'], preprocessor=preprocesses) test_neg_loader = dataset.DataLoader(test_neg_dataset, batch_size=1, shuffle=False, drop_last=False) test_pos_loader = dataset.DataLoader(test_pos_dataset, batch_size=1, shuffle=False, drop_last=False) model = models.SparseCodingWithMultiDict( preprocesses=model_preprocesses, num_of_basis=model_params['num_of_basis'], alpha=model_params['alpha'], transform_algorithm=model_params['transform_algorithm'], transform_alpha=model_params['transform_alpha'], fit_algorithm=model_params['fit_algorithm'], n_iter=model_params['n_iter'], num_of_nonzero=model_params['num_of_nonzero'], test_neg_loader=test_neg_loader, test_pos_loader=test_pos_loader) model.load_dict(paths['dict_file']) model.visualize( ch=model_params['visualized_ch'], org_H=int(256 / 8.) - model_params['cutoff_edge_width'] * 2, org_W=int(256 / 8.) - model_params['cutoff_edge_width'] * 2, patch_size=model_params['patch_size'], stride=model_params['stride'])
def eval(self, eval_data, batch_size): self.model.eval() losses = [] recalls = [] mrrs = [] # losses = None # recalls = None # mrrs = None dataloader = dataset.DataLoader(eval_data, batch_size) eval_iter = 0 def reset_hidden(hidden, mask): if len(mask) != 0: hidden[:, mask, :] = 0 return hidden with torch.no_grad(): hidden = self.model.init_hidden() for input, target, mask in dataloader: input = input.to(self.device) target = target.to(self.device) hidden = reset_hidden(hidden, mask).detach() logit, hidden = self.model(input, hidden) # print("preds", logit) logit_sampled = logit[:, target.view(-1)] loss = self.loss_func(logit_sampled) # print("input", input, input.size()) # print("target", target, target.size()) recall, mrr = evaluate(logit, target, k=self.topk) # if losses is None: # losses = loss # else: # losses += loss # if recalls is None: # recalls = recall # else: # recalls += recall # if mrrs is None: # mrrs = mrr # else: # mrrs += mrr eval_iter += 1 losses.append(loss.item()) recalls.append(recall) mrrs.append(mrr.item()) # print("mrrs", mrrs) mean_losses = np.mean(losses) mean_recall = np.mean(recalls) mean_mrr = np.mean(mrrs) # mean_losses = losses/eval_iter # mean_recall = recalls/eval_iter # mean_mrr = mrrs/eval_iter return mean_losses, mean_recall, mean_mrr
def train( train_file, test_file=None, embed_file=None, embed_size=100, n_epoch=20, batch_size=32, lr=0.001, l2_lambda=0.0, grad_clip=5.0, tasks='tp', gpu=-1, save_to=None, seed=None): if seed is not None: utils.set_random_seed(seed, gpu) Log.i("random seed: {}".format(seed)) framework_utils.set_debug(App.debug) # Select Task with_tagging_task = False with_parsing_task = False for char in tasks: if char == 't': with_tagging_task = True elif char == 'p': with_parsing_task = True else: raise ValueError("Invalid task specified: {}".format(char)) if not any([with_tagging_task, with_parsing_task]): raise RuntimeError("No valid task specified") Log.i('Task: tagging={}, parsing={}' .format(with_tagging_task, with_parsing_task)) # Transition System transition_system = transition.ArcStandard if with_parsing_task: Log.i('Transition System: {}'.format(transition_system)) # Load files Log.i('initialize DataLoader with embed_file={} and embed_size={}' .format(embed_file, embed_size)) loader = dataset.DataLoader(word_embed_file=embed_file, word_embed_size=embed_size, char_embed_size=10, transition_system=transition_system) Log.i('load train dataset from {}'.format(train_file)) train_dataset = loader.load(train_file, train=True, size=120 if utils.is_dev() else None) if test_file: Log.i('load test dataset from {}'.format(test_file)) test_dataset = loader.load(test_file, train=False, size=16 if utils.is_dev() else None) else: test_dataset = None Log.v('') Log.v("initialize ...") Log.v('--------------------------------') Log.i('# Minibatch-size: {}'.format(batch_size)) Log.i('# epoch: {}'.format(n_epoch)) Log.i('# gpu: {}'.format(gpu)) Log.i('# tagset size: {}'.format(len(loader.tag_map))) Log.v('--------------------------------') Log.v('') # Set up a neural network model layers = [ models.Input( word_embeddings=loader.get_embeddings('word'), char_embeddings=loader.get_embeddings('char'), char_feature_size=50, dropout=0.5, ), models.Recurrent( n_layers=2, in_size=loader.get_embeddings('word').shape[1] + 50, out_size=400, dropout=0.5), models.Tagger( in_size=400 * 2, out_size=len(loader.tag_map), units=100, dropout=0.5) if with_tagging_task else models.GoldTagger(out_size=len(loader.tag_map)), ] if with_parsing_task: layers.extend([ models.Connection( tagset_size=len(loader.tag_map), tag_embed_size=50, dropout=0.5), models.Parser( in_size=850, n_deprels=len(loader.rel_map), n_blstm_layers=2, lstm_hidden_size=400, parser_mlp_units=800, dropout=0.50, transition_system=transition_system), ]) model = models.MTL(*layers) if gpu >= 0: framework_utils.set_model_to_device(model, device_id=gpu) # Setup an optimizer optimizer = chainer.optimizers.Adam( alpha=lr, beta1=0.9, beta2=0.999, eps=1e-08) optimizer.setup(model) if l2_lambda > 0.0: optimizer.add_hook(chainer.optimizer.WeightDecay(l2_lambda)) else: l2_lambda = False if grad_clip > 0.0: optimizer.add_hook(chainer.optimizer.GradientClipping(grad_clip)) else: grad_clip = False # optimizer.add_hook( # framework_utils.optimizers.ExponentialDecayAnnealing( # initial_lr=lr, decay_rate=0.75, decay_step=5000, lr_key='alpha')) Log.i('optimizer: Adam(alpha={}, beta1=0.9, ' 'beta2=0.999, eps=1e-08), grad_clip={}, ' 'regularization: WeightDecay(lambda={})' .format(lr, grad_clip, l2_lambda)) # Setup a trainer trainer = Trainer(optimizer, model, loss_func=model.compute_loss, accuracy_func=model.compute_accuracy) trainer.configure(framework_utils.config) if test_dataset: evaluator = models.Evaluator(loader, test_file, save_to) evaluator.add_target(model) trainer.attach_callback(evaluator) if save_to is not None: accessid = Log.getLogger().accessid date = Log.getLogger().accesstime.strftime('%Y%m%d') trainer.attach_callback( framework_utils.callbacks.Saver( model, basename="{}-{}".format(date, accessid), directory=save_to, context=dict(App.context, models=[type(layer) for layer in layers], loader=loader))) # Start training trainer.fit(train_dataset, None, batch_size=batch_size, epochs=n_epoch, validation_data=test_dataset, verbose=App.verbose)