def main(data_path: str, text_path: str, /): make_deterministic() ru_args, en_args, model_data = torch.load(data_path) ru_lang = Language(*ru_args) en_lang = Language(*en_args) model = Seq2Seq.from_data(model_data).to(Device) evaluate(model, ru_lang, en_lang, text_path)
def evaluate(dataloader, model, criterion, accuracy, static_augmentations=[], device=None, random_seed=123): print("evaluating...") if random_seed is not None: utils.make_deterministic(random_seed) if device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) model.eval() losses = AverageMeter() accs = [] nway = dataloader.batch_sampler.n_way nshot = dataloader.batch_sampler.n_shot nquery = dataloader.batch_sampler.n_query with torch.no_grad(): for i, data in enumerate(tqdm(dataloader)): inputs = data["input"].to(device) labels = data["label"].to(device) inputs_generated = None if model.mixer is not None or "generated" in static_augmentations: inputs_generated = data["generated"].to(device) print_final_nshot = False if i == 0: print_final_nshot = True outputs, query_labels = model(inputs, labels, nway, nshot, nquery, inputs_generated=inputs_generated, print_final_nshot=print_final_nshot, augmentations=static_augmentations) loss = criterion(outputs, query_labels) acc = accuracy(outputs, query_labels) losses.update(loss.item(), outputs.size(0)) accs.append(acc.item()) print("eval loss: %0.5f " % losses.avg) acc = float(np.mean(accs)) conf = float(1.96 * np.std(accs) / np.sqrt(len(accs))) print("eval acc :%0.5f +- %0.5f" % (acc, conf)) return float(losses.avg), acc, conf
epochs_paths.append((epoch_num, model_weight_path)) inf_entry = epochs_paths[0] epochs_paths = epochs_paths[1:] epochs_paths.append(inf_entry) return epochs_paths if __name__ == '__main__': args = parse_args() if args.pdb: import pdb pdb.set_trace() make_deterministic(args.cuda) sns.set_style('darkgrid') device = torch.device( 'cpu' if args.cuda is None else 'cuda:{}'.format(args.cuda)) if not osp.exists(args.run): os.makedirs(args.run) ckpt_dir = osp.join(args.run, 'ckpt') images_dir = osp.join(args.run, 'images') log_dir = osp.join(args.run, 'logs') if not osp.exists(ckpt_dir): os.makedirs(ckpt_dir) if not osp.exists(images_dir): os.makedirs(images_dir)
def main(): make_deterministic() # region Prepare data with Timer('\nData preparation time: %s\n'): ru_lang = Language() en_lang = Language() yandex = Yandex( 'datasets/yandex/corpus.en_ru.1m.ru', 'datasets/yandex/corpus.en_ru.1m.en', ru_lang, en_lang, data_slice=H.dataset_slice, ) paracrawl = ParaCrawl( 'datasets/paracrawl/en-ru.txt', ru_lang, en_lang, data_slice=slice(0), ) low = ru_lang.lower_than(H.ru_word_count_minimum) infrequent_words_n = max( ceil(ru_lang.words_n * H.infrequent_words_percent), len(low)) if infrequent_words_n > 0: ru_lang.drop_words(ru_lang.lowk(infrequent_words_n)) print( f'{infrequent_words_n:,} infrequent Russian words are dropped') low = en_lang.lower_than(H.en_word_count_minimum) if len(low) > 0: en_lang.drop_words(*low) print(f'{len(low):,} infrequent English words are dropped') print( f'Russian language: {ru_lang.words_n:,} words, {ru_lang.sentence_length:,} words in a sentence' ) print( f'English language: {en_lang.words_n:,} words, {en_lang.sentence_length:,} words in a sentence' ) batch = H.batch_size dataset = ConcatDataset((yandex, paracrawl)) loader = DataLoader(dataset, batch, shuffle=True) # endregion # region Models and optimizers model = Seq2Seq( Encoder(ru_lang.words_n, H.encoder_embed_dim, H.encoder_hidden_dim, H.encoder_bi, H.decoder_hd), Attention(H.encoder_hd, H.decoder_hd), Decoder(en_lang.words_n, H.decoder_embed_dim, H.decoder_hidden_dim, H.encoder_hd), ).to(Device).train() optimizer = Adam(model.parameters(), lr=H.learning_rate) criterion = CrossEntropyLoss(ignore_index=Token_PAD, reduction='sum') # endregion # region Training teaching_percent = H.teaching_percent total = len(dataset) log_interval = max(5, round(total / batch / 1000)) for epoch in range(1, H.epochs + 1): with Printer() as printer: printer.print(f'Train epoch {epoch}: starting...') for i, ((ru, ru_l), en_sos, en_eos) in enumerate(loader, 1): # Zero the parameter gradients optimizer.zero_grad() # Run data through model predictions = model(ru, ru_l, en_sos, teaching_percent) # Calculate loss loss = criterion(predictions, en_eos) # Back propagate and perform optimization loss.backward() clip_grad_norm_(model.parameters(), H.gradient_norm_clip) optimizer.step() # Print log if i % log_interval == 0: printer.print( f'Train epoch {epoch}: {i * batch / total:.1%} [{i * batch:,}/{total:,}]' ) printer.print(f'Train epoch {epoch}: completed') # endregion torch.save( ( ru_lang.__getnewargs__(), en_lang.__getnewargs__(), model.cpu().eval().data, ), 'data/data.pt', ) evaluate(model.to(Device), ru_lang, en_lang, 'datasets/yandex/corpus.en_ru.1m.ru', slice(H.dataset_slice.stop + 1, H.dataset_slice.stop + 1 + 100))
parser.add_argument('--ckpt_path', type=str, default='ckpts') parser.add_argument('--samples_path', type=str, default='out') parser.add_argument('--summary_path', type=str, default='logs') parser.add_argument( '--loss', type=str, default='original') # original, official_nll, nll, sum, min parser.add_argument('--id', type=int, default='-1') parser.add_argument('--note', type=str, default='') parser.add_argument( '--debug', action='store_true') # Use few samples, make model deterministic, run conf = parser.parse_args() print("Configs: ", conf) if conf.debug: utils.make_deterministic() np.set_printoptions(threshold=sys.maxsize) # Get data if conf.debug: conf.batch_size = 3 if conf.data == 'mnist_bw': # pixels in range (0,1) from tensorflow.examples.tutorials.mnist import input_data if not os.path.exists(conf.data_path): os.makedirs(conf.data_path) data = input_data.read_data_sets(conf.data_path) conf.num_classes = 10 conf.img_height = 28 conf.img_width = 28 conf.channels = 1 conf.bins = 2 conf.num_batches = data.train.num_examples // conf.batch_size
def train_one_epoch(dataloader, model, criterion, optimizer, accuracy=accuracy, device=None, print_freq=100, random_seed=None): if random_seed is not None: #be careful to use this! #it's okay to fix seed every time we call evaluate() because we want to have exactly same order of test images #HOWEVER, for training time, we want to have different orders of training images for each epoch. #to do this, we can set the seed as epoch, for example. utils.make_deterministic(random_seed) since = time.time() if device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) model.train() # Set model to training mode losses = AverageMeter() accs = AverageMeter() suprevised_baseline = False if hasattr(dataloader.batch_sampler, "n_way"): nway = dataloader.batch_sampler.n_way nshot = dataloader.batch_sampler.n_shot nquery = dataloader.batch_sampler.n_query else: suprevised_baseline = True for i, data in enumerate(tqdm(dataloader)): inputs = data["input"].to(device) labels = data["label"].to(device) if suprevised_baseline: #this is a baseline without meta-learning inputs = model.embed_samples(inputs) outputs = model.classifier(inputs) query_labels = labels else: inputs_generated = None if model.mixer is not None: inputs_generated = data["generated"].to(device) print_final_nshot = False if i == 0: print_final_nshot = True outputs, query_labels = model(inputs, labels, nway, nshot, nquery, inputs_generated=inputs_generated, print_final_nshot=print_final_nshot) loss = criterion(outputs, query_labels) acc = accuracy(outputs, query_labels) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() # measure acc and record loss losses.update(loss.item(), outputs.size(0)) accs.update(acc.item(), outputs.size(0)) if i % print_freq == 0 or i == len(dataloader) - 1: temp = "current loss: %0.5f " % loss.item() temp += "acc %0.5f " % acc.item() temp += "| running average loss %0.5f " % losses.avg temp += "acc %0.5f " % accs.avg print(i, temp) time_elapsed = time.time() - since print('this epoch took {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60)) return float(losses.avg), float(accs.avg)
import torchvision from torchvision import transforms from torch.utils.data import Dataset, DataLoader ##setup imagebackend from torchvision import get_image_backend, set_image_backend try: import accimage set_image_backend("accimage") except: print("accimage is not available") print("image backend: %s" % get_image_backend()) # imports from my own script import utils utils.make_deterministic(123) from dataloaders.ImagePandasDataset import ImagePandasDataset from dataloaders.NShotTaskSampler import NShotTaskSampler from dataloaders.WrapImagePandasDataset import WrapImagePandasDataset from metrics.AverageMeter import AverageMeter from metrics.accuracy import accuracy from modules.layers.Flatten import Flatten from modules.layers.Identity import Identity from modules.metamodels.Baselines import Baselines from modules.metamodels.ProtoNet import ProtoNet from modules.metamodels.MetaModel import MetaModel from modules.fusionnets.ImageFusionNet import ImageFusionNet from modules.fusionnets.ImageMixer import ImageMixer from modules.fusionnets.Mixup import Mixup from modules.backbones.Conv4 import Conv4
def main(args): since = time.time() print(args) #set seed args.seed = utils.setup_seed(args.seed) utils.make_deterministic(args.seed) #setup the directory to save the experiment log and trained models log_dir = utils.setup_savedir(prefix=args.saveprefix, basedir=args.saveroot, args=args, append_args=args.saveargs) #save args utils.save_args(log_dir, args) #setup device device = utils.setup_device(args.gpu) #setup dataset and dataloaders dataset_dict = setup_dataset(args) dataloader_dict = setup_dataloader(args, dataset_dict) #setup backbone cnn num_classes = dataset_dict["train"].num_classes model = setup_backbone(args.backbone, pretrained=args.backbone_pretrained, num_classes=num_classes) #resume model if needed if args.resume is not None: model = utils.resume_model(model, args.resume, state_dict_key="model") #setup loss criterion = torch.nn.CrossEntropyLoss().to(device) if args.loss_balanced: print("using balanced loss") #if this optin is true, weight the loss inversely proportional to class frequency weight = torch.FloatTensor(dataset_dict["train"].inverse_label_freq) criterion = torch.nn.CrossEntropyLoss(weight=weight).to(device) #setup optimizer if args.optimizer == "adam": optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, amsgrad=True) elif args.optimizer == "sgd": optimizer = torch.optim.SGD(model.parameters(), lr=args.lr) else: raise NotImplementedError() if args.resume_optimizer is not None: optimizer = utils.resume_model(optimizer, args.resume_optimizer, state_dict_key="optimizer") lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=args.patience, factor=args.step_facter, verbose=True) #main training log = {} log["git"] = utils.check_gitstatus() log["timestamp"] = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") log["train"] = [] log["val"] = [] log["lr"] = [] log_save_path = os.path.join(log_dir, "log.json") utils.save_json(log, log_save_path) valacc = 0 best_val_acc = 0 bestmodel = model for epoch in range(args.epochs): print("epoch: %d --start from 0 and at most end at %d" % (epoch, args.epochs - 1)) loss, acc = train_one_epoch(dataloader_dict["train"], model, criterion, optimizer, accuracy=accuracy, device=device, print_freq=args.print_freq) log["train"].append({'epoch': epoch, "loss": loss, "acc": acc}) valloss, valacc = evaluate(dataloader_dict["val"], model, criterion, accuracy=accuracy, device=device) log["val"].append({'epoch': epoch, "loss": valloss, "acc": valacc}) lr_scheduler.step(valloss) #if this is the best model so far, keep it on cpu and save it if valacc > best_val_acc: best_val_acc = valacc log["best_epoch"] = epoch log["best_acc"] = best_val_acc bestmodel = deepcopy(model) bestmodel.cpu() if args.savemodel: save_path = os.path.join(log_dir, "bestmodel.pth") utils.save_checkpoint(save_path, bestmodel, key="model") save_path = os.path.join(log_dir, "bestmodel_optimizer.pth") utils.save_checkpoint(save_path, optimizer, key="optimizer") utils.save_json(log, log_save_path) max_lr_now = max([group['lr'] for group in optimizer.param_groups]) log["lr"].append(max_lr_now) if max_lr_now < args.lr_min: break #use the best model to evaluate on test set print("test started") loss, acc = evaluate(dataloader_dict["test"], bestmodel, criterion, accuracy=accuracy, device=device) log["test"] = {"loss": loss, "acc": acc} time_elapsed = time.time() - since log["time_elapsed"] = time_elapsed #save the final log utils.save_json(log, log_save_path)