コード例 #1
0
def main(data_path: str, text_path: str, /):
    make_deterministic()
    ru_args, en_args, model_data = torch.load(data_path)

    ru_lang = Language(*ru_args)
    en_lang = Language(*en_args)

    model = Seq2Seq.from_data(model_data).to(Device)

    evaluate(model, ru_lang, en_lang, text_path)
コード例 #2
0
def evaluate(dataloader,
             model,
             criterion,
             accuracy,
             static_augmentations=[],
             device=None,
             random_seed=123):
    print("evaluating...")
    if random_seed is not None:
        utils.make_deterministic(random_seed)

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.eval()

    losses = AverageMeter()
    accs = []
    nway = dataloader.batch_sampler.n_way
    nshot = dataloader.batch_sampler.n_shot
    nquery = dataloader.batch_sampler.n_query
    with torch.no_grad():
        for i, data in enumerate(tqdm(dataloader)):
            inputs = data["input"].to(device)
            labels = data["label"].to(device)
            inputs_generated = None
            if model.mixer is not None or "generated" in static_augmentations:
                inputs_generated = data["generated"].to(device)
            print_final_nshot = False
            if i == 0:
                print_final_nshot = True
            outputs, query_labels = model(inputs,
                                          labels,
                                          nway,
                                          nshot,
                                          nquery,
                                          inputs_generated=inputs_generated,
                                          print_final_nshot=print_final_nshot,
                                          augmentations=static_augmentations)
            loss = criterion(outputs, query_labels)
            acc = accuracy(outputs, query_labels)

            losses.update(loss.item(), outputs.size(0))
            accs.append(acc.item())

    print("eval loss: %0.5f " % losses.avg)
    acc = float(np.mean(accs))
    conf = float(1.96 * np.std(accs) / np.sqrt(len(accs)))
    print("eval acc :%0.5f +- %0.5f" % (acc, conf))
    return float(losses.avg), acc, conf
        epochs_paths.append((epoch_num, model_weight_path))

    inf_entry = epochs_paths[0]
    epochs_paths = epochs_paths[1:]
    epochs_paths.append(inf_entry)

    return epochs_paths


if __name__ == '__main__':

    args = parse_args()
    if args.pdb:
        import pdb
        pdb.set_trace()
    make_deterministic(args.cuda)
    sns.set_style('darkgrid')
    device = torch.device(
        'cpu' if args.cuda is None else 'cuda:{}'.format(args.cuda))

    if not osp.exists(args.run):
        os.makedirs(args.run)

    ckpt_dir = osp.join(args.run, 'ckpt')
    images_dir = osp.join(args.run, 'images')
    log_dir = osp.join(args.run, 'logs')

    if not osp.exists(ckpt_dir):
        os.makedirs(ckpt_dir)
    if not osp.exists(images_dir):
        os.makedirs(images_dir)
コード例 #4
0
def main():
    make_deterministic()

    # region Prepare data
    with Timer('\nData preparation time: %s\n'):
        ru_lang = Language()
        en_lang = Language()

        yandex = Yandex(
            'datasets/yandex/corpus.en_ru.1m.ru',
            'datasets/yandex/corpus.en_ru.1m.en',
            ru_lang,
            en_lang,
            data_slice=H.dataset_slice,
        )

        paracrawl = ParaCrawl(
            'datasets/paracrawl/en-ru.txt',
            ru_lang,
            en_lang,
            data_slice=slice(0),
        )

        low = ru_lang.lower_than(H.ru_word_count_minimum)
        infrequent_words_n = max(
            ceil(ru_lang.words_n * H.infrequent_words_percent), len(low))
        if infrequent_words_n > 0:
            ru_lang.drop_words(ru_lang.lowk(infrequent_words_n))
            print(
                f'{infrequent_words_n:,} infrequent Russian words are dropped')

        low = en_lang.lower_than(H.en_word_count_minimum)
        if len(low) > 0:
            en_lang.drop_words(*low)
            print(f'{len(low):,} infrequent English words are dropped')

        print(
            f'Russian language: {ru_lang.words_n:,} words, {ru_lang.sentence_length:,} words in a sentence'
        )
        print(
            f'English language: {en_lang.words_n:,} words, {en_lang.sentence_length:,} words in a sentence'
        )

        batch = H.batch_size
        dataset = ConcatDataset((yandex, paracrawl))
        loader = DataLoader(dataset, batch, shuffle=True)
    # endregion

    # region Models and optimizers
    model = Seq2Seq(
        Encoder(ru_lang.words_n, H.encoder_embed_dim, H.encoder_hidden_dim,
                H.encoder_bi, H.decoder_hd),
        Attention(H.encoder_hd, H.decoder_hd),
        Decoder(en_lang.words_n, H.decoder_embed_dim, H.decoder_hidden_dim,
                H.encoder_hd),
    ).to(Device).train()

    optimizer = Adam(model.parameters(), lr=H.learning_rate)
    criterion = CrossEntropyLoss(ignore_index=Token_PAD, reduction='sum')
    # endregion

    # region Training
    teaching_percent = H.teaching_percent
    total = len(dataset)
    log_interval = max(5, round(total / batch / 1000))

    for epoch in range(1, H.epochs + 1):
        with Printer() as printer:
            printer.print(f'Train epoch {epoch}: starting...')
            for i, ((ru, ru_l), en_sos, en_eos) in enumerate(loader, 1):
                # Zero the parameter gradients
                optimizer.zero_grad()
                # Run data through model
                predictions = model(ru, ru_l, en_sos, teaching_percent)
                # Calculate loss
                loss = criterion(predictions, en_eos)
                # Back propagate and perform optimization
                loss.backward()
                clip_grad_norm_(model.parameters(), H.gradient_norm_clip)
                optimizer.step()

                # Print log
                if i % log_interval == 0:
                    printer.print(
                        f'Train epoch {epoch}: {i * batch / total:.1%} [{i * batch:,}/{total:,}]'
                    )

            printer.print(f'Train epoch {epoch}: completed')
    # endregion

    torch.save(
        (
            ru_lang.__getnewargs__(),
            en_lang.__getnewargs__(),
            model.cpu().eval().data,
        ),
        'data/data.pt',
    )

    evaluate(model.to(Device), ru_lang, en_lang,
             'datasets/yandex/corpus.en_ru.1m.ru',
             slice(H.dataset_slice.stop + 1, H.dataset_slice.stop + 1 + 100))
コード例 #5
0
parser.add_argument('--ckpt_path', type=str, default='ckpts')
parser.add_argument('--samples_path', type=str, default='out')
parser.add_argument('--summary_path', type=str, default='logs')
parser.add_argument(
    '--loss', type=str,
    default='original')  # original, official_nll, nll, sum, min
parser.add_argument('--id', type=int, default='-1')
parser.add_argument('--note', type=str, default='')
parser.add_argument(
    '--debug',
    action='store_true')  # Use few samples, make model deterministic, run
conf = parser.parse_args()
print("Configs: ", conf)

if conf.debug:
    utils.make_deterministic()
    np.set_printoptions(threshold=sys.maxsize)

# Get data
if conf.debug: conf.batch_size = 3
if conf.data == 'mnist_bw':  # pixels in range (0,1)
    from tensorflow.examples.tutorials.mnist import input_data
    if not os.path.exists(conf.data_path):
        os.makedirs(conf.data_path)
    data = input_data.read_data_sets(conf.data_path)
    conf.num_classes = 10
    conf.img_height = 28
    conf.img_width = 28
    conf.channels = 1
    conf.bins = 2
    conf.num_batches = data.train.num_examples // conf.batch_size
コード例 #6
0
def train_one_epoch(dataloader,
                    model,
                    criterion,
                    optimizer,
                    accuracy=accuracy,
                    device=None,
                    print_freq=100,
                    random_seed=None):
    if random_seed is not None:
        #be careful to use this!
        #it's okay to fix seed every time we call evaluate() because we want to have exactly same order of test images
        #HOWEVER, for training time, we want to have different orders of training images for each epoch.
        #to do this, we can set the seed as epoch, for example.
        utils.make_deterministic(random_seed)

    since = time.time()
    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    model.train()  # Set model to training mode

    losses = AverageMeter()
    accs = AverageMeter()

    suprevised_baseline = False
    if hasattr(dataloader.batch_sampler, "n_way"):
        nway = dataloader.batch_sampler.n_way
        nshot = dataloader.batch_sampler.n_shot
        nquery = dataloader.batch_sampler.n_query
    else:
        suprevised_baseline = True

    for i, data in enumerate(tqdm(dataloader)):
        inputs = data["input"].to(device)
        labels = data["label"].to(device)

        if suprevised_baseline:
            #this is a baseline without meta-learning
            inputs = model.embed_samples(inputs)
            outputs = model.classifier(inputs)
            query_labels = labels
        else:
            inputs_generated = None
            if model.mixer is not None:
                inputs_generated = data["generated"].to(device)
            print_final_nshot = False
            if i == 0:
                print_final_nshot = True
            outputs, query_labels = model(inputs,
                                          labels,
                                          nway,
                                          nshot,
                                          nquery,
                                          inputs_generated=inputs_generated,
                                          print_final_nshot=print_final_nshot)

        loss = criterion(outputs, query_labels)
        acc = accuracy(outputs, query_labels)
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure acc and record loss
        losses.update(loss.item(), outputs.size(0))
        accs.update(acc.item(), outputs.size(0))

        if i % print_freq == 0 or i == len(dataloader) - 1:
            temp = "current loss: %0.5f " % loss.item()
            temp += "acc %0.5f " % acc.item()
            temp += "| running average loss %0.5f " % losses.avg
            temp += "acc %0.5f " % accs.avg
            print(i, temp)

    time_elapsed = time.time() - since
    print('this epoch took {:.0f}m {:.0f}s'.format(time_elapsed // 60,
                                                   time_elapsed % 60))
    return float(losses.avg), float(accs.avg)
コード例 #7
0
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

##setup imagebackend
from torchvision import get_image_backend, set_image_backend
try:
    import accimage
    set_image_backend("accimage")
except:
    print("accimage is not available")
print("image backend: %s" % get_image_backend())

# imports from my own script
import utils
utils.make_deterministic(123)
from dataloaders.ImagePandasDataset import ImagePandasDataset
from dataloaders.NShotTaskSampler import NShotTaskSampler
from dataloaders.WrapImagePandasDataset import WrapImagePandasDataset
from metrics.AverageMeter import AverageMeter
from metrics.accuracy import accuracy
from modules.layers.Flatten import Flatten
from modules.layers.Identity import Identity
from modules.metamodels.Baselines import Baselines
from modules.metamodels.ProtoNet import ProtoNet
from modules.metamodels.MetaModel import MetaModel
from modules.fusionnets.ImageFusionNet import ImageFusionNet
from modules.fusionnets.ImageMixer import ImageMixer
from modules.fusionnets.Mixup import Mixup
from modules.backbones.Conv4 import Conv4
コード例 #8
0
def main(args):
    since = time.time()
    print(args)
    #set seed
    args.seed = utils.setup_seed(args.seed)
    utils.make_deterministic(args.seed)

    #setup the directory to save the experiment log and trained models
    log_dir = utils.setup_savedir(prefix=args.saveprefix,
                                  basedir=args.saveroot,
                                  args=args,
                                  append_args=args.saveargs)

    #save args
    utils.save_args(log_dir, args)
    #setup device
    device = utils.setup_device(args.gpu)

    #setup dataset and dataloaders
    dataset_dict = setup_dataset(args)
    dataloader_dict = setup_dataloader(args, dataset_dict)

    #setup backbone cnn
    num_classes = dataset_dict["train"].num_classes
    model = setup_backbone(args.backbone,
                           pretrained=args.backbone_pretrained,
                           num_classes=num_classes)

    #resume model if needed
    if args.resume is not None:
        model = utils.resume_model(model, args.resume, state_dict_key="model")

    #setup loss
    criterion = torch.nn.CrossEntropyLoss().to(device)
    if args.loss_balanced:
        print("using balanced loss")
        #if this optin is true, weight the loss inversely proportional to class frequency
        weight = torch.FloatTensor(dataset_dict["train"].inverse_label_freq)
        criterion = torch.nn.CrossEntropyLoss(weight=weight).to(device)

    #setup optimizer
    if args.optimizer == "adam":
        optimizer = torch.optim.Adam(model.parameters(),
                                     lr=args.lr,
                                     amsgrad=True)
    elif args.optimizer == "sgd":
        optimizer = torch.optim.SGD(model.parameters(), lr=args.lr)
    else:
        raise NotImplementedError()
    if args.resume_optimizer is not None:
        optimizer = utils.resume_model(optimizer,
                                       args.resume_optimizer,
                                       state_dict_key="optimizer")
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
        optimizer,
        patience=args.patience,
        factor=args.step_facter,
        verbose=True)

    #main training
    log = {}
    log["git"] = utils.check_gitstatus()
    log["timestamp"] = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
    log["train"] = []
    log["val"] = []
    log["lr"] = []
    log_save_path = os.path.join(log_dir, "log.json")
    utils.save_json(log, log_save_path)
    valacc = 0
    best_val_acc = 0
    bestmodel = model
    for epoch in range(args.epochs):
        print("epoch: %d --start from 0 and at most end at %d" %
              (epoch, args.epochs - 1))
        loss, acc = train_one_epoch(dataloader_dict["train"],
                                    model,
                                    criterion,
                                    optimizer,
                                    accuracy=accuracy,
                                    device=device,
                                    print_freq=args.print_freq)
        log["train"].append({'epoch': epoch, "loss": loss, "acc": acc})

        valloss, valacc = evaluate(dataloader_dict["val"],
                                   model,
                                   criterion,
                                   accuracy=accuracy,
                                   device=device)
        log["val"].append({'epoch': epoch, "loss": valloss, "acc": valacc})
        lr_scheduler.step(valloss)

        #if this is the best model so far, keep it on cpu and save it
        if valacc > best_val_acc:
            best_val_acc = valacc
            log["best_epoch"] = epoch
            log["best_acc"] = best_val_acc
            bestmodel = deepcopy(model)
            bestmodel.cpu()
            if args.savemodel:
                save_path = os.path.join(log_dir, "bestmodel.pth")
                utils.save_checkpoint(save_path, bestmodel, key="model")
                save_path = os.path.join(log_dir, "bestmodel_optimizer.pth")
                utils.save_checkpoint(save_path, optimizer, key="optimizer")

        utils.save_json(log, log_save_path)
        max_lr_now = max([group['lr'] for group in optimizer.param_groups])
        log["lr"].append(max_lr_now)
        if max_lr_now < args.lr_min:
            break

    #use the best model to evaluate on test set
    print("test started")
    loss, acc = evaluate(dataloader_dict["test"],
                         bestmodel,
                         criterion,
                         accuracy=accuracy,
                         device=device)
    log["test"] = {"loss": loss, "acc": acc}

    time_elapsed = time.time() - since
    log["time_elapsed"] = time_elapsed
    #save the final log
    utils.save_json(log, log_save_path)