def train_and_eval_IS(csv_fp,
                      image_fp,
                      label_colname,
                      eval_key,
                      dataset_name,
                      group_key,
                      all_group_colnames=[],
                      data_dir=DATA_DIR,
                      num_workers=64,
                      seed=None,
                      num_classes=2,
                      num_epochs=20,
                      return_model=False,
                      model_name='resnet18',
                      **sgd_params):

    print("Training and evaluating IS")
    # set up the dataloaders
    dataloaders = get_data_loaders(data_dir=data_dir,
                                   csv_fp=csv_fp,
                                   image_fp=image_fp,
                                   label_colname=label_colname,
                                   eval_key=eval_key,
                                   dataset_name=dataset_name,
                                   all_group_colnames=all_group_colnames,
                                   this_group_key=group_key,
                                   sample_by_groups=True,
                                   weight_to_eval_set_distribution=True,
                                   num_workers=num_workers)

    x = train_and_eval(dataloaders, eval_key, seed, num_classes, num_epochs,
                       return_model, model_name, **sgd_params)

    return x
Example #2
0
def main():
    # 定数
    epochs = 100
    train_batchsize = 128
    valid_batchsize = 4
    log_interval = 50
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # 各学習に必要なもの
    model = Net(num_class=10)
    train_loader, valid_loader = get_data_loaders(train_batchsize,
                                                  valid_batchsize)
    criterion = F.nll_loss
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    log_writer = SummaryWriter('./log')

    # 学習開始
    train(epochs=epochs,
          model=model,
          train_loader=train_loader,
          valid_loader=valid_loader,
          criterion=criterion,
          optimizer=optimizer,
          writer=log_writer,
          device=device,
          log_interval=log_interval)

    # モデル保存
    torch.save(model.state_dict(), './checkpoints/final_weights.pt')

    log_writer.close()
Example #3
0
def learning_rate_validation(num_of_epochs, dataset_name, learning_rate_list,
                             batch_size, model_type, num_of_topics,
                             hidden_size, topic_hidden_size, drop_out_prob):
    result = dict()
    dump_file_name = './val_results/learning_rate_valid_result_' + model_type + '_' + dataset_name
    train_loader, validation_loader, test_loader = get_data_loaders(
        0.1, dataset_name=dataset_name)
    for learning_rate in learning_rate_list:
        net = Net(num_of_epochs,
                  train_loader,
                  test_loader,
                  validation_loader,
                  learning_rate,
                  model_type,
                  early_stopping_mode,
                  early_stopping_min_delta,
                  early_stopping_patience,
                  num_of_topics=num_of_topics,
                  hidden_size=hidden_size,
                  input_size=300,
                  topic_hidden_size=topic_hidden_size,
                  drop_out_prob=drop_out_prob)
        gc.collect()
        train_loss, validation_loss = net.train(batch_size=batch_size,
                                                validate=True)
        result[learning_rate] = [train_loss, validation_loss]
    with open(dump_file_name, 'wb') as fp:
        pickle.dump(result, fp)
Example #4
0
def main():
    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    train_loader, val_loader = dataset.get_data_loaders(100)
    classes = val_loader.dataset.classes
    # load model and set to evaluation mode
    model = load_model('ResNet')
    model.to(device)
    model.eval()

    paths = load_imagepaths_from_folder('data/test/999/')
    # load the image
    f = open("OneMoreSecond.txt", "w")  # opens file with name of "test.txt"
    for path in paths:
        image = imread(path)
        image = base_transform(image)
        image = image.view(-1, 3, 128, 128)
        image = image.to(device)
        # run the forward process
        prediction = model(image)
        prediction = prediction.to('cpu')
        _, cls = torch.topk(prediction, dim=1, k=5)
        output = path
        for i in cls.data[0]:
            output = output + " "
            x = classes[i.item()]
            output = output + str(x)
        a = output.split("/")
        output = a[1] + "/" + a[3]
        print(output)
        f.write(output + "\n")
        #os.system("echo %s > text1/output_file.txt" %output)

    f.close()
Example #5
0
def run():
    # Parameters
    num_epochs = 10
    output_period = 100
    batch_size = 100

    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = resnet_18()
    model = model.to(device)

    train_loader, val_loader = dataset.get_data_loaders(batch_size)
    num_train_batches = len(train_loader)

    criterion = nn.CrossEntropyLoss().to(device)
    # TODO: optimizer is currently unoptimized
    # there's a lot of room for improvement/different optimizers
    optimizer = optim.SGD(model.parameters(), lr=1e-3)

    epoch = 1
    while epoch <= num_epochs:
        running_loss = 0.0
        for param_group in optimizer.param_groups:
            print('Current learning rate: ' + str(param_group['lr']))
        model.train()

        for batch_num, (inputs, labels) in enumerate(train_loader, 1):
            print('I am working')
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()

            optimizer.step()
            running_loss += loss.item()

            if batch_num % output_period == 0:
                print('[%d:%.2f] loss: %.3f' %
                      (epoch, batch_num * 1.0 / num_train_batches,
                       running_loss / output_period))
                running_loss = 0.0
                gc.collect()

        gc.collect()
        # save after every epoch
        torch.save(model.state_dict(), "models/model_sgd.%d" % epoch)

        # TODO: Calculate classification error and Top-5 Error
        # on training and validation datasets here

        gc.collect()
        epoch += 1
    return
Example #6
0
def validate(learning_rate, batch_size, dataset_name, model_type):
    train_loader, validation_loader, test_loader = get_data_loaders(
        validation_percentage, dataset_name)
    net = Net(250,
              train_loader,
              test_loader,
              validation_loader,
              learning_rate,
              model_type=model_type)
    train_loss, test_loss = net.train(batch_size=batch_size, validate=True)
Example #7
0
def run():
    # Parameters
    num_epochs = 10
    output_period = 100
    batch_size = 100

    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = resnet_18()
    model = model.to(device)

    train_loader, val_loader = dataset.get_data_loaders(batch_size)
    num_train_batches = len(train_loader)

    criterion = nn.CrossEntropyLoss().to(device)
    # optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-2)
    # optimizer = optim.Adagrad(model.parameters(), lr=1e-3, weight_decay=1e-2)
    # optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-3)
    # optimizer = optim.SGD(model.parameters(), lr=1e-3);
    optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-2);

    trainAccLog = []
    valAccLog = []
    epoch = 1
    while epoch <= num_epochs:
        running_loss = 0.0
        for param_group in optimizer.param_groups:
            print('Current learning rate: ' + str(param_group['lr']))
        model.train()

        for batch_num, (inputs, labels) in enumerate(train_loader, 1):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()

            optimizer.step()
            running_loss += loss.item()

            if batch_num % output_period == 0:
                print('[%d:%.2f] loss: %.3f' % (
                    epoch, batch_num*1.0/num_train_batches,
                    running_loss/output_period
                    ))
                running_loss = 0.0
                gc.collect()

        gc.collect()
        # save after every epoch
        torch.save(model.state_dict(), "models/model.%d" % epoch)
        epoch+=1
Example #8
0
def run(num_epochs, out_period, batch_size, model):
    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    train_loader, val_loader = dataset.get_data_loaders(batch_size)

    if len(sys.argv) > 1:  #output file for val set
        epoch = sys.argv[1]  #take number
        print("loading models/model.%s" % epoch)
        model.load_state_dict(torch.load("models/model.%s" % epoch))
        model.eval()

        # Opens file to write results to, will overwrite existing files
        out_file = open("resultsVAL.txt", "w")
        total = 0
        for (inputs, labels) in tqdm(val_loader):
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            top5 = torch.topk(outputs, 5)[1]
            # path = "test/" + '{0:08d}'.format(i) + ".jpg"
            for i in range(len(inputs)):
                filename = val_loader.dataset.samples[total][0]
                # formats string in the structure of "val/39/00000132.jpg 1 3 5 6 9"
                path_top5 = filename

                for j in top5[i]:
                    path_top5 = path_top5 + " " + str(j.item())
                out_file.write(path_top5 + "\n")
                # print(labels[i], "TOP5:", top5[i])
                # print(path_top5)
                total += 1

            gc.collect()
        #remove final newline
        out_file.seek(out_file.tell() - 2)
        out_file.truncate()
    else:  #print accuracy for all epochs on val set
        epoch = 1
        while epoch <= num_epochs:
            print("loading models/model.%s" % epoch)
            model.load_state_dict(torch.load("models/model.%s" % epoch))
            model.eval()

            # Calculate classification error and Top-5 Error
            # on training and validation datasets here
            printAccuracy(val_loader, device, model, "VALSET", epoch)

            gc.collect()
            epoch += 1
def train_and_eval_GDRO(csv_fp,
                        image_fp,
                        label_colname,
                        eval_key,
                        dataset_name,
                        group_key,
                        gdro_params,
                        all_group_colnames=[],
                        data_dir=DATA_DIR,
                        num_workers=64,
                        seed=None,
                        num_classes=2,
                        num_epochs=20,
                        return_model=False,
                        model_name='resnet18',
                        **sgd_params):
    print("Training and Evaluating GDRO")
    # set up the dataloaders - GDRO needs sample_by_groups = True
    dataloaders = get_data_loaders(data_dir=data_dir,
                                   csv_fp=csv_fp,
                                   image_fp=image_fp,
                                   label_colname=label_colname,
                                   eval_key=eval_key,
                                   dataset_name=dataset_name,
                                   all_group_colnames=all_group_colnames,
                                   this_group_key=group_key,
                                   sample_by_groups=True,
                                   weight_to_eval_set_distribution=False,
                                   num_workers=num_workers)

    # update gdro params with dataset specific paramters
    dataloader_train = dataloaders[0]
    gdro_params['group_key'] = group_key
    gdro_params['num_groups'] = len(dataloader_train.dataset.group_counts)
    gdro_params['group_sizes'] = dataloader_train.dataset.group_counts

    x = train_and_eval(dataloaders,
                       eval_key,
                       seed,
                       num_classes,
                       num_epochs,
                       return_model,
                       model_name,
                       gdro=True,
                       gdro_params=gdro_params,
                       **sgd_params)

    return x
Example #10
0
def main():
    config = get_config()
    if config['train'] and not config['resume']:
        for key in ['folder_log', 'folder_out']:
            if os.path.exists(config[key]):
                raise FileExistsError(config[key])
            os.makedirs(config[key])
        with open(os.path.join(config['folder_out'], 'config.yaml'), 'w') as f:
            yaml.safe_dump(config, f)
    data_loaders, image_shape = get_data_loaders(config)
    config['image_shape'] = image_shape
    net = get_model(config)
    if config['train']:
        train_model(config, data_loaders, net)
    test_model(config, data_loaders, net)
    return
Example #11
0
def main(args, seed):
    torch.random.manual_seed(seed)

    torch.random.manual_seed(seed)
    train_loader, val_loader, shape = get_data_loaders(
        config.Training.batch_size,
        start_idx=args.start_idx,
        test_batch_size=args.horizon,
    )
    n, d, t = shape
    model = models.ConvNet(d, seq_len=t)
    if args.ckpt is not None:
        state_dict = torch.load(args.ckpt)
        model.load_state_dict(state_dict)

    out = ar(val_loader, model)
    plot_output(*out)
    plt.show()
    plt.close()
Example #12
0
def main():
    gpus = tf.config.experimental.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    config = get_config()
    if config['train'] and not config['resume']:
        for key in ['folder_log', 'folder_out']:
            if os.path.exists(config[key]):
                raise FileExistsError(config[key])
            os.makedirs(config[key])
        with open(os.path.join(config['folder_out'], 'config.yaml'), 'w') as f:
            yaml.safe_dump(config, f)
    strategy = tf.distribute.MirroredStrategy()
    data_loaders = get_data_loaders(strategy, config)
    net = get_model(config)
    if config['train']:
        train_model(strategy, config, data_loaders, net)
    test_model(strategy, config, data_loaders, net)
    return
Example #13
0
def main():
    config = get_config()
    if config['train'] and not config['resume']:
        for key in ['folder_log', 'folder_out']:
            if os.path.exists(config[key]):
                raise FileExistsError(config[key])
            os.makedirs(config[key])
        with open(os.path.join(config['folder_out'], 'config.yaml'), 'w') as f:
            yaml.safe_dump(config, f)
    data_loaders, image_shape = get_data_loaders(config)
    config['image_shape'] = image_shape
    if 'crop_shape' not in config:
        config['crop_shape'] = [
            val if idx == 0 else val // 2
            for idx, val in enumerate(image_shape)
        ]
    net = get_model(config)
    net_gen = None if config['path_pretrain'] is None else get_model(config)
    if config['train']:
        train_model(config, data_loaders, net, net_gen)
    test_model(config, data_loaders, net)
    return
Example #14
0
def test(learning_rate, batch_size, dataset_name, model_type,
         early_stopping_mode, early_stopping_min_delta,
         early_stopping_patience):
    train_loader, validation_loader, test_loader, embeddings = get_data_loaders(
        validation_percentage, dataset_name)
    net = Net(300,
              train_loader,
              test_loader,
              validation_loader,
              learning_rate,
              model_type,
              early_stopping_mode,
              early_stopping_min_delta,
              early_stopping_patience,
              input_size=input_size,
              num_of_topics=num_of_topics,
              hidden_size=hidden_size,
              topic_hidden_size=topic_hidden_size,
              drop_out_prob=drop_out_prob,
              embeddings=embeddings)
    result = net.train(batch_size=batch_size, validate=False)
    print(result)
Example #15
0
    def __init__(self, args):

        # Training configurations
        self.method = args.method
        self.dataset = args.dataset
        self.dim = args.dim
        self.lr = args.lr
        self.batch_size = args.batch_size
        self.val_batch_size = self.batch_size // 2
        self.iteration = args.iteration
        self.evaluation = args.evaluation
        self.show_iter = 1000
        self.update_epoch = 10
        self.balanced = args.balanced
        self.instances = args.instances
        self.cm = args.cm
        self.device = torch.device(
            'cuda:0' if torch.cuda.is_available() else 'cpu')

        self.file_name = '{}_{}_{}'.format(
            self.method,
            self.dataset,
            self.lr,
        )
        print('========================================')
        print(json.dumps(vars(args), indent=2))
        print(self.file_name)

        # Paths

        self.root_dir = os.path.join('/', 'home', 'lyz')
        self.data_dir = os.path.join(self.root_dir, 'datasets', self.dataset)
        self.model_dir = self._get_path('./trained_model')
        self.code_dir = self._get_path(os.path.join('codes', self.dataset))
        self.fig_dir = self._get_path(
            os.path.join('fig', self.dataset, self.file_name))

        # Preparing data
        self.transforms = get_transform()
        self.datasets = get_datasets(dataset=self.dataset,
                                     data_dir=self.data_dir,
                                     transforms=self.transforms)
        self.cm_sampler = ClassMiningSampler(self.datasets['train'],
                                             batch_size=self.batch_size,
                                             n_instance=self.instances,
                                             balanced=self.balanced)
        self.data_loaders = get_data_loaders(
            datasets=self.datasets,
            batch_size=self.batch_size,
            val_batch_size=self.val_batch_size,
            n_instance=self.instances,
            balanced=self.balanced,
            cm=self.cm_sampler if self.cm else None)
        self.dataset_sizes = {
            x: len(self.datasets[x])
            for x in ['train', 'test']
        }

        # Set up model
        self.model = get_model(self.device, self.dim)

        self.optimizer = optim.SGD(
            [{
                'params': self.model.google_net.parameters()
            }, {
                'params': self.model.linear.parameters(),
                'lr': self.lr * 10,
                'momentum': 0.9
            }],
            lr=self.lr,
            momentum=0.9)
        self.scheduler = lr_scheduler.StepLR(self.optimizer,
                                             step_size=2000,
                                             gamma=0.5)
Example #16
0
n_class = 9

plt.ion()  # interactive mode

#def run_cv(img_size, pre_trained, target):
if __name__ == '__main__':
    image_files = get_img_files(data_dir)
    kf = KFold(n_splits=N_CV, random_state=RANDOM_STATE, shuffle=True)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for n, (train_idx, val_idx) in enumerate(kf.split(image_files)):
        #----Prepare Data-------------------------------------------------------------------------------
        train_files = image_files[train_idx]
        val_files = image_files[val_idx]
        data_loaders = get_data_loaders(train_files, val_files, img_size)
        dataset_sizes = [len(train_files), len(val_files)]
        print('dataset_sizes:', dataset_sizes)
        inputs, classes = next(iter(data_loaders[0]))
        #out = torchvision.utils.make_grid(inputs)
        #imshow(out, title=[x for x in classes])
        #----Prepare Model-------------------------------------------------------------------------------
        model_mobnet2 = MobileNetV2()
        model_mobnet2.load_state_dict(torch.load(pre_trained_mobnet2))
        model_mobnet2.classifier = nn.Sequential(
            nn.Dropout(0.2),
            nn.Linear(model_mobnet2.last_channel, n_class),
        )
        model_mobnet2 = model_mobnet2.to(device)
        criterion = nn.CrossEntropyLoss()
        # Observe that all parameters are being optimized
def run(config):
    # Update the config dict as necessary
    # This is for convenience, to add settings derived from the user-specified
    # configuration into the config-dict (e.g. inferring the number of classes
    # and size of the images from the dataset, passing in a pytorch object
    # for the activation specified as a string)
    config['resolution'] = 64
    config['n_classes'] = 120
    config['G_activation'] = utils.activation_dict[config['G_nl']]
    config['D_activation'] = utils.activation_dict[config['D_nl']]
    # By default, skip init if resuming training.
    if config['resume']:
        print('Skipping initialization for training resumption...')
        config['skip_init'] = True
    config = utils.update_config_roots(config)
    device = 'cuda'
    # Seed RNG
    utils.seed_rng(config['seed'])
    # Prepare root folders if necessary
    utils.prepare_root(config)
    # Setup cudnn.benchmark for free speed
    torch.backends.cudnn.benchmark = True

    experiment_name = (config['experiment_name'] if config['experiment_name']
                       else 'generative_dog_images')
    print('Experiment name is %s' % experiment_name)

    G = BigGAN.Generator(**config).to(device)
    D = BigGAN.Discriminator(**config).to(device)

    # If using EMA, prepare it
    if config['ema']:
        print('Preparing EMA for G with decay of {}'.format(
            config['ema_decay']))
        G_ema = BigGAN.Generator(**{
            **config, 'skip_init': True,
            'no_optim': True
        }).to(device)
        ema = utils.ema(G, G_ema, config['ema_decay'], config['ema_start'])
    else:
        G_ema, ema = None, None

    GD = BigGAN.G_D(G, D)
    print(G)
    print(D)
    print('Number of params in G: {} D: {}'.format(
        *
        [sum([p.data.nelement() for p in net.parameters()])
         for net in [G, D]]))
    # Prepare state dict, which holds things like epoch # and itr #
    state_dict = {'itr': 0, 'epoch': 0, 'save_num': 0, 'config': config}

    # If loading from a pre-trained model, load weights
    if config['resume']:
        print('Loading weights...')
        utils.load_weights(
            G, D, state_dict, config['weights_root'], experiment_name,
            config['load_weights'] if config['load_weights'] else None,
            G_ema if config['ema'] else None)

    # Prepare data; the Discriminator's batch size is all that needs to be passed
    # to the dataloader, as G doesn't require dataloading.
    # Note that at every loader iteration we pass in enough data to complete
    # a full D iteration (regardless of number of D steps and accumulations)
    D_batch_size = (config['batch_size'] * config['num_D_steps'] *
                    config['num_D_accumulations'])
    loaders = dataset.get_data_loaders(data_root=config['data_root'],
                                       label_root=config['label_root'],
                                       batch_size=D_batch_size,
                                       num_workers=config['num_workers'],
                                       shuffle=config['shuffle'],
                                       pin_memory=config['pin_memory'],
                                       drop_last=True)

    # Prepare noise and randomly sampled label arrays
    # Allow for different batch sizes in G
    G_batch_size = max(config['G_batch_size'], config['batch_size'])
    z_, y_ = utils.prepare_z_y(G_batch_size,
                               G.dim_z,
                               config['n_classes'],
                               device=device,
                               fp16=config['G_fp16'])
    # Prepare a fixed z & y to see individual sample evolution throghout training
    fixed_z, fixed_y = utils.prepare_z_y(G_batch_size,
                                         G.dim_z,
                                         config['n_classes'],
                                         device=device,
                                         fp16=config['G_fp16'])
    fixed_z.sample_()
    fixed_y.sample_()
    # Loaders are loaded, prepare the training function
    train = train_fns.create_train_fn(G, D, GD, z_, y_, ema, state_dict,
                                      config)

    print('Beginning training at epoch %d...' % state_dict['epoch'])
    start_time = time.perf_counter()
    total_iters = config['num_epochs'] * len(loaders[0])

    # Train for specified number of epochs, although we mostly track G iterations.
    for epoch in range(state_dict['epoch'], config['num_epochs']):
        for i, (x, y) in enumerate(loaders[0]):
            # Increment the iteration counter
            state_dict['itr'] += 1
            # Make sure G and D are in training mode, just in case they got set to eval
            # For D, which typically doesn't have BN, this shouldn't matter much.
            G.train()
            D.train()
            if config['ema']:
                G_ema.train()
            x, y = x.to(device), y.to(device)
            metrics = train(x, y)

            if not (state_dict['itr'] % config['log_interval']):
                curr_time = time.perf_counter()
                curr_time_str = datetime.datetime.fromtimestamp(
                    curr_time).strftime('%H:%M:%S')
                elapsed = str(
                    datetime.timedelta(seconds=(curr_time - start_time)))
                log = ("[{}] [{}] [{} / {}] Ep {}, ".format(
                    curr_time_str, elapsed, state_dict['itr'], total_iters,
                    epoch) + ', '.join([
                        '%s : %+4.3f' % (key, metrics[key]) for key in metrics
                    ]))
                print(log)

            # Save weights and copies as configured at specified interval
            if not (state_dict['itr'] % config['save_every']):
                if config['G_eval_mode']:
                    print('Switching G to eval mode...')
                    G.eval()
                    # if config['ema']:
                    # G_ema.eval()
                train_fns.save_and_sample(G, D, G_ema, z_, y_, fixed_z,
                                          fixed_y, state_dict, config,
                                          experiment_name)

        # Increment epoch counter at end of epoch
        state_dict['epoch'] += 1
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda:0" if use_cuda else "cpu")

    # Load model.
    model = load_fcn(num_classes=1)
    model.to(device)

    # Load optimizer and loss
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=config["lr"])

    ## Set up Data Loaders.
    epochs = config["epochs"]
    train_loader, val_loader, _ = get_data_loaders(
        args.id_path,
        args.im_path,
        args.label_path,
        batch_size=config["batch_size"])

    ## Begin training
    best_val_iou = -np.inf
    for epoch in range(epochs):
        print(f"Starting epoch {epoch+1}:")

        ## Metrics
        train_loss, val_loss = 0, 0
        train_iou, val_iou = 0, 0
        train_prec, val_prec = 0, 0
        train_recall, val_recall = 0, 0

        ## Training.
Example #19
0
def run():
    # Parameters
    num_epochs = 10
    output_period = 100
    batch_size = 100

    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = resnet_18()
    model = model.to(device)

    train_loader, val_loader = dataset.get_data_loaders(batch_size)
    num_train_batches = len(train_loader)
    num_val_batches = len(val_loader)

    criterion = nn.CrossEntropyLoss().to(device)
    # TODO:ptimizer is currently unoptimized
    # there's a lot of room for improvement/different optimizers
    optimizer = optim.SGD(
        model.parameters(),
        lr=1e-3)  # we can to stochastic graddescent, adagrad, adadelta,
    # optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=0.05)

    epoch = 1
    with open(cwd + "/resnet_18-" + datestr + ".txt", "w") as outptfile:

        while epoch <= num_epochs:
            running_loss = 0.0
            epoch_train_loss = 0.0
            epoch_val_loss = 0.0
            correctInTrainEpoch = 0
            top5InTrainEpoch = 0
            epoch_samples = 0
            val_samples = 0

            for param_group in optimizer.param_groups:
                print('Current learning rate: ' + str(param_group['lr']))
            model.train()

            for batch_num, (inputs, labels) in enumerate(train_loader, 1):
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

                # with torch.no_grad():
                #     _, prediction = torch.max(outputs, dim=1)
                #     prediction = prediction.cpu()
                #     outputs = outputs.cpu()
                #     labels = labels.cpu()
                #     numCorrect = prediction.numpy() == labels.numpy()
                #     correctInTrainEpoch += sum(numCorrect)
                #     top5 = torch.topk(outputs, 5, dim=1)
                #     top5 = top5[:][1]
                #     accuracy = [1 if int(labels[ind]) in x else 0 for ind, x in enumerate(top5.numpy())]
                #     top5InTrainEpoch += sum(accuracy)

                # if batch_num % output_period == 0:
                #     print('[%d:%.2f] loss: %.3f' % (
                #         epoch, batch_num*1.0/num_train_batches,
                #         running_loss/output_period
                #         ))
                #     epoch_train_loss += running_loss
                #     running_loss = 0.0
                #     gc.collect()
                # if batch_num > 5:
                #     print('[%d:%.2f] loss: %.3f' % (
                #         epoch, batch_num*1.0/num_train_batches,
                #         running_loss/output_period
                #         ))
                #     epoch_train_loss += running_loss
                #     running_loss = 0.0
                #     gc.collect()
                #     print(outputs)
                #     _, prediction = torch.max(outputs, dim=1)
                #     print('predicted class: ', prediction)
                #     print('actual class: ', labels)
                #     numCorrect = prediction == labels
                #     print('classification error: ', sum(numCorrect)/len(labels))
                #     top5 = torch.topk(outputs, 5, dim=1)
                #     print('Top 5 classes were: ', top5[:][1])
                #     top5 = top5[:][1]
                #     # print(labels.repeat(1,5).view(5,-1))
                #     print(labels.transpose(0,-1))
                #     accuracy = [1 if int(labels[ind]) in x else 0 for ind, x in enumerate(top5.numpy())]
                #     print('top5 accuracy: ', sum(accuracy)/len(labels))
                #     break
                acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
                n = outputs.size(0)
                correctInTrainEpoch += acc1[0]
                top5InTrainEpoch += acc5[0]
                epoch_samples += n
                if batch_num % output_period == 0:
                    print('[%d:%.2f] loss: %.3f' %
                          (epoch, batch_num * 1.0 / num_train_batches,
                           running_loss / output_period))
                    epoch_train_loss += running_loss
                    running_loss = 0.0
                    gc.collect()

                # if batch_num > 5:
                #     break

            gc.collect()
            # save after every epoch
            torch.save(model.state_dict(), "models/model.%d" % epoch)

            # TODO: Calculate classification error and Top-5 Error
            # on training and validation datasets here

            correctInValEpoch = 0
            top5InValEpoch = 0

            model.eval()
            with torch.no_grad():
                for batch_num, (inputs, labels) in enumerate(val_loader):
                    inputs = inputs.to(device)
                    labels = labels.to(device)

                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    acc1, acc5 = accuracy(outputs, labels, topk=(1, 5))
                    n = outputs.size(0)

                    correctInValEpoch += acc1[0]
                    top5InValEpoch += acc5[0]
                    epoch_val_loss += loss.item()
                    val_samples += n
                    # if batch_num > 5:
                    #     break

                    # _, prediction = torch.max(outputs, dim=1)
                    # prediction = prediction.cpu()
                    # outputs = outputs.cpu()
                    # labels = labels.cpu()
                    # numCorrect = prediction.numpy() == labels.numpy()
                    # correctInValEpoch += sum(numCorrect)
                    # top5 = torch.topk(outputs, 5, dim=1)
                    # top5 = top5[:][1]
                    # accuracy = [1 if int(labels[ind]) in x else 0 for ind, x in enumerate(top5.numpy())]
                    # top5InValEpoch += sum(accuracy)
                    # if batch_num > 5:
                    #     print('[%d:%.2f] loss: %.3f' % (
                    #     epoch, batch_num*1.0/num_train_batches,
                    #     running_loss/output_period
                    #     ))
                    #     epoch_train_loss += running_loss
                    #     running_loss = 0.0
                    #     gc.collect()
                    #     print(outputs)
                    #     _, prediction = torch.max(outputs, dim=1)
                    #     print('predicted class: ', prediction)
                    #     print('actual class: ', labels)
                    #     numCorrect = prediction == labels
                    #     print('classification error: ', sum(numCorrect)/len(labels))
                    #     top5 = torch.topk(outputs, 5, dim=1)
                    #     print('Top 5 classes were: ', top5[:][1])
                    #     top5 = top5[:][1]
                    #     # print(labels.repeat(1,5).view(5,-1))
                    #     print(labels.transpose(0,-1))
                    #     accuracy = [1 if int(labels[ind]) in x else 0 for ind, x in enumerate(top5.numpy())]
                    #     print('top5 accuracy: ', sum(accuracy)/len(labels))
                    #     break

            accuractyString = 'Epoch %d Train: T1  %.2f, T5 %.2f, Loss %.2f \nEpoch %d Val: V1 %.2f, V5 %.2f, Loss %.2f\n' % (
                epoch,
                100.0 - correctInTrainEpoch / (epoch_samples) * 100,
                100.0 - top5InTrainEpoch / epoch_samples * 100,
                epoch_train_loss / (num_train_batches),
                epoch,
                100.0 - correctInValEpoch / (val_samples) * 100,
                100.0 - top5InValEpoch / (val_samples) * 100,
                epoch_val_loss / (num_val_batches),
            )

            print(accuractyString)

            outptfile.write(accuractyString)
            outptfile.write("\n")

            gc.collect()
            epoch += 1
Example #20
0
def main(args):

    # Store name of experiment
    exp_name = args.exp_name
    exp_name = '{}_r{}_p{}_n{}_i{}_k{}'.format(exp_name, args.rho,
                                               args.pos_reward,
                                               args.neg_reward,
                                               args.class_imbalance,
                                               args.kldiv_lambda)

    # Create an directory for output path
    args.output_path = os.path.join(args.output_path, args.exp_name)
    os.makedirs(args.output_path, exist_ok=True)

    utils.LOG_FILE = os.path.join(args.output_path, 'log.txt')

    LEARNING_PROFILE_FILE = os.path.join(args.output_path,
                                         'learning_curve.txt')
    lpf = open(LEARNING_PROFILE_FILE, 'a')
    args.lpf = lpf
    # Set logging
    logging.basicConfig(filename=utils.LOG_FILE,
                        filemode='a',
                        format='%(levelname)s :: %(asctime)s - %(message)s',
                        level=args.log_level,
                        datefmt='%d/%m/%Y %I:%M:%S %p')
    console = logging.StreamHandler()
    console.setLevel(args.log_level)
    formatter = logging.Formatter('%(levelname)s :: %(asctime)s - %(message)s',
                                  datefmt='%d/%m/%Y %I:%M:%S %p')
    console.setFormatter(formatter)
    logging.getLogger().addHandler(console)

    logging.info(
        'Beginning code for experiment {} and storing stuff in {}'.format(
            exp_name, args.output_path))
    logging.info('Loaded arguments as \n{}'.format(str(pprint.pformat(args))))

    # Begin of main code

    train_loader, val_loader, labelled_train_loader = dataset.get_data_loaders(
        args)
    model = models.select_model(args)
    my_eval_fn = compute.get_evaluation_function(args)

    if args.optim == 'sgd':
        optimizer = optim.SGD(filter(lambda p: p.requires_grad,
                                     model.parameters()),
                              momentum=args.momentum,
                              lr=args.lr,
                              weight_decay=args.decay)
    else:
        optimizer = optim.Adam(filter(lambda p: p.requires_grad,
                                      model.parameters()),
                               lr=args.lr,
                               weight_decay=args.decay)

    checkpoint_file = os.path.join(args.output_path,
                                   '{}_checkpoint.pth'.format(exp_name))
    best_checkpoint_file = os.path.join(
        args.output_path, '{}_best_checkpoint.pth'.format(exp_name))
    logging.info('Saving checkpoints at {} and best checkpoint at : {}'.format(
        checkpoint_file, best_checkpoint_file))

    start_epoch = 0
    best_score = -9999999

    # Load checkpoint if present in input arguments
    if args.checkpoint != '':
        logging.info('Starting from checkpoint: {}'.format(args.checkpoint))
        cp = torch.load(args.checkpoint)
        start_epoch = cp['epoch'] + 1
        model.load_state_dict(cp['model'])
        # optimizer.load_state_dict(cp['optimizer']) TODO: - Why not do this?
        best_score = cp['best_score']
        for param_group in optimizer.param_groups:
            param_group['lr'] = args.lr
            param_group['weight_decay'] = args.decay

    num_epochs = args.num_epochs
    logging.info('Beginning train/validate cycle')

    time1 = time.time()
    if val_loader is not None:
        record, metric_idx, headers = compute.compute(start_epoch - 1,
                                                      model,
                                                      val_loader,
                                                      optimizer,
                                                      'eval',
                                                      eval_fn=my_eval_fn,
                                                      args=args)
        if (args.log_eval is not None):
            handler = open(args.log_eval, "a")
            print(','.join([
                str(round(x, 6)) if isinstance(x, float) else str(x)
                for x in record
            ]),
                  file=handler)
            handler.close()
    print("Time taken:", time.time() - time1)
    if (args.only_eval):
        logging.info('Ran only eval mode, now exiting')
        exit(0)

    # Start TRAINING
    for epoch in range(start_epoch, num_epochs):
        logging.info('Beginning epoch {}'.format(epoch))

        if labelled_train_loader is not None:
            record, metric_idx, _ = compute.compute(epoch,
                                                    model,
                                                    labelled_train_loader,
                                                    optimizer,
                                                    'train_sup',
                                                    eval_fn=my_eval_fn,
                                                    args=args)

        if train_loader is not None:
            record, metric_idx, _ = compute.compute(
                epoch,
                model,
                train_loader,
                optimizer,
                'train_un',
                eval_fn=my_eval_fn,
                args=args,
                labelled_train_loader=labelled_train_loader)

        if val_loader is not None:
            record, metric_idx, _ = compute.compute(epoch,
                                                    model,
                                                    val_loader,
                                                    None,
                                                    'eval',
                                                    eval_fn=my_eval_fn,
                                                    args=args)

        is_best = False
        logging.info('Best score: {}, This score: {}'.format(
            best_score, record[metric_idx]))

        if record[metric_idx] > best_score:
            best_score = record[metric_idx]
            is_best = True

        utils.save_checkpoint(
            {
                'epoch': epoch,
                'best_score': best_score,
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'is_best': is_best
            }, epoch, is_best, checkpoint_file, best_checkpoint_file)

    args.lpf.close()
Example #21
0
def run():
    # Parameters
    num_epochs = 10
    output_period = 100
    batch_size = 100

    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = resnet_18()
    model = model.to(device)

    train_loader, val_loader = dataset.get_data_loaders(batch_size)
    num_train_batches = len(train_loader)

    criterion = nn.CrossEntropyLoss().to(device)
    # TODO: optimizer is currently unoptimized
    # there's a lot of room for improvement/different optimizers
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.001)

    epoch = 1
    while epoch <= num_epochs:
        scheduler.step()
        running_loss = 0.0
        for param_group in optimizer.param_groups:
            print('Current learning rate: ' + str(param_group['lr']))
        model.train()

        for batch_num, (inputs, labels) in enumerate(train_loader, 1):
            print(labels)
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()

            optimizer.step()
            running_loss += loss.item()

            if batch_num % output_period == 0:
                print('[%d:%.2f] loss: %.3f' %
                      (epoch, batch_num * 1.0 / num_train_batches,
                       running_loss / output_period))
                running_loss = 0.0
                gc.collect()

        gc.collect()
        # save after every epoch
        torch.save(model.state_dict(), "models/model.%d" % epoch)

        # TODO: Calculate classification error and Top-5 Error
        # on training and validation datasets here
        # count = 0
        # accuracy_top1 = 0
        # accuracy_top5 = 0
        # for batch_num, (inputs, labels) in enumerate(train_loader, 1):
        #     prediction = model(inputs)
        #     prediction = prediction.to('cpu')
        #     _, cls = torch.max(prediction, dim=1)
        #     _, top5 = torch.topk(prediction, k=5, dim=1)
        #     for i in range(len(cls)):
        #         accuracy_top1 += int(cls[i] == labels[i])
        #         count += 1
        #     for i in range(len(top5)):
        #         accuracy_top5 += int(labels[i] in top5[i])

        # accuracy_top1 /= count
        # accuracy_top5 /= count
        # print(accuracy_top1, accuracy_top5)

        # accuracy_top1 = 0
        # accuracy_top5 = 0
        # for batch_num, (inputs, labels) in enumerate(val_loader, 1):
        #     prediction = model(inputs)
        #     prediction = prediction.to('cpu')
        #     _, cls = torch.max(prediction, dim=1)
        #     _, top5 = torch.topk(prediction, k=5, dim=1)
        #     for i in range(len(cls)):
        #         accuracy_top1 += int(cls[i] == labels[i])
        #     for i in range(len(top5)):
        #         accuracy_top5 += int(labels[i] in top5[i])
        #     count += 1

        # accuracy_top1 /= count
        # accuracy_top5 /= count
        # print(accuracy_top1, accuracy_top5)

        gc.collect()
        epoch += 1
Example #22
0
    def train_ignite(self):
        train_loader, validation_loader = get_data_loaders(self.config)
        writer = create_summary_writer(self.Model, train_loader,
                                       self.logs_save_dir)

        self.optimizer = Adam(self.Model.parameters(),
                              lr=self.learning_rate,
                              betas=(0.9, 0.999))
        self.learning_rate_scheduler()
        loss = UNetCrossEntropyLoss().cuda()

        trainer = create_trainer(model=self.Model,
                                 optimizer=self.optimizer,
                                 criterion=loss,
                                 device=self.device)
        evaluator = create_evaluator(self.Model,
                                     metrics={
                                         'CrossEntropy': Loss(loss),
                                         'PrecisionRecall': PrecisionRecall()
                                     },
                                     device=self.device)

        desc = "ITERATION - loss: {:.2f}"
        pbar = tqdm(initial=0,
                    leave=False,
                    total=len(train_loader),
                    desc=desc.format(0))
        log_interval = 2

        @trainer.on(Events.ITERATION_COMPLETED)
        def log_training_loss(engine):
            iter = (engine.state.iteration - 1) % len(train_loader) + 1

            if iter % log_interval == 0:
                pbar.desc = desc.format(engine.state.output)
                pbar.update(log_interval)
            writer.add_scalar("training/logs", engine.state.output,
                              engine.state.iteration)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_training_results(engine):
            self.scheduler.step()
            pbar.refresh()
            evaluator.run(train_loader)
            metrics = evaluator.state.metrics
            cross_entropy_loss = metrics['CrossEntropy']
            tqdm.write(
                "Current Learning Rate:{:.10f}: Training Results - Epoch: {}  Cross Entropy Loss: {:.2f}"
                .format(self.optimizer.param_groups[0]['lr'],
                        engine.state.epoch, cross_entropy_loss))
            writer.add_scalar("training/cross_entropy_loss",
                              cross_entropy_loss, engine.state.epoch)

        @trainer.on(Events.EPOCH_COMPLETED)
        def log_validation_results(engine):
            pbar.refresh()
            evaluator.run(validation_loader)
            metrics = evaluator.state.metrics
            cross_entropy_loss = metrics['CrossEntropy']
            precision_recall_loss = metrics['PrecisionRecall']
            tqdm.write(
                "Validation Results - Epoch: {}  Cross Entropy Loss: {:.2f} \n Precision: {:.4f},"
                " Recall: {:.4f}, Mean Euclidean Distance: {:.2f}".format(
                    engine.state.epoch, cross_entropy_loss,
                    precision_recall_loss['precision'],
                    precision_recall_loss['recall'],
                    precision_recall_loss['mean_euclidean_dist']))
            pbar.n = pbar.last_print_n = 0

            input = evaluator.state.batch['image']

            output = evaluator.state.output
            pred = output[0]
            mask = output[1]

            input_grid = torchvision.utils.make_grid(torch.stack(
                [img.cpu() for img in input], dim=0),
                                                     normalize=True)
            pred_grid = torchvision.utils.make_grid(
                torch.stack([img.cpu() for img in pred]))
            mask_grid = torchvision.utils.make_grid(
                torch.stack([img.cpu() for img in mask]))
            # torchvision.utils.save_image(pred_grid, "pred/pred_grid_" + str(engine.state.epoch) + ".png")
            # torchvision.utils.save_image(mask_grid, "pred/mask_grid_" + str(engine.state.epoch) + ".png")
            writer.add_image("Input", input_grid, engine.state.epoch)
            writer.add_image("Result", pred_grid, engine.state.epoch)
            writer.add_image("Ground Truth", mask_grid, engine.state.epoch)
            writer.add_scalar("validation/precision",
                              precision_recall_loss['precision'],
                              engine.state.epoch)
            writer.add_scalar("validation/recall",
                              precision_recall_loss['recall'],
                              engine.state.epoch)
            writer.add_scalar("validation/mean_euclidean_dist",
                              precision_recall_loss['mean_euclidean_dist'],
                              engine.state.epoch)
            writer.add_scalar("validation/cross_entropy_loss",
                              cross_entropy_loss, engine.state.epoch)

        checkpointer = ModelCheckpoint(self.model_save_path,
                                       'unet_v_1_',
                                       save_interval=1,
                                       n_saved=50,
                                       require_empty=False,
                                       save_as_state_dict=True)
        # early_stopping = EarlyStopping(patience=5, score_function=self.score_function, trainer=trainer)

        trainer.add_event_handler(Events.EPOCH_COMPLETED, checkpointer,
                                  {'epoch': self.Model})
        # trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan())
        # evaluator.add_event_handler(Events.COMPLETED, early_stopping)

        trainer.run(train_loader, max_epochs=self.epochs)
        pbar.close()
        writer.close()
Example #23
0
# Set random seed for reproducibility
manualSeed = 999
#manualSeed = random.randint(1, 10000) # use if you want new results
print("Random Seed: ", manualSeed)
random.seed(manualSeed)
torch.manual_seed(manualSeed)

parser = utils.prepare_parser()
config = vars(parser.parse_args())

loaders = dataset.get_data_loaders(
    data_root=config['data_root'],
    label_root=config['label_root'],
    batch_size=config['batch_size'],
    num_workers=config['num_workers'],
    shuffle=config['shuffle'],
    pin_memory=config['pin_memory'],
    drop_last=True,
    load_in_mem=config['load_in_mem'],
    mask_out=True,
)

image_size = IMG_SIZE

nc = 3

nz = config['dim_z']

# Size of feature maps in generator
ngf = config['G_ch']
Example #24
0
args = parser.parse_args()
config = {}
config.update(vars(args))
args = utils.Map(config)
o2n, n2o = utils.get_template_id_maps(args.num_templates, args.exclude_t_ids)
args.o2n = o2n
args.n2o = n2o

for key in ['train_labels_path', 'val_labels_path']:
    if args[key] == 'None':
        args[key] = None

settings.set_settings(args)

train_loader, val_loader, labelled_train_loader = dataset.get_data_loaders(
    args)


def SIDX(template_id):
    return (3 + (template_id - 1) * 7)


def EIDX(template_id):
    return (3 + (template_id - 1) * 7 + 7)


# for i in val_loader.dataset.raw_data[:,SIDX(6)]:
# 	print (i)
# print (val_loader.dataset.raw_data[:,SIDX(2)])

#my_score, max_score, similarity, rank, conditional_rank, mean, std
Example #25
0
def main(args):
    # make sure that model dir exists
    if not os.path.exists(args.model_dir):
        os.makedirs(args.model_dir)

    use_multi_gpu = args.multi_gpu
    gpu_index = args.gpu
    # if the given index is not available then we use index 0
    # also when using multi gpu we should specify index 0
    if gpu_index + 1 > torch.cuda.device_count() or use_multi_gpu:
        gpu_index = 0

    logging.info('using gpu cuda:{}, script PID {}'.format(
        gpu_index, os.getpid()))
    device = torch.device('cuda:{}'.format(gpu_index))

    # get the configuration file
    config = Config(args.config_type).create_config()
    if args.state:
        # if we provide a saved state then load config from there
        logging.info('loading config from {}'.format(args.state))
        best_state = torch.load(args.state)
        config = best_state['config']

    # sanity check to make sure old configs still work with new format
    config = backward_compatible_config(config)
    # size of input depends on sequence types, either difference or orientation
    input_size = 3
    if config['seq_type'] == 'orient':
        input_size = 4
    model = MortonNet(input_size=input_size,
                      conv_layers=config['conv_layers'],
                      rnn_layers=config['rnn_layers'],
                      hidden_size=config['hidden_size'])
    # we use MSE loss
    criterion = nn.MSELoss()

    model.to(device)
    # if use multi_gou then convert the model to DataParallel
    if use_multi_gpu:
        model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=config['lr'])
    # we reduce learning rate when validation doesn't improve after some patience
    scheduler = ReduceLROnPlateau(optimizer,
                                  mode='min',
                                  factor=config['lr_decay'],
                                  patience=config['lr_patience'],
                                  verbose=True)

    logging.info('Config {}'.format(config))

    phases = ['train', 'valid']
    dataloaders, datasets = get_data_loaders(
        root_dir=args.root_dir,
        phases=phases,
        shuffle=True,
        cluster=config['cluster'],
        batch_size=args.bs,
        chunk_size=config['chunk_size'],
        seq_len=config['seq_len'],
        random_sequence=config['random_sequence'],
        ratio=config['ratio'],
        seq_type=config['seq_type'])

    model_dir = generate_experiment_dir(args.model_dir, config)
    logging.info(
        'TB logs and checkpoint will be saved in {}'.format(model_dir))

    # get TensorboardX writer
    writer = SummaryWriter(log_dir=model_dir)

    train(config=config,
          model=model,
          criterion=criterion,
          optimizer=optimizer,
          dataloaders=dataloaders,
          device=device,
          model_dir=model_dir,
          phases=phases,
          scheduler=scheduler,
          writer=writer,
          print_every=args.print,
          plot_every=args.plot,
          save_every=args.save)

    writer.close()
Example #26
0
    total_f1, total_pr, total_rc = f1_score(P, G, S)
    total_loss = cum_loss / total_sample

    return total_loss, total_f1, total_pr, total_rc


if __name__ == '__main__':
    EP = 100
    SAVING_DIR = '../models/'
    tokenizer = BertTokenizer.from_pretrained(
        '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch',
        do_lower_case=True)
    train_loader, val_loader = get_data_loaders(
        rv_path='../data/TRAIN/Train_reviews.csv',
        lb_path='../data/TRAIN/Train_labels.csv',
        tokenizer=tokenizer,
        batch_size=12,
        val_split=0.15)

    model = OpinioNet.from_pretrained(
        '/home/zydq/.torch/models/bert/chinese-bert_chinese_wwm_pytorch')
    model.cuda()
    optimizer = Adam(model.parameters(), lr=5e-6)
    scheduler = GradualWarmupScheduler(optimizer, total_epoch=2)
    best_val_f1 = 0
    best_val_loss = float('inf')
    for e in range(EP):

        print('Epoch [%d/%d] train:' % (e, EP))
        train_loss, train_f1, train_pr, train_rc = train_epoch(
            model, train_loader, optimizer, scheduler)
Example #27
0
def run(num_epochs, out_period, batch_size, model):
    # setup the device for running
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = model.to(device)

    train_loader, val_loader = dataset.get_data_loaders(batch_size)
    num_train_batches = len(train_loader)

    criterion = nn.CrossEntropyLoss().to(device)
    # optimizer is currently unoptimized
    # there's a lot of room for improvement/different optimizers
    # optimizer = optim.SGD(model.parameters(), lr=1e-3, nesterov=True)
    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    # optimizer = optim.Nesterov(model.parameters(), lr=1e-3)

    # printAccuracy(train_loader, device, model, "TRAINSET", 1)

    epoch = 1
    while epoch <= num_epochs:
        running_loss = 0.0
        for param_group in optimizer.param_groups:
            param_group['lr'] = max(param_group['lr'] * 0.97, 1e-4)
            tqdm.write('Current learning rate: ' + str(param_group['lr']))
        model.train()

        for batch_num, (inputs, labels) in enumerate(tqdm(train_loader), 1):
            inputs = inputs.to(device)
            labels = labels.to(device)
            # print(labels)
            # print(labels.size())

            optimizer.zero_grad()
            outputs = model(inputs)
            # outputs, aux = model(inputs)
            # print(outputs.size())
            # print(torch.topk(outputs,5))
            # top5 = torch.topk(outputs,5)[1]
            # top52 = torch.topk(outputs,5)[0]
            # for i in range(len(inputs)):
            #     print("\n\nyooooooooo", i, "\n\n\n", labels[i].item(),"\n", top5[i], top52[i])
            #     print(top5[i][0])
            loss = criterion(outputs, labels)
            loss.backward()

            optimizer.step()
            running_loss += loss.item()

            if batch_num % out_period == 0:
                tqdm.write('[%d:%.2f] loss: %.3f' %
                      (epoch, batch_num * 1.0 / num_train_batches,
                       running_loss / out_period))
                running_loss = 0.0
                gc.collect()

        gc.collect()
        # save after every epoch
        torch.save(model.state_dict(), "models/model.%d" % epoch)

        # Calculate classification error and Top-5 Error
        # on training and validation datasets here
        model.eval()
        try:
            printAccuracy(train_loader, device, model, "TRAINSET", epoch)
            printAccuracy(val_loader, device, model, "VALSET", epoch)
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            pass


        gc.collect()
        epoch += 1
Example #28
0
def train(seed, depth, maxlen, batch_size, accumulation_steps, model_name):

    config.seed = seed
    config.max_sequence_length = maxlen
    config.batch_size = batch_size
    config.accumulation_steps = accumulation_steps
    if depth != 24:
        config.bert_weight = f"../bert_weight/uncased_L-{depth}_H-768_A-12/"
    else:
        config.bert_weight = f"../bert_weight/uncased_L-{depth}_H-1024_A-16/"
    if model_name == 'bert':
        config.features = f"../bert_features_{maxlen}/"
    elif model_name == 'gpt2':
        config.features = f"../features_{maxlen}_gpt/"
    else:
        config.features = f"../features_{maxlen}_xlnet/"
    config.experiment = f"{depth}layers"
    config.checkpoint = f"{config.logdir}/{config.today}/{model_name}_{config.experiment}_" \
                        f"{config.batch_size}bs_{config.accumulation_steps}accum_{config.seed}seed_{config.max_sequence_length}/"

    print_config(config)

    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed(config.seed)
    torch.backends.cudnn.deterministic = True

    # Data loaders
    train_loader, valid_loader, valid_df, loss_weight = get_data_loaders(
        config)
    loaders = {"train": train_loader, "valid": valid_loader}

    # Criterion
    criterion = CustomLoss(loss_weight)

    # Model and optimizer
    if model_name == 'bert':
        print("BERT MODEL")
        model = BertForTokenClassificationMultiOutput2.from_pretrained(
            config.bert_weight,
            cache_dir=None,
            num_aux_labels=config.n_aux_targets)

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]
        num_train_optimization_steps = np.ceil(
            len(train_loader.dataset) / config.batch_size /
            config.accumulation_steps) * config.epochs
        optimizer = BertAdam(optimizer_grouped_parameters,
                             lr=config.lr,
                             warmup=0.01,
                             t_total=num_train_optimization_steps)

    elif model_name == 'gpt2':
        print("GPT2 MODEL")
        model = GPT2ClassificationMultioutput.from_pretrained(
            config.gpt2_weight,
            cache_dir=None,
            num_aux_labels=config.n_aux_targets)

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]
        num_train_optimization_steps = np.ceil(
            len(train_loader.dataset) / config.batch_size /
            config.accumulation_steps) * config.epochs
        optimizer = OpenAIAdam(optimizer_grouped_parameters,
                               lr=config.lr,
                               warmup=0.01,
                               t_total=num_train_optimization_steps)
    elif model_name == 'xlnet':
        model = XLNetWithMultiOutput.from_pretrained(
            config.xlnet_weight,
            clf_dropout=0.4,
            n_class=6
            # num_aux_labels=config.n_aux_targets
        )

        param_optimizer = list(model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [{
            'params': [
                p for n, p in param_optimizer
                if not any(nd in n for nd in no_decay)
            ],
            'weight_decay':
            0.01
        }, {
            'params':
            [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
            'weight_decay':
            0.0
        }]
        num_train_optimization_steps = np.ceil(
            len(train_loader.dataset) / config.batch_size /
            config.accumulation_steps) * config.epochs
        optimizer = OpenAIAdam(optimizer_grouped_parameters,
                               lr=config.lr,
                               warmup=0.01,
                               t_total=num_train_optimization_steps)
    else:
        raise ("Model is not implemented")

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)
    model = model.cuda()

    from apex import amp
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    # if distributed_rank > -1:
    # from apex.parallel import DistributedDataParallel
    # model = DistributedDataParallel(model)
    model = torch.nn.DataParallel(model)

    if config.resume:
        checkpoint = torch.load(config.checkpoint + "/checkpoints/best.pth")
        import pdb
        pdb.set_trace()
        new_state_dict = {}
        old_state_dict = checkpoint['model_state_dict']
        for k, v in old_state_dict.items():
            new_state_dict["module." + k] = v
        model.load_state_dict(new_state_dict)
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        criterion.load_state_dict(checkpoint['criterion_state_dict'])
        print("!!! Loaded checkpoint ",
              config.checkpoint + "/checkpoints/best.pth")

    identity_valid = valid_df[config.identity_columns].copy()
    target_valid = valid_df.target.values
    auc_callback = AucCallback(identity=identity_valid, target=target_valid)

    checkpoint_callback = IterationCheckpointCallback(
        save_n_last=2000,
        num_iters=10000,
    )

    # model runner
    runner = ModelRunner()

    # model training
    runner.train(model=model,
                 criterion=criterion,
                 optimizer=optimizer,
                 scheduler=scheduler,
                 loaders=loaders,
                 main_metric='auc',
                 minimize_metric=False,
                 logdir=config.checkpoint,
                 num_epochs=config.epochs,
                 verbose=True,
                 fp16={"opt_level": "O1"},
                 callbacks=[auc_callback, checkpoint_callback])
Example #29
0
    correct = 0
    examples = 0
    for (inputs, labels) in data_loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        # print(labels)
        # print(labels.size())
        outputs = model(inputs)
        # print(outputs)
        # print(outputs.size())
        _, top = torch.max(outputs, dim=1)
        # print(top)
        # print(top.size())
        # exit()
        for i in range(len(outputs)):
            if labels[i] == top[i]:
                correct += 1
        examples += len(outputs)
    return float(correct) / examples


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

batch_size = 100
train_loader, val_loader, test_loader = dataset.get_data_loaders(batch_size)
model = NNmodel()
model = model.to(device)
model.load_state_dict(torch.load('models/model.101'))
model.eval()
print(classification_accuracy(model, test_loader, device))
Example #30
0
    def __init__(self, args):

        # Training configurations
        self.method = args.method
        self.dataset = args.dataset
        self.dim = args.dim
        self.lr_init = args.lr_init
        self.gamma_m = args.gamma_m
        self.gamma_s = args.gamma_s
        self.batch_size = args.batch_size
        self.val_batch_size = self.batch_size // 2
        self.iteration = args.iteration
        self.evaluation = args.evaluation
        self.show_iter = 1000
        self.update_epoch = args.update_epoch
        self.balanced = args.balanced
        self.instances = args.instances
        self.inter_test = args.intertest
        self.cm = args.cm
        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.n_class = args.batch_size // args.instances
        self.classes = args.classes
        self.pretrained = args.pretrained
        self.model_save_interval = args.model_save_interval


        self.file_name = '{}_{}_{}'.format(
            self.method,
            self.dataset,
            self.iteration,
        )
        print('========================================')
        print(json.dumps(vars(args), indent=2))
        print(self.file_name)

        # Paths

        self.root_dir = os.path.join('/', 'data')
        self.data_dir = os.path.join(self.root_dir, self.dataset)
        self.model_dir = self._get_path('./trained_model')
        self.plot_dir = self._get_path('./plot_model')
        self.code_dir = self._get_path(os.path.join('codes', self.dataset))
        self.fig_dir = self._get_path(os.path.join('fig', self.dataset, self.file_name))

        # Preparing data
        self.transforms = get_transform()
        self.datasets = get_datasets(dataset=self.dataset, data_dir=self.data_dir, transforms=self.transforms)

        self.data_loaders = get_data_loaders(
            datasets=self.datasets,
            batch_size=self.batch_size,
            val_batch_size=self.val_batch_size,
            n_instance=self.instances,
            balanced=self.balanced,
            #cm=self.cm_sampler if self.cm else None
        )
        self.dataset_sizes = {x: len(self.datasets[x]) for x in ['train', 'test']}


        self.mean = (torch.zeros((self.classes,self.classes)).add(1.5)-1.0*torch.eye(self.classes)).to(self.device)
        self.std = (torch.zeros((self.classes,self.classes)).add(0.15)).to(self.device)
        self.last_delta_mean = torch.zeros((self.classes,self.classes)).to(self.device)
        self.last_delta_std = torch.zeros((self.classes,self.classes)).to(self.device)

        
        self.ndmodel = nd.NDfdml(n_class=self.n_class,batch_size=self.batch_size,instances=self.instances,pretrained=self.pretrained).to(self.device)
        
        
        optimizer_c = optim.SGD(
            [
                {'params': self.ndmodel.googlelayer.parameters()},
                {'params': self.ndmodel.embedding_layer.parameters(), 'lr': self.lr_init * 10, 'momentum': 0.9}
            ],
            lr=self.lr_init, momentum=0.9
        )


        self.scheduler = lr_scheduler.StepLR(optimizer_c, step_size=4000, gamma=0.9)