コード例 #1
0
            # print(label)
            labels.append(1)
            # abnormal sample
        else:
            labels.append(0)
            # normal sample
    print("The number of abnormal in training set: " + str(count))
    labels = numpy.array(labels)
    target = torch.tensor(labels).to(device)

    training_dataset_loader = torch.utils.data.DataLoader(
        training_dataset,
        batch_size=batch_size,
        shuffle=False,
        drop_last=True,
        sampler=BalancedBatchSampler(training_dataset, target),
        num_workers=0,
        pin_memory=True)

    epoch_restart = 0
    dataiter = iter(training_dataset_loader)

    for epoch in range(epoch_restart + 1, 1101):
        batch = tqdm(training_dataset_loader,
                     total=len(training_dataset) // batch_size)
        new_count = 0
        for x, label in batch:
            x = x.to(device)
            l = []
            for index in range(0, x.shape[0]):
コード例 #2
0
import torch
from sampler import BalancedBatchSampler

epochs = 3
size = 20
features = 5
classes_prob = torch.tensor([0.1, 0.4, 0.5])

dataset_X = torch.randn(size, features)
dataset_Y = torch.distributions.categorical.Categorical(classes_prob.repeat(size, 1)).sample()

dataset = torch.utils.data.TensorDataset(dataset_X, dataset_Y)

train_loader = torch.utils.data.DataLoader(dataset, sampler=BalancedBatchSampler(dataset, dataset_Y), batch_size=6)

for epoch in range(0, epochs):
    for batch_x, batch_y in train_loader:
        print("epoch: %d labels: %s\ninputs: %s\n" % (epoch, batch_y, batch_x))
コード例 #3
0
    # criterion = torch.nn.BCELoss().cuda()
    #
    # # print(train_dataloader.batch_size)
    #
    # for i, (images,labels) in enumerate(train_dataloader):
    #     # print(sample['image'])
    #     outputs = model(images)
    #     labels = labels.float().reshape(-1,1)
    #     print(outputs.shape,labels.shape)
    #     loss = criterion(outputs,labels)
    #     print('loss: ',loss)

    valconfig = {"dataset": "tb2020", "subset": '0'}
    val_config = dataconfig(**valconfig)
    validation_data = DataGenerator(val_config,
                                    transform=None,
                                    type='1: slice_sampled')
    #val_loader = DataLoader(validation_data, batch_size=12, num_workers=1, shuffle=True)
    #batch_sampler = batch_sampler(batch_size=6,class_list=range(6))

    val_loader = DataLoader(validation_data,
                            num_workers=1,
                            sampler=BalancedBatchSampler(validation_data,
                                                         type='multi_label'),
                            batch_size=6)

    for i, (images, labels) in enumerate(val_loader):
        print(i)
        print(labels)
        print(images.shape)
コード例 #4
0
def train_eval(clf_model,
               train_meta,
               validation_dataloader,
               base_dir,
               batch_size,
               weights=None,
               lr=2e-5,
               epochs=4,
               eval_every_num_iters=40,
               seed_val=42):
    """train and evaluate a deep learning model
    :params[in]: clf_model, a classifier
    :params[in]: train_meta, training data: data in ids, masks, and labels
    :params[in]: validation_dataloader, validation data
    :params[in]: base_dir, output directory to create the directory to save results
    :params[in]: lr, the learning rate
    :params[in]: epochs, the number of training epochs
    :params[in]: eval_every_num_iters, the number of iterations to evaluate
    :params[in]: seed_val, set a random seed
    """
    # the 'W' stands for 'Warm up", AdamW is a class from the huggingface library
    optimizer = AdamW(
        clf_model.parameters(),
        lr=lr,  # args.learning_rate - default is 5e-5, our notebook had 2e-5
        eps=1e-8  # args.adam_epsilon  - default is 1e-8.
    )
    # Number of training epochs (authors recommend between 2 and 4)
    epochs = epochs
    train_inputs, train_masks, train_labels = train_meta
    train_size = train_inputs.shape[0]  # training sample size
    # Total number of training steps is number of batches * number of epochs.
    total_steps = int(1. + train_size / batch_size) * epochs
    # Create the learning rate scheduler.
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=1,  # Default value in run_glue.py
        num_training_steps=total_steps)
    # see if weights is None:
    if weights != None:
        weights = torch.FloatTensor(weights)
    # Set the seed value all over the place to make this reproducible.
    random.seed(seed_val)
    np.random.seed(seed_val)
    torch.manual_seed(seed_val)
    torch.cuda.manual_seed_all(seed_val)

    # Store the average loss after each epoch so we can plot them.
    loss_values = []

    ## reconstruct a dataloader
    for epoch_i in range(0, epochs):

        # ========================================
        #               Training
        # ========================================
        print("")
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        print('Training...')
        perm0 = torch.randperm(train_size)
        tmp_X, tmp_mask, tmp_Y = train_inputs[perm0, :], train_masks[
            perm0, :], train_labels[perm0]
        dataset = torch.utils.data.TensorDataset(tmp_X, tmp_mask, tmp_Y)
        train_loader = torch.utils.data.DataLoader(
            dataset,
            sampler=BalancedBatchSampler(dataset, tmp_Y),
            batch_size=batch_size,
            drop_last=True)
        # Measure how long the training epoch takes.
        t0 = time.time()

        # Reset the total loss for this epoch.
        total_loss = 0

        # Put the model into training mode. Don't be mislead--the call to
        # `train` just changes the *mode*, it doesn't *perform* the training.
        # `dropout` and `batchnorm` layers behave differently during training
        # vs. test (source: https://stackoverflow.com/questions/51433378/what-does-model-train-do-in-pytorch)
        clf_model.train()  ## model training mode

        # For each batch of training data...
        for step, batch in enumerate(train_loader):

            # Unpack this training batch from our dataloader.
            #
            # As we unpack the batch, we'll also copy each tensor to the GPU using the
            # `to` method.
            #
            # `batch` contains three pytorch tensors:
            #   [0]: input ids
            #   [1]: attention masks
            #   [2]: labels
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)

            # Always clear any previously calculated gradients before performing a
            # backward pass. PyTorch doesn't do this automatically because
            # accumulating the gradients is "convenient while training RNNs".
            # (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
            clf_model.zero_grad()

            # Perform a forward pass (evaluate the model on this training batch).
            # This will return the loss (rather than the model output) because we
            # have provided the `labels`.
            # The documentation for this `model` function is here:
            # https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
            outputs = clf_model(b_input_ids,
                                token_type_ids=None,
                                attention_mask=b_input_mask,
                                labels=b_labels,
                                weights=weights)
            #weights=torch.FloatTensor([100/127,100/191,100/34]))

            # The call to `model` always returns a tuple, so we need to pull the
            # loss value out of the tuple.
            loss = outputs[0]

            # Accumulate the training loss over all of the batches so that we can
            # calculate the average loss at the end. `loss` is a Tensor containing a
            # single value; the `.item()` function just returns the Python value
            # from the tensor.
            total_loss += loss.item()

            # Perform a backward pass to calculate the gradients.
            loss.backward()

            # Clip the norm of the gradients to 1.0.
            # This is to help prevent the "exploding gradients" problem.
            torch.nn.utils.clip_grad_norm_(clf_model.parameters(), 1.0)

            # Update parameters and take a step using the computed gradient.
            # The optimizer dictates the "update rule"--how the parameters are
            # modified based on their gradients, the learning rate, etc.
            optimizer.step()
            # Update the learning rate.
            scheduler.step()
            # eveluate the performance after some iterations
            if step % eval_every_num_iters == 0 and not step == 0:
                # Calculate elapsed time in minutes.
                elapsed = format_time(time.time() - t0)
                # Report progress.
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(
                    step, len(train_loader), elapsed))
                tmp_dir = base_dir + '/epoch' + str(
                    epoch_i + 1) + 'iteration' + str(step)
                ## save pretrained model
                evaluate_model(clf_model, validation_dataloader, tmp_dir)
                clf_model.train()  ## model training mode
        # Calculate the average loss over the training data.
        avg_train_loss = total_loss / total_steps

        # Store the loss value for plotting the learning curve.
        loss_values.append(avg_train_loss)
        # save the data after epochs
        tmp_dir = base_dir + '/epoch' + str(epoch_i + 1) + '_done'
        ## save pretrained model
        evaluate_model(clf_model, validation_dataloader, tmp_dir)
        clf_model.train()  ## model training mode
コード例 #5
0
def train_net(base_path, size=32000, n_classes=4):
    experiments_path = os.path.join(
        r'C:\Users\kotov-d\Documents\TASKS\cross_inference',
        os.path.basename(base_path))
    Path(experiments_path).mkdir(parents=True, exist_ok=True)

    [x_train, x_val, x_test, y_train, y_val,
     y_test] = get_raw_data(base_path, experiments_path, n_classes, size=16000)
    # x_train = np.vstack((x_train, x_val))
    # y_train = np.hstack((y_train, y_val))

    config = Config(lr=0.00001,
                    batch_size=128,
                    num_epochs=1000,
                    n_classes=n_classes)
    net = torch_model(config, p_size=(3, 3, 3, 3), k_size=(64, 32, 16, 8))

    if cuda.is_available():
        device = 'cuda'
    else:
        device = 'cpu'

    sampler = BalancedBatchSampler(My_Dataset(x_train, y_train), y_train)

    batcher_train = DataLoader(My_Dataset(x_train, y_train),
                               batch_size=config.batch_size,
                               sampler=sampler)
    batcher_val = DataLoader(My_Dataset(x_val, y_val),
                             batch_size=config.batch_size,
                             shuffle=True)
    start_time = time.time()

    train_loss = []
    valid_loss = []
    train_fscore = []
    valid_fscore = []

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(net.parameters(), lr=config.lr)
    net.to(device)
    early_stopping = EarlyStopping()
    min_loss = 1000

    for epoch in range(config.num_epochs):
        iter_loss = 0.0
        correct = 0
        f_scores = 0
        iterations = 0

        net.train()

        h = net.init_hidden(config.batch_size)

        for i, (items, classes) in enumerate(batcher_train):
            if classes.shape[0] != config.batch_size:
                break

            items = items.to(device)
            classes = classes.to(device)
            optimizer.zero_grad()

            h = tuple([each.data for each in h])
            outputs, h = net(items, h)

            loss = criterion(outputs, classes.long())
            iter_loss += loss.item()
            loss.backward()
            optimizer.step()

            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == classes.data.long()).sum()

            f_scores += f1_score(predicted.cpu().numpy(),
                                 classes.data.cpu().numpy(),
                                 average='macro')

            iterations += 1

            torch.cuda.empty_cache()

        train_loss.append(iter_loss / iterations)
        train_fscore.append(f_scores / iterations)

        early_stopping.update_loss(train_loss[-1])
        if early_stopping.stop_training():
            break

        ############################
        # Validate
        ############################
        iter_loss = 0.0
        correct = 0
        f_scores = 0
        iterations = 0

        net.eval()  # Put the network into evaluate mode
        val_h = net.init_hidden(config.batch_size)

        for i, (items, classes) in enumerate(batcher_val):
            if classes.shape[0] != config.batch_size:
                break

            items = items.to(device)
            classes = classes.to(device)

            val_h = tuple([each.data for each in val_h])
            outputs, val_h = net(items, val_h)
            loss = criterion(outputs, classes.long())
            iter_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            correct += (predicted == classes.data.long()).sum()

            f_scores += f1_score(predicted.cpu().numpy(),
                                 classes.data.cpu().numpy(),
                                 average='macro')

            iterations += 1

        valid_loss.append(iter_loss / iterations)
        valid_fscore.append(f_scores / iterations)

        if valid_loss[-1] < min_loss:
            torch.save(
                net, os.path.join(experiments_path,
                                  "net.pb".format(n_classes)))
            min_loss = valid_loss[-1]

        print(
            'Epoch %d/%d, Tr Loss: %.4f, Tr Fscore: %.4f, Val Loss: %.4f, Val Fscore: %.4f'
            % (epoch + 1, config.num_epochs, train_loss[-1], train_fscore[-1],
               valid_loss[-1], valid_fscore[-1]))

        with open(os.path.join(experiments_path, "loss_track.pkl"), 'wb') as f:
            pickle.dump([train_loss, train_fscore, valid_loss, valid_fscore],
                        f)

    print(time.time() - start_time)
コード例 #6
0
    def __init__(self, args, s):

        # bank data --> sep=';'
        # adult, home data --> sep=','
        # german data --> sep=' '
        if args.dataset == 'german':
            train_path = 'german-data/german.train'

        elif args.dataset == 'german-pre-dp':
            if s == 0:
                train_path = 'german-data/german.train'
            else:
                train_path = 'german-data/synth/syth_data_correlated_{}.csv'.format(
                    s)

        if args.dataset == 'bank':
            train_path = 'bank-data/bank-additional-full.csv'

        elif args.dataset == 'bank-pre-dp':
            if s == 0:
                train_path = 'bank-data/bank-additional-full.csv'
            else:
                train_path = 'bank-data/synth/syth_data_correlated_ymod_{}.csv'.format(
                    s)

        elif args.dataset == 'adult':
            train_path = 'adult-data/adult.data'

        elif args.dataset == 'adult-pre-dp':
            if s == 0:
                train_path = 'adult-data/adult.data'
            else:
                train_path = 'adult-data/synth/syth_data_correlated_ymod_{}.csv'.format(
                    s)

        elif args.dataset == 'home':
            train_path = 'home-data/hcdf_train.csv'

        elif args.dataset == 'home-pre-dp':
            if s == 0:
                train_path = 'home-data/hcdf_train.csv'
            else:
                train_path = 'home-data/synth/syth_data_correlated_ymod_{}.csv'.format(
                    s)

        if args.dataset == 'german' or args.dataset == 'german-pre-dp':
            cols = [
                'existing_checking', 'duration', 'credit_history', 'purpose',
                'credit_amount', 'savings', 'employment_since',
                'installment_rate', 'status_sex', 'other_debtors',
                'residence_since', 'property', 'age',
                'other_installment_plans', 'housing', 'existing_credits',
                'job', 'people_liable', 'telephone', 'foreign_worker', 'y'
            ]

            test_path = 'german-data/german.test'
            if args.dataset == 'german-pre-dp' and s > 0:
                sep = ','
            else:

                sep = ' '
            train_df = pd.read_csv(train_path, sep=sep, names=cols)
            print(train_df)
            test_df = pd.read_csv(test_path, sep=' ', names=cols)

            train_df['y'] = train_df['y'].apply(lambda x: 0 if x == 2 else 1)
            test_df['y'] = test_df['y'].apply(lambda x: 0 if x == 2 else 1)

        if args.dataset == 'bank' or args.dataset == 'bank-pre-dp':
            cols = [
                'age', 'job', 'marital', 'education', 'default', 'housing',
                'loan', 'contact', 'month', 'day_of_week', 'duration',
                'campaign', 'pdays', 'previous', 'poutcome', 'emp.var.rate',
                'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed',
                'y'
            ]
            test_path = 'bank-data/bank-additional.csv'
            train_df = pd.read_csv(train_path, sep=';', names=cols)
            test_df = pd.read_csv(test_path, sep=';', names=cols)

        elif args.dataset == 'adult' or args.dataset == 'adult-pre-dp':
            test_path = 'adult-data/adult.test'

            cols = [
                'age', 'workclass', 'fnlwgt', 'education', 'education-num',
                'marital-status', 'occupation', 'relationship', 'race', 'sex',
                'capital-gain', 'capital-loss', 'hours-per-week',
                'native-country', 'y'
            ]

            train_df = pd.read_csv(train_path, sep=',', names=cols)
            test_df = pd.read_csv(test_path, sep=',', names=cols)

            train_df = train_df.replace({'?': np.nan})
            test_df = test_df.replace({'?': np.nan})

            train_df['y'] = train_df['y'].apply(lambda x: 0
                                                if ">50K" in x else 1)
            test_df['y'] = test_df['y'].apply(lambda x: 0
                                              if ">50K" in x else 1)

        elif args.dataset == 'home' or args.dataset == 'home-pre-dp':
            test_path = 'home-data/hcdf_test.csv'

            train_df = pd.read_csv(train_path, sep=',', header=0)
            test_df = pd.read_csv(test_path, sep=',', header=0)

            train_df = train_df.drop(columns=['FLAG_OWN_CAR'])
            test_df = test_df.drop(columns=['FLAG_OWN_CAR'])

            train_df = train_df.rename(columns={
                "TARGET": "y",
                "CODE_GENDER": "GENDER"
            })
            test_df = test_df.rename(columns={
                "TARGET": "y",
                "CODE_GENDER": "GENDER"
            })

        train_df = train_df.dropna()
        test_df = test_df.dropna()
        print(train_df.head())

        train_df = train_df.sample(frac=1).reset_index(drop=True)  # shuffle df
        #test_df = test_df.sample(frac=1).reset_index(drop=True)  # shuffle df

        if args.num_teachers == 0 or s == 0:
            train_data = LoadDataset(train_df, args.dataset, args.sensitive)
            test_data = LoadDataset(test_df, args.dataset, args.sensitive)

            self.sensitive_keys = train_data.getkeys()
            self.train_size = len(train_data)
            self.test_size = len(test_data)
            self.sensitive_col_idx = train_data.get_sensitive_idx()
            self.cat_emb_size = train_data.categorical_embedding_sizes  # size of categorical embedding
            print("***", self.cat_emb_size)
            self.num_conts = train_data.num_numerical_cols  # number of numerical variables

            print(train_df.head(40))
            print(train_df.y.value_counts())
            class_count = dict(train_df.y.value_counts())
            class_weights = [
                value / len(train_data) for _, value in class_count.items()
            ]

            train_batch = args.batch_size
            test_batch = len(test_data)
            self.train_loader = DataLoader(dataset=train_data,
                                           sampler=BalancedBatchSampler(
                                               train_data, train_data.Y),
                                           batch_size=train_batch)

            self.test_loader = DataLoader(dataset=test_data,
                                          batch_size=test_batch,
                                          drop_last=True)
        else:
            student_train_size = int(len(train_df) * .3)
            teacher_train_df = train_df.iloc[student_train_size:, :]
            student_train_df = train_df.iloc[:student_train_size, :]

            train_data = LoadDataset(teacher_train_df, args.dataset,
                                     args.sensitive)
            student_train_data = LoadDataset(student_train_df, args.dataset,
                                             args.sensitive)
            test_data = LoadDataset(test_df, args.dataset, args.sensitive)

            self.sensitive_keys = train_data.getkeys()
            self.train_size = len(train_data)
            self.test_size = len(test_data)
            self.sensitive_col_idx = train_data.get_sensitive_idx()

            student_train_size = len(student_train_data)

            self.cat_emb_size = train_data.categorical_embedding_sizes  # size of categorical embedding
            #print(self.cat_emb_size)
            self.num_conts = train_data.num_numerical_cols  # number of numerical variables

            class_count = dict(train_df.y.value_counts())
            class_weights = [
                value / len(train_data) for _, value in class_count.items()
            ]

            train_batch = args.batch_size
            test_batch = len(test_data)

            self.teacher_loaders = []
            data_size = self.train_size // args.num_teachers

            for i in range(args.num_teachers):
                indices = list(range(i * data_size, (i + 1) * data_size))

                subset_data = Subset(train_data, indices)
                subset_data_Y = [i[2] for i in subset_data]

                subset_data_Y = torch.stack(subset_data_Y)

                loader = DataLoader(dataset=subset_data,
                                    sampler=BalancedBatchSampler(
                                        subset_data, subset_data_Y),
                                    batch_size=train_batch)

                self.teacher_loaders.append(loader)
            """
            indices = list(range(len(test_data)))
            indices = random.sample(indices, len(indices))
            student_split = int(len(test_data) * .7)
            
            student_train_data = Subset(test_data, indices[:student_split])
            student_test_data = Subset(test_data, indices[student_split+1:])
            """
            self.student_train_loader = torch.utils.data.DataLoader(
                student_train_data,
                #sampler=BalancedBatchSampler(student_train_data,
                #                             student_train_data.Y),
                batch_size=student_train_size)
            self.student_test_loader = torch.utils.data.DataLoader(
                test_data, batch_size=test_batch)
コード例 #7
0
def train(args, model, train_dataset):
    # total step
    step_tot = (len(train_dataset) // args.gradient_accumulation_steps //
                args.batch_size_per_gpu // args.n_gpu) * args.max_epoch

    train_sampler = data.distributed.DistributedSampler(
        train_dataset) if args.local_rank != -1 else BalancedBatchSampler(
            train_dataset, train_dataset.label)
    params = {"batch_size": args.batch_size_per_gpu, "sampler": train_sampler}
    train_dataloader = data.DataLoader(train_dataset, **params)

    # optimizer
    no_decay = ['bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [{
        'params': [
            p for n, p in model.named_parameters()
            if not any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        args.weight_decay
    }, {
        'params': [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        'weight_decay':
        0.0
    }]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=args.lr,
                      eps=args.adam_epsilon,
                      correct_bias=False)
    scheduler = get_linear_schedule_with_warmup(
        optimizer,
        num_warmup_steps=args.warmup_steps,
        num_training_steps=step_tot)

    # amp training
    model, optimizer = amp.initialize(model, optimizer, opt_level="O1")

    # distributed training
    if args.local_rank != -1:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[args.local_rank],
            output_device=args.local_rank,
            find_unused_parameters=True)

    print("Begin train...")
    print("We will train model in %d steps" % step_tot)
    global_step = 0
    loss_record = []
    step_record = []
    for i in range(args.max_epoch):
        if args.local_rank != -1:  # Distributed training
            train_sampler.set_epoch(i)
        for step, batch in enumerate(train_dataloader):
            if args.model == "MTB":
                inputs = {
                    "l_input": batch[0].to(args.device),
                    "l_mask": batch[1].to(args.device),
                    "l_ph": batch[2].to(args.device),
                    "l_pt": batch[3].to(args.device),
                    "r_input": batch[4].to(args.device),
                    "r_mask": batch[5].to(args.device),
                    "r_ph": batch[6].to(args.device),
                    "r_pt": batch[7].to(args.device),
                    "label": batch[8].to(args.device)
                }
            elif args.model == "CP":
                inputs = {
                    "input": batch[0].to(args.device),
                    "mask": batch[1].to(args.device),
                    "label": batch[2].to(args.device),
                    "h_pos": batch[3].to(args.device),
                    't_pos': batch[4].to(args.device)
                }
            model.train()
            m_loss, r_loss = model(**inputs)
            loss = m_loss + r_loss
            if args.gradient_accumulation_steps > 1:
                loss = loss / args.gradient_accumulation_steps

            with amp.scale_loss(loss, optimizer) as scaled_loss:
                scaled_loss.backward()

            if step % args.gradient_accumulation_steps == 0:
                nn.utils.clip_grad_norm_(amp.master_params(optimizer),
                                         args.max_grad_norm)
                optimizer.step()
                scheduler.step()
                model.zero_grad()
                global_step += 1

                if args.local_rank in [0, -1
                                       ] and global_step % args.save_step == 0:
                    if not os.path.exists("../ckpt"):
                        os.mkdir("../ckpt")
                    if not os.path.exists("../ckpt/" + args.save_dir):
                        os.mkdir("../ckpt/" + args.save_dir)
                    if type(model
                            ) == torch.nn.parallel.DistributedDataParallel:
                        ckpt = {
                            'bert-base': model.module.model.bert.state_dict()
                        }
                    else:
                        ckpt = {'bert-base': model.model.bert.state_dict()}
                    torch.save(
                        ckpt,
                        os.path.join("../ckpt/" + args.save_dir,
                                     "ckpt_of_step_" + str(global_step)))

                # if args.local_rank in [0, -1] and global_step % 5 == 0:
                #     step_record.append(global_step)
                #     loss_record.append(loss)

                # if args.local_rank in [0, -1] and global_step % 500 == 0:
                #     log_loss(step_record, loss_record)

                if args.local_rank in [0, -1]:
                    sys.stdout.write(
                        "step: %d, shcedule: %.3f, mlm_loss: %.6f relation_loss: %.6f\r"
                        %
                        (global_step, global_step / step_tot, m_loss, r_loss))
                    sys.stdout.flush()

        if args.train_sample:
            print("sampling...")
            train_dataloader.dataset.__sample__()
            print("sampled")
コード例 #8
0
    data = ImageFolderWithName(
        return_fn=False,
        root='/home/chk/cars_stanford/cars_train_labelled/train',
        transform=transforms.Compose([
            transforms.Resize(228),
            transforms.RandomCrop((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ]),
        loader=lambda x: Image.open(x).convert('RGB'))
    dataset = torch.utils.data.DataLoader(data,
                                          batch_sampler=BalancedBatchSampler(
                                              data,
                                              batch_size=32,
                                              batch_k=4,
                                              length=2000),
                                          num_workers=4)
    '''
    data = MetricData(data_root='/home/chk/cars_stanford/cars_train', \
                                    anno_file='/home/chk/cars_stanford/devkit/cars_train_annos.mat', \
                                    idx_file='/home/chk/cars_stanford/devkit/cars_train_annos_idx.pkl', \
                                    return_fn=True)
    sampler = SourceSampler(data)
    print('Batch sampler len:', len(sampler))
    dataset = torch.utils.data.DataLoader(data, batch_sampler=sampler)
    '''

    from model import MetricLearner
    #model = MetricLearner()
コード例 #9
0
        beta = checkpoint['beta']
        print("=> loaded checkpoint '{}' (epoch {})"
              .format(args.resume, checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(args.resume))

trn_dataset = dataset.load(
        name = args.dataset,
        root = data_root,
        mode = 'train',
        transform = dataset.utils.make_transform(
            is_train = True,
            is_inception = False
        ))

batch_sampler = BalancedBatchSampler(trn_dataset, args.batch_size, args.batch_k, length=args.batch_num)
train_loader = torch.utils.data.DataLoader(
    batch_sampler=batch_sampler,
    dataset=trn_dataset,
    num_workers=args.workers,
    pin_memory=True
    )

ev_dataset = dataset.load(
    name=args.dataset,
    root=data_root,
    mode='eval',
    transform=dataset.utils.make_transform(
        is_train=False,
        is_inception=False
    ))
コード例 #10
0
def main(args):
    # checking
    assert args['batch_size'] % args['batch_k'] == 0
    assert args['batch_size'] > 0 and args['batch_k'] > 0
    assert args['batch_size'] // args['batch_k'] < args['classes']

    # seed
    if args['seed'] is not None:
        random.seed(args['seed'])
        torch.manual_seed(args['seed'])
        cudnn.deterministic = True
        warnings.warn(
            '''You have chosen to seed training. This will turn on the CUDNN deterministic setting, which can slow down your training considerably! You may see unexpected behavior when restarting from checkpoints.'''
        )

    # gpus setting
    os.environ['CUDA_VISIBLE_DEVICES'] = args['gpus']

    # construct model

    if not args['use_pretrained']:
        model = resnet50(num_classes=args['feat_dim'])
    else:
        model = resnet50(pretrained=True)
        try:
            model.fc = nn.Linear(model.fc.in_features, args['feat_dim'])
        except NameError as e:
            print(
                "Error: current works only with model having fc layer as the last layer, try modify the code"
            )
            exit(-1)

    model = MarginNet(base_net=model,
                      emb_dim=args['embed_dim'],
                      batch_k=args['batch_k'],
                      feat_dim=args['feat_dim'],
                      normalize=args['normalize_weights'])

    print(model.state_dict().keys())
    model.cuda()

    if args['loss'] == 'margin':
        criterion = MarginLoss(margin=args['margin'], nu=args['nu'])
    elif args['loss'] == 'triplet':
        criterion = TripletLoss(margin=args['margin'], nu=args['nu'])
    else:
        raise NotImplementedError

    optimizer = torch.optim.SGD(model.parameters(),
                                args['lr'],
                                momentum=args['momentum'],
                                weight_decay=args['wd'])

    beta = Parameter(
        torch.ones((args['classes'], ),
                   dtype=torch.float32,
                   device=torch.device('cuda')) * args['beta'])

    optimizer_beta = torch.optim.SGD([beta],
                                     args['lr_beta'],
                                     momentum=args['momentum'],
                                     weight_decay=args['wd'])

    if args['resume']:
        if os.path.isfile(args['resume']):
            print("=> loading checkpoint '{}'".format(args['resume']))
            checkpoint = torch.load(args['resume'])
            args['start_epoch'] = checkpoint['epoch']
            state_dict = {}
            for k, v in checkpoint['state_dict'].items():
                if k.startswith('module.'):
                    k = k[7:]
                state_dict[k] = v
            model.load_state_dict(state_dict)
            optimizer.load_state_dict(checkpoint['optimizer'])
            optimizer_beta.load_state_dict(checkpoint['optimizer_beta'])
            beta = checkpoint['beta']
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args['resume'], checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args['resume']))

    # if len(args.gpus.split(',')) > 1:
    #    model = torch.nn.DataParallel(model)

    # dataset
    traindir = os.path.join(args['data_path'], 'train')
    valdir = os.path.join(args['data_path'], 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            # transforms.RandomResizedCrop(224),
            transforms.Resize((224, 224)),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]))
    val_dataset = datasets.ImageFolder(
        valdir,
        transforms.Compose([
            # transforms.RandomResizedCrop(224),
            transforms.Resize((224, 224)),
            # transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize
        ]))

    batch_sampler = BalancedBatchSampler(train_dataset,
                                         args['batch_size'],
                                         args['batch_k'],
                                         length=args['batch_num'])

    train_loader = torch.utils.data.DataLoader(batch_sampler=batch_sampler,
                                               dataset=train_dataset,
                                               num_workers=args['workers'],
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(batch_sampler=batch_sampler,
                                             dataset=val_dataset,
                                             num_workers=args['workers'],
                                             pin_memory=True)

    if not os.path.exists('checkpoints/'):
        os.mkdir('checkpoints/')
    for epoch in range(args['start_epoch'], args['epochs']):
        adjust_learning_rate(optimizer, epoch, args)
        adjust_learning_rate(optimizer_beta, epoch, args, beta=True)
        # print(optimizer.state_dict()['param_groups'][0]['lr'])
        # print(optimizer_beta.state_dict()['param_groups'][0]['lr'])

        # train for one epoch
        train(train_loader, model, criterion, optimizer, optimizer_beta, beta,
              epoch, args)

        # evaluate
        #

        state = {
            'epoch': epoch + 1,
            'arch': args['model'],
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict(),
            'optimizer_beta': optimizer_beta.state_dict(),
            'beta': beta
        }
        torch.save(
            state, 'checkpoints/%s_checkpoint_%d.pth.tar' %
            (args['save_prefix'], epoch + 1))
コード例 #11
0
                                        std=[0.229, 0.224, 0.225])]),
                                        loader=loader)
    return data   

args = get_args()

if args.gpu_ids:
    device = torch.device('cuda:{}'.format(args.gpu_ids[0]))
    cudnn.benchmark = True
else:
    device = torch.device('cpu')

#data = MetricData(data_root=args.img_folder, anno_file=args.anno, idx_file=args.idx_file)
data = imagefolder(args.img_folder)
data_test = imagefolder(args.img_folder_test)
dataset = torch.utils.data.DataLoader(data, batch_sampler=BalancedBatchSampler(data, batch_size=args.batch, batch_k=args.batch_k, length=args.num_batch), \
                                        num_workers=args.num_workers)
dataset_test = torch.utils.data.DataLoader(data_test, batch_sampler=BalancedBatchSampler(data_test, batch_size=args.batch, batch_k=args.batch_k, length=args.num_batch//2))
model = MetricLearner(pretrain=args.pretrain, normalize=True, batch_k=args.batch_k, att_heads=args.att_heads)
if not os.path.exists(args.ckpt):
    os.makedirs(args.ckpt)
    print('Init ', args.ckpt)
if args.resume:
    if args.ckpt.endswith('.pth'):
        state_dict = torch.load(args.ckpt)
    else:
        state_dict = torch.load(os.path.join(args.ckpt, 'best_performance.pth'))
    best_performace = state_dict['loss']
    start_epoch = state_dict['epoch'] + 1
    model.load_state_dict(state_dict['state_dict'], strict=False)
    print('Resume training. Start from epoch %d'%start_epoch)