Example #1
0
            self,
            idx):  #default of torch: float32, default of np: float64 (double)
        return torch.as_tensor(self.X[idx].toarray()).float(), torch.as_tensor(
            self.y[idx].toarray()).view(1, 1).float()


ds = PrepareData(X=data[:, :-1], y=data[:, -1])

#do not shuffle as we have sequence here!
#We can not have batch similar to feed-forward nn due to the h_prev! We have to manully prop the update inside the training loop
ds = DataLoader(ds, batch_size=1, shuffle=False)

rnnlm = MikolovRNNLM(params.rnnlm)

criterion = torch.nn.NLLLoss()
optimizer = optim.SGD(rnnlm.parameters(), lr=params.rnnlm['lr'])
scheduler = ReduceLROnPlateau(optimizer,
                              mode='min',
                              factor=0.1,
                              patience=10,
                              verbose=False)
h_prev = torch.randn(params.rnnlm['h']).view(1, params.rnnlm['h'])

test_loss = []
running_loss = []
e = 1
while 1:  # for e in range(params.rnnlm['e']):#epochs
    epoch_loss = 0
    l_prev = sys.maxsize
    for i, (X, y) in enumerate(ds):
        X = X.view(X.shape[0], X.shape[2])
Example #2
0
            nn.LeakyReLU(inplace=True),
            nn.Linear(200, 200),
            nn.LeakyReLU(inplace=True),
            nn.Linear(200, 10),
            nn.LeakyReLU(inplace=True),
        )

    def forward(self, x):
        x = self.model(x)

        return x


device = torch.device('cuda:0')
net = MLP().to(device)
optimizer = optim.SGD(net.parameters(), lr=learning_rate)
criteon = nn.CrossEntropyLoss().to(device)

for epoch in range(epochs):

    for batch_idx, (data, target) in enumerate(train_loader):
        data = data.view(-1, 28 * 28)
        data, target = data.to(device), target.cuda()

        logits = net(data)
        loss = criteon(logits, target)

        optimizer.zero_grad()
        loss.backward()
        # print(w1.grad.norm(), w2.grad.norm())
        optimizer.step()
Example #3
0
def run():
    batch_size = 32

    train_transform = transforms.Compose([
        transforms.Resize(144, interpolation=3),
        transforms.RandomCrop((256, 128)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

    test_transform = transforms.Compose([
        transforms.Resize((288, 144), interpolation=3),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    test_flip_transform = transforms.Compose([
        transforms.Resize((288, 144), interpolation=3),
        functional.hflip,
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])

    train_dataset = Market1501(root + '/bounding_box_train',
                               transform=train_transform)
    train_loader = DataLoader(train_dataset,
                              batch_size=batch_size,
                              shuffle=True)

    query_dataset = Market1501(root + '/query', transform=test_transform)
    query_flip_dataset = Market1501(root + '/query',
                                    transform=test_flip_transform)
    query_loader = DataLoader(query_dataset,
                              batch_size=batch_size,
                              shuffle=False)
    query_flip_loader = DataLoader(query_flip_dataset,
                                   batch_size=batch_size,
                                   shuffle=False)

    test_dataset = Market1501(root + '/bounding_box_test',
                              transform=test_transform)
    test_flip_dataset = Market1501(root + '/bounding_box_test',
                                   transform=test_flip_transform)
    test_loader = DataLoader(test_dataset,
                             batch_size=batch_size,
                             shuffle=False)
    test_flip_loader = DataLoader(test_flip_dataset,
                                  batch_size=batch_size,
                                  shuffle=False)

    ide = IDE(num_classes=len(train_dataset.unique_ids)).to(DEVICE)
    criterion = nn.CrossEntropyLoss()

    params = [
        {
            'params': ide.backbone.parameters(),
            'lr': 0.01
        },
        {
            'params': ide.classifier.parameters(),
            'lr': 0.1
        },
    ]
    optimizer = optim.SGD(params,
                          momentum=0.9,
                          weight_decay=5e-4,
                          nesterov=True)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

    epochs = 50
    for epoch in range(epochs):
        ide.train()
        scheduler.step()

        running_loss = 0.0
        for i, data in enumerate(train_loader):
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)

            optimizer.zero_grad()

            outputs = ide(inputs)
            loss = criterion(outputs[1], labels)
            loss.backward()

            optimizer.step()

            running_loss += loss.item()
            print('%d/%d - %d/%d - loss: %f' %
                  (epoch, epochs, i, len(train_loader), loss.item()))
        print('epoch: %d/%d - loss: %f' %
              (epoch, epochs, running_loss / len(train_loader)))

        if epoch % 10 == 9:
            ide.eval()

            query = np.concatenate([
                ide(inputs.to(DEVICE))[0].detach().cpu().numpy()
                for inputs, _ in query_loader
            ])
            query_flip = np.concatenate([
                ide(inputs.to(DEVICE))[0].detach().cpu().numpy()
                for inputs, _ in query_flip_loader
            ])

            test = np.concatenate([
                ide(inputs.to(DEVICE))[0].detach().cpu().numpy()
                for inputs, _ in test_loader
            ])
            test_flip = np.concatenate([
                ide(inputs.to(DEVICE))[0].detach().cpu().numpy()
                for inputs, _ in test_flip_loader
            ])

            # dist = cdist((query + query_flip) / 2., (test + test_flip) / 2.)
            dist = cdist(normalize(query + query_flip),
                         normalize(test + test_flip))
            r = cmc(dist,
                    query_dataset.ids,
                    test_dataset.ids,
                    query_dataset.cameras,
                    test_dataset.cameras,
                    separate_camera_set=False,
                    single_gallery_shot=False,
                    first_match_break=True)
            m_ap = mean_ap(dist, query_dataset.ids, test_dataset.ids,
                           query_dataset.cameras, test_dataset.cameras)
            print('epoch[%d]: mAP=%f, r@1=%f, r@3=%f, r@5=%f, r@10=%f' %
                  (epoch + 1, m_ap, r[0], r[2], r[4], r[9]))
Example #4
0
        in_size = x.size(0)  # one batch
        # x: 64*10*12*12
        x = F.relu(self.mp(self.conv1(x)))
        # x: 64*20*4*4
        x = F.relu(self.mp(self.conv2(x)))
        # x: 64*320
        x = x.view(in_size, -1)  # flatten the tensor
        # x: 64*10
        x = self.fc(x)
        return F.log_softmax(x, dim=1)


model = Net()
if torch.cuda.is_available():
    model = model.cuda()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)


def train(epoch):
    loss_all = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = Variable(data), Variable(target)
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        loss_all += loss.data
        if batch_idx % 200 == 0:
Example #5
0
def main():
    """Do stuff."""
    args = parser.parse_args()
    # don't use this, neither set learning rate as a linear function
    # of the count of gpus, it will make accuracy lower
    # args.batch_size = args.batch_size * torch.cuda.device_count()

    if args.mode == 'prune':
        args.save_folder = os.path.join(args.save_folder,
                                        str(args.target_sparsity))
        if args.initial_sparsity != 0.0:
            args.load_folder = os.path.join(args.load_folder,
                                            str(args.initial_sparsity))

    if args.save_folder and not os.path.isdir(args.save_folder):
        os.makedirs(args.save_folder)

    if args.log_path:
        set_logger(args.log_path)

    if args.pruning_ratio_to_acc_record_file and not os.path.isdir(
            args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0]):
        os.makedirs(args.pruning_ratio_to_acc_record_file.rsplit('/', 1)[0])

    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        args.cuda = False

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    cudnn.benchmark = True

    # If set > 0, will resume training from a given checkpoint.
    resume_from_epoch = 0
    resume_folder = args.load_folder
    for try_epoch in range(200, 0, -1):
        if os.path.exists(
                args.checkpoint_format.format(save_folder=resume_folder,
                                              epoch=try_epoch)):
            resume_from_epoch = try_epoch
            break

    if args.restore_epoch:
        resume_from_epoch = args.restore_epoch

    # Set default train and test path if not provided as input.
    utils.set_dataset_paths(args)
    if resume_from_epoch:
        filepath = args.checkpoint_format.format(save_folder=resume_folder,
                                                 epoch=resume_from_epoch)
        checkpoint = torch.load(filepath)
        checkpoint_keys = checkpoint.keys()
        dataset_history = checkpoint['dataset_history']
        dataset2num_classes = checkpoint['dataset2num_classes']
        masks = checkpoint['masks']
        shared_layer_info = checkpoint['shared_layer_info']
        piggymask_floats = checkpoint['piggymask_floats']
        piggymask_task_tags = checkpoint['piggymask_task_tags']

        if 'num_for_construct' in checkpoint_keys:
            num_for_construct = checkpoint['num_for_construct']
        if args.mode == 'inference' and 'network_width_multiplier' in shared_layer_info[
                args.dataset]:  # TODO, temporary solution
            args.network_width_multiplier = shared_layer_info[
                args.dataset]['network_width_multiplier']
    else:
        dataset_history = []
        dataset2num_classes = {}
        masks = {}
        shared_layer_info = {}
        piggymask_floats = {}
        piggymask_task_tags = {}

    if args.baseline_acc_file is None or not os.path.isfile(
            args.baseline_acc_file):
        sys.exit(3)
    with open(args.baseline_acc_file, 'r') as jsonfile:
        json_data = json.load(jsonfile)
        baseline_acc = float(json_data[args.dataset])

    if args.mode == 'prune' and not args.pruning_ratio_to_acc_record_file:
        sys.exit(-1)

    if args.arch == 'resnet50':
        num_for_construct = [
            64, 64, 64 * 4, 128, 128 * 4, 256, 256 * 4, 512, 512 * 4
        ]
        model = models.__dict__[args.arch](pretrained=True,
                                           num_for_construct=num_for_construct,
                                           threshold=args.threshold)
    elif 'vgg' in args.arch:
        custom_cfg = [
            64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M',
            512, 512, 512, 'M'
        ]
        model = models.__dict__[args.arch](
            custom_cfg,
            dataset_history=dataset_history,
            dataset2num_classes=dataset2num_classes,
            network_width_multiplier=args.network_width_multiplier,
            shared_layer_info=shared_layer_info,
            groups=int(args.network_width_multiplier))
    else:
        print('Error!')
        sys.exit(1)

    # Add and set the model dataset.
    model.add_dataset(args.dataset, args.num_classes)
    model.set_dataset(args.dataset)

    model = nn.DataParallel(model)
    model = model.cuda()

    NEED_ADJUST_MASK = False
    task_id = model.module.datasets.index(args.dataset) + 1
    if not masks:
        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                mask = torch.ByteTensor(module.weight.data.size()).fill_(0)
                if 'cuda' in module.weight.data.type():
                    mask = mask.cuda()
                masks[name] = mask
                module.packnet_mask = mask
    else:
        # when we expand network, we need to allocate new masks
        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d):
                if masks[name].size(0) < module.weight.data.size(0):
                    assert args.mode == 'finetune'
                    NEED_ADJUST_MASK = True
                elif masks[name].size(0) > module.weight.data.size(0):
                    assert args.mode == 'inference'
                    NEED_ADJUST_MASK = True

        if NEED_ADJUST_MASK:
            if args.mode == 'finetune':
                for name, module in model.module.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(task_id)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:masks[name].size(0), :, :, :].copy_(masks[name])
                        masks[name] = mask
                    elif isinstance(module, nl.SharableLinear):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(task_id)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:masks[name].size(0), :masks[name].size(1)].copy_(
                            masks[name])
                        masks[name] = mask
            elif args.mode == 'inference':
                for name, module in model.module.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(task_id)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:, :, :, :].copy_(
                            masks[name][:mask.size(0), :, :, :])
                        masks[name] = mask
                    elif isinstance(module, nl.SharableLinear):
                        mask = torch.ByteTensor(
                            module.weight.data.size()).fill_(task_id)
                        if 'cuda' in module.weight.data.type():
                            mask = mask.cuda()
                        mask[:, :].copy_(
                            masks[name][:mask.size(0), :mask.size(1)])
                        masks[name] = mask

        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                module.packnet_mask = masks[name]

    if args.dataset not in shared_layer_info:

        shared_layer_info[args.dataset] = {
            'bias': {},
            'bn_layer_running_mean': {},
            'bn_layer_running_var': {},
            'bn_layer_weight': {},
            'bn_layer_bias': {}
        }

    NEED_ADJUST_MASK = False
    if task_id == 1:
        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                module.inference_task_id = task_id

    elif task_id == 2 and not piggymask_floats:
        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                piggymask_floats[name] = torch.zeros_like(masks[name],
                                                          dtype=torch.float32)
                piggymask_task_tags[name] = torch.zeros_like(masks[name])
                piggymask_floats[name] = torch.where(
                    masks[name] != 0,
                    torch.full_like(piggymask_floats[name], 0.01),
                    piggymask_floats[name])
                piggymask_task_tags[name] = torch.where(
                    masks[name] != 0,
                    torch.full_like(piggymask_task_tags[name], task_id),
                    piggymask_task_tags[name])
                piggymask_floats[name] = Parameter(piggymask_floats[name])
                module.piggymask_float = piggymask_floats[name]
                module.piggymask_task_tag = piggymask_task_tags[name]
                module.inference_task_id = task_id
    elif task_id >= 2:
        # when we expand network, we need to allocate new piggymasks
        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d):
                if piggymask_floats[name].size(0) < module.weight.data.size(0):
                    assert args.mode == 'finetune'
                    NEED_ADJUST_MASK = True
                elif piggymask_floats[name].size(0) > module.weight.data.size(
                        0):
                    assert args.mode == 'inference'
                    NEED_ADJUST_MASK = True

        if NEED_ADJUST_MASK:
            if args.mode == 'finetune':
                for name, module in model.module.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        piggymask_float = torch.zeros_like(masks[name],
                                                           dtype=torch.float32)
                        piggymask_task_tag = torch.zeros_like(masks[name])
                        piggymask_float[:piggymask_floats[name].
                                        size(0), :, :, :].copy_(
                                            piggymask_floats[name])
                        piggymask_task_tag[:piggymask_task_tags[name].
                                           size(0), :, :, :].copy_(
                                               piggymask_task_tags[name])
                        piggymask_floats[name] = Parameter(piggymask_float)
                        piggymask_task_tags[name] = piggymask_task_tag
                    elif isinstance(module, nl.SharableLinear):
                        piggymask_float = torch.zeros_like(masks[name],
                                                           dtype=torch.float32)
                        piggymask_task_tag = torch.zeros_like(masks[name])
                        piggymask_float[:piggymask_floats[name].size(0), :
                                        piggymask_floats[name].size(1)].copy_(
                                            piggymask_floats[name])
                        piggymask_task_tag[:piggymask_task_tags[name].size(
                            0), :piggymask_task_tags[name].size(1)].copy_(
                                piggymask_task_tags[name])
                        piggymask_floats[name] = Parameter(piggymask_float)
                        piggymask_task_tags[name] = piggymask_task_tag
            elif args.mode == 'inference':
                for name, module in model.module.named_modules():
                    if isinstance(module, nl.SharableConv2d):
                        piggymask_float = torch.zeros_like(masks[name],
                                                           dtype=torch.float32)
                        piggymask_task_tag = torch.zeros_like(masks[name])
                        piggymask_float[:, :, :, :].copy_(
                            piggymask_floats[name]
                            [:piggymask_float.size(0), :, :, :])
                        piggymask_floats[name] = Parameter(piggymask_float)
                        piggymask_task_tag[:, :, :, :].copy_(
                            piggymask_task_tags[name]
                            [:piggymask_task_tag.size(0), :, :, :])
                        piggymask_task_tags[name] = piggymask_task_tag
                    elif isinstance(module, nl.SharableLinear):
                        piggymask_float = torch.zeros_like(masks[name],
                                                           dtype=torch.float32)
                        piggymask_task_tag = torch.zeros_like(masks[name])
                        piggymask_float[:, :].copy_(
                            piggymask_floats[name][:piggymask_float.size(0), :
                                                   piggymask_float.size(1)])
                        piggymask_floats[name] = Parameter(piggymask_float)
                        piggymask_task_tag[:, :].copy_(
                            piggymask_task_tags[name][:piggymask_task_tag.size(
                                0), :piggymask_task_tag.size(1)])
                        piggymask_task_tags[name] = piggymask_task_tag

        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                if args.mode == 'finetune' and not args.finetune_again:
                    piggymask_task_tags[name].data[
                        piggymask_task_tags[name].data.eq(0)
                        & (masks[name] != 0)] = task_id
                    piggymask_floats[name].data[
                        piggymask_task_tags[name].data.eq(task_id)] = 0.01

                module.piggymask_float = piggymask_floats[name]
                module.piggymask_task_tag = piggymask_task_tags[name]
                module.inference_task_id = task_id

    shared_layer_info[args.dataset][
        'network_width_multiplier'] = args.network_width_multiplier

    if args.num_classes == 2:
        train_loader = dataset.cifar100_train_loader_two_class(
            args.dataset, args.batch_size)
        val_loader = dataset.cifar100_val_loader_two_class(
            args.dataset, args.val_batch_size)
    elif args.num_classes == 5:
        train_loader = dataset.cifar100_train_loader(args.dataset,
                                                     args.batch_size)
        val_loader = dataset.cifar100_val_loader(args.dataset,
                                                 args.val_batch_size)
    else:
        print("num_classes should be either 2 or 5")
        sys.exit(1)

    # if we are going to save checkpoint in other folder, then we recalculate the starting epoch
    if args.save_folder != args.load_folder:
        start_epoch = 0
    else:
        start_epoch = resume_from_epoch

    curr_prune_step = begin_prune_step = start_epoch * len(train_loader)
    end_prune_step = curr_prune_step + args.pruning_interval * len(
        train_loader)
    manager = Manager(args, model, shared_layer_info, masks, train_loader,
                      val_loader, begin_prune_step, end_prune_step)
    if args.mode == 'inference':
        manager.load_checkpoint_only_for_evaluate(resume_from_epoch,
                                                  resume_folder)
        manager.validate(resume_from_epoch - 1)
        return

    # manager.inference_dataset_idx
    lr = args.lr
    lr_mask = args.lr_mask
    # update all layers
    named_params = dict(model.named_parameters())
    params_to_optimize_via_SGD = []
    named_of_params_to_optimize_via_SGD = []
    masks_to_optimize_via_Adam = []
    named_of_masks_to_optimize_via_Adam = []

    for name, param in named_params.items():
        if 'classifiers' in name:
            if '.{}.'.format(model.module.datasets.index(
                    args.dataset)) in name:
                params_to_optimize_via_SGD.append(param)
                named_of_params_to_optimize_via_SGD.append(name)
            continue
        elif 'piggymask' in name:
            masks_to_optimize_via_Adam.append(param)
            named_of_masks_to_optimize_via_Adam.append(name)
        else:
            params_to_optimize_via_SGD.append(param)
            named_of_params_to_optimize_via_SGD.append(name)

    optimizer_network = optim.SGD(params_to_optimize_via_SGD,
                                  lr=lr,
                                  weight_decay=0.0,
                                  momentum=0.9,
                                  nesterov=True)
    optimizers = Optimizers()
    optimizers.add(optimizer_network, lr)

    if masks_to_optimize_via_Adam:
        optimizer_mask = optim.Adam(masks_to_optimize_via_Adam, lr=lr_mask)
        optimizers.add(optimizer_mask, lr_mask)

    manager.load_checkpoint(optimizers, resume_from_epoch, resume_folder,
                            NEED_ADJUST_MASK)

    #   k = int(args.network_width_multiplier)
    #   assert k >= 2
    #   for name, module in model.module.named_modules():
    #       if isinstance(module, nl.SharableConv2d):
    #           n = len(module.weight)
    #           n = int((n // k * (k-1)) * 0.1)
    #           # module.weight.data[:n, :, :, :] = 0.0
    #           module.packnet_mask[:n, :, :, :] = 255

    #       if isinstance(module, nl.SharableLinear):
    #           n = len(module.bias)
    #           n = int((n // k * (k-1)) * 0.1)
    #           # module.weight.data[:n, :] = 0.0
    #           # module.bias.data[:n] = 0.0
    #           module.packnet_mask[:n, :] = 255

    #       if isinstance(module, nn.BatchNorm2d):
    #           n = len(module.weight)
    #           n = int((n // k * (k-1)) * 0.1)
    #           # module.weight.data[:n] = 0.0
    """Performs training."""
    curr_lrs = []
    for optimizer in optimizers:
        for param_group in optimizer.param_groups:
            curr_lrs.append(param_group['lr'])
            break

    if args.mode == 'prune':
        if 'gradual_prune' in args.load_folder and args.save_folder == args.load_folder:
            args.epochs = 20 + resume_from_epoch
        logging.info('')
        logging.info('Before pruning: ')
        logging.info('Sparsity range: {} -> {}'.format(args.initial_sparsity,
                                                       args.target_sparsity))

        must_pruning_ratio_for_curr_task = 0.0

        json_data = {}
        if os.path.isfile(args.pruning_ratio_to_acc_record_file):
            with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file:
                json_data = json.load(json_file)

        if args.network_width_multiplier == args.max_allowed_network_width_multiplier and json_data[
                '0.0'] < baseline_acc:
            # if we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning
            logging.info(
                'we reach the upperbound and still do not get the accuracy over our target on curr task'
            )
            remain_num_tasks = args.total_num_tasks - len(dataset_history)
            logging.info('remain_num_tasks: {}'.format(remain_num_tasks))
            ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1), 1)
            logging.info('ratio_allow_for_curr_task: {:.4f}'.format(
                ratio_allow_for_curr_task))
            must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task
            if args.initial_sparsity >= must_pruning_ratio_for_curr_task:
                sys.exit(6)

        manager.validate(start_epoch - 1)
        logging.info('')
    elif args.mode == 'finetune':
        if not args.finetune_again:
            manager.pruner.make_finetuning_mask()
            logging.info('Finetune stage...')
        else:
            logging.info('Piggymask Retrain...')
            history_best_avg_val_acc_when_retraining = manager.validate(
                start_epoch - 1)
            num_epochs_that_criterion_does_not_get_better = 0

        stop_lr_mask = True
        if manager.pruner.calculate_curr_task_ratio() == 0.0:
            logging.info(
                'There is no left space in convolutional layer for curr task'
                ', we will try to use prior experience as long as possible')
            stop_lr_mask = False

    for epoch_idx in range(start_epoch, args.epochs):
        avg_train_acc, curr_prune_step = manager.train(optimizers, epoch_idx,
                                                       curr_lrs,
                                                       curr_prune_step)

        avg_val_acc = manager.validate(epoch_idx)

        # if args.mode == 'prune' and (epoch_idx+1) >= (args.pruning_interval + start_epoch) and (
        #     avg_val_acc > history_best_avg_val_acc_when_prune):
        #     pass
        if args.finetune_again:
            if avg_val_acc > history_best_avg_val_acc_when_retraining:
                history_best_avg_val_acc_when_retraining = avg_val_acc

                num_epochs_that_criterion_does_not_get_better = 0
                if args.save_folder is not None:
                    for path in os.listdir(args.save_folder):
                        if '.pth.tar' in path:
                            os.remove(os.path.join(args.save_folder, path))
                else:
                    print('Something is wrong! Block the program with pdb')
                    pdb.set_trace()

                history_best_avg_val_acc = avg_val_acc
                manager.save_checkpoint(optimizers, epoch_idx,
                                        args.save_folder)
            else:
                num_epochs_that_criterion_does_not_get_better += 1

            if args.finetune_again and num_epochs_that_criterion_does_not_get_better == 5:
                logging.info("stop retraining")
                sys.exit(0)

        if args.mode == 'finetune':
            if epoch_idx + 1 == 50 or epoch_idx + 1 == 80:
                for param_group in optimizers[0].param_groups:
                    param_group['lr'] *= 0.1
                curr_lrs[0] = param_group['lr']
            if len(optimizers.lrs) == 2:
                if epoch_idx + 1 == 50:
                    for param_group in optimizers[1].param_groups:
                        param_group['lr'] *= 0.2
                if stop_lr_mask and epoch_idx + 1 == 70:
                    for param_group in optimizers[1].param_groups:
                        param_group['lr'] *= 0.0

                curr_lrs[1] = param_group['lr']

    if args.save_folder is not None:
        pass
    #     paths = os.listdir(args.save_folder)
    #     if paths and '.pth.tar' in paths[0]:
    #         for checkpoint_file in paths:
    #             os.remove(os.path.join(args.save_folder, checkpoint_file))
    else:
        print('Something is wrong! Block the program with pdb')

    if task_id >= 2:
        for name, module in model.module.named_modules():
            if isinstance(module, nl.SharableConv2d) or isinstance(
                    module, nl.SharableLinear):
                if args.mode == 'finetune':
                    module.piggymask_task_tag[module.piggymask_float.le(
                        0.005)] = 0

    if avg_train_acc > 0.95:
        manager.save_checkpoint(optimizers, epoch_idx, args.save_folder)

    logging.info('-' * 16)

    if args.pruning_ratio_to_acc_record_file:
        json_data = {}
        if os.path.isfile(args.pruning_ratio_to_acc_record_file):
            with open(args.pruning_ratio_to_acc_record_file, 'r') as json_file:
                json_data = json.load(json_file)

        if args.mode == 'finetune' and not args.test_piggymask:
            json_data[0.0] = round(avg_val_acc, 4)
            with open(args.pruning_ratio_to_acc_record_file, 'w') as json_file:
                json.dump(json_data, json_file)
            if avg_train_acc > 0.95 and avg_val_acc >= baseline_acc:
                pass
            else:
                logging.info("It's time to expand the Network")
                logging.info('Auto expand network')
                sys.exit(2)

            if manager.pruner.calculate_curr_task_ratio() == 0.0:
                logging.info(
                    'There is no left space in convolutional layer for curr task, so needless to prune'
                )
                sys.exit(5)

        elif args.mode == 'prune':
            if avg_train_acc > 0.95:
                json_data[args.target_sparsity] = round(avg_val_acc, 4)
                with open(args.pruning_ratio_to_acc_record_file,
                          'w') as json_file:
                    json.dump(json_data, json_file)
            else:
                sys.exit(6)

            must_pruning_ratio_for_curr_task = 0.0

            if args.network_width_multiplier == args.max_allowed_network_width_multiplier and json_data[
                    '0.0'] < baseline_acc:
                # if we reach the upperbound and still do not get the accuracy over our target on curr task, we still do pruning
                logging.info(
                    'we reach the upperbound and still do not get the accuracy over our target on curr task'
                )
                remain_num_tasks = args.total_num_tasks - len(dataset_history)
                logging.info('remain_num_tasks: {}'.format(remain_num_tasks))
                ratio_allow_for_curr_task = round(1.0 / (remain_num_tasks + 1),
                                                  1)
                logging.info('ratio_allow_for_curr_task: {:.4f}'.format(
                    ratio_allow_for_curr_task))
                must_pruning_ratio_for_curr_task = 1.0 - ratio_allow_for_curr_task
                if args.target_sparsity >= must_pruning_ratio_for_curr_task:
                    sys.exit(6)
Example #6
0
	#############################

	net = load_model(args.checkpoint, 'cpu')

	#############################
	#
	# Loss and optimizer 
	#
	#############################
	miner = miners.BatchHardMiner().to(device)
	loss_func = losses.TripletMarginLoss(margin=0.3).to(device)

	if args.optimizer == 'ADAM':
		optimizer = optim.Adam(net.parameters(), lr=learning_rate_)#1e-3)
	elif args.optimizer == 'SGD':
		optimizer = optim.SGD(net.parameters(), lr=learning_rate_, momentum=0.9, weight_decay=5e-4, nesterov=True)


	#############################
	#
	# Resume 
	#
	#############################
	if dataset_name =='DeepFashion':
		checkpoint_file_name = '{}/{}/{}_{}_{}'.format(checkpoint_path,dataset_name,dataset_name,args.optimizer,combinations_type)

	if args.resume:
		# Load checkpoint.
		load_epoch_num = args.load_epoch_num - 1
		checkpoint_number = '{}_{}_ckpt.t7'.format(checkpoint_file_name,load_epoch_num)
		print(checkpoint_number)
Example #7
0

# if y_hat.requires_grad:
def log_hook(grad_input):
    print("logging", grad_input.shape)
    grads[0] = grad_input
    # torch.cat((grad_input.detach().cpu(), y_hat.detach().cpu()), dim=0)
    # grad_input_batch = torch.cat(tuple(torch.cat(tuple(vis(e_0[c]) for c in range(e_0.shape[0])), dim=1) for e_0 in grad_input), dim=2)
    # self.logger.experiment.add_image(f'train_regression_grad', grad_input_batch, self.global_step)
    # handle.remove()


handle = embedding.register_hook(log_hook)

optimizer = optim.SGD(list(sslt.parameters()) + [
    embedding,
], lr=1e1)

for it in range(0, iterations):
    optimizer.zero_grad()
    loss, _, _ = sslt.loss_CPCshift(None, embedding, (4, 2))
    loss.backward()
    vis_grad = grads[0].detach()
    vis_grad = vis_grad / (vis_grad.abs().max() + 1e-8)
    vis_grad = vis_grad.detach().cpu().numpy()

    vis_emb = embedding.detach()
    vis_emb = vis_emb / (vis_emb.abs().max() + 1e-8)
    vis_emb = vis_emb.detach().cpu().numpy()

    for c in range(1):
elif args.model == 'resnet18':
    model = resnet18(num_classes=10)
elif args.model == 'resnet34':
    model = resnet34(num_classes=10)
elif args.model == 'resnet50':
    model = resnet50(num_classes=10)
elif args.model == 'vgg16':
    model = VGG(vgg_name='vgg16', num_classes=10)
elif args.model == 'MLP':
    model = MLP()
else:
    raise ValueError('Unrecognized training model')

if args.optim == 'SGD':
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)
elif args.optim == 'LBFGS':
    optimizer = optim.LBFGS(model.parameters(), lr=args.lr)

num_epochs = args.epoch
lr = args.lr
print_itr = args.print_frq
criterion = nn.CrossEntropyLoss()

start_epoch = 0
if args.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/{}/{}_best.pth'.format(
Example #9
0
def train():
    if args.dataset == 'COCO':
        if args.dataset_root == VOC_ROOT:
            if not os.path.exists(COCO_ROOT):
                parser.error('Must specify dataset_root if specifying dataset')
            print("WARNING: Using default COCO dataset_root because " +
                  "--dataset_root was not specified.")
            args.dataset_root = COCO_ROOT
        cfg = coco
        dataset = COCODetection(root=args.dataset_root,
                                transform=SSDAugmentation(cfg['min_dim'],
                                                          MEANS))
    elif args.dataset == 'VOC':
        if args.dataset_root == COCO_ROOT:
            parser.error('Must specify dataset if specifying dataset_root')
        cfg = voc
        dataset = VOCDetection(root=args.dataset_root,
                               transform=SSDAugmentation(cfg['min_dim'],
                                                         MEANS))

    if args.visdom:
        import visdom
        global viz
        viz = visdom.Visdom()

    ssd_net = build_ssd('train', cfg['min_dim'], cfg['num_classes'])
    net = ssd_net

    if args.cuda:
        net = torch.nn.DataParallel(ssd_net)
        cudnn.benchmark = True

    if args.resume:
        print('Resuming training, loading {}...'.format(args.resume))
        ssd_net.load_weights(args.resume)
    else:
        vgg_weights = torch.load(args.save_folder + args.basenet)
        print('Loading base network...')
        ssd_net.vgg.load_state_dict(vgg_weights)

    if args.cuda:
        net = net.cuda()

    if not args.resume:
        print('Initializing weights...')
        # initialize newly added layers' weights with xavier method
        ssd_net.extras.apply(weights_init)
        ssd_net.loc.apply(weights_init)
        ssd_net.conf.apply(weights_init)

    optimizer = optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum,
                          weight_decay=args.weight_decay)
    criterion = MultiBoxLoss(cfg['num_classes'], 0.5, True, 0, True, 3, 0.5,
                             False, args.cuda)

    net.train()
    # loss counters
    loc_loss = 0
    conf_loss = 0
    epoch = 0
    print('Loading the dataset...')

    epoch_size = len(dataset) // args.batch_size
    print('Training SSD on:', dataset.name)
    print('Using the specified args:')
    print(args)

    step_index = 0

    if args.visdom:
        vis_title = 'SSD.PyTorch on ' + dataset.name
        vis_legend = ['Loc Loss', 'Conf Loss', 'Total Loss']
        iter_plot = create_vis_plot('Iteration', 'Loss', vis_title, vis_legend)
        epoch_plot = create_vis_plot('Epoch', 'Loss', vis_title, vis_legend)

    data_loader = data.DataLoader(dataset, args.batch_size,
                                  num_workers=args.num_workers,
                                  shuffle=True, collate_fn=detection_collate,
                                  pin_memory=True)
    # create batch iterator
    batch_iterator = iter(data_loader)
    for iteration in range(args.start_iter, cfg['max_iter']):
        if args.visdom and iteration != 0 and (iteration % epoch_size == 0):
            update_vis_plot(epoch, loc_loss, conf_loss, epoch_plot, None,
                            'append', epoch_size)
            # reset epoch loss counters
            loc_loss = 0
            conf_loss = 0
            epoch += 1

        if iteration in cfg['lr_steps']:
            step_index += 1
            adjust_learning_rate(optimizer, args.gamma, step_index)

        # load train data
        # images, targets = next(batch_iterator)
        try:
            images, targets = next(batch_iterator)
        except StopIteration:
            batch_iterator = iter(data_loader)
            images, targets = next(batch_iterator)

        if args.cuda:
            images = Variable(images.cuda())
            targets = [Variable(ann.cuda(), volatile=True) for ann in targets]
        else:
            images = Variable(images)
            targets = [Variable(ann, volatile=True) for ann in targets]
        # forward
        t0 = time.time()
        out = net(images)
        # backprop
        optimizer.zero_grad()
        loss_l, loss_c = criterion(out, targets)
        loss = loss_l + loss_c
        loss.backward()
        optimizer.step()
        t1 = time.time()
        #loc_loss += loss_l.data[0]
        #conf_loss += loss_c.data[0]
        loc_loss += loss_l.item()
        conf_loss += loss_c.item()

        if iteration % 10 == 0:
            print('timer: %.4f sec.' % (t1 - t0))
            #print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.data[0]), end=' ')
            print('iter ' + repr(iteration) + ' || Loss: %.4f ||' % (loss.item()), end=' ')

        if args.visdom:
            # update_vis_plot(iteration, loss_l.data[0], loss_c.data[0],
            update_vis_plot(iteration, loss_l.item(), loss_c.item(),
                            iter_plot, epoch_plot, 'append')

        if iteration != 0 and iteration % 5000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(ssd_net.state_dict(), 'weights/ssd300_COCO_' +
                       repr(iteration) + '.pth')
    torch.save(ssd_net.state_dict(),
               args.save_folder + '' + args.dataset + '.pth')
Example #10
0
y_test = np.zeros((25000, ))
y_test[0:12500] = 1

#calling the model
vocab_size += 1
model = BOW_model(vocab_size, 500)
model.cuda()

# opt = 'sgd'
# LR = 0.01
opt = 'adam'
LR = 0.001
if (opt == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=LR)
elif (opt == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

batch_size = 200
no_of_epochs = 6
L_Y_train = len(y_train)
L_Y_test = len(y_test)

model.train()

train_loss = []
train_accu = []
test_accu = []

for epoch in range(no_of_epochs):

    # training

net = Net()


# 3. Define a Loss function and optimizer
# ---------------------------------------
# 
# Let's use a Classification Cross-Entropy loss and SGD with momentum

# In[4]:

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)


# 4. Train the network
# --------------------
# 
# This is when things start to get interesting.
# We simply have to loop over our data iterator, and feed the inputs to the
# network and optimize

# In[11]:

start_time = time.time()
fileName = "NetworkConfiguration_1_2.txt"
file = open(fileName, 'w')
file.close()
def create_optimizer(net, lr, mom):
    optimizer = optim.SGD(net.parameters(), lr, mom)
    return optimizer
Example #13
0
        torch.set_num_threads(os.cpu_count())
        print(f'Using {device}: {torch.get_num_threads()} threads')

    # load data
    train_loader, test_loader = get_data_loader(opt, im_size=32)

    # model + loss function + optimizer + scheduler
    net = VGGAttention(mode=opt.attention_mode)
    criterion = nn.CrossEntropyLoss()
    if torch.cuda.is_available():
        model = nn.DataParallel(
            net, device_ids=list(range(torch.cuda.device_count()))).to(device)
    else:
        model = net.to(device)
    criterion.to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=opt.lr,
                          momentum=0.9,
                          weight_decay=5e-4)
    scheduler = lr_scheduler.LambdaLR(
        optimizer, lr_lambda=lambda epoch: np.power(0.5, int(epoch / 25)))

    # time to train/validate
    obj = AttentionNetwork(opt=opt,
                           model=model,
                           criterion=criterion,
                           optimizer=optimizer,
                           scheduler=scheduler,
                           device=device)
    obj.train_validate(train_loader=train_loader, test_loader=test_loader)
Example #14
0
def train():
    # initialize the model
    model_path = os.path.join(constant.MODEL_DIR, constant.PRETRAINED_MODEL)
    c3d = model.C3D(constant.NUM_CLASSES)

    print(model_path)

    device = get_default_device()

    if device == torch.device('cpu'):
        pretrained_param = torch.load(model_path, map_location='cpu')
    else:
        pretrained_param = torch.load(model_path)

    to_load = {}

    for key in pretrained_param.keys():
        if 'conv' in key:
            to_load[key] = pretrained_param[key]
        else:
            to_load[key] = c3d.state_dict()[key]

    c3d.load_state_dict(to_load)

    print(c3d.state_dict())

    train_params = [{'params': c3d.get_conv_1x_lr_param(), 'weight_decay': constant.WEIGHT_DECAY},
                    {'params': c3d.get_conv_2x_lr_param(), 'lr': constant.BASE_LR * 2},
                    {'params': c3d.get_fc_1x_lr_param(), 'weight_decay': constant.WEIGHT_DECAY},
                    {'params': c3d.get_fc_2x_lr_param(), 'lr': constant.BASE_LR * 2}]

    # import input data
    trainset = UCF101DataSet(framelist_file=constant.TRAIN_LIST, clip_len=constant.CLIP_LENGTH,
                             crop_size=constant.CROP_SIZE, split="training")
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=constant.TRAIN_BATCH_SIZE,
                                              shuffle=True, num_workers=10)

    c3d.to(device, non_blocking=True, dtype=torch.float)
    c3d.train()

    # define loss function (Cross Entropy loss)
    criterion = nn.CrossEntropyLoss()
    criterion.to(device)

    # define optimizer
    optimizer = optim.SGD(train_params, lr=constant.BASE_LR,
                          momentum=constant.MOMENTUM, weight_decay=0)

    print(optimizer.state_dict())
    # define lr schedule

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=constant.LR_DECAY_STEP_SIZE,
                                          gamma=constant.LR_DECAY_GAMMA)
    writer = SummaryWriter()

    for epoch in range(constant.NUM_EPOCHES):

        running_loss = 0.0
        running_accuracy = 0.0
        scheduler.step()

        for i, data in enumerate(trainloader, 0):
            step = epoch * len(trainloader) + i
            inputs, labels = data['clip'].to(device, dtype=torch.float), data['label'].to(
                device=device, dtype=torch.int64)
            optimizer.zero_grad()

            outputs = c3d(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            print('Step %d, loss: %.3f' % (i, loss.item()))
            writer.add_scalar('Train/Loss', loss.item(), step)

            outputs = nn.Softmax(dim=1)(outputs)
            _, predict_label = outputs.max(1)
            correct = (predict_label == labels).sum().item()
            accuracy = float(correct) / float(constant.TRAIN_BATCH_SIZE)
            running_accuracy += accuracy
            writer.add_scalar('Train/Accuracy', accuracy, step)

            print("iteration %d, accuracy = %.3f" % (i, accuracy))

            if i % 100 == 99:
                print('[%d, %5d] loss: %.3f' %
                      (epoch + 1, i + 1, running_loss / 100))
                print('[%d, %5d] accuracy: %.3f' %
                      (epoch + 1, i + 1, running_accuracy / 100))
                running_loss = 0.0
                running_accuracy = 0.0
            if step % 10000 == 9999:
                torch.save(c3d.state_dict(), os.path.join(
                    constant.MODEL_DIR, '%s-%s-%d' % (constant.TRAIN_MODEL_NAME, datetime.date.today(), step + 1)))

    print('Finished Training')
    writer.close()
Example #15
0
    parser.add_argument('--dataRoot', type=str, default='data')
    opt = parser.parse_args()
    config = None
    if opt.config == 'transformer':
        config = TransformerConfig()
    if not os.path.exists('weight'):
        os.mkdir('weight')
    d_model = config.d_model
    n_layers = config.n_layers
    heads = config.heads
    dropout = config.dropout
    rtnet = RTNet(d_model, n_layers, heads, dropout)
    rtnet = rtnet.cuda()
    opti = None
    if opt.opti == 'SGD':
        opti = optim.SGD(rtnet.parameters(), lr=0.01)

    # 读取嵌入向量
    f = open('config/embedding.txt', 'r')
    op_embedding_str = f.read()
    op_embedding_Dict = json.loads(op_embedding_str)
    f.close()

    # 读取操作向量
    f = open('config/opDict.txt', 'r')
    op_str = f.read()
    op_Dict = json.loads(op_str)
    f.close()

    # 损失函数
    # loss_func = F.cross_entropy
Example #16
0
        if args.dataset == 'mnist':
            net = fc(width=args.width,
                     depth=args.depth,
                     num_classes=num_classes).to(args.device)
        elif args.dataset == 'cifar10':
            net = fc(width=args.width,
                     depth=args.depth,
                     num_classes=num_classes,
                     input_dim=3 * 32 * 32).to(args.device)
    elif args.model == 'alexnet':
        net = alexnet(ch=args.scale, num_classes=num_classes).to(args.device)

    print(net)

    opt = optim.SGD(net.parameters(),
                    lr=args.lr,
                    momentum=args.mom,
                    weight_decay=args.wd)

    if args.lr_schedule:
        milestone = int(args.iterations / 3)
        scheduler = optim.lr_scheduler.MultiStepLR(
            opt, milestones=[milestone, 2 * milestone], gamma=0.5)

    if args.criterion == 'NLL':
        crit = nn.CrossEntropyLoss().to(args.device)
    elif args.criterion == 'linear_hinge':
        crit = linear_hinge_loss

    def cycle_loader(dataloader):
        while 1:
            for data in dataloader:
Example #17
0
def train_network():
    network = Stage1CountingNet()
    model_save_dir = './models_stage_1'
    model_save_path = os.path.join(model_save_dir, 'train2')
    if not os.path.exists(model_save_path):
        os.makedirs(model_save_path)
        os.makedirs(os.path.join(model_save_path, 'snapshots'))
    global f
    snapshot_path = os.path.join(model_save_path, 'snapshots')
    f = open(os.path.join(model_save_path, 'train0.log'), 'w')

    # -- Logging Parameters
    log(f, 'args: ' + str(args))
    log(f, 'model: ' + str(network), False)
    log(f, 'Stage1..')
    log(f, 'LR: %.12f.' % (args.lr))

    start_epoch = 0
    num_epochs = args.epochs
    valid_losses = {}
    train_losses = {}
    for metric in ['loss1', 'new_mae']:
        valid_losses[metric] = []

    for metric in ['loss1']:
        train_losses[metric] = []

    batch_size = args.batch_size
    num_train_images = len(dataset.data_files['train'])
    num_patches_per_image = args.patches
    num_batches_per_epoch = num_patches_per_image * num_train_images // batch_size

    optimizer = optim.SGD(filter(lambda p: p.requires_grad, network.parameters()),
                          lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)

    # -- Main Training Loop
    all_epoch_test_valid_accs = []
    all_epoch_test_valid_per_rot_accs = []
    for e_i, epoch in enumerate(range(start_epoch, num_epochs)):
        avg_loss = [0.0 for _ in range(1)]

        # b_i - batch index
        total_match_count = 0
        total_count = 0
        total_per_angle_count = np.zeros(num_rotations)
        total_per_angle_match_count = np.zeros(num_rotations)
        for b_i in range(num_batches_per_epoch):
            # Generate next training sample
            Xs, _ = dataset.train_get_data(batch_size=args.batch_size)

            # 1. Crop image to 112x112 . Xs shape: (B,3,h,w)
            image_size = Xs.shape[-1]
            crop_start_loc = [image_size // 4, image_size // 4]

            Xs = Xs[:, :, crop_start_loc[0]: crop_start_loc[0] + image_new_crop_size,
                    crop_start_loc[1]: crop_start_loc[1] + image_new_crop_size]

            # 2 . Randomly rotate each image
            new_images_input = np.zeros_like(Xs, dtype=Xs.dtype)  # (B,3,h',w')
            new_image_rotation_gt = np.zeros(
                (Xs.shape[0], ), dtype=np.int32)  # (B,4)
            images = np.transpose(Xs, (0, 2, 3, 1))  # (B,h',w',3)
            for i in range(images.shape[0]):
                image = images[i]  # (h',w',3)
                chosen_index = np.random.choice(num_rotations, 1)[0]
                chosen_angle = rotation_angles[chosen_index]
                if chosen_angle != 0:
                    image = cv2.rotate(
                        image, rotation_angles_cv2[chosen_index])
                new_images_input[i, :, :, :] = np.transpose(image, (2, 0, 1))
                new_image_rotation_gt[i] = chosen_index

            losses, matches, actual_angle_dist, matches_by_angle = train_function(new_images_input,
                                                                                  new_image_rotation_gt,
                                                                                  network, optimizer)
            total_match_count += matches
            total_count += args.batch_size
            assert(total_match_count <= total_count)

            total_per_angle_count += actual_angle_dist
            total_per_angle_match_count += matches_by_angle

            assert(np.sum(total_per_angle_count) == total_count)
            for scale_idx in range(1):
                avg_loss[scale_idx] = avg_loss[scale_idx] + losses[scale_idx]

            # Logging losses after 1k iterations.
            if b_i % 100 == 0:
                log(f, 'Epoch %d [%d]: %s loss: %s.' %
                    (epoch, b_i, [network.name], losses))
                log(f, 'Epoch %d [%d]: %s rot acc: %s.' % (
                    epoch, b_i, [network.name], (total_match_count/total_count)))
                log(f, 'Epoch %d [%d]: %s rot acc(0,90,180,270): %s.' % (epoch, b_i, [network.name],
                                                                         (total_per_angle_match_count / total_per_angle_count)))

        # -- Stats update
        avg_loss = [al / num_batches_per_epoch for al in avg_loss]
        avg_loss = [av for av in avg_loss]

        train_losses['loss1'].append(avg_loss)

        torch.cuda.empty_cache()
        log(f, 'Validating...')

        epoch_val_losses, txt, rot_acc_valid, per_rot_acc_valid = test_network(
            dataset, 'test_valid', network, False)
        log(f, 'Valid epoch: ' + str(epoch) + ' ' + txt)
        log(f, 'Valid epoch: ' + str(epoch) +
            'total rotation acc:' + str(rot_acc_valid))
        log(f, 'Valid epoch: ' + str(epoch) +
            'per rotation acc:' + str(per_rot_acc_valid))
        all_epoch_test_valid_accs.append(rot_acc_valid)
        all_epoch_test_valid_per_rot_accs.append(per_rot_acc_valid)

        best_epoch = np.argmax(np.array(all_epoch_test_valid_accs))
        best_valid_test_acc = np.array(all_epoch_test_valid_accs).max()
        log(f, 'Best valid rot acc so far epoch : {} , acc : {}'.format(
            best_epoch, best_valid_test_acc))

        for metric in ['loss1', 'new_mae']:
            valid_losses[metric].append(epoch_val_losses[metric])

        min_valid_epoch = np.argmin(valid_losses['new_mae'])

        # Save networks
        save_checkpoint({
            'epoch': epoch + 1,
            'state_dict': network.state_dict(),
            'optimizer': optimizer.state_dict(),
        }, snapshot_path, get_filename(network.name, epoch + 1))

        print('saving graphs...')
        with open(os.path.join(snapshot_path, 'losses.pkl'), 'wb') as lossfile:
            pickle.dump((train_losses, valid_losses),
                        lossfile, protocol=2)

        for metric in train_losses.keys():
            if "maxima_split" not in metric:
                if isinstance(train_losses[metric][0], list):
                    for i in range(len(train_losses[metric][0])):
                        plt.plot([a[i] for a in train_losses[metric]])
                        plt.savefig(os.path.join(snapshot_path,
                                                 'train_%s_%d.png' % (metric, i)))
                        plt.clf()
                        plt.close()
                plt.plot(train_losses[metric])
                plt.savefig(os.path.join(
                    snapshot_path, 'train_%s.png' % metric))
                plt.clf()
                plt.close()

        for metric in valid_losses.keys():
            if isinstance(valid_losses[metric][0], list):
                for i in range(len(valid_losses[metric][0])):
                    plt.plot([a[i] for a in valid_losses[metric]])
                    plt.savefig(os.path.join(snapshot_path,
                                             'valid_%s_%d.png' % (metric, i)))
                    plt.clf()
                    plt.close()
            plt.plot(valid_losses[metric])
            plt.savefig(os.path.join(snapshot_path, 'valid_%s.png' % metric))
            plt.clf()
            plt.close()

    all_epoch_test_valid_accs = np.array(all_epoch_test_valid_accs)
    best_epoch = np.argmax(all_epoch_test_valid_accs)
    best_valid_test_acc = all_epoch_test_valid_accs.max()

    log(f, 'Best valid rot acc epoch : {} , acc : {}'.format(
        best_epoch, best_valid_test_acc))

    # Plotting the valid accuracies
    plt.plot(np.array(all_epoch_test_valid_accs))
    for i in range(num_rotations):
        plt.plot(np.array(all_epoch_test_valid_per_rot_accs)[:, i])
    plt.legend(['overall acc', '0 deg acc', '90 deg acc',
                '180 deg acc', '270 deg acc'], loc='upper right')
    plt.savefig(os.path.join(snapshot_path, 'test_valid_all_rot_acc.png'))
    plt.clf()
    plt.close()

    # this is to be consistent with the file name written
    filename = get_filename(network.name, best_epoch + 1)
    with open(os.path.join(snapshot_path, 'unsup_vgg_best_model_meta.pkl'), 'wb') as unsup_file:
        pickle.dump(filename, unsup_file, protocol=2)
    log(f, 'Exiting train...')
    f.close()
    return
Example #18
0
def main():
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    chunk_size = torch.tensor(0)
    dist.recv(chunk_size, src=0)
    num_data = chunk_size.item()

    is_train = True

    alpha = args.alpha

    pattern_list = ['random', 'lowbias', 'midbias', 'highbias']

    datanum_list = ['balance', 'lowimbalance', 'highimbalance']

    checkpoint_dir = '/data/jcliu/FL/RE-AFL/client_result/client_' + str(
        args.rank) + '/'
    print("Create client dir success")
    fl_utils.create_dir(checkpoint_dir)

    fig_dir = checkpoint_dir + 'figure/'
    fl_utils.create_dir(fig_dir)

    MODEL_PATH = checkpoint_dir + 'model/'
    fl_utils.create_dir(MODEL_PATH)

    LOAD_MODEL_PATH = MODEL_PATH + 'alpha_' + str(alpha) + '_model-type_' + args.model_type + '_dataset-type' + args.dataset_type + \
        '_batch-size' + str(args.batch_size) + '_tx2nums' + str(args.world_size) + '_' + pattern_list[args.pattern_idx] + 'data-pattern' + \
            datanum_list[args.datanum_idx] + 'data' + '_exit-loss' + str(exit_loss_threshold) + '_lr' + str(args.lr) + '_epoch' + str(args.epochs) + '_local' + str(args.local_iters) + '.pth'

    SAVE_MODEL_PATH = MODEL_PATH + 'alpha_' + str(alpha) + '_model-type_' + args.model_type + '_dataset-type' + args.dataset_type + \
        '_batch-size' + str(args.batch_size) + '_tx2nums' + str(args.world_size) + '_' + pattern_list[args.pattern_idx] + 'data-pattern' + \
            datanum_list[args.datanum_idx] + 'data' + '_exit-loss' + str(exit_loss_threshold)  + '_lr' + str(args.lr) + '_epoch' + str(args.epochs) + '_local' + str(args.local_iters) + '.pth'

    LOG_ROOT_PATH = checkpoint_dir +  'log/' + '/alpha_' + str(alpha) + '/model-type_' + args.model_type + '_dataset-type' + args.dataset_type + \
        '_batch-size' + str(args.batch_size) + '_tx2nums' + str(args.world_size) + '_' + pattern_list[args.pattern_idx] + 'data-pattern' + \
            datanum_list[args.datanum_idx] + 'data' + '_exit-loss' + str(exit_loss_threshold) + '_lr' + str(args.lr) + '_epoch' + str(args.epochs) + '_local' + str(args.local_iters) +'/'

    fl_utils.create_dir(LOG_ROOT_PATH)

    LOG_PATH = LOG_ROOT_PATH + 'model_acc_loss.txt'

    log_out = open(LOG_PATH, 'w+')
    # if args.epoch_start == 0:
    #     log_out.write("%s\n" % LOG_PATH)

    # if not args.epoch_start == 0:
    #     model.load_state_dict(torch.load(LOAD_MODEL_PATH))

    # log_out = dict()
    # log_out["model_acc_loss"] = open(os.path.join(LOG_ROOT_PATH, "model_acc_loss.txt"), 'w+')

    # <--Load datasets
    train_dataset, test_dataset = fl_datasets.load_datasets(args.dataset_type)

    # train_dataset, test_dataset = load_data()
    # train_loader = torch.utils.data.DataLoader(train_dataset,
    #     batch_size=args.batch_size, shuffle=True, **kwargs)
    # test_loader = torch.utils.data.DataLoader(test_dataset,
    #     batch_size=args.test_batch_size, shuffle=True, **kwargs)
    pattern_idx = args.pattern_idx
    datanum_idx = args.datanum_idx

    # <--Create federated train/test loaders for virtrual machines
    if pattern_idx == 0:  # random data (IID)
        if datanum_idx != 0:  # imbalance data
            is_train = True
            tx2_train_loader = fl_utils.create_random_loader(
                args, kwargs, args.rank, num_data, is_train, train_dataset)
            is_train = False
            tx2_test_loader = fl_utils.create_random_loader(
                args, kwargs, args.rank, num_data, is_train, test_dataset)
        else:  # balance data
            is_train = True
            tx2_train_loader = fl_utils.create_segment_loader(
                args, kwargs, args.world_size, args.rank, is_train,
                train_dataset)
            is_train = False
            tx2_test_loader = fl_utils.create_segment_loader(
                args, kwargs, args.world_size, args.rank, is_train,
                test_dataset)

    else:  # bias data partition (Non-IID)
        if pattern_idx == 1:  # lowbias
            label_clusters = ((0, 1, 2, 3, 4), (5, 6, 7, 8, 9))
        elif pattern_idx == 2:  # midbias
            label_clusters = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9))
        elif pattern_idx == 3:  # highbias
            label_clusters = ((0, ), (1, ), (2, ), (3, ), (4, ), (5, ), (6, ),
                              (7, ), (8, ), (9, ))

        class_num = len(train_dataset.classes)
        cluster_len = len(label_clusters)

        for idx in range(cluster_len):
            train_data_tmp, train_targets_tmp = fl_utils.create_bias_selected_data(
                args, label_clusters[idx], train_dataset)
            test_data_tmp, test_targets_tmp = fl_utils.create_bias_selected_data(
                args, label_clusters[idx], test_dataset)
            if idx == 0:
                train_data = train_data_tmp
                train_targets = train_targets_tmp

                test_data = test_data_tmp
                test_targets = test_targets_tmp
            else:
                train_data = np.vstack((train_data, train_data_tmp))
                train_targets = np.hstack((train_targets, train_targets_tmp))

                test_data = np.vstack((test_data, test_data_tmp))
                test_targets = np.hstack((test_targets, test_targets_tmp))

        new_train_dataset = fl_datasets.train_test_dataset(
            train_data, train_targets, class_num)

        new_test_dataset = fl_datasets.train_test_dataset(
            test_data, test_targets, class_num)

        is_train = True
        tx2_train_loader = fl_utils.create_segment_loader(
            args, kwargs, args.world_size, args.rank, is_train,
            new_train_dataset)
        is_train = False
        tx2_test_loader = fl_utils.create_segment_loader(
            args, kwargs, args.world_size, args.rank, is_train,
            new_test_dataset)
    del train_dataset
    del test_dataset
    #test loader

    # self.test_loader = fl_utils.create_ps_test_loader(
    #     args, kwargs, self.param_server, test_dataset)

    # pattern_list = ['bias', 'partition', 'random']
    # pattern_idx = args.pattern_idx

    # # <--Create federated train/test loaders for virtrual machines
    # if pattern_idx == 0:
    #     # class_num = len(train_dataset.classes)
    #     # step = np.int32(np.floor(class_num / args.vm_num))
    #     if args.world_size == 5:
    #         # <--the number of items must equals to args.vm_num
    #         self.selected_idxs = ((0, 1), (2, 3), (4, 5), (6, 7), (8, 9))
    #     elif args.world_size == 10:
    #         # <--the number of items must equals to args.vm_num
    #         self.selected_idxs = (
    #             (0,), (1,), (2,), (3,), (4,), (5,), (6,), (7,), (8,), (9,))
    #     else:
    #         class_num = len(train_dataset.classes)
    #         step = np.int32(np.floor(class_num / args.vm_num))
    #         self.selected_idxs = [
    #             [idx + n for n in range(step)] for idx in range(0, class_num - step + 1, step)]

    #     is_train = True
    #     self.vm_train_loaders = fl_utils.create_bias_federated_loader(
    #         args, kwargs, self.vm_list, is_train, train_dataset, self.selected_idxs)
    #     is_train = False
    #     self.vm_test_loaders = fl_utils.create_bias_federated_loader(
    #         args, kwargs, self.vm_list, is_train, test_dataset, self.selected_idxs)

    # elif pattern_idx == 1:
    #     # <--the number of items must equals to args.vm_num
    #     partition_ratios = [1/2, 1/4, 1/8, 1/16, 1/16]
    #     is_train = True
    #     self.vm_train_loaders = fl_utils.create_labelwise_federated_loader(
    #         args, kwargs, self.vm_list, is_train, train_dataset, partition_ratios)
    #     is_train = False
    #     self.vm_test_loaders = fl_utils.create_labelwise_federated_loader(
    #         args, kwargs, self.vm_list, is_train, test_dataset, partition_ratios)

    # else:
    #     is_train = True
    #     self.vm_train_loaders = fl_utils.create_segment_federated_loader(
    #         args, kwargs, self.vm_list, is_train, train_dataset)
    #     is_train = False
    #     self.vm_test_loaders = fl_utils.create_segment_federated_loader(
    #         args, kwargs, self.vm_list, is_train, test_dataset)

    # <--Create Neural Network model instance
    if args.dataset_type == 'FashionMNIST':
        if args.model_type == 'LR':
            model = fl_models.MNIST_LR_Net().to(device)
        else:
            model = fl_models.MNIST_Net().to(device)

    elif args.dataset_type == 'MNIST':
        if args.model_type == 'LR':
            model = fl_models.MNIST_LR_Net().to(device)
        else:
            model = fl_models.MNIST_Small_Net().to(device)

    elif args.dataset_type == 'CIFAR10':

        if args.model_type == 'Deep':
            model = fl_models.CIFAR10_Deep_Net().to(device)
            args.decay_rate = 0.98
        else:
            model = fl_models.CIFAR10_Net().to(device)
            args.decay_rate = 0.98

    elif args.dataset_type == 'Sent140':

        if args.model_type == 'LSTM':
            model = fl_models.Sent140_Net().to(device)
            args.decay_rate = 0.99
        else:
            model = fl_models.Sent140_Net().to(device)
            args.decay_rate = 0.99
    else:
        pass

    model_layers_num = len(list(model.named_parameters()))

    if not args.epoch_start == 0:
        model.load_state_dict(torch.load(LOAD_MODEL_PATH))

    print("Model and Dataset ok")
    #model = Net().to(device)
    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)
    # global_para  =  model.state_dict().copy()
    # global_para = list(model.state_dict())
    global_para = [para[1].data for para in model.named_parameters()]

    start = time.time()

    for j in range(len(global_para)):
        temp = global_para[j].to('cpu')
        dist.recv(temp, src=0)
        global_para[j] = temp.to('cuda')
    global_epoch = torch.tensor(0)
    dist.recv(global_epoch, src=0)

    # print("Recev global para from the server")
    apply_global_para(model, global_para)

    for epoch in range(1, args.epochs + 1):
        print("Epoch %d" % epoch)
        # plt.ioff()
        train(args, start, model, device, tx2_train_loader, tx2_test_loader,
              optimizer, epoch, log_out)
        print("train ok")
        global_para = [para[1].data for para in model.named_parameters()]
        # local_para = [para[1].data for para in model.named_parameters()]

        for j in range(len(global_para)):
            dist.send(global_para[j].to('cpu'), dst=0)
        # print("Send para to the server")
        for j in range(len(global_para)):
            temp = global_para[j].to('cpu')
            dist.recv(temp, src=0)
            global_para[j] = temp.to('cuda')
        dist.recv(global_epoch, src=0)
        # print("recved server epoch: ", global_epoch)
        if global_epoch == args.epochs:
            break
        apply_global_para(model, global_para)
    # cudnn.enabled = False

    cudnn.benchmark = True
    kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
    train_dataset = datasets.MNIST('./data', train=True, download=True, \
     transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))
    test_dataset = datasets.MNIST('./data', train=False, \
     transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.1307,), (0.3081,))]))

    train_loader = torch.utils.data.DataLoader(\
     train_dataset, batch_size=args.batch_size, shuffle=True, **kwargs)
    test_loader = torch.utils.data.DataLoader(\
     test_dataset, batch_size=args.batch_size, shuffle=True, **kwargs)\

    optimizer = optim.SGD(model.parameters(),
                          lr=args.lr,
                          momentum=args.momentum)

    if not os.path.isdir(args.snapshot):
        os.mkdir(args.snapshot)
    # else:
    # 	files = glob.glob(args.snapshot+'/*')
    # 	for f in files:
    # 		os.remove(f)

    start_epoch = 1

    if args.resume:
        if os.path.isfile(args.resume):
            print("Loading snapshot '{}'".format(args.resume))
            snapshot = torch.load(args.resume)
def main(config):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_transform = transforms.Compose([
        transforms.Scale(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()])

    val_transform = transforms.Compose([
        transforms.Scale(256),
        transforms.RandomCrop(224),
        transforms.ToTensor()])
    test_transform = transforms.Compose([
        transforms.ToTensor()])
    trainset = AVADataset(csv_file=config.train_csv_file, root_dir=config.train_img_path, transform=train_transform)
    valset = AVADataset(csv_file=config.val_csv_file, root_dir=config.val_img_path, transform=val_transform)

    train_loader = torch.utils.data.DataLoader(trainset, batch_size=config.train_batch_size,
        shuffle=True, num_workers=config.num_workers)
    val_loader = torch.utils.data.DataLoader(valset, batch_size=config.val_batch_size,
        shuffle=False, num_workers=config.num_workers)

    base_model = models.vgg16(pretrained=True)
#    base_model = models.resnet18(pretrained=True)
#    base_model = models.inception_v3(pretrained=True)
    model = NIMA(base_model)
#    model = NIMA()
    if config.warm_start:
        model.load_state_dict(torch.load(os.path.join(config.ckpt_path, 'epoch-%d.pkl' % config.warm_start_epoch)))
        print('Successfully loaded model epoch-%d.pkl' % config.warm_start_epoch)

    if config.multi_gpu:
        model.features = torch.nn.DataParallel(model.features, device_ids=config.gpu_ids)
        model = model.to(device)
    else:
        model = model.to(device)

    conv_base_lr = config.conv_base_lr
    dense_lr = config.dense_lr
    optimizer = optim.SGD([
        {'params': model.features.parameters(), 'lr': conv_base_lr},
        {'params': model.classifier.parameters(), 'lr': dense_lr}],
        momentum=0.6
        )
    criterion = torch.nn.L1Loss()
    # send hyperparams
    lrs.send({
        'title': 'EMD Loss',
        'train_batch_size': config.train_batch_size,
        'val_batch_size': config.val_batch_size,
        'optimizer': 'SGD',
        'conv_base_lr': config.conv_base_lr,
        'dense_lr': config.dense_lr,
        'momentum': 0.9
        })

    param_num = 0
    for param in model.parameters():
        param_num += int(np.prod(param.shape))
    print('Trainable params: %.2f million' % (param_num / 1e6))

    if config.train:
        # for early stopping
        count = 0
        init_val_loss = float('inf')
        train_losses = []
        val_losses = []
        for epoch in range(config.warm_start_epoch, config.epochs):
            lrs.send('epoch', epoch)
            batch_losses = []
            for i, data in enumerate(train_loader):
                images = data['image'].to(device)
                labels = data['annotations'].to(device).float()
                outputs = model(images)
                outputs = outputs.view(-1, 1, 1)

                optimizer.zero_grad()
                loss = criterion(outputs, labels)
#                loss = emd_loss(labels, outputs)
                batch_losses.append(loss.item())

                loss.backward()

                optimizer.step()

                lrs.send('train_emd_loss', loss.item())

#                print('Epoch: %d/%d | Step: %d/%d | Training EMD loss: %.4f' % (epoch + 1, config.epochs, i + 1, len(trainset) // config.train_batch_size + 1, loss.data[0]))

            avg_loss = sum(batch_losses) / (len(trainset) // config.train_batch_size + 1)
            train_losses.append(avg_loss)
            print('Epoch %d averaged training EMD loss: %.4f' % (epoch + 1, avg_loss))

            # exponetial learning rate decay
            if (epoch + 1) % 10 == 0:
                conv_base_lr = conv_base_lr * config.lr_decay_rate ** ((epoch + 1) / config.lr_decay_freq)
                dense_lr = dense_lr * config.lr_decay_rate ** ((epoch + 1) / config.lr_decay_freq)
                optimizer = optim.SGD([
                    {'params': model.features.parameters(), 'lr': conv_base_lr},
                    {'params': model.classifier.parameters(), 'lr': dense_lr}],
                    momentum=0.6
                )

                # send decay hyperparams
                lrs.send({
                    'lr_decay_rate': config.lr_decay_rate,
                    'lr_decay_freq': config.lr_decay_freq,
                    'conv_base_lr': config.conv_base_lr,
                    'dense_lr': config.dense_lr
                    })

            # do validation after each epoch
            batch_val_losses = []
            for data in val_loader:
                images = data['image'].to(device)
                labels = data['annotations'].to(device).float()
                with torch.no_grad():
                    outputs = model(images)
                val_outputs = outputs.view(-1, 1, 1)
                val_loss = criterion(val_outputs, labels)               
#                val_loss = emd_loss(labels, outputs)
                batch_val_losses.append(val_loss.item())
            avg_val_loss = sum(batch_val_losses) / (len(valset) // config.val_batch_size + 1)
            val_losses.append(avg_val_loss)

            lrs.send('val_emd_loss', avg_val_loss)

            print('Epoch %d completed. Averaged MSE loss on val set: %.4f. Inital val loss : %.4f.' % (epoch + 1, avg_val_loss, init_val_loss))
            # Use early stopping to monitor training
            if avg_val_loss < init_val_loss:
                init_val_loss = avg_val_loss
                # save model weights if val loss decreases
                print('Saving model...')
                torch.save(model.state_dict(), os.path.join(config.ckpt_path, 'epoch-%d.pkl' % (epoch + 1)))
                print('Done.\n')
                # reset count
                count = 0
            elif avg_val_loss >= init_val_loss:
                count += 1
                if count == config.early_stopping_patience:
                    print('Val EMD loss has not decreased in %d epochs. Training terminated.' % config.early_stopping_patience)
#                    break

        print('Training completed.')

        if config.save_fig:
            # plot train and val loss
            epochs = range(1, epoch + 2)
            plt.plot(epochs, train_losses, 'b-', label='train loss')
            plt.plot(epochs, val_losses, 'g-', label='val loss')
            plt.title('EMD loss')
            plt.legend()
            plt.savefig('./loss.png')

    if config.test:
        start.record()
        print('Testing')   
        # compute mean score
        test_transform = test_transform#val_transform
        testset = AVADataset(csv_file=config.test_csv_file, root_dir=config.test_img_path, transform=val_transform)
        test_loader = torch.utils.data.DataLoader(testset, batch_size=config.test_batch_size, shuffle=False, num_workers=config.num_workers)

        mean_preds =  np.zeros(45)
        mean_labels =  np.zeros(45)
#        std_preds = []
        count = 0
        for data in test_loader:
            im_id = data['img_id']
           
            image = data['image'].to(device)
            labels = data['annotations'].to(device).float()
            output = model(image)
            output = output.view(1, 1)
            bpred = output.to(torch.device("cpu"))
            cpred = bpred.data.numpy()
            blabel = labels.to(torch.device("cpu"))
            clabel = blabel.data.numpy()
#            predicted_mean, predicted_std = 0.0, 0.0
#            for i, elem in enumerate(output, 1):
#                predicted_mean += i * elem
#            for j, elem in enumerate(output, 1):
#                predicted_std += elem * (i - predicted_mean) ** 2
            mean_preds[count] = cpred
            mean_labels[count] = clabel
            print(im_id,mean_preds[count])
            count= count+1
#            std_preds.append(predicted_std)
        # Do what you want with predicted and std...
        end.record()
Example #21
0
middle_shape = 30
num_data_point = 256
buffer_size = 256
num_epoch = 8000
batch_size = 256
k = 4
data_x = np.random.randn(num_data_point, data_shape)
data_x = data_x / np.linalg.norm(data_x, axis=1, keepdims=True)
data_y = np.random.rand(num_data_point, 1) * 2 - 1
buffer_index = np.random.choice(num_data_point, buffer_size, replace=False)
buffer_y = data_y[buffer_index]
repr_model = SimpleNet(num_data_point, data_shape, middle_shape)
# repr_model_target = SimpleNet(num_data_point,data_shape, middle_shape)

# training
optimizer = optim.SGD(repr_model.parameters(), lr=1e-4, weight_decay=0)

torch_data_x = torch.tensor(data_x, dtype=torch.float32)
torch_data_y = torch.tensor(data_y, dtype=torch.float32)
rep_buffer = np.array([
    repr_model(onehot([x])).detach().numpy().reshape(-1)
    for x in range(len(data_x))
])
for epoch in range(num_epoch):
    repr_model.train()
    rep_buffer = np.array([
        repr_model(onehot([x])).detach().numpy().reshape(-1)
        for x in range(len(data_x))
    ])
    # if epoch % 12000 == 0:
    #     plot(rep_buffer, buffer_y, "rep_buffer")
Example #22
0
def final_training(log_dirs, config):
    for c, experiment in enumerate(config["experiments"]):
        log_dir = log_dirs[c]
        print("using logs from: ", log_dir)
        basic_settings = experiment["basic_settings"]
        # data_manager
        iD = basic_settings.get("iD", "Cifar10")
        OoD = basic_settings.get("OoD", ["Fashion_MNIST"])
        labelled_size = basic_settings.get("labelled_size", 3000)
        pool_size = basic_settings.get("pool_size", 20000)
        OOD_ratio = basic_settings.get("OOD_ratio", 0.0)
        # training settings
        epochs = 130  # basic_settings.get("epochs", 200)
        batch_size = basic_settings.get("batch_size", 128)
        weight_decay = basic_settings.get("weight_decay", 1e-4)

        lr = basic_settings.get("lr", 0.1)
        nesterov = basic_settings.get("nesterov", False)
        momentum = basic_settings.get("momentum", 0.9)
        num_classes = basic_settings.get("num_classes", 10)

        # criterion = basic_settings.get("criterion", "crossentropy")

        metric = basic_settings.get("metric", "accuracy")
        # logging
        verbose = basic_settings.get("verbose", 1)
        criterion = nn.CrossEntropyLoss()
        with open(os.path.join(log_dir, "final_result.csv"),
                  "w",
                  encoding="utf-8") as result_file:
            result_file.write(
                f"Experiment_name,Starting_size,Train_size,OOD_ratio,Train_Acc,Train_Loss,Val_Acc,Val_Loss,Test_Acc,Test_Loss\n"
            )

        subclass = basic_settings.get("subclass", {"do_subclass": False})
        with open(os.path.join(log_dir, "final_result.csv"),
                  "w",
                  encoding="utf-8") as result_file:
            result_file.write(
                "exp_name,trainsize,OOD_ratio,avg_train_acc,avg_train_loss,avg_test_acc,avg_test_loss\n"
            )
        for exp_setting in experiment["exp_settings"]:
            exp_name = exp_setting.get("exp_name", "standard_name")
            data_manager = Data_manager(
                iD_datasets=[iD],
                OoD_datasets=OoD,
                labelled_size=labelled_size,
                pool_size=pool_size,
                OoD_ratio=OOD_ratio,
                test_iD_size=None,
                subclass=subclass,
            )

            if not exp_setting.get("perform_experiment", True):
                continue
            else:
                print("performing final training for: ", exp_name)

            try:
                # data_manager.create_merged_data() TODO load the statusmanager from the path
                check_path = os.path.join(
                    log_dir, "status_manager_dir",
                    f"{exp_name}-result-statusmanager.csv")
                exp_type = exp_setting.get("exp_type", "baseline")
                if exp_type == "max_disc":
                    max_disc = True
                else:
                    max_disc = False

                print("loading statusmanager: ", check_path)
                if os.path.exists(check_path):
                    data_manager.status_manager = pd.read_csv(check_path,
                                                              index_col=0)
                    # self.data_manager.reset_pool()
                    data_manager.iter = 19
                    print("loaded statusmanager from file")
                else:
                    print("couldn't load statusmanager aborting: f{exp_name}")
                    break
                result_tup = create_dataloader(data_manager,
                                               batch_size,
                                               0.1,
                                               validation_source=None)
                train_loader = result_tup[0]
                test_loader = result_tup[1]
                # val_loader = result_tup[3]

                device = torch.device(
                    "cuda:0" if torch.cuda.is_available() else "cpu")

                if not max_disc:
                    model = get_model("base", num_classes=num_classes)
                else:
                    model = get_model("maximum_discrepancy",
                                      num_classes=num_classes)

                model.to(device)
                optimizer = optim.SGD(
                    model.parameters(),
                    weight_decay=weight_decay,
                    lr=lr,
                    momentum=momentum,
                    nesterov=nesterov,
                )

                if device == "cuda":
                    torch.backends.cudnn.benchmark = True

                model, avg_train_loss, avg_train_acc = train(
                    train_loader=train_loader,
                    val_loader=None,
                    optimizer=optimizer,
                    criterion=criterion,
                    device=device,
                    epochs=epochs,
                    model=model,
                    verbose=verbose,
                    max_disc=max_disc)
                avg_test_acc, avg_test_loss = test(model,
                                                   test_loader,
                                                   device,
                                                   criterion,
                                                   max_disc=max_disc)

                print(f"""Experiment: {exp_name},
                      Final_trainingset size: {len(train_loader)},
                      OOD_ratio: {OOD_ratio},
                      Train-Accuracy: {avg_train_acc},
                      Train-Loss: {avg_train_loss},
                      Test-Accuracy: {avg_test_acc},
                      Test-Loss: {avg_test_loss}""")

                with open(os.path.join(log_dir, "final_result.csv"),
                          "a",
                          encoding="utf-8") as result_file:
                    result_file.write(
                        f"{exp_name},{len(train_loader.dataset)},{OOD_ratio},{avg_train_acc},{avg_train_loss},{avg_test_acc},{avg_test_loss}\n"
                    )
            except Exception as e:
                print(f"{exp_name} failed with Exceptopm {e}")
Example #23
0
if device == 'cuda':
    net = torch.nn.DataParallel(net)
    cudnn.benchmark = True

if args.resume:
    # Load checkpoint.
    print('==> Resuming from checkpoint..')
    assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
    checkpoint = torch.load('./checkpoint/ckpt.pth')
    net.load_state_dict(checkpoint['net'])
    best_acc = checkpoint['acc']
    start_epoch = checkpoint['epoch']

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),
                      lr=args.lr,
                      momentum=0.9,
                      weight_decay=5e-4)


# Training
def train(epoch):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = net(inputs)
        loss = criterion(outputs, targets)
Example #24
0
valid_data_gen = torch.utils.data.DataLoader(valid,batch_size=64,num_workers=3)
dataset_sizes = {'train':len(train_data_gen.dataset),'valid':len(valid_data_gen.dataset)}
dataloaders = {'train':train_data_gen,'valid':valid_data_gen}

model_ft = models.resnet18(pretrained=True)
breakpoint()
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)

if torch.cuda.is_available():
    model_ft = model_ft.cuda()

# Loss and Optimizer
learning_rate = 0.001
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)
breakpoint()

def train_model(model, criterion, optimizer, scheduler, num_epochs=5):
    since = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'valid']:
def train():
    # Model parameters
    g_input_size = 1  # Random noise dimension coming into generator, per output vector
    g_hidden_size = 5  # Generator complexity
    g_output_size = 1  # Size of generated output vector
    d_input_size = 500  # Minibatch size - cardinality of distributions
    d_hidden_size = 10  # Discriminator complexity
    d_output_size = 1  # Single dimension for 'real' vs. 'fake' classification
    minibatch_size = d_input_size

    d_learning_rate = 1e-3
    g_learning_rate = 1e-3
    sgd_momentum = 0.9

    num_epochs = 5000
    print_interval = 100
    d_steps = 20
    g_steps = 20

    dfe, dre, ge = 0, 0, 0
    d_real_data, d_fake_data, g_fake_data = None, None, None

    discriminator_activation_function = torch.sigmoid
    generator_activation_function = torch.tanh

    d_sampler = get_distribution_sampler(data_mean, data_stddev)
    gi_sampler = get_generator_input_sampler()
    G = Generator(input_size=g_input_size,
                  hidden_size=g_hidden_size,
                  output_size=g_output_size,
                  f=generator_activation_function)
    D = Discriminator(input_size=d_input_func(d_input_size),
                      hidden_size=d_hidden_size,
                      output_size=d_output_size,
                      f=discriminator_activation_function)
    criterion = nn.BCELoss(
    )  # Binary cross entropy: http://pytorch.org/docs/nn.html#bceloss
    d_optimizer = optim.SGD(D.parameters(),
                            lr=d_learning_rate,
                            momentum=sgd_momentum)
    g_optimizer = optim.SGD(G.parameters(),
                            lr=g_learning_rate,
                            momentum=sgd_momentum)

    for epoch in range(num_epochs):
        for d_index in range(d_steps):
            # 1. Train D on real+fake
            D.zero_grad()

            #  1A: Train D on real
            d_real_data = Variable(d_sampler(d_input_size))
            d_real_decision = D(preprocess(d_real_data))
            d_real_error = criterion(d_real_decision,
                                     Variable(torch.ones([1,
                                                          1])))  # ones = true
            d_real_error.backward(
            )  # compute/store gradients, but don't change params

            #  1B: Train D on fake
            d_gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
            d_fake_data = G(d_gen_input).detach(
            )  # detach to avoid training G on these labels
            d_fake_decision = D(preprocess(d_fake_data.t()))
            d_fake_error = criterion(d_fake_decision,
                                     Variable(torch.zeros([1, 1
                                                           ])))  # zeros = fake
            d_fake_error.backward()
            d_optimizer.step(
            )  # Only optimizes D's parameters; changes based on stored gradients from backward()

            dre, dfe = extract(d_real_error)[0], extract(d_fake_error)[0]

        for g_index in range(g_steps):
            # 2. Train G on D's response (but DO NOT train D on these labels)
            G.zero_grad()

            gen_input = Variable(gi_sampler(minibatch_size, g_input_size))
            g_fake_data = G(gen_input)
            dg_fake_decision = D(preprocess(g_fake_data.t()))
            g_error = criterion(dg_fake_decision, Variable(torch.ones(
                [1, 1])))  # Train G to pretend it's genuine

            g_error.backward()
            g_optimizer.step()  # Only optimizes G's parameters
            ge = extract(g_error)[0]

        if epoch % print_interval == 0:
            print(
                "Epoch %s: D (%s real_err, %s fake_err) G (%s err); Real Dist (%s),  Fake Dist (%s) "
                % (epoch, dre, dfe, ge, stats(
                    extract(d_real_data)), stats(extract(d_fake_data))))

    if matplotlib_is_available:
        print("Plotting the generated distribution...")
        values = extract(g_fake_data)
        print(" Values: %s" % (str(values)))
        plt.hist(values, bins=50)
        plt.xlabel('Value')
        plt.ylabel('Count')
        plt.title('Histogram of Generated Distribution')
        plt.grid(True)
        plt.show()
def adversarial_learning(best_cla_model_path):
    # Device configuration
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    # print(device)

    parser = argparse.ArgumentParser("Image classifical!")
    parser.add_argument('--input_dir_trainSet', type=str,
                        default='D:/python_workplace/resnet-AE/checkpoint/Joint_Training/ResNet18/cifar10/train/train.pkl',
                        help='data set dir path')
    parser.add_argument('--input_dir_testSet', type=str,
                        default='D:/python_workplace/resnet-AE/checkpoint/Joint_Training/ResNet18/cifar10/test/test.pkl',
                        help='data set dir path')
    parser.add_argument('--epochs', type=int, default=300, help='Epoch default:50.')
    parser.add_argument('--image_size', type=int, default=32, help='Image Size default:28.')
    parser.add_argument('--batch_size', type=int, default=512, help='Batch_size default:256.')
    parser.add_argument('--lr', type=float, default=0.01, help='learing_rate. Default=0.01')
    parser.add_argument('--num_classes', type=int, default=10, help='num classes')
    parser.add_argument('--model_path', type=str,
                        default='D:/python_workplace/resnet-AE/checkpoint/AdversarialLearning/ResNet18/cifar10/model/',
                        help='Save model path')
    parser.add_argument('--acc_file_path', type=str,
                        default='D:/python_workplace/resnet-AE/checkpoint/AdversarialLearning/ResNet18/cifar10/acc.txt',
                        help='Save accuracy file')
    parser.add_argument('--best_acc_file_path', type=str,
                        default='D:/python_workplace/resnet-AE/checkpoint/'
                                'AdversarialLearning/ResNet18/cifar10/best_acc.txt',
                        help='Save best accuracy file')
    parser.add_argument('--log_file_path', type=str,
                        default='D:/python_workplace/resnet-AE/checkpoint/AdversarialLearning/ResNet18/cifar10/log.txt',
                        help='Save log file')

    args = parser.parse_args()

    # Load model
    model = resnet_cifar.resnet18(pretrained=False)
    model.to(device)
    # summary(model,(3,32,32))
    # print(model)

    # Load pre-trained weights
    model.load_state_dict(torch.load(best_cla_model_path))
    model.to(device)

    # criterion
    criterion = nn.CrossEntropyLoss().to(device)

    # batch_shape
    batch_shape = [args.batch_size, 3, args.image_size, args.image_size]

    best_acc_clean = 0  # 初始化best clean test set accuracy
    best_acc_adv = 0  # 初始化best adv test set accuracy
    best_epoch = 0  # 初始化best epoch
    time_k = time.time()
    print("Start Adversarial Training, Resnet-18!")
    with open(args.acc_file_path, "w") as f1:
        with open(args.log_file_path, "w")as f2:
            for epoch in range(0, args.epochs):
                if epoch + 1 <= 100:
                    args.lr = 0.1
                elif 100 < epoch + 1 <= 200:
                    args.lr = 0.01
                elif 200 < epoch + 1 <= 250:
                    args.lr = 0.001
                else:
                    args.lr = 0.0001

                # Optimization
                optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)

                print('Epoch: %d' % (epoch + 1))
                sum_loss = 0.0
                correct = 0.0
                total = 0.0
                batchId = 1
                for batchSize, images_train, labels_train in load_train_set(args.input_dir_trainSet, batch_shape):
                    start = time.time()

                    # data prepare
                    images_train = torch.from_numpy(images_train).type(torch.FloatTensor).to(device)
                    labels_train = torch.from_numpy(labels_train).type(torch.LongTensor).to(device)

                    model.to(device)
                    model.train()
                    optimizer.zero_grad()

                    # forward + backward
                    outputs = model(images_train)
                    loss = criterion(outputs, labels_train)
                    loss.backward()
                    optimizer.step()

                    # 每训练1个batch打印一次loss和准确率
                    sum_loss += loss.item()
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels_train.size(0)
                    correct += predicted.eq(labels_train.data).cpu().sum().item()
                    # print(100.* correct / total)

                    end = time.time()

                    print('[Epoch:%d/%d] | [Batch:%d/%d] | Loss: %.03f | Acc: %.2f%% | Lr: %.04f | Time: %.03fs'
                          % (epoch + 1, args.epochs, batchId, (100000 / args.batch_size) + 1, sum_loss / batchId,
                             correct / total * 100, args.lr, (end - start)))
                    f2.write('[Epoch:%d/%d] | [Batch:%d/%d] | Loss: %.03f | Acc: %.2f%% | Lr: %.4f | Time: %.3fs'
                          % (epoch + 1, args.epochs, batchId, (100000 / args.batch_size) + 1, sum_loss / batchId,
                             correct / total * 100, args.lr, (end - start)))
                    f2.write('\n')
                    f2.flush()
                    batchId += 1

                # 每训练完一个epoch测试一下准确率
                if (epoch + 1) % 50 == 0:
                    print("Waiting for Testing!")
                    with torch.no_grad():
                        # 测试clean test set
                        correct_clean = 0
                        total_clean = 0
                        for batchSize, images_test_clean, labels_test_clean in load_test_set_clean(args.input_dir_testSet,
                                                                                                   batch_shape):
                            model.eval()

                            # data prepare
                            images_test_clean = torch.from_numpy(images_test_clean).type(torch.FloatTensor).to(device)
                            labels_test_clean = torch.from_numpy(labels_test_clean).type(torch.LongTensor).to(device)

                            model.to(device)

                            outputs = model(images_test_clean)
                            # 取得分最高的那个类 (outputs.data的索引号)
                            _, predicted = torch.max(outputs.data, 1)
                            total_clean += labels_test_clean.size(0)
                            correct_clean += (predicted == labels_test_clean).sum().item()
                        print('Clean Test Set Accuracy:%.2f%%' % (correct_clean / total_clean * 100))
                        acc_clean = correct_clean / total_clean * 100

                        # 测试adv test set
                        correct_adv = 0
                        total_adv = 0
                        for batchSize, images_test_adv, labels_test_adv in load_test_set_adv(args.input_dir_testSet,
                                                                                                   batch_shape):
                            model.eval()

                            # data prepare
                            images_test_adv = torch.from_numpy(images_test_adv).type(torch.FloatTensor).to(device)
                            labels_test_adv = torch.from_numpy(labels_test_adv).type(torch.LongTensor).to(device)

                            model.to(device)

                            outputs = model(images_test_adv)
                            # 取得分最高的那个类 (outputs.data的索引号)
                            _, predicted = torch.max(outputs.data, 1)
                            total_adv += labels_test_adv.size(0)
                            correct_adv += (predicted == labels_test_adv).sum().item()
                        print('Adv Test Set Accuracy:%.2f%%' % (correct_adv / total_adv * 100))
                        acc_adv = correct_adv / total_adv * 100

                        # 保存测试集准确率至acc.txt文件中
                        f1.write("Epoch=%03d,Clean Test Set Accuracy= %.2f%%" % (epoch + 1, acc_clean))
                        f1.write('\n')
                        f1.write("Epoch=%03d,Adv Test Set Accuracy= %.2f%%" % (epoch + 1, acc_adv))
                        f1.write('\n')
                        f1.flush()
                        # 记录最佳测试分类准确率并写入best_acc.txt文件中并将准确率达标的模型保存
                        if acc_clean > best_acc_clean and acc_adv > best_acc_adv:
                            if epoch != 49:
                               os.remove(args.model_path + "model_" + str(best_epoch) + ".pth")
                            best_acc_clean = acc_clean
                            best_acc_adv = acc_adv
                            print('Saving model!')
                            torch.save(model.state_dict(), '%s/model_%d.pth' % (args.model_path, epoch + 1))
                            print('Model saved!')
                            f3 = open(args.best_acc_file_path, "w")
                            f3.write("Epoch=%d,Best Accuracy of Clean Set = %.2f%%,Best Accuracy of Adv Set = %.2f%%"
                                     % (epoch + 1, best_acc_clean, best_acc_adv))
                            f3.close()
                            best_epoch = epoch + 1
            time_j = time.time()
            print("Training Finished, Total Epoch = %d, Best Epoch = %d, Best Accuracy of Clean Set = %.2f%%, "
                  "Best Accuracy of Adv Set = %.2f%%, Total Time = %.2f" % (args.epochs, best_epoch, best_acc_clean,
                                                                            best_acc_adv, (time_j - time_k)/3600))
Example #27
0
        self.module = nn.Sequential(
            layer1,
            activation1,
            layer2
        )
        
    def forward(self, x):
        out = self.module(x)
        result = F.softmax(out, dim=1)
        return result    


# 준비재료
criterion = nn.CrossEntropyLoss()
learning_rate = 1e-5
optimizer = optim.SGD(model.paraeters(), lr=learning_rate)
num_epochs = 2
num_batches = len(train_loader)

for epoch in range(num_epochs):
	for i, data in enumerate(train_loader):
		x, x_labels = data # x.size() = [batch, channel, x, y]
		# init grad
		optimizer.zero_grad() # step과 zero_grad는 쌍을 이루는 것이라고 생각하면 됨
		# forward
		pred = model(x)
		# calculate loss
		loss = criterion(pred, x_labels)
		# backpropagation
		loss.backward()
		# weight update
Example #28
0
if __name__ == '__main__':
    #rootdir = '../../../data/office_caltech_10/'
    torch.manual_seed(1)
    i = 0
    data_src = DataLoader(dataset = MyTrainData_src(i),batch_size=BATCH_SIZE[0],shuffle=True, drop_last= True)
    data_tar = DataLoader(dataset = MyTrainData_tar(i),batch_size=BATCH_SIZE[1],shuffle=True, drop_last= True)
    '''
    data_src = data_loader.load_data(
        root_dir=rootdir, domain='amazon', batch_size=BATCH_SIZE[0])
    data_tar = data_loader.load_test(
        root_dir=rootdir, domain='webcam', batch_size=BATCH_SIZE[1])
    '''
    model = DaNN.DaNN(n_input=2048, n_hidden=256, n_class=65)
    model = model.to(DEVICE)
    optimizer = optim.SGD(
        model.parameters(),
        lr=LEARNING_RATE,
        momentum=MOMEMTUN,
        weight_decay=L2_WEIGHT
    )
    for e in tqdm(range(1, N_EPOCH + 1)):
        model = train(model=model, optimizer=optimizer,
                      epoch=e, data_src=data_src, data_tar=data_tar)
        test(model, data_tar, e)
    torch.save(model, 'model_dann.pkl')
    log_train.close()
    log_test.close()
    res_train = np.asarray(RESULT_TRAIN)
    res_test = np.asarray(RESULT_TEST)
    np.savetxt('res_train_a-w.csv', res_train, fmt='%.6f', delimiter=',')
    np.savetxt('res_test_a-w.csv', res_test, fmt='%.6f', delimiter=',')
if __name__ == '__main__':
    # train_middle_shot(saved=True)
    # model_path = 'G:/model/20210319_model.pt'
    # model_eval(model_path)
    ground_dir = '../'
    video_list = ['01_From_Pole_to_Pole','02_Mountains','03_Ice_Worlds','04_Great_Plains','05_Jungles','06_Seasonal_Forests','07_Fresh_Water',
                  '08_Ocean_Deep','09_Shallow_Seas','10_Caves','11_Deserts']
    
    transcript_path = os.path.join(ground_dir,'transcript')
    gt_path = os.path.join(ground_dir,'annotations/scenes/annotator_1/')
    cuda = False
    check_file(video_list,ground_dir+'bbc_dataset_video')
    device = torch.device('cuda' if cuda else 'cpu')
    model = MyTransformer(4096,4,6)
    lossfun = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(),lr=0.1)
    scheduler = optim.lr_scheduler.StepLR(optimizer, 5)
    epoches =30
    eval_rate = 5
    nshots = 540
    f_score = 0
    for epoch in range(epoches):
        loss = 0
        # training, testing = train_test_split(video_list)
        print('Epoch :{}...'.format(epoch))
        for i in range(len(video_list)):
            video_name = video_list[i]
            model.train()
            visual_feature_dir = os.path.join(ground_dir,'parse_data',video_name)
            print("{} Training Start...".format(video_name))
        
Example #30
0
def main():
    start = time.time()
    parser = args.parse_args()

    # run some checks on arguments
    check_args(parser)

    # format logging
    log_name = os.path.join(
        parser.run_log,
        '{}_run_log_{}.log'.format(parser.experiment,
                                   dt.now().strftime("%Y%m%d_%H%M")))

    log.basicConfig(filename=log_name,
                    format='%(asctime)s | %(name)s -- %(message)s',
                    level=log.INFO)
    os.chmod(log_name, parser.access_mode)

    # set device to CPU if available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))
    log.info("Starting experiment {} VN -> EN NMT on {}.".format(
        parser.experiment, device))

    # set seed for replication
    random.seed(parser.seed)
    np.random.seed(parser.seed)
    torch.manual_seed(parser.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(parser.seed)
    log.info("For reproducibility, the seed is set to {}.".format(parser.seed))

    # set file paths
    source_name = parser.source_name
    target_name = parser.target_name

    # get saved models dir
    base_saved_models_dir = parser.save_dir
    saved_models_dir = os.path.join(base_saved_models_dir,
                                    source_name + '2' + target_name)
    plots_dir = parser.plots_dir

    log.info("We will save the models in this directory: {}".format(
        saved_models_dir))
    log.info("We will save the plots in this directory: {}".format(plots_dir))

    # get data dir
    main_data_path = parser.data_dir
    path_to_train_data = {
        'source': main_data_path + 'train.tok.' + source_name,
        'target': main_data_path + 'train.tok.' + target_name
    }
    path_to_dev_data = {
        'source': main_data_path + 'dev.tok.' + source_name,
        'target': main_data_path + 'dev.tok.' + target_name
    }
    path_to_test_data = {
        'source': main_data_path + 'test.tok.' + source_name,
        'target': main_data_path + 'test.tok.' + target_name
    }

    # Configuration
    bs = parser.batch_size
    log.info("Batch size = {}.".format(bs))

    enc_emb = parser.enc_emb
    enc_hidden = parser.enc_hidden
    enc_layers = parser.enc_layers
    rnn_type = parser.rnn_type

    dec_emb = parser.dec_emb
    dec_hidden = parser.dec_hidden
    dec_layers = parser.dec_layers

    learning_rate = parser.learning_rate
    num_epochs = parser.epochs
    attn_flag = parser.attn
    log.info("The attention flag is set to {}.".format(attn_flag))
    beam_size = parser.beam_size
    log.info("We evaluate using beam size of {}.".format(beam_size))

    train, val, test, en_lang, vi_lang = dataset_helper.train_val_load(
        "", main_data_path)

    # get vocab sizes
    log.info('English has vocab size of: {} words.'.format(en_lang.n_words))
    log.info('Vietnamese has vocab size of: {} words.'.format(vi_lang.n_words))

    # get max sentence length by 95% percentile
    MAX_LEN = int(train['en_len'].quantile(0.95))
    log.info(
        'We will have a max sentence length of {} (95 percentile).'.format(
            MAX_LEN))

    # set data loaders
    bs_dict = {'train': bs, 'validate': 1, 'test': 1}
    shuffle_dict = {'train': True, 'validate': False, 'test': False}

    train_used = train
    val_used = val

    collate_fn_dict = {
        'train': partial(dataset_helper.vocab_collate_func, MAX_LEN=MAX_LEN),
        'validate': dataset_helper.vocab_collate_func_val,
        'test': dataset_helper.vocab_collate_func_val
    }

    transformed_dataset = {
        'train': dataset_helper.Vietnamese(train_used),
        'validate': dataset_helper.Vietnamese(val_used, val=True),
        'test': dataset_helper.Vietnamese(test, val=True)
    }

    dataloader = {
        x: DataLoader(transformed_dataset[x],
                      batch_size=bs_dict[x],
                      collate_fn=collate_fn_dict[x],
                      shuffle=shuffle_dict[x],
                      num_workers=0)
        for x in ['train', 'validate', 'test']
    }

    # instantiate encoder/decoder
    encoder_w_att = nnet_models.EncoderRNN(input_size=vi_lang.n_words,
                                           embed_dim=enc_emb,
                                           hidden_size=enc_hidden,
                                           n_layers=enc_layers,
                                           rnn_type=rnn_type).to(device)
    decoder_w_att = nnet_models.AttentionDecoderRNN(
        output_size=en_lang.n_words,
        embed_dim=dec_emb,
        hidden_size=dec_hidden,
        n_layers=dec_layers,
        attention=attn_flag).to(device)

    # instantiate optimizer
    if parser.optimizer == 'sgd':
        encoder_optimizer = optim.SGD(encoder_w_att.parameters(),
                                      lr=learning_rate,
                                      nesterov=True,
                                      momentum=0.99)
        decoder_optimizer = optim.SGD(decoder_w_att.parameters(),
                                      lr=learning_rate,
                                      nesterov=True,
                                      momentum=0.99)
    elif parser.optimizer == 'adam':
        encoder_optimizer = optim.Adam(encoder_w_att.parameters(), lr=5e-3)
        decoder_optimizer = optim.Adam(decoder_w_att.parameters(), lr=5e-3)
    else:
        raise ValueError('Invalid optimizer!')

    # instantiate scheduler
    enc_scheduler = ReduceLROnPlateau(encoder_optimizer,
                                      min_lr=1e-4,
                                      factor=0.5,
                                      patience=0)
    dec_scheduler = ReduceLROnPlateau(decoder_optimizer,
                                      min_lr=1e-4,
                                      factor=0.5,
                                      patience=0)
    criterion = nn.NLLLoss(ignore_index=global_variables.PAD_IDX)

    log.info(
        "Seq2Seq Model with the following parameters: batch_size = {}, learning_rate = {}, rnn_type = {}, enc_emb = {}, enc_hidden = {}, enc_layers = {}, dec_emb = {}, dec_hidden = {}, dec_layers = {}, num_epochs = {}, source_name = {}, target_name = {}"
        .format(bs, learning_rate, rnn_type, enc_emb, enc_hidden, enc_layers,
                dec_emb, dec_hidden, dec_layers, num_epochs, source_name,
                target_name))

    # do we want to train again?
    train_again = False
    encoder_save = '{}_att_{}bs_{}hs_{}_{}beam_enc_{}_layer'.format(
        rnn_type, bs, enc_hidden, parser.optimizer, beam_size, enc_layers)
    decoder_save = '{}_att_{}bs_{}hs_{}_{}beam_dec_{}_layer'.format(
        rnn_type, bs, enc_hidden, parser.optimizer, beam_size, dec_layers)

    if os.path.exists(utils.get_full_filepath(
            saved_models_dir, encoder_save)) and os.path.exists(
                utils.get_full_filepath(saved_models_dir,
                                        decoder_save)) and (not train_again):
        log.info("Retrieving saved encoder from {}".format(
            utils.get_full_filepath(saved_models_dir, encoder_save)))
        log.info("Retrieving saved decoder from {}".format(
            utils.get_full_filepath(saved_models_dir, decoder_save)))
        encoder_w_att.load_state_dict(
            torch.load(utils.get_full_filepath(saved_models_dir,
                                               encoder_save)))
        decoder_w_att.load_state_dict(
            torch.load(utils.get_full_filepath(saved_models_dir,
                                               decoder_save)))
    else:
        log.info("Check if encoder path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, encoder_save)))
        log.info("Check if decoder path exists: {}".format(
            utils.get_full_filepath(saved_models_dir, decoder_save)))
        log.info("Encoder and Decoder do not exist! Starting to train...")
        encoder_w_att, decoder_w_att, loss_hist, acc_hist = train_utilities.train_model(
            encoder_optimizer,
            decoder_optimizer,
            encoder_w_att,
            decoder_w_att,
            criterion,
            "attention",
            dataloader,
            en_lang,
            vi_lang,
            saved_models_dir,
            encoder_save,
            decoder_save,
            num_epochs=num_epochs,
            rm=0.95,
            enc_scheduler=enc_scheduler,
            dec_scheduler=dec_scheduler)
        log.info("Total time is: {} min : {} s".format(
            (time.time() - start) // 60, (time.time() - start) % 60))
        log.info(
            "We will save the encoder/decoder in this directory: {}".format(
                saved_models_dir))

    # BLEU with beam size
    bleu_no_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search(
        encoder_w_att,
        decoder_w_att,
        dataloader['validate'],
        en_lang,
        vi_lang,
        'attention',
        beam_size,
        verbose=False)

    log.info("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk))
    print("Bleu-{} Score (No UNK): {}".format(beam_size, bleu_no_unk))

    bleu_unk, att_score_wo, pred_wo, src_wo = train_utilities.validation_beam_search(
        encoder_w_att,
        decoder_w_att,
        dataloader['validate'],
        en_lang,
        vi_lang,
        'attention',
        beam_size,
        verbose=False,
        replace_unk=True)

    log.info("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk))
    print("Bleu-{} Score (UNK): {}".format(beam_size, bleu_unk))

    # generate 5 random predictions
    indexes = range(len(pred_wo))
    for i in np.random.choice(indexes, 5):
        print('Source: {} \nPrediction: {}\n---'.format(src_wo[i], pred_wo[i]))
        log.info('Source: {} \nPrediction: {}\n---'.format(
            src_wo[i], pred_wo[i]))

    log.info("Exported Binned Bleu Score Plot to {}!".format(plots_dir))
    _, _, fig = utils.get_binned_bl_score(
        encoder=encoder_w_att,
        decoder=decoder_w_att,
        val_dataset=transformed_dataset['validate'],
        attn_flag=attn_flag,
        beam_size=beam_size,
        location=plots_dir,
        collate=collate_fn_dict['validate'],
        lang_en=en_lang,
        lang_vi=vi_lang)