Example #1
0
def initial(args, dimension, data_home, n_party, train_dimensions):
    if train_dimensions:
        if args.dimensions == "AVG":
            local_epoch = [1] * n_party
        else:
            local_epoch = []
            for i in range(n_party):  # 浮点数转为int, 同时随机每个client训练的local_epoch数
                # local_epoch.append(np.random.randint(1, 10))  # 每一个client随机训练1-10次
                local_epoch = [args.fix_epochs] * n_party
                # local_epoch = [4, 4, 3, 2, 1]

            print("local_epoch", local_epoch)

            # train_dimensions = [10,113]
        print("train_dimensions=", train_dimensions)
        train_dimensions_split = train_dimensions.copy(
        )  # 拷贝,防止后面get_dataset传引用而改变
        # dataset
        print('data_home:', data_home)
        feature, label, index = get_dataset(
            args, n_party, data_home, train_dimensions_split)  # 此处传参为引用类型
    else:

        if args.dimensions == "AVG":
            last = dimension % n_party  # 均分之后剩余的feature数
            avg = dimension // n_party  # 均分每个client分到的features数
            train_dimensions = [avg] * n_party
            train_dimensions[n_party - 1] = avg + last
            local_epoch = [1] * n_party
        else:
            local_epoch = []
            low = dimension // n_party - 10  # 随机每一个client上的feature数量,最少
            high = dimension // n_party + 10  # 随机每一个client上的feature数量,最多
            y0 = np.random.randint(low, high, size=n_party - 1)
            ratio = sum(y0) / dimension
            if n_party > 10:
                train_dimensions = y0 // ratio
            else:
                train_dimensions = y0
            train_dimensions = train_dimensions.tolist()
            train_dimensions.append(dimension - sum(train_dimensions))
            for i in range(n_party):  # 浮点数转为int, 同时随机每个client训练的local_epoch数
                train_dimensions[i] = int(train_dimensions[i])
                local_epoch.append(np.random.randint(1,
                                                     10))  # 每一个client随机训练1-10次
                # local_epoch = []
            print("local_epoch", local_epoch)

        # train_dimensions = [10,113]
        print("train_dimensions=", train_dimensions)
        train_dimensions_split = train_dimensions.copy(
        )  # 拷贝,防止后面get_dataset传引用而改变
        # dataset
        feature, label, index = get_dataset(
            args, n_party, data_home, train_dimensions_split)  # 此处传参为引用类型

    return local_epoch, feature, label, index
def annotation_parse(annotation_file, class_names):
    """
    parse annotation file to get image dict and ground truth class dict

    Args:
        annotation_file: test annotation txt file
        class_names: list of class names

    Return:
        image dict would be like:
        annotation_records = {
            '/path/to/000001.jpg': {'100,120,200,235':'dog', '85,63,156,128':'car', ...},
            ...
        }
        ground truth class dict would be like:
        classes_records = {
            'car': [
                    ['000001.jpg','100,120,200,235'],
                    ['000002.jpg','85,63,156,128'],
                    ...
                   ],
            ...
        }
    """
    annotation_records = OrderedDict()
    classes_records = OrderedDict(
        {class_name: []
         for class_name in class_names})

    annotation_lines = get_dataset(annotation_file, shuffle=False)
    # annotation_lines would be like:
    # ['/path/to/000001.jpg 100,120,200,235,11 85,63,156,128,14',
    # ...,
    # ]
    for line in annotation_lines:
        box_records = {}
        image_name = line.split(' ')[0]
        boxes = line.split(' ')[1:]
        for box in boxes:
            # strip box coordinate and class
            class_name = class_names[int(box.split(',')[-1])]
            coordinate = ','.join(box.split(',')[:-1])
            box_records[coordinate] = class_name
            # append or add ground truth class item
            record = [os.path.basename(image_name), coordinate]
            if class_name in classes_records:
                classes_records[class_name].append(record)
            else:
                classes_records[class_name] = list([record])
        annotation_records[image_name] = box_records

    return annotation_records, classes_records
Example #3
0
def build_dataloader(args, tokenizer, logger):
    """ Prepare the dataset for training and evaluation """
    personachat = get_dataset(tokenizer, args.dataset_path, args.dataset_cache, logger)
    logger.info("Build inputs and labels")
    datasets = {"train": defaultdict(list), "dev": defaultdict(list)}
    for dataset_name, dataset in personachat.items():
        num_candidates = len(dataset[0]["utterances"][0]["candidates"])
        if args.num_candidates > 0: # and dataset_name == 'train':
            num_candidates = min(args.num_candidates, num_candidates)
        for dialog in tqdm(dataset):
            persona = dialog["personality"].copy()
            for utterance in dialog["utterances"]:
                history = utterance["history"][-(2*args.max_history+1):]    # +1 as question
                img_list = utterance["img_list"]
                for j, candidate in enumerate(utterance["candidates"][-num_candidates:]):
                    lm_labels = bool(j == num_candidates-1)
                    instance = build_input_from_segments(persona, history, candidate, img_list, tokenizer, args, lm_labels)
                    for input_name, input_array in instance.items():
                        datasets[dataset_name][input_name].append(input_array)
                datasets[dataset_name]["mc_labels"].append(num_candidates - 1)
                datasets[dataset_name]["n_candidates"] = num_candidates

    logger.info("Pad inputs and convert to Tensor")
    data = {}
    for dataset_name, dataset in datasets.items():
        dataset = pad_dataset(dataset, logger, padding=tokenizer.convert_tokens_to_ids(SPECIAL_TOKENS[-1]))
        data[dataset_name] = dataset
    logger.info("Build train and validation dataloaders")
    train_dataset, valid_dataset = DialoImageDataset(data["train"], args.images_feature_path, "train"), DialoImageDataset(data["dev"], args.images_feature_path, "dev")
    train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) if args.distributed else None
    valid_sampler = torch.utils.data.distributed.DistributedSampler(valid_dataset) if args.distributed else None
    train_loader = DataLoader(train_dataset,
                              sampler=train_sampler,
                              batch_size=args.train_batch_size,
                              collate_fn=train_dataset.collate_fn,
                              num_workers=args.num_workers,
                              shuffle=(not args.distributed))
    valid_loader = DataLoader(valid_dataset,
                              sampler=valid_sampler,
                              batch_size=args.valid_batch_size,
                              collate_fn=valid_dataset.collate_fn,
                              num_workers=args.num_workers,
                              shuffle=False)
    logger.info("Train dataset (Batch, Seq length): {}".format(np.array(train_dataset.dataset["input_ids"]).shape))
    logger.info("Valid dataset (Batch, Seq length): {}".format(np.array(valid_dataset.dataset["input_ids"]).shape))
    return train_loader, valid_loader, train_sampler, valid_sampler
Example #4
0
    log_value('test_variance', variance, epoch)
    log_value('test_unique_policies', len(policy_set), epoch)

    # save the model --- agent
    agent_state_dict = agent.module.state_dict(
    ) if args.parallel else agent.state_dict()
    state = {
        'agent': agent_state_dict,
        'epoch': epoch,
        'reward': reward,
    }
    torch.save(state, args.cv_dir + '/ckpt_E_%d_R_%.2E' % (epoch, reward))


#--------------------------------------------------------------------------------------------------------#
trainset, testset = utils.get_dataset(args.img_size, args.data_dir)
trainloader = torchdata.DataLoader(trainset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=args.num_workers)
testloader = torchdata.DataLoader(testset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=args.num_workers)
agent = utils.get_model(num_actions)

# ---- Load the pre-trained model ----------------------
start_epoch = 0
if args.load is not None:
    checkpoint = torch.load(args.load)
    agent.load_state_dict(checkpoint['agent'])
Example #5
0
    rnet_hr_state_dict = rnet_hr.module.state_dict(
    ) if args.parallel else rnet_hr.state_dict()
    state = {
        'agent': agent_state_dict,
        'resnet': rnet_hr_state_dict,
        'epoch': epoch,
        'reward': reward,
        'acc': accuracy
    }
    torch.save(
        state,
        args.cv_dir + '/ckpt_E_%d_A_%.3f_R_%.2E' % (epoch, accuracy, reward))


#--------------------------------------------------------------------------------------------------------#
trainset, testset = utils.get_dataset(args.model, args.data_dir)
trainloader = torchdata.DataLoader(trainset,
                                   batch_size=args.batch_size,
                                   shuffle=True,
                                   num_workers=8)
testloader = torchdata.DataLoader(testset,
                                  batch_size=args.batch_size,
                                  shuffle=False,
                                  num_workers=8)
rnet_hr, rnet_lr, agent = utils.get_model(args.model)
rnet_hr.cuda()
rnet_lr.eval().cuda()
agent.cuda()

# Save the configurations into the output folder
configure(args.cv_dir + '/log', flush_secs=5)
Example #6
0
def lr_scheduler(optim, iter):
    if iter < 10:
        optim.param_groups[0]['lr'] = args.lr/10 *iter
    elif iter > 30:
        optim.param_groups[0]['lr'] = args.lr*(30/iter)
    else:
        optim.param_groups[0]['lr'] = args.lr

make_dir(args.log_path + 'unique_object/' + args.model_type + '/')
make_dir(args.models_path + 'unique_object/' + args.model_type + '/')
logger = SummaryWriter(args.log_path + 'unique_object/' + args.model_type + '/' + args.name)

# logger.add_hparams(args.get_dict(), {})

trSet, valSet = get_dataset()

net = get_net()

if args.optimizer == 'Ranger':
    optimizer = Ranger(net.parameters(), lr=args.lr, alpha=0.5, k=5)
elif args.optimizer == 'Adam':
    optimizer = torch.optim.Adam(net.parameters(), lr=args.lr)
else:
    optimizer = torch.optim.SGD(net.parameters(), lr=args.lr)

trDataloader = DataLoader(trSet, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=trSet.collate_fn)
valDataloader = DataLoader(valSet, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=valSet.collate_fn)

# torch.autograd.set_detect_anomaly(True)
iter_num = 0
Example #7
0
def main_per_worker(process_index, ngpus_per_node, args):
    update_config(cfg, args)

    # torch seed
    torch.cuda.manual_seed(random.random())

    # cudnn
    cudnn.benchmark = cfg.CUDNN.BENCHMARK
    torch.backends.cudnn.enabled = cfg.CUDNN.ENABLED

    #proc_rank
    proc_rank = args.rank * ngpus_per_node + process_index

    #create logger
    logger, output_dir = create_logger(cfg, proc_rank)
    # logger.info(pprint.pformat(args))
    # logger.info(cfg)

    model = get_model(cfg, cfg.MODEL.FILE, cfg.MODEL.NAME)
    optimizer = get_optimizer(cfg, model)
    model, optimizer, last_iter = load_checkpoint(cfg, model, optimizer)
    lr_scheduler = get_lr_scheduler(cfg, optimizer, last_iter)
    train_dataset, eval_dataset = get_dataset(cfg)

    # distribution
    if args.distributed:
        logger.info(f'Init process group: dist_url: {args.dist_url},  '
                    f'world_size: {args.world_size}, '
                    f'machine: {args.rank}, '
                    f'rank:{proc_rank}')
        dist.init_process_group(backend=cfg.DIST_BACKEND,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=proc_rank)
        torch.cuda.set_device(process_index)
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[process_index])
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
        batch_size = cfg.DATASET.IMG_NUM_PER_GPU

    else:
        assert proc_rank == 0, ('proc_rank != 0, it will influence '
                                'the evaluation procedure')
        model = torch.nn.DataParallel(model).cuda()
        train_sampler = None
        batch_size = cfg.DATASET.IMG_NUM_PER_GPU * ngpus_per_node

    print('BATCH_SIZE: ', batch_size)
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=(train_sampler is None),
                                               drop_last=True,
                                               collate_fn=objtrack_collect,
                                               num_workers=cfg.WORKERS,
                                               pin_memory=True,
                                               sampler=train_sampler)

    eval_loader = torch.utils.data.DataLoader(eval_dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              drop_last=False,
                                              collate_fn=objtrack_collect,
                                              num_workers=cfg.WORKERS)

    criterion = get_det_criterion(cfg)

    Trainer = get_trainer(
        cfg,
        model,
        optimizer,
        lr_scheduler,
        criterion,
        output_dir,
        last_iter,
        proc_rank,
    )

    while True:
        Trainer.train(train_loader, eval_loader)
Example #8
0
    dest='yaml_file',
    default=None,
    help='experiment configure file name, e.g. configs/fcos_detector.yaml',
    type=str)
parser.add_argument(
    'opts',
    help="Modify config options using the command-line",
    default=None,
    nargs=argparse.REMAINDER)
args = parser.parse_args()

if __name__ == '__main__':
    update_config(cfg, args)
    model = get_model(cfg, cfg.MODEL.FILE, cfg.MODEL.NAME)
    resume_path = cfg.MODEL.RESUME_PATH
    _, eval_dataset = get_dataset(cfg)
    eval_loader = torch.utils.data.DataLoader(
        eval_dataset,
        batch_size=cfg.DATASET.IMG_NUM_PER_GPU,
        shuffle=False,
        drop_last=False,
        collate_fn=objtrack_collect,
    )
    criterion = get_det_criterion(cfg)
    
    model = load_eval_model(resume_path, model)

    model.cuda()
    model.eval()

    mAP, aps, pr_curves = eval_fcos_det(cfg,
Example #9
0
def main():
    parser = get_parser()
    args = parser.parse_args()
    setup(args)
    # read train data
    train_set = utils.get_dataset(args.train_data_dir)
    nrof_classes = len(train_set)

    # read validation data
    print('unit test directory: %s' % args.unit_test_dir)
    unit_test_paths, unit_actual_issame = utils.get_val_paths(
        os.path.expanduser(args.unit_test_dir))
    nrof_test_img = len(unit_test_paths)
    unit_issame_label = np.zeros(nrof_test_img)
    for i in range(len(unit_actual_issame)):
        unit_issame_label[2 * i] = unit_actual_issame[i]
        unit_issame_label[2 * i + 1] = unit_actual_issame[i]
    unit_issame_label = np.asarray(unit_issame_label, dtype=np.int32)
    # Get a list of image paths and their labels
    image_list, label_list = utils.get_image_paths_and_labels(train_set)
    assert len(image_list) > 0, 'The dataset should not be empty'

    print('Total number of train classes: %d' % nrof_classes)
    print('Total number of train examples: %d' % len(image_list))
    print("number of validation examples: %d" % nrof_test_img)
    #ipdb.set_trace()
    train_dataset = data_loader.DataLoader(image_list, label_list, [160, 160],
                                           nrof_classes)
    validation_dataset = data_loader.DataLoader(unit_test_paths,
                                                unit_issame_label, [160, 160])
    tf.reset_default_graph()
    if args.model_type == "student":
        teacher_model = None
        if args.load_teacher_from_checkpoint:
            teacher_model = model.BigModel(args, "teacher", nrof_classes,
                                           nrof_test_img)
            teacher_model.start_session()
            teacher_model.load_model_from_file(
                args.load_teacher_checkpoint_dir)
            print("Verify Teacher State before Training Student")
            teacher_model.run_inference(validation_dataset, unit_actual_issame)
        student_model = model.SmallModel(args, "student", nrof_classes,
                                         nrof_test_img)
        student_model.start_session()
        student_model.train(train_dataset, validation_dataset,
                            unit_actual_issame, teacher_model)

        # Testing student model on the best model based on validation set
        student_model.load_model_from_file(args.checkpoint_dir)
        student_model.run_inference(validation_dataset, unit_actual_issame)

        if args.load_teacher_from_checkpoint:
            print("Verify Teacher State After Training student Model")
            teacher_model.run_inference(validation_dataset, unit_actual_issame)
            teacher_model.close_session()
        student_model.close_session()
    else:
        teacher_model = model.BigModel(args, "teacher", nrof_classes,
                                       nrof_test_img)
        teacher_model.start_session()
        teacher_model.train(train_dataset, validation_dataset,
                            unit_actual_issame)

        # Testing teacher model on the best model based on validation set
        teacher_model.load_model_from_file(args.checkpoint_dir)
        teacher_model.run_inference(validation_dataset, unit_actual_issame)
        teacher_model.close_session()