Beispiel #1
0
def preliminaries(dataset_path, output_path, params_path, n_fold, n_run):
    """
    Runs a single instance of EEL.
    If an exception occurs during this execution, the program will output the exception message to an file, into the
    output_path directory.

    :type dataset_path: str
    :param dataset_path: a full path (including file type, i.e. dataset.arff) to the dataset to be run.
    :type output_path: str
    :param output_path: path to output metadata regarding evolutionary process.
    :type params_path: str
    :param params_path: path to parameters file.
    :type n_fold: int
    :param n_fold: number of fold to run in this execution. Must be smaller than the value in the params file.
    :type n_run: int
    :param n_run: number of the current run.
    """

    dataset_name = get_dataset_name(dataset_path)

    # try:
    eelem(
        dataset_path=dataset_path,
        output_path=output_path,
        params_path=params_path,
        n_fold=n_fold,
        n_run=n_run
    )
Beispiel #2
0
def verify_speed(traj, sequences):
    dataset_name = get_dataset_name(TEST_DATASET_PATH)
    max_speed = get_max_speed(dataset_name)
    traj = get_traj(traj, sequences)
    dist = get_distance(traj)
    speed = get_speed_from_distance(dist)
    # We calculate inverse sigmoid to verify the speed
    inverse_sigmoid(speed, max_speed)
def get_dataset_location(dataset):
    "Return EOS location of the dataset."
    return XROOTD_DIR_BASE + \
        EXPERIMENT + '/' + \
        MCDIR_BASE + '/' + \
        get_dataset_runperiod(dataset) + '/' + \
        get_dataset_name(dataset) + '/' + \
        get_dataset_format(dataset) + '/' + \
        get_dataset_version(dataset)
def get_dataset_index_file_base(dataset):
    "Return index file base for given dataset."
    filebase = EXPERIMENT.upper() + '_' + \
               MCDIR_BASE + '_' + \
               get_dataset_runperiod(dataset) + '_' + \
               get_dataset_name(dataset) + '_' + \
               get_dataset_format(dataset) + '_' + \
               get_dataset_version(dataset)
    return filebase
Beispiel #5
0
def eelem(dataset_path, output_path, params_path, n_fold, n_run, verbose=True):
    """
    Runs a single instance of EEL.


    :type dataset_path: str
    :param dataset_path: a full path (including file type, i.e. dataset.arff) to the dataset to be run.
    :type output_path: str
    :param output_path: path to output metadata regarding evolutionary process.
    :type params_path: str
    :param params_path: path to parameters file.
    :type n_fold: int
    :param n_fold: number of fold to run in this execution. Must be smaller than the value in the params file.
    :type n_run: int
    :param n_run: number of the current run.
    :type verbose: bool
    :param verbose: whether to output metadata to console. Defaults to True.
    """

    params = json.load(open(params_path))

    dataset_name = get_dataset_name(dataset_path)

    X_train, X_test, y_train, y_test = __get_fold__(params=params, dataset_path=dataset_path, n_fold=n_fold)

    n_classes = len(np.unique(y_train))

    reporter = EDAReporter(
        Xs=[X_train, X_test],
        ys=[y_train, y_test],
        set_names=['train', 'test'],
        output_path=output_path,
        dataset_name=dataset_name,
        n_fold=n_fold,
        n_run=n_run,
        n_classifiers=params['n_base_classifiers'],
        n_classes=n_classes,
    )

    ensemble = Ensemble.from_adaboost(
        X_train=X_train, y_train=y_train,
        data_normalizer_class=DataNormalizer,
        n_classifiers=params['n_base_classifiers'],
    )  # type: Ensemble

    ensemble = integrate(
        ensemble=ensemble,
        n_individuals=params['n_individuals'],
        n_generations=params['n_generations'],
        reporter=reporter,
        verbose=verbose
    )
    return ensemble
Beispiel #6
0
def verify_speed(traj, sequences, labels=None):
    if MULTI_CONDITIONAL_MODEL:
        traj, label = get_traj(traj, sequences, labels=labels)
    else:
        dataset_name = get_dataset_name(SINGLE_TEST_DATASET_PATH)
        traj = get_traj(traj, sequences, labels=None)
    dist = get_distance(traj)
    speed = get_speed_from_distance(dist)
    # We calculate inverse sigmoid to verify the speed
    if MULTI_CONDITIONAL_MODEL:
        inverse_sigmoid(speed, labels=label)
    else:
        maxspeed= SINGLE_AGENT_MAX_SPEED
        inverse_sigmoid(speed, max_speed=maxspeed)
Beispiel #7
0
def main():
    checkpoint = torch.load(CHECKPOINT_NAME)
    generator = TrajectoryGenerator()
    generator.load_state_dict(checkpoint['g_state'])
    if USE_GPU:
        generator.cuda()
        generator.train()
    else:
        generator.train()

    dataset_name = get_dataset_name(TEST_DATASET_PATH)
    _, loader = data_loader(TEST_DATASET_PATH, TEST_METRIC)
    if TEST_METRIC == 1:
        num_samples = 1
    else:
        num_samples = NUM_SAMPLES
    ade, fde = evaluate(loader, generator, num_samples)
    print('Dataset: {}, Pred Len: {}, ADE: {:.2f}, FDE: {:.2f}'.format(dataset_name, PRED_LEN, ade, fde))
Beispiel #8
0
def main():
    args = parser.parse_args()
    print(args)
    config = configure(args.config)

    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    # define model name
    setup_list = [
        args.src, args.tgt, args.network, f"contrast_dim_{args.contrast_dim}",
        f"temperature_{args.temperature}", f"alpha_{args.alpha}",
        f"cw_{args.cw}", f"thresh_{args.thresh}",
        f"max_key_size_{args.max_key_size}",
        f"min_conf_samples_{args.min_conf_samples}", f"gpu_{args.gpu}"
    ]
    model_name = "_".join(setup_list)
    print(colored(f"Model name: {model_name}", 'green'))
    model_dir = os.path.join(args.log_dir, model_name)

    if os.path.isdir(model_dir):
        shutil.rmtree(model_dir)
    os.mkdir(model_dir)
    summary_writer = SummaryWriter(model_dir)

    # save parsed arguments
    with open(os.path.join(model_dir, 'parsed_args.txt'), 'w') as f:
        json.dump(args.__dict__, f, indent=2)

    dataset_name = get_dataset_name(args.src, args.tgt)
    dataset_config = config.data.dataset[dataset_name]
    src_file = os.path.join(args.dataset_root, dataset_name,
                            args.src + '_list.txt')
    tgt_file = os.path.join(args.dataset_root, dataset_name,
                            args.tgt + '_list.txt')

    model = Model(base_net=args.network,
                  num_classes=dataset_config.num_classes,
                  contrast_dim=args.contrast_dim,
                  frozen_layer=args.frozen_layer)
    model_ema = Model(base_net=args.network,
                      num_classes=dataset_config.num_classes,
                      contrast_dim=args.contrast_dim,
                      frozen_layer=args.frozen_layer)

    moment_update(model, model_ema, 0)

    model = model.cuda()
    model_ema = model_ema.cuda()

    contrast_loss = InfoNCELoss(temperature=args.temperature).cuda()
    src_memory = KeyMemory(args.max_key_size, args.contrast_dim).cuda()
    tgt_memory = KeyMemory(args.max_key_size, args.contrast_dim).cuda()

    tgt_pseudo_labeler = KMeansPseudoLabeler(
        num_classes=dataset_config.num_classes,
        batch_size=args.pseudo_batch_size)

    parameters = model.get_parameter_list()
    group_ratios = [parameter['lr'] for parameter in parameters]

    optimizer = torch.optim.SGD(parameters,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    assert args.lr_scheduler == 'inv'
    lr_scheduler = InvScheduler(gamma=args.gamma,
                                decay_rate=args.decay_rate,
                                group_ratios=group_ratios,
                                init_lr=args.lr)

    trainer = Train(model,
                    model_ema,
                    optimizer,
                    lr_scheduler,
                    model_dir,
                    summary_writer,
                    src_file,
                    tgt_file,
                    contrast_loss,
                    src_memory,
                    tgt_memory,
                    tgt_pseudo_labeler,
                    cw=args.cw,
                    thresh=args.thresh,
                    min_conf_samples=args.min_conf_samples,
                    num_classes=dataset_config.num_classes,
                    batch_size=args.batch_size,
                    eval_batch_size=args.eval_batch_size,
                    num_workers=args.num_workers,
                    max_iter=args.max_iterations,
                    iters_per_epoch=dataset_config.iterations_per_epoch,
                    log_summary_interval=args.log_summary_interval,
                    log_image_interval=args.log_image_interval,
                    num_proj_samples=args.num_project_samples,
                    acc_metric=dataset_config.acc_metric,
                    alpha=args.alpha)

    tgt_best_acc = trainer.train()

    # write to text file
    # with open(args.acc_file, 'a') as f:
    #     f.write(model_name + '     ' + str(tgt_best_acc) + '\n')
    #     f.close()

    # write to xlsx file
    write_list = [
        args.src, args.tgt, args.network, args.contrast_dim, args.temperature,
        args.alpha, args.cw, args.thresh, args.max_key_size,
        args.min_conf_samples, args.gpu, tgt_best_acc
    ]
    with open(args.acc_file, 'a') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow(write_list)
Beispiel #9
0
training_group = parser.add_argument_group('Training parameters')
training_group.add_argument('-ts', '--train_set_size', default=70000, type=int, help='training set size')
training_group.add_argument('-vs', '--valid_set_size', default=30000, type=int, help='validation set size')
training_group.add_argument('-vp', '--valid_partitions', default=100, type=int, help='validation set partitions number')
training_group.add_argument('-tp', '--test_partitions', default=100, type=int, help='test set partitions number')
training_group.add_argument('-b', '--batch_size', default=70, type=int, help='batch size for SGD')
training_group.add_argument('-l', '--learning_rate', default=1e-4, type=float, help='learning rate for SGD')
training_group.add_argument('-dr', '--decay_rate', default=0.1, type=float, help='learning rate decay rate')
training_group.add_argument('-ds', '--decay-steps', default=1000, type=int, help='learning rate decay steps')
training_group.add_argument('-c', '--gradient_clip', default=40.0, type=float, help='clip at this max norm of gradient')
training_group.add_argument('-m', '--max_steps', default=10000, type=int, help='max number of iterations for training')
training_group.add_argument('-s', '--save', action='store_true', help='save the model every epoch')
training_group.add_argument('-ens', '--ensemble', default=1, type=int, help='Number of the model in the ensemble')
args = parser.parse_args()

pickle_filename = utils.get_dataset_name(args.time_window, args.time_aggregation, args.forecast_window,
                                         args.forecast_aggregation, args.train_set_size, args.valid_set_size)

dataset = utils.get_dataset(pickle_filename, args, parser)
train_set = dataset[0]
train_labels = dataset[1]
valid_set = dataset[2]
valid_labels = dataset[3]
valid_set2 = dataset[4]
valid_labels2 = dataset[5]
test_set = dataset[6]
test_labels = dataset[7]
mean = dataset[8]
stddev = dataset[9]
del dataset

print('Training set', train_set.shape, train_labels.shape)
Beispiel #10
0
    Xs.append(X), ys.append(y)

k = 30
cv = model_selection.StratifiedKFold(n_splits=k)

id3_measures = []
cart_measures = []

for dataset_index in range(0, len(datasets)):
    fold = 0
    id3_fold_measures = []
    cart_fold_measures = []
    for train_indexes, test_indexes in cv.split(Xs[dataset_index],
                                                ys[dataset_index]):
        print('processing {} fold of {} algorithm...'.format(
            fold, utils.get_dataset_name(dataset_index)))
        fold_sets = [[], [], [], []]

        fold_sets[SplitPartNames['X_train']] = Xs[dataset_index][train_indexes]
        fold_sets[SplitPartNames['y_train']] = ys[dataset_index][train_indexes]

        fold_sets[SplitPartNames['X_test']] = Xs[dataset_index][train_indexes]
        fold_sets[SplitPartNames['y_test']] = ys[dataset_index][train_indexes]

        id3_fold_measures.append(trees_algorithms.measures_of_id3(fold_sets))
        cart_fold_measures.append(trees_algorithms.measures_of_cart(fold_sets))

        fold += 1

    id3_measures.append(id3_fold_measures)
    cart_measures.append(cart_fold_measures)
Beispiel #11
0
def main():
    args = parser.parse_args()
    print(args)
    config = configure(args.config)

    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    print(colored(f"Model directory: {args.model_dir}", 'green'))

    assert os.path.isfile(args.model_dir)

    dataset_name = get_dataset_name(args.src, args.tgt)
    dataset_config = config.data.dataset[dataset_name]
    src_file = os.path.join(args.dataset_root, dataset_name,
                            args.src + '_list.txt')
    tgt_file = os.path.join(args.dataset_root, dataset_name,
                            args.tgt + '_list.txt')

    model = Model(base_net=args.network,
                  num_classes=dataset_config.num_classes,
                  frozen_layer='')
    del model.classifier_layer
    del model.contrast_layer

    model_state_dict = model.state_dict()
    trained_state_dict = torch.load(args.model_dir)['weights']

    keys = set(model_state_dict.keys())
    trained_keys = set(trained_state_dict.keys())

    shared_keys = keys.intersection(trained_keys)
    to_load_state_dict = {key: trained_state_dict[key] for key in shared_keys}
    model.load_state_dict(to_load_state_dict)
    model = model.cuda()

    # source classifier and domain classifier
    src_classifier = nn.Sequential(
        nn.Dropout(0.5),
        nn.Linear(model.base_network.out_dim, dataset_config.num_classes))
    initialize_layer(src_classifier)
    parameter_list = [{"params": src_classifier.parameters(), "lr": 1}]
    src_classifier = src_classifier.cuda()

    domain_classifier = nn.Sequential(nn.Dropout(0.5),
                                      nn.Linear(model.base_network.out_dim, 2))
    initialize_layer(domain_classifier)
    parameter_list += [{"params": domain_classifier.parameters(), "lr": 1}]
    domain_classifier = domain_classifier.cuda()

    group_ratios = [parameter['lr'] for parameter in parameter_list]
    optimizer = torch.optim.SGD(parameter_list,
                                lr=args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay,
                                nesterov=args.nesterov)

    assert args.lr_scheduler == 'inv'
    lr_scheduler = InvScheduler(gamma=args.gamma,
                                decay_rate=args.decay_rate,
                                group_ratios=group_ratios,
                                init_lr=args.lr)

    # split into train and validation sets
    src_size = len(open(src_file).readlines())
    src_train_size = int(args.train_portion * src_size)
    src_train_indices, src_test_indices = np.split(
        np.random.permutation(src_size), [src_train_size])

    tgt_size = len(open(tgt_file).readlines())
    tgt_train_size = int(args.train_portion * tgt_size)
    tgt_train_indices, tgt_test_indices = np.split(
        np.random.permutation(tgt_size), [tgt_train_size])

    # define data loaders
    train_data_loader_kwargs = {
        'shuffle': True,
        'drop_last': True,
        'batch_size': args.batch_size,
        'num_workers': args.num_workers
    }
    test_data_loader_kwargs = {
        'shuffle': False,
        'drop_last': False,
        'batch_size': args.eval_batch_size,
        'num_workers': args.num_workers
    }

    train_transformer = get_transform(training=True)
    test_transformer = get_transform(training=False)

    data_loader = {}
    data_iterator = {}
    src_train_dataset = IndicesDataset(src_file,
                                       list(src_train_indices),
                                       transform=train_transformer)
    data_loader['src_train'] = DataLoader(src_train_dataset,
                                          **train_data_loader_kwargs)
    src_test_dataset = IndicesDataset(src_file,
                                      list(src_test_indices),
                                      transform=test_transformer)
    data_loader['src_test'] = DataLoader(src_test_dataset,
                                         **test_data_loader_kwargs)

    tgt_train_dataset = IndicesDataset(tgt_file,
                                       list(tgt_train_indices),
                                       transform=train_transformer)
    data_loader['tgt_train'] = DataLoader(tgt_train_dataset,
                                          **train_data_loader_kwargs)
    tgt_test_dataset = IndicesDataset(tgt_file,
                                      list(tgt_test_indices),
                                      transform=test_transformer)
    data_loader['tgt_test'] = DataLoader(tgt_test_dataset,
                                         **test_data_loader_kwargs)

    for key in data_loader:
        data_iterator[key] = iter(data_loader[key])

    # start training
    total_progress_bar = tqdm.tqdm(desc='Iterations',
                                   total=args.max_iterations,
                                   ascii=True,
                                   smoothing=0.01)
    class_criterion = nn.CrossEntropyLoss()
    model.base_network.eval()
    src_classifier.train()
    domain_classifier.train()
    iteration = 0
    while iteration < args.max_iterations:
        lr_scheduler.adjust_learning_rate(optimizer, iteration)
        optimizer.zero_grad()

        src_data = get_sample(data_loader, data_iterator, 'src_train')
        src_inputs, src_labels = src_data['image_1'].cuda(
        ), src_data['true_label'].cuda()

        tgt_data = get_sample(data_loader, data_iterator, 'tgt_train')
        tgt_inputs = tgt_data['image_1'].cuda()

        model.set_bn_domain(domain=0)
        with torch.no_grad():
            src_features = model.base_network(src_inputs)
            src_features = F.normalize(src_features, p=2, dim=1)
        src_class_logits = src_classifier(src_features)
        src_domain_logits = domain_classifier(src_features)

        model.set_bn_domain(domain=1)
        with torch.no_grad():
            tgt_features = model.base_network(tgt_inputs)
            tgt_features = F.normalize(tgt_features, p=2, dim=1)
        tgt_domain_logits = domain_classifier(tgt_features)

        src_classification_loss = class_criterion(src_class_logits, src_labels)

        domain_logits = torch.cat([src_domain_logits, tgt_domain_logits],
                                  dim=0)
        domain_labels = torch.tensor([0] * src_inputs.size(0) +
                                     [1] * tgt_inputs.size(0)).cuda()
        domain_classification_loss = class_criterion(domain_logits,
                                                     domain_labels)

        if iteration % args.print_acc_interval == 0:
            compute_accuracy(src_class_logits,
                             src_labels,
                             acc_metric=dataset_config.acc_metric,
                             print_result=True)
            compute_accuracy(domain_logits,
                             domain_labels,
                             acc_metric='total_mean',
                             print_result=True)

        total_loss = src_classification_loss + domain_classification_loss
        total_loss.backward()
        optimizer.step()

        iteration += 1
        total_progress_bar.update(1)

    # test
    model.base_network.eval()
    src_classifier.eval()
    domain_classifier.eval()
    with torch.no_grad():
        src_all_class_logits = []
        src_all_labels = []
        src_all_domain_logits = []
        model.set_bn_domain(domain=0)
        for src_test_data in tqdm.tqdm(data_loader['src_test'],
                                       desc='src_test',
                                       leave=False,
                                       ascii=True):
            src_test_inputs, src_test_labels = src_test_data['image_1'].cuda(
            ), src_test_data['true_label'].cuda()
            src_test_features = model.base_network(src_test_inputs)
            src_test_features = F.normalize(src_test_features, p=2, dim=1)
            src_test_class_logits = src_classifier(src_test_features)
            src_test_domain_logits = domain_classifier(src_test_features)

            src_all_class_logits += [src_test_class_logits]
            src_all_labels += [src_test_labels]
            src_all_domain_logits += [src_test_domain_logits]

        src_all_class_logits = torch.cat(src_all_class_logits, dim=0)
        src_all_labels = torch.cat(src_all_labels, dim=0)
        src_all_domain_logits = torch.cat(src_all_domain_logits, dim=0)

        src_test_class_acc = compute_accuracy(
            src_all_class_logits,
            src_all_labels,
            acc_metric=dataset_config.acc_metric,
            print_result=True)
        src_test_domain_acc = compute_accuracy(
            src_all_domain_logits,
            torch.zeros(src_all_domain_logits.size(0)).cuda(),
            acc_metric='total_mean',
            print_result=True)

        tgt_all_domain_logits = []
        model.set_bn_domain(domain=1)
        for tgt_test_data in tqdm.tqdm(data_loader['tgt_test'],
                                       desc='tgt_test',
                                       leave=False,
                                       ascii=True):
            tgt_test_inputs = tgt_test_data['image_1'].cuda()
            tgt_test_features = model.base_network(tgt_test_inputs)
            tgt_test_features = F.normalize(tgt_test_features, p=2, dim=1)
            tgt_test_domain_logits = domain_classifier(tgt_test_features)

            tgt_all_domain_logits += [tgt_test_domain_logits]

        tgt_all_domain_logits = torch.cat(tgt_all_domain_logits, dim=0)

        tgt_test_domain_acc = compute_accuracy(
            tgt_all_domain_logits,
            torch.ones(tgt_all_domain_logits.size(0)).cuda(),
            acc_metric='total_mean',
            print_result=True)

    write_list = [
        args.model_dir, src_test_class_acc, src_test_domain_acc,
        tgt_test_domain_acc
    ]
    # with open('hyper_search_office_home.csv', 'a') as f:
    with open(args.output_file, 'a') as f:
        csv_writer = csv.writer(f)
        csv_writer.writerow(write_list)
Beispiel #12
0
def main():
    args = parser.parse_args()
    print(args)
    config = configure(args.config)

    os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID'
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    print(colored(f"Model directory: {args.model_dir}", 'green'))

    assert os.path.isfile(args.model_dir)

    dataset_name = get_dataset_name(args.src, args.tgt)
    dataset_config = config.data.dataset[dataset_name]
    tgt_file = os.path.join(args.dataset_root, dataset_name,
                            args.tgt + '_list.txt')

    # tgt classification
    model = Model(base_net=args.network,
                  num_classes=dataset_config.num_classes,
                  frozen_layer='')
    del model.classifier_layer
    del model.contrast_layer

    model_state_dict = model.state_dict()
    trained_state_dict = torch.load(args.model_dir)['weights']

    keys = set(model_state_dict.keys())
    trained_keys = set(trained_state_dict.keys())

    shared_keys = keys.intersection(trained_keys)
    to_load_state_dict = {key: trained_state_dict[key] for key in shared_keys}
    model.load_state_dict(to_load_state_dict)
    model = model.cuda()

    # define data loader
    test_data_loader_kwargs = {
        'shuffle': False,
        'drop_last': False,
        'batch_size': args.batch_size,
        'num_workers': args.num_workers
    }

    test_transformer = get_transform(training=False)

    data_loader = {}
    data_iterator = {}
    tgt_test_dataset = DefaultDataset(tgt_file, transform=test_transformer)
    data_loader['tgt_test'] = DataLoader(tgt_test_dataset,
                                         **test_data_loader_kwargs)

    for key in data_loader:
        data_iterator[key] = iter(data_loader[key])

    # test
    model.base_network.eval()
    with torch.no_grad():
        tgt_all_features = []
        tgt_all_labels = []
        model.set_bn_domain(domain=1)
        for tgt_test_data in tqdm.tqdm(data_loader['tgt_test'],
                                       desc='tgt_test',
                                       leave=False,
                                       ascii=True):
            tgt_test_inputs, tgt_test_labels = tgt_test_data['image_1'].cuda(
            ), tgt_test_data['true_label'].cuda()
            tgt_test_features = model.base_network(tgt_test_inputs)
            # tgt_test_features = F.normalize(tgt_test_features, p=2, dim=1)
            tgt_all_features += [tgt_test_features]
            tgt_all_labels += [tgt_test_labels]

        tgt_all_features = torch.cat(tgt_all_features, dim=0)
        tgt_all_labels = torch.cat(tgt_all_labels, dim=0)

    tgt_all_features = tgt_all_features.cpu().numpy()
    tgt_all_labels = tgt_all_labels.cpu().numpy()

    features_pickle_file = os.path.join('features', args.src + '_' + args.tgt,
                                        'tgt_features.pkl')
    labels_pickle_file = os.path.join('features', args.src + '_' + args.tgt,
                                      'tgt_labels.pkl')
    pickle.dump(tgt_all_features, open(features_pickle_file, 'wb'))
    pickle.dump(tgt_all_labels, open(labels_pickle_file, 'wb'))