Exemple #1
0
def get_loaders(dataset_name, dataset_dir, batch_size):
    if dataset_name == 'movielens1m':
        dataset_path = path.join(dataset_dir, 'ml-1m', 'ratings.dat')
        dataset = MovieLens1MDataset(dataset_path)
    elif dataset_name == 'criteo':
        dataset_path = path.join(dataset_dir, 'criteo', 'train.txt')
        dataset = CriteoDataset(dataset_path,
                                cache_path=str(Path.home()) + '/.criteo')
    else:
        raise ValueError(f"Unknown dataset {dataset_name}!")

    train_length, validation_length = int(len(dataset) * 0.8), int(
        len(dataset) * 0.1)
    test_length = len(dataset) - train_length - validation_length
    train_dataset, validation_dataset, test_dataset = random_split(
        dataset, (train_length, validation_length, test_length))

    train_data_loader = DataLoader(train_dataset,
                                   batch_size=batch_size,
                                   num_workers=8)
    validation_data_loader = DataLoader(validation_dataset,
                                        batch_size=batch_size,
                                        num_workers=8)
    test_data_loader = DataLoader(test_dataset,
                                  batch_size=batch_size,
                                  num_workers=8)

    features_dimension = dataset.field_dims

    return features_dimension, (train_data_loader, validation_data_loader,
                                test_data_loader)
Exemple #2
0
def get_dataset(name, path):
    if name == 'movielens1M':
        return MovieLens1MDataset(path)
    elif name == 'movielens20M':
        return MovieLens20MDataset(path)
    elif name == 'criteo':
        return CriteoDataset(path)
    elif name == 'avazu':
        return AvazuDataset(path)
    else:
        raise ValueError('unknown dataset name: ' + name)
Exemple #3
0
def get_dataset(name, path):
    if name == 'movielens1M':
        return MovieLens1MDataset(path)
    elif name == 'movielens20M':
        return MovieLens20MDataset(path)
    elif name == 'flow':
        return FlowDataset(path)
    elif name == 'criteo':
        return CriteoDataset(path, cache_path='.criteo', predict=False)
    elif name == 'avazu':
        return AvazuDataset(path)
    else:
        raise ValueError('unknown dataset name: ' + name)
Exemple #4
0
def get_dataset(name, path):
    if not os.path.exists(os.path.dirname(path)):
        try:
            os.makedirs(os.path.dirname(path))
        except:
            pass

    if name == 'movielens1M':
        return MovieLens1MDataset(path)
    elif name == 'movielens20M':
        return MovieLens20MDataset(path)
    elif name == 'criteo':
        return CriteoDataset(path)
    elif name == 'avazu':
        return AvazuDataset(path)
    else:
        raise ValueError('unknown dataset name: ' + name)