Beispiel #1
0
"""
Test different data operations.
"""

import dev.util as util
from preprocess.DataIO import load_sequences_csv, load_seq_features_csv, load_event_features_csv
from preprocess.DataOperation import stitching, superposing, aggregating, data_info

# test sequence loading functions
# load event sequences
domain_names = {'seq_id': 'id', 'time': 'time', 'event': 'event'}
database = load_sequences_csv('{}/{}/Linkedin.csv'.format(
    util.POPPY_PATH, util.DATA_DIR),
                              domain_names=domain_names)
data_info(database)

# load sequences' features
domain_dict = {'time': 'numerical', 'option1': 'categorical'}
database = load_seq_features_csv('{}/{}/Linkedin.csv'.format(
    util.POPPY_PATH, util.DATA_DIR),
                                 seq_domain='id',
                                 domain_dict=domain_dict,
                                 database=database)
data_info(database)

# load event types' features
database = load_event_features_csv('{}/{}/Linkedin.csv'.format(
    util.POPPY_PATH, util.DATA_DIR),
                                   event_domain='event',
                                   domain_dict=domain_dict,
                                   database=database)
Beispiel #2
0
if __name__ == '__main__':
    # hyper-parameters
    memory_size = 400
    batch_size = 128
    use_cuda = True
    use_cuda = use_cuda and torch.cuda.is_available()
    seed = 2
    torch.manual_seed(seed)
    if use_cuda:
        torch.cuda.manual_seed(seed)
    kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

    # load event sequences from csv file
    domain_names = {'seq_id': 'seq_id', 'time': 'time', 'event': 'event'}
    database = load_sequences_csv('{}/{}/IPTV_DATA.csv'.format(
        util.POPPY_PATH, util.DATA_DIR),
                                  domain_names=domain_names,
                                  upperlimit=50000)
    data_info(database)

    # sample batches from database
    trainloader_thinning = DataLoader(ThinningSampler(database=database,
                                                      length=memory_size),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      **kwargs)
    trainloader_interval = DataLoader(EventSampler(database=database,
                                                   memorysize=memory_size),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      **kwargs)
    validationloader = DataLoader(FullData(database=database))