Esempio n. 1
0
def main():
    torch_tvm.enable(opt_level=3,
                     device_type="gpu",
                     device="cuda",
                     host="llvm")

    path = argv[1]
    dr_config = Config(constants.DREAM_CONFIG)
    with open(path, 'rb') as f:
        dr_model = torch.load(f)

    bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
    ub_basket = bc.get_baskets('prior', reconstruct=False)

    # this function needs a sample input to infer types
    baskets, lens, users = Dataset(ub_basket)[0:dr_config.batch_size]

    baskets, lens, users = sort_batch_of_lists(baskets, lens, users)
    baskets = pad_batch_of_lists(baskets, lens[0])

    dr_hidden = dr_model.init_hidden(dr_config.batch_size)

    ub_seqs = []  # users' basket sequence

    for ubaskets in baskets:
        x = dr_model.embed_baskets(ubaskets)
        ub_seqs.append(torch.cat(x, 0).unsqueeze(0))

    ub_seqs = torch.cat(ub_seqs, 0)

    arg = [ub_seqs, dr_hidden]

    relay_graph = torch_tvm.to_relay(dr_model.rnn, arg)
Esempio n. 2
0
def main():
    path = argv[1]
    dr_config = Config(constants.DREAM_CONFIG)
    with open(path, 'rb') as f:
        dr_model = torch.load(f)

    bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
    ub_basket = bc.get_baskets('prior', reconstruct=False)

    # this function needs a sample input to infer types
    baskets, lens, users = Dataset(ub_basket)[0: dr_config.batch_size]

    baskets, lens, users = sort_batch_of_lists(baskets, lens, users)
    baskets = pad_batch_of_lists(baskets, lens[0])

    dr_hidden = dr_model.init_hidden(dr_config.batch_size)
 
    ub_seqs = [] # users' basket sequence

    for ubaskets in baskets:
        x = dr_model.embed_baskets(ubaskets)
        ub_seqs.append(torch.cat(x, 0).unsqueeze(0))

    ub_seqs = torch.cat(ub_seqs, 0)

    r_model = dr_model.rnn

    print("Converting model of type: " + str(type(dr_model)))
    m_in = (ub_seqs, dr_hidden)
    m_out = r_model(*m_in)

    torch.onnx.export(r_model, ub_seqs, './models/model.onnx', verbose=True)
Esempio n. 3
0
def run():
    embedding_wrapper = EmbeddingWrapper('product')
    bc = BasketConstructor('./data/', './data/')
    ub_basket = bc.get_baskets('prior', reconstruct=False)
    ok, ub_basket = train_test_split(ub_basket, test_size=0.20, random_state=0)
    #embedding_wrapper = EmbeddingWrapper('tafeng_products')
    print(ub_basket)

    all_baskets = ub_basket.basket.values
    print(all_baskets)
    #changes every item to string
    print("nested change")
    all_baskets = nested_change(list(all_baskets), str)
    print("embedding_wrapper.remove_products_wo_embeddings(all_baskets)")
    all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets)
    print("uncommon products")
    all_baskets = remove_products_which_are_uncommon(all_baskets)
    print("short baskets")
    medium_baskets, all_baskets = remove_short_baskets(all_baskets)
    print(medium_baskets , all_baskets)
    print("nested change")
    all_baskets = nested_change(all_baskets, embedding_wrapper.lookup_ind_f)
    print("split_data")
    train_ub, val_ub_input, val_ub_target, test_ub_input, test_ub_target = split_data(all_baskets)
    print('knndtw')
    knndtw = KnnDtw(n_neighbors=[5])
    preds_all, distances = knndtw.predict(train_ub, val_ub_input, embedding_wrapper.basket_dist_EMD, 
                                          embedding_wrapper.basket_dist_REMD)
    print(preds_all)
    print(distances)
    #print("Wasserstein distance", sum(distances)/len(distances))
    return preds_all, distances
Esempio n. 4
0
def main():
    dr_path = argv[1]
    dr_config = Config(constants.DREAM_CONFIG)

    with open(dr_path, 'rb') as f:
        dr_model = torch.load(f)

    bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
    ub_basket = bc.get_baskets('prior', reconstruct=False)

    # this function needs a sample input to infer types
    baskets, lens, users = Dataset(ub_basket)[0:dr_config.batch_size]

    baskets, lens, users = sort_batch_of_lists(baskets, lens, users)
    baskets = pad_batch_of_lists(baskets, lens[0])

    ub_seqs = []  # users' basket sequence

    for ubaskets in baskets:
        x = dr_model.embed_baskets(ubaskets)
        ub_seqs.append(torch.cat(x, 0).unsqueeze(0))

    ub_seqs = torch.cat(ub_seqs, 0)

    model_path = argv[2]

    onnx_model = onnx.load(model_path)
    tf_model = onnx_tf.backend.prepare(onnx_model)

    tf_model.export_graph('./models/model.pb')
Esempio n. 5
0
def association_rules():
    embedding_wrapper = EmbeddingWrapper('product')
    bc = BasketConstructor('./data/', './data/')
    ub_basket = bc.get_baskets('prior', reconstruct=False)
    ok, ub_basket = train_test_split(ub_basket, test_size=0.20, random_state=0)
    # embedding_wrapper = EmbeddingWrapper('tafeng_products')
    # print(ub_basket)

    all_baskets = ub_basket.basket.values
    # print(all_baskets)
    # changes every item to string
    print("nested change")
    all_baskets = nested_change(list(all_baskets), str)
    print("embedding_wrapper.remove_products_wo_embeddings(all_baskets)")
    all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets)
Esempio n. 6
0
def run():
    embedding_wrapper = EmbeddingWrapper('product')
    bc = BasketConstructor('./data/', './data/')
    ub_basket = bc.get_baskets('prior', reconstruct=False)

    all_baskets = ub_basket.basket.values
    all_baskets = nested_change(list(all_baskets), str)

    all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets)
    all_baskets = remove_products_which_are_uncommon(all_baskets)
    all_baskets = remove_short_baskets(all_baskets)
    all_baskets = nested_change(all_baskets, embedding_wrapper.lookup_ind_f)

    train_ub, val_ub_input, val_ub_target, test_ub_input, test_ub_target = split_data(
        all_baskets)

    knndtw = KnnDtw(n_neighbors=[5])
    preds_all, distances = knndtw.predict(train_ub, val_ub_input,
                                          embedding_wrapper.basket_dist_EMD,
                                          embedding_wrapper.basket_dist_REMD)
    return preds_all, distances
Esempio n. 7
0
        total_loss += loss.data

    # Logging
    elapsed = (time() - start_time) * 1000 / num_batchs
    total_loss = total_loss / num_batchs / dr_config.batch_size
    print('[Evaluation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.2f} |'.
          format(epoch, elapsed, total_loss))
    return total_loss


os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"

start = datetime.now()

# Prepare input
bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
# Users' baskets
ub_basket = bc.get_baskets('prior', reconstruct=False)
print('{} - constructed user baskets'.format(datetime.now() - start))

if constants.REORDER:
    # Users' reordered baskets
    ub_rbks = bc.get_baskets('prior', reconstruct=False, reordered=True)
    # User's item history
    ub_ihis = bc.get_item_history('prior', reconstruct=False)
    # Train test split
    train_ub, test_ub, train_rbks, test_rbks, train_ihis, test_ihis = train_test_split(
        ub_basket, ub_rbks, ub_ihis, test_size=0.2)
    del ub_basket, ub_rbks, ub_ihis  # memory saving
    train_ub, test_ub = Dataset(train_ub, train_rbks,
                                train_ihis), Dataset(test_ub, test_rbks,
Esempio n. 8
0
File: eval.py Progetto: yul091/DREAM
    '''
        get item's embedding
        pid can be a integer or a torch.cuda.LongTensor
    '''
    if isinstance(pid, torch.cuda.LongTensor) or isinstance(
            pid, torch.LongTensor):
        return dr_model.encode.weight[pid]
    elif isinstance(pid, int):
        return dr_model.encode.weight[pid].unsqueeze(0)
    else:
        print('Unsupported Index Type %s' % type(pid))
        return None


if __name__ == '__main__':
    bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
    ub_basket = bc.get_baskets('prior', reconstruct=False)
    ub = Dataset(ub_basket)
    up = bc.get_users_products('prior')

    dr_config = Config(constants.DREAM_CONFIG)
    with open(dr_config.checkpoint_dir, 'rb') as f:
        dr_model = torch.load(f)
    dr_model.eval()

    # score_ub, id_u = eval_pred(dr_model, ub)
    # (u,p) score
    # uid, pid = 1, 196
    # ub = Dataset(ub_basket[ub_basket.user_id == 1])
    # dr_hidden = dr_model.init_hidden(1) # init hidden for batch_size = 1 prediction
    # score_up = eval_up(uid, pid, dr_model, ub, dr_hidden)
Esempio n. 9
0
def runtopncustomers():
    embedding_wrapper = EmbeddingWrapper('product')
    bc = BasketConstructor('./data/', './data/')
    ub_basket = bc.get_baskets('prior', reconstruct=False)
    ok, ub_basket = train_test_split(ub_basket, test_size=0.20, random_state=0)
    #embedding_wrapper = EmbeddingWrapper('tafeng_products')
    #print(ub_basket)

    all_baskets = ub_basket.basket.values
    #print(all_baskets)
    #changes every item to string
    print("nested change")
    all_baskets = nested_change(list(all_baskets), str)
    print("embedding_wrapper.remove_products_wo_embeddings(all_baskets)")
    all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets)
    #print('test' ,all_baskets)
    #every customer sequence
    for s in range(2):
        print(all_baskets[s])
        #itemperklant = np.array([])
        itemperklant = []
        sizes = []
        top_nc = get_top_nc(all_baskets, 2)
        for i in range(len(all_baskets[s])):   # every basket in all baskets
            for j in range(len(all_baskets[s][i])): # every item in every basket
                #print('basket', all_baskets[s][i][j])
                itemperklant.append( all_baskets[s][i][j])
        print(itemperklant)
        unique_items = np.unique(itemperklant)
        print(unique_items)
        arrayklant = np.zeros((int(len(unique_items)), 2))
        arrayklant[:, 0] = unique_items
        for ding in range(len(unique_items)):
            countproduct = itemperklant.count(unique_items[ding])
            # itemperklant.append(countproduct)
            arrayklant[ding, 1] = countproduct

        print(arrayklant)

        sorted = arrayklant[np.argsort(arrayklant[:, 1])]
        print('sorted', sorted)
        product = np.array([])
        print('average length', top_nc[s])
        for reverse in range(int(top_nc[s])):
            print   ('test', sorted[-reverse - 1, :])
            product = np.append(product, sorted[-reverse, :])


    #print("uncommon products")
    #all_baskets = remove_products_which_are_uncommon(all_baskets)
    #print("short baskets")
    #medium_baskets, all_baskets = remove_short_baskets(all_baskets)
    #print(medium_baskets , all_baskets)
    #print("nested change")
    #all_baskets = nested_change(all_baskets, embedding_wrapper.lookup_ind_f)
    #print("split_data")
    train_ub, val_ub_input, val_ub_target, test_ub_input, test_ub_target = split_data(all_baskets)
    #print('knndtw')
    #knndtw = KnnDtw(n_neighbors=[5])
    #preds_all, distances = knndtw.predict(train_ub, val_ub_input, embedding_wrapper.basket_dist_EMD, embedding_wrapper.basket_dist_REMD)
    #print(preds_all)
    #print(distances)
    #print("Wasserstein distance", sum(distances)/len(distances))
    #return preds_all, distances
    write_path = 'data/testprint'
    with open(write_path + '.txt', 'w') as results:
        results.write('All baskets test ' + str(all_baskets) + '\n')
    results.close()
Esempio n. 10
0
def main():
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = constants.GPUS

    torch_tvm.enable(opt_level=3,
                     device_type="gpu",
                     device="cuda",
                     host="llvm")

    # Prepare input
    bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR)
    # Users' baskets
    ub_basket = bc.get_baskets('prior', reconstruct=False)

    if constants.REORDER:
        # Users' reordered baskets
        ub_rbks = bc.get_baskets('prior', reconstruct=False, reordered=True)
        # User's item history
        ub_ihis = bc.get_item_history('prior', reconstruct=False)
        # Train test split
        train_ub, test_ub, train_rbks, test_rbks, train_ihis, test_ihis = train_test_split(
            ub_basket, ub_rbks, ub_ihis, test_size=0.2)
        del ub_basket, ub_rbks, ub_ihis  # memory saving
        train_ub, test_ub = Dataset(train_ub, train_rbks, train_ihis), Dataset(
            test_ub, test_rbks, test_ihis)
        del train_rbks, test_rbks, train_ihis, test_ihis  # memory saving
    else:
        train_ub, test_ub = train_test_split(ub_basket, test_size=0.2)
        del ub_basket
        train_ub, test_ub = Dataset(train_ub), Dataset(test_ub)

    # Model config
    dr_config = Config(constants.DREAM_CONFIG)
    dr_model = DreamModel(dr_config)
    if dr_config.cuda:
        dr_model.cuda()

    # Optimizer
    optim = torch.optim.Adam(dr_model.parameters(), lr=dr_config.learning_rate)
    # optim = torch.optim.Adadelta(dr_model.parameters())
    # optim = torch.optim.SGD(dr_model.parameters(), lr=dr_config.learning_rate, momentum=0.9)
    writer = SummaryWriter(log_dir='runs/{}'.format(
        dr_config.alias))  # tensorboard writer
    writer.add_text('config', str(dr_config))
    best_val_loss = None

    try:
        for k, v in constants.DREAM_CONFIG.items():
            print(k, v)

        # training
        for epoch in range(dr_config.epochs):
            if constants.REORDER:
                train_reorder_dream()
            else:
                train_dream()
            print('-' * 89)
            if constants.REORDER:
                val_loss = evaluate_reorder_dream()
            else:
                val_loss = evaluate_dream()
            print('-' * 89)
            # checkpoint
            if not best_val_loss or val_loss < best_val_loss:
                with open(
                        dr_config.checkpoint_dir.format(epoch=epoch,
                                                        loss=val_loss),
                        'wb') as f:
                    torch.save(dr_model, f)
                best_val_loss = val_loss
            else:
                # Manual SGD slow down lr if no improvement in val_loss
                # dr_config.learning_rate = dr_config.learning_rate / 4
                pass

    except KeyboardInterrupt:
        print('*' * 89)
        print('Got keyboard Interrupt and stopped early')