def main(): torch_tvm.enable(opt_level=3, device_type="gpu", device="cuda", host="llvm") path = argv[1] dr_config = Config(constants.DREAM_CONFIG) with open(path, 'rb') as f: dr_model = torch.load(f) bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR) ub_basket = bc.get_baskets('prior', reconstruct=False) # this function needs a sample input to infer types baskets, lens, users = Dataset(ub_basket)[0:dr_config.batch_size] baskets, lens, users = sort_batch_of_lists(baskets, lens, users) baskets = pad_batch_of_lists(baskets, lens[0]) dr_hidden = dr_model.init_hidden(dr_config.batch_size) ub_seqs = [] # users' basket sequence for ubaskets in baskets: x = dr_model.embed_baskets(ubaskets) ub_seqs.append(torch.cat(x, 0).unsqueeze(0)) ub_seqs = torch.cat(ub_seqs, 0) arg = [ub_seqs, dr_hidden] relay_graph = torch_tvm.to_relay(dr_model.rnn, arg)
def main(): path = argv[1] dr_config = Config(constants.DREAM_CONFIG) with open(path, 'rb') as f: dr_model = torch.load(f) bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR) ub_basket = bc.get_baskets('prior', reconstruct=False) # this function needs a sample input to infer types baskets, lens, users = Dataset(ub_basket)[0: dr_config.batch_size] baskets, lens, users = sort_batch_of_lists(baskets, lens, users) baskets = pad_batch_of_lists(baskets, lens[0]) dr_hidden = dr_model.init_hidden(dr_config.batch_size) ub_seqs = [] # users' basket sequence for ubaskets in baskets: x = dr_model.embed_baskets(ubaskets) ub_seqs.append(torch.cat(x, 0).unsqueeze(0)) ub_seqs = torch.cat(ub_seqs, 0) r_model = dr_model.rnn print("Converting model of type: " + str(type(dr_model))) m_in = (ub_seqs, dr_hidden) m_out = r_model(*m_in) torch.onnx.export(r_model, ub_seqs, './models/model.onnx', verbose=True)
def run(): embedding_wrapper = EmbeddingWrapper('product') bc = BasketConstructor('./data/', './data/') ub_basket = bc.get_baskets('prior', reconstruct=False) ok, ub_basket = train_test_split(ub_basket, test_size=0.20, random_state=0) #embedding_wrapper = EmbeddingWrapper('tafeng_products') print(ub_basket) all_baskets = ub_basket.basket.values print(all_baskets) #changes every item to string print("nested change") all_baskets = nested_change(list(all_baskets), str) print("embedding_wrapper.remove_products_wo_embeddings(all_baskets)") all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets) print("uncommon products") all_baskets = remove_products_which_are_uncommon(all_baskets) print("short baskets") medium_baskets, all_baskets = remove_short_baskets(all_baskets) print(medium_baskets , all_baskets) print("nested change") all_baskets = nested_change(all_baskets, embedding_wrapper.lookup_ind_f) print("split_data") train_ub, val_ub_input, val_ub_target, test_ub_input, test_ub_target = split_data(all_baskets) print('knndtw') knndtw = KnnDtw(n_neighbors=[5]) preds_all, distances = knndtw.predict(train_ub, val_ub_input, embedding_wrapper.basket_dist_EMD, embedding_wrapper.basket_dist_REMD) print(preds_all) print(distances) #print("Wasserstein distance", sum(distances)/len(distances)) return preds_all, distances
def main(): dr_path = argv[1] dr_config = Config(constants.DREAM_CONFIG) with open(dr_path, 'rb') as f: dr_model = torch.load(f) bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR) ub_basket = bc.get_baskets('prior', reconstruct=False) # this function needs a sample input to infer types baskets, lens, users = Dataset(ub_basket)[0:dr_config.batch_size] baskets, lens, users = sort_batch_of_lists(baskets, lens, users) baskets = pad_batch_of_lists(baskets, lens[0]) ub_seqs = [] # users' basket sequence for ubaskets in baskets: x = dr_model.embed_baskets(ubaskets) ub_seqs.append(torch.cat(x, 0).unsqueeze(0)) ub_seqs = torch.cat(ub_seqs, 0) model_path = argv[2] onnx_model = onnx.load(model_path) tf_model = onnx_tf.backend.prepare(onnx_model) tf_model.export_graph('./models/model.pb')
def association_rules(): embedding_wrapper = EmbeddingWrapper('product') bc = BasketConstructor('./data/', './data/') ub_basket = bc.get_baskets('prior', reconstruct=False) ok, ub_basket = train_test_split(ub_basket, test_size=0.20, random_state=0) # embedding_wrapper = EmbeddingWrapper('tafeng_products') # print(ub_basket) all_baskets = ub_basket.basket.values # print(all_baskets) # changes every item to string print("nested change") all_baskets = nested_change(list(all_baskets), str) print("embedding_wrapper.remove_products_wo_embeddings(all_baskets)") all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets)
def run(): embedding_wrapper = EmbeddingWrapper('product') bc = BasketConstructor('./data/', './data/') ub_basket = bc.get_baskets('prior', reconstruct=False) all_baskets = ub_basket.basket.values all_baskets = nested_change(list(all_baskets), str) all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets) all_baskets = remove_products_which_are_uncommon(all_baskets) all_baskets = remove_short_baskets(all_baskets) all_baskets = nested_change(all_baskets, embedding_wrapper.lookup_ind_f) train_ub, val_ub_input, val_ub_target, test_ub_input, test_ub_target = split_data( all_baskets) knndtw = KnnDtw(n_neighbors=[5]) preds_all, distances = knndtw.predict(train_ub, val_ub_input, embedding_wrapper.basket_dist_EMD, embedding_wrapper.basket_dist_REMD) return preds_all, distances
total_loss += loss.data # Logging elapsed = (time() - start_time) * 1000 / num_batchs total_loss = total_loss / num_batchs / dr_config.batch_size print('[Evaluation]| Epochs {:3d} | Elapsed {:02.2f} | Loss {:05.2f} |'. format(epoch, elapsed, total_loss)) return total_loss os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" start = datetime.now() # Prepare input bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR) # Users' baskets ub_basket = bc.get_baskets('prior', reconstruct=False) print('{} - constructed user baskets'.format(datetime.now() - start)) if constants.REORDER: # Users' reordered baskets ub_rbks = bc.get_baskets('prior', reconstruct=False, reordered=True) # User's item history ub_ihis = bc.get_item_history('prior', reconstruct=False) # Train test split train_ub, test_ub, train_rbks, test_rbks, train_ihis, test_ihis = train_test_split( ub_basket, ub_rbks, ub_ihis, test_size=0.2) del ub_basket, ub_rbks, ub_ihis # memory saving train_ub, test_ub = Dataset(train_ub, train_rbks, train_ihis), Dataset(test_ub, test_rbks,
''' get item's embedding pid can be a integer or a torch.cuda.LongTensor ''' if isinstance(pid, torch.cuda.LongTensor) or isinstance( pid, torch.LongTensor): return dr_model.encode.weight[pid] elif isinstance(pid, int): return dr_model.encode.weight[pid].unsqueeze(0) else: print('Unsupported Index Type %s' % type(pid)) return None if __name__ == '__main__': bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR) ub_basket = bc.get_baskets('prior', reconstruct=False) ub = Dataset(ub_basket) up = bc.get_users_products('prior') dr_config = Config(constants.DREAM_CONFIG) with open(dr_config.checkpoint_dir, 'rb') as f: dr_model = torch.load(f) dr_model.eval() # score_ub, id_u = eval_pred(dr_model, ub) # (u,p) score # uid, pid = 1, 196 # ub = Dataset(ub_basket[ub_basket.user_id == 1]) # dr_hidden = dr_model.init_hidden(1) # init hidden for batch_size = 1 prediction # score_up = eval_up(uid, pid, dr_model, ub, dr_hidden)
def runtopncustomers(): embedding_wrapper = EmbeddingWrapper('product') bc = BasketConstructor('./data/', './data/') ub_basket = bc.get_baskets('prior', reconstruct=False) ok, ub_basket = train_test_split(ub_basket, test_size=0.20, random_state=0) #embedding_wrapper = EmbeddingWrapper('tafeng_products') #print(ub_basket) all_baskets = ub_basket.basket.values #print(all_baskets) #changes every item to string print("nested change") all_baskets = nested_change(list(all_baskets), str) print("embedding_wrapper.remove_products_wo_embeddings(all_baskets)") all_baskets = embedding_wrapper.remove_products_wo_embeddings(all_baskets) #print('test' ,all_baskets) #every customer sequence for s in range(2): print(all_baskets[s]) #itemperklant = np.array([]) itemperklant = [] sizes = [] top_nc = get_top_nc(all_baskets, 2) for i in range(len(all_baskets[s])): # every basket in all baskets for j in range(len(all_baskets[s][i])): # every item in every basket #print('basket', all_baskets[s][i][j]) itemperklant.append( all_baskets[s][i][j]) print(itemperklant) unique_items = np.unique(itemperklant) print(unique_items) arrayklant = np.zeros((int(len(unique_items)), 2)) arrayklant[:, 0] = unique_items for ding in range(len(unique_items)): countproduct = itemperklant.count(unique_items[ding]) # itemperklant.append(countproduct) arrayklant[ding, 1] = countproduct print(arrayklant) sorted = arrayklant[np.argsort(arrayklant[:, 1])] print('sorted', sorted) product = np.array([]) print('average length', top_nc[s]) for reverse in range(int(top_nc[s])): print ('test', sorted[-reverse - 1, :]) product = np.append(product, sorted[-reverse, :]) #print("uncommon products") #all_baskets = remove_products_which_are_uncommon(all_baskets) #print("short baskets") #medium_baskets, all_baskets = remove_short_baskets(all_baskets) #print(medium_baskets , all_baskets) #print("nested change") #all_baskets = nested_change(all_baskets, embedding_wrapper.lookup_ind_f) #print("split_data") train_ub, val_ub_input, val_ub_target, test_ub_input, test_ub_target = split_data(all_baskets) #print('knndtw') #knndtw = KnnDtw(n_neighbors=[5]) #preds_all, distances = knndtw.predict(train_ub, val_ub_input, embedding_wrapper.basket_dist_EMD, embedding_wrapper.basket_dist_REMD) #print(preds_all) #print(distances) #print("Wasserstein distance", sum(distances)/len(distances)) #return preds_all, distances write_path = 'data/testprint' with open(write_path + '.txt', 'w') as results: results.write('All baskets test ' + str(all_baskets) + '\n') results.close()
def main(): os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = constants.GPUS torch_tvm.enable(opt_level=3, device_type="gpu", device="cuda", host="llvm") # Prepare input bc = BasketConstructor(constants.RAW_DATA_DIR, constants.FEAT_DATA_DIR) # Users' baskets ub_basket = bc.get_baskets('prior', reconstruct=False) if constants.REORDER: # Users' reordered baskets ub_rbks = bc.get_baskets('prior', reconstruct=False, reordered=True) # User's item history ub_ihis = bc.get_item_history('prior', reconstruct=False) # Train test split train_ub, test_ub, train_rbks, test_rbks, train_ihis, test_ihis = train_test_split( ub_basket, ub_rbks, ub_ihis, test_size=0.2) del ub_basket, ub_rbks, ub_ihis # memory saving train_ub, test_ub = Dataset(train_ub, train_rbks, train_ihis), Dataset( test_ub, test_rbks, test_ihis) del train_rbks, test_rbks, train_ihis, test_ihis # memory saving else: train_ub, test_ub = train_test_split(ub_basket, test_size=0.2) del ub_basket train_ub, test_ub = Dataset(train_ub), Dataset(test_ub) # Model config dr_config = Config(constants.DREAM_CONFIG) dr_model = DreamModel(dr_config) if dr_config.cuda: dr_model.cuda() # Optimizer optim = torch.optim.Adam(dr_model.parameters(), lr=dr_config.learning_rate) # optim = torch.optim.Adadelta(dr_model.parameters()) # optim = torch.optim.SGD(dr_model.parameters(), lr=dr_config.learning_rate, momentum=0.9) writer = SummaryWriter(log_dir='runs/{}'.format( dr_config.alias)) # tensorboard writer writer.add_text('config', str(dr_config)) best_val_loss = None try: for k, v in constants.DREAM_CONFIG.items(): print(k, v) # training for epoch in range(dr_config.epochs): if constants.REORDER: train_reorder_dream() else: train_dream() print('-' * 89) if constants.REORDER: val_loss = evaluate_reorder_dream() else: val_loss = evaluate_dream() print('-' * 89) # checkpoint if not best_val_loss or val_loss < best_val_loss: with open( dr_config.checkpoint_dir.format(epoch=epoch, loss=val_loss), 'wb') as f: torch.save(dr_model, f) best_val_loss = val_loss else: # Manual SGD slow down lr if no improvement in val_loss # dr_config.learning_rate = dr_config.learning_rate / 4 pass except KeyboardInterrupt: print('*' * 89) print('Got keyboard Interrupt and stopped early')