prev_reversed_item_dict, w_behavior, i) list_prev_entries.append(prev_entry_dict) list_freq_entries.append(prev_freq_dict) list_reversed_entries.append(prev_reversed_item_dict) list_trans_matrix.append(transition_matrix) sp_matrix_path = model_name + '_transition_matrix_MC.npz' # nb_item = len(item_dict) # print('Density : %.6f' % (transition_matrix.nnz * 1.0 / nb_item / nb_item)) if not os.path.exists(o_dir): os.makedirs(o_dir) saved_file = os.path.join(o_dir, sp_matrix_path) print("Save model in ", saved_file) sp.save_npz(saved_file, transition_matrix) mc_model = MarkovChain(item_dict, list_prev_entries[:-1], list_freq_entries[:-1], list_reversed_entries[:-1], list_trans_matrix, w_behavior, mc_order) topk = 50 print('Predict to outfile') predict_file = os.path.join(o_dir, 'predict_' + model_name + '.txt') MC_utils.write_predict(predict_file, test_instances, topk, mc_model) print('Predict done') ground_truth, predict = MC_utils.read_predict(predict_file) for topk in [5, 10, 15]: print("Top : ", topk) # hit_rate = MC_hit_ratio(test_instances, topk, mc_model) # recall = MC_recall(test_instances, topk, mc_model) hit_rate = MC_utils.hit_ratio(ground_truth, predict, topk) recall = MC_utils.recall(ground_truth, predict, topk) print("hit ratio: ", hit_rate) print("recall: ", recall)
# transition_pair_dicts = MC_utils.multicore_calculate_transition_matrix(train_instances, item_dict, item_freq_dict, reversed_item_dict, w_behavior, mc_order) row = [] col = [] data = [] for pair_dict in transition_pair_dicts: print('Number pair in core: ', len(pair_dict)) row.extend([p[0] for p in pair_dict]) col.extend([p[1] for p in pair_dict]) data.extend([pair_dict[p] for p in pair_dict]) NB_ITEMS = len(item_dict) transition_matrix = sp.csr_matrix((data, (row, col)), shape=(NB_ITEMS, NB_ITEMS), dtype="float32") # transition_matrix nb_nonzero = transition_matrix.getnnz() density = nb_nonzero * 1.0 / NB_ITEMS / NB_ITEMS print("Density of matrix: {:.6f}".format(density)) sp_matrix_path = model_name+'_transition_matrix_MC.npz' # nb_item = len(item_dict) # print('Density : %.6f' % (transition_matrix.nnz * 1.0 / nb_item / nb_item)) if not os.path.exists(o_dir): os.makedirs(o_dir) saved_file = os.path.join(o_dir, sp_matrix_path) print("Save model in ", saved_file) sp.save_npz(saved_file, transition_matrix) mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict, w_behavior, transition_matrix, mc_order) for topk in [5, 10, 15]: print("Top : ", topk) hit_rate = MC_hit_ratio(test_instances, topk, mc_model) recall = MC_recall(test_instances, topk, mc_model) print("hit ratio: ", hit_rate) print("recall: ", recall)
from MC import MarkovChain from bitarray import bitarray from cryptography.fernet import Fernet import sys sys.setrecursionlimit(150000) with open("../books/3001.txt", "r", encoding="utf-8") as myfile: data = myfile.readlines() m = MarkovChain() for i in data: m.learn(i) length = 10000 m.babble(length) def Encrypt(data): key = Fernet.generate_key() f = Fernet(key) ciphertext = f.encrypt(data) return (key, ciphertext) def Decrypt(key_ciphertext): f = Fernet(key_ciphertext[0]) decrypttext = f.decrypt(key_ciphertext[1]) return (decrypttext)
w_behavior = {'buy': 1, 'cart': 0.5, 'fav': 0.5, 'pv': 0.5} else: with open(w_behavior_file, 'r') as fp: w_behavior = json.load(fp) # print(nb_test) print( "---------------------@Build knowledge-------------------------------") MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, item_freq_dict, user_dict = MC_utils.build_knowledge( train_instances + test_instances, w_behavior) if not os.path.exists(o_dir): os.makedirs(o_dir) saved_file = os.path.join(o_dir, 'transition_matrix_MC.npz') # print("Save model in ", saved_file) transition_matrix = sp.load_npz(saved_file) mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict, w_behavior, transition_matrix, mc_order) if ex_file is not None: ex_instances = MC_utils.read_instances_lines_from_file(ex_file) else: ex_instances = test_instances for i in random.sample(ex_instances, nb_predict): elements = i.split('|') b_seq = elements[-mc_model.mc_order - 1:-1] # prev_basket = [item for item in re.split('[\\s]+',b_seq[-2].strip())] prev_item = [] for prev_basket in b_seq[:-1]: prev_item += [ p.split(':')[0] for p in re.split('[\\s]+', prev_basket.strip()) ]