def main(train_file, user_item_side_information_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_item_side_information_file) hierarchy = json.loads(open(hierarchy_file).read()) lda = LDAHierarquical(B, hierarchy, topics=15) #####REMOVE_IT def important_topics(x, topics): if not x: return x transf = [(i, j) for i, j in enumerate(x)] transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1])) return [i[0] for i in transf[:topics]] topics = 3 coincidencias = [] for user in range(1, 101): # Topicos do usuario 10 user_topics = important_topics(lda.model['users'][user], topics) # Topicos das cidades de teste do usuario 10 T = tsv_to_matrix(test_file) cities = T[user].nonzero()[0] cities_topics = [ important_topics(lda.model['cities'].get(city, []), topics) for city in cities ] total = 0 topics_compared = 0 coinc = 0 for city_topic in cities_topics: if city_topic: coinc += len(set(user_topics) & set(city_topic)) topics_compared += len(user_topics) total += 1 else: pass if total: perc = (coinc / float(topics_compared)) else: perc = -1 coincidencias.append([coinc, topics_compared, perc]) aa = open('/tmp/coincidencias.json', 'w') aa.write(json.dumps(coincidencias)) aa.close() ##### W = slim_train(A) recommendations = slim_lda_recommender(A, W, lda) compute_precision(recommendations, test_file)
def main(train_file, test_file): A = tsv_to_matrix(train_file) W = slim_train(A) recommendations = slim_recommender(A, W) compute_precision(recommendations, test_file)
def main(train_file, user_item_side_information_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_item_side_information_file) hierarchy = json.loads(open(hierarchy_file).read()) lda = LDAHierarquical(B, hierarchy, topics=15) #####REMOVE_IT def important_topics(x, topics): if not x: return x transf = [ (i, j) for i, j in enumerate(x) ] transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1])) return [ i[0] for i in transf[:topics] ] topics = 3 coincidencias = [] for user in range(1, 101): # Topicos do usuario 10 user_topics = important_topics(lda.model['users'][user], topics) # Topicos das cidades de teste do usuario 10 T = tsv_to_matrix(test_file) cities = T[user].nonzero()[0] cities_topics = [ important_topics(lda.model['cities'].get(city, []), topics) for city in cities ] total = 0 topics_compared = 0 coinc = 0 for city_topic in cities_topics: if city_topic: coinc += len(set(user_topics) & set(city_topic)) topics_compared += len(user_topics) total += 1 else: pass if total: perc = (coinc/float(topics_compared)) else: perc = -1 coincidencias.append([coinc, topics_compared, perc]) aa = open('/tmp/coincidencias.json', 'w') aa.write(json.dumps(coincidencias)) aa.close() ##### W = slim_train(A) recommendations = slim_lda_recommender(A, W, lda) compute_precision(recommendations, test_file)
def main(train_file, part_file, test_file): AG = tsv_to_matrix(train_file, 942, 1682) AP = tsv_to_matrix(part_file, 942, 1682) W1 = slim_train(AG) W2 = slim_train(AP) for i in range(0, 11): W = (i / 10) * W1 + (1 - i / 10) * W2 print(i / 10) recommendations = slim_recommender(AP, W) compute_precision(recommendations, test_file)
def main(train_file, part_file): AG = tsv_to_matrix(train_file, 942, 1682) AP = tsv_to_matrix(part_file, 942, 1682) W1 = slim_train(AG) W2 = slim_train(AP) W = 0 * W1 + 1 * W2 recommendations = slim_recommender(AP, W) compute_precision(recommendations, part_file)
def main(train_file, user_sideinformation_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_sideinformation_file) """ from util import mm2csr mm2csr(A, '/tmp/train.mat') mm2csr(useritem_featureitem, '/tmp/train_feature.mat') C = tsv_to_matrix(test_file) mm2csr(C, '/tmp/test.mat') """ W = sslim_train(A, B) recommendations = slim_recommender(A, W) compute_precision(recommendations, test_file)
def main(train_file, user_sideinformation_file, hierarchy_file, test_file): A = tsv_to_matrix(train_file) B = tsv_to_matrix(user_sideinformation_file, A.shape[0], A.shape[1]) hierarchy = hierarchy_factory(hierarchy_file) # Learning using SLIM # We handle user bias only in B because in B we have explicit evaluations K = slim_train(handle_user_bias(B)) W = slim_train(A) Wline = generate_subitem_hierarchy(K, W, hierarchy) WlineNorm = normalize_wline(Wline) #recommendations = slim_recommender(A, W + 0.2 * WlineNorm) import pdb;pdb.set_trace() recommendations = slim_recommender(A, WlineNorm) # See if the predictor is just of not #user_cities = np.array([ map(hierarchy, B[i].nonzero()[0].tolist()) for i in range(B.shape[0]) ]) #G = tsv_to_matrix(test_file) #print 'TEM QUE DAR VAZIO: ', set(G[1].nonzero()[0]) & set(user_cities[1]) ### ---- FIM REMOVAME compute_precision(recommendations, test_file)
Mline = vstack((A, Balpha), format='lil') # Fit each column of W separately. We put something in each positions of W # to allow us direct indexing of each position in parallel total_columns = Mline.shape[1] ranges = generate_slices(total_columns) separated_tasks = [] for from_j, to_j in ranges: separated_tasks.append([from_j, to_j, Mline, model]) pool = multiprocessing.Pool() pool.map(work, separated_tasks) pool.close() pool.join() return shared_array W = sslim_train(A, B) recommendations = slim_recommender(A, W) compute_precision(recommendations, test_file) """ main('data/atracoes/10/usuarios_atracoes_train.tsv', 'data/atracoes/10/palavras_atracoes.tsv', 'data/atracoes/10/usuarios_atracoes_test.tsv') """
""" alpha = l1_reg+l2_reg l1_ratio = l1_reg/alpha model = SGDRegressor( penalty='elasticnet', fit_intercept=False, alpha=alpha, l1_ratio=l1_ratio, ) total_columns = A.shape[1] ranges = generate_slices(total_columns) separated_tasks = [] for from_j, to_j in ranges: separated_tasks.append([from_j, to_j, A, model]) pool = multiprocessing.Pool() pool.map(work, separated_tasks) pool.close() pool.join() return shared_array W = slim_train(A) recommendations = slim_recommender(A, W) compute_precision(recommendations, test_file)
[cost, accuracy, summaries, sialoss]) print( "Iter=%d/epoch=%d, Loss=%.6f, Triplet loss=%.6f, Training Accuracy=%.6f, lr=%f" % (step * batch_size, epoch1, loss, triplet_loss, train_accuracy, lr)) writer.add_summary(summaries_string, step) if step > 0 and step % validation_interval == 0: valacc = [] a2, preds, vlbls = sess.run( [validation_accuracy, tf.argmax(valpred, 1), val_labels]) valacc.append(a2) conf_mat = tf.math.confusion_matrix(vlbls, preds) conf_mat = conf_mat.eval(session=sess) precision = compute_precision(conf_mat) recall = compute_recall(conf_mat) print("Iter=%d/epoch=%d, Validation Accuracy=%.6f" % (step * batch_size, epoch1, np.mean(valacc))) valaccs.append(np.mean(valacc)) precisions.append(precision) recalls.append(recall) # Implement early stopping if np.mean(valacc) >= val_threshold and epoch1 >= 15: break if np.mean(valacc) >= 0.89 and lr == 1e-5: lr /= 10
def main(train_file, part_file, test_file): AG = tsv_to_matrix(train_file, 942, 1682) AP = tsv_to_matrix(part_file, 942, 1682) W1 = slim_train(AG) W2 = slim_train(AP) # total_precision = [] k = 2 matrix_5 = np.zeros((21, k)) matrix_10 = np.zeros((21, k)) matrix_15 = np.zeros((21, k)) matrix_20 = np.zeros((21, k)) for i in range(0, 105, 5): gu = i / 100 W = gu * W1 + (1 - gu) * W2 print("gu: " + str(gu)) recommendations = slim_recommender(AP, W) top5, top10, top15, top20 = compute_precision(recommendations, test_file) for j in range(2): matrix_5[int(i / 5)][j] = top5[j] matrix_10[int(i / 5)][j] = top10[j] matrix_15[int(i / 5)][j] = top15[j] matrix_20[int(i / 5)][j] = top20[j] hr_values = [] hr_values1 = [] index1, value1 = max(enumerate(matrix_5[:, 0]), key=operator.itemgetter(1)) index2, value2 = max(enumerate(matrix_10[:, 0]), key=operator.itemgetter(1)) index3, value3 = max(enumerate(matrix_15[:, 0]), key=operator.itemgetter(1)) index4, value4 = max(enumerate(matrix_20[:, 0]), key=operator.itemgetter(1)) hr_values.append(index1 * 0.05) hr_values.append(value1) hr_values.append(index2 * 0.05) hr_values.append(value2) hr_values.append(index3 * 0.05) hr_values.append(value3) hr_values.append(index4 * 0.05) hr_values.append(value4) hr_values1.append(matrix_5[20][0]) hr_values1.append(matrix_10[20][0]) hr_values1.append(matrix_15[20][0]) hr_values1.append(matrix_20[20][0]) arhr_values = [] arhr_values1 = [] index1, value1 = max(enumerate(matrix_5[:, 1]), key=operator.itemgetter(1)) index2, value2 = max(enumerate(matrix_10[:, 1]), key=operator.itemgetter(1)) index3, value3 = max(enumerate(matrix_15[:, 1]), key=operator.itemgetter(1)) index4, value4 = max(enumerate(matrix_20[:, 1]), key=operator.itemgetter(1)) arhr_values.append(index1 * 0.05) arhr_values.append(value1) arhr_values.append(index2 * 0.05) arhr_values.append(value2) arhr_values.append(index3 * 0.05) arhr_values.append(value3) arhr_values.append(index4 * 0.05) arhr_values.append(value4) arhr_values1.append(matrix_5[20][1]) arhr_values1.append(matrix_10[20][1]) arhr_values1.append(matrix_15[20][1]) arhr_values1.append(matrix_20[20][1]) print('k8 top5: %s' % (matrix_5)) print('k8 top10: %s' % (matrix_10)) print('k8 top15: %s' % (matrix_15)) print('k8 top20: %s' % (matrix_20)) print('Max HR: %s' % (hr_values)) print('HR at gu = 1: %s' % (hr_values1)) print('Max ARHR: %s' % (arhr_values)) print('ARHR at gu = 1: %s' % (arhr_values1))
def main(args): # tensorboard logger_tb = logger.Logger(log_dir=args.experiment_name) #augmenter = get_augmenter(args) # train dataloader and val dataset train_dataset = NucleiDataset(args.train_data, 'train', transform=True) val_dataset = NucleiDataset(args.val_data, 'val', transform=True) train_params = { 'batch_size': args.batch_size, 'shuffle': False, 'num_workers': args.num_workers } train_dataloader = DataLoader(train_dataset, **train_params) # device device = torch.device(args.device) # model if args.model == "fusion": model = FusionNet(args, train_dataset.dim) elif args.model == "dilation": model = DilationCNN(train_dataset.dim) elif args.model == "unet": model = UNet(args.num_kernel, args.kernel_size, train_dataset.dim) if args.device == "cuda": # parse gpu_ids for data paralle if ',' in args.gpu_ids: gpu_ids = [int(ids) for ids in args.gpu_ids.split(',')] else: gpu_ids = int(args.gpu_ids) # parallelize computation if type(gpu_ids) is not int: model = nn.DataParallel(model, gpu_ids) model.to(device) # optimizer parameters = model.parameters() if args.optimizer == "adam": optimizer = torch.optim.Adam(parameters, args.lr) else: optimizer = torch.optim.SGD(parameters, args.lr) # loss loss_function = dice_loss # train model for epoch in range(args.epoch): model.train() with tqdm.tqdm(total=len(train_dataloader.dataset), unit=f"epoch {epoch} itr") as progress_bar: total_loss = [] total_iou = [] total_precision = [] for i, (x_train, y_train) in enumerate(train_dataloader): with torch.set_grad_enabled(True): # send data and label to device x = torch.Tensor(x_train.float()).to(device) y = torch.Tensor(y_train.float()).to(device) # predict segmentation pred = model.forward(x) # calculate loss loss = loss_function(pred, y) total_loss.append(loss.item()) # calculate IoU precision predictions = pred.clone().squeeze().detach().cpu().numpy() gt = y.clone().squeeze().detach().cpu().numpy() ious = [ metrics.get_ious(p, g, 0.5) for p, g in zip(predictions, gt) ] total_iou.append(np.mean(ious)) # back prop optimizer.zero_grad() loss.backward() optimizer.step() # log loss and iou avg_loss = np.mean(total_loss) avg_iou = np.mean(total_iou) logger_tb.update_value('train loss', avg_loss, epoch) logger_tb.update_value('train iou', avg_iou, epoch) progress_bar.update(len(x)) # validation model.eval() for idx in range(len(val_dataset)): x_val, y_val, mask_val = val_dataset.__getitem__(idx) total_precision = [] total_iou = [] total_loss = [] with torch.no_grad(): # send data and label to device x_val = np.expand_dims(x_val, axis=0) x = torch.Tensor(torch.tensor(x_val).float()).to(device) y = torch.Tensor(torch.tensor(y_val).float()).to(device) # predict segmentation pred = model.forward(x) # calculate loss loss = loss_function(pred, y) total_loss.append(loss.item()) # calculate IoU prediction = pred.clone().squeeze().detach().cpu().numpy() gt = y.clone().squeeze().detach().cpu().numpy() iou = metrics.get_ious(prediction, gt, 0.5) total_iou.append(iou) # calculate precision precision = metrics.compute_precision(prediction, mask_val, 0.5) total_precision.append(precision) # display segmentation on tensorboard if idx == 1: original = x_val truth = np.expand_dims(y_val, axis=0) seg = pred.cpu().squeeze().detach().numpy() seg = np.expand_dims(seg, axis=0) logger_tb.update_image("original", original, 0) logger_tb.update_image("ground truth", truth, 0) logger_tb.update_image("segmentation", seg, epoch) # log metrics logger_tb.update_value('val loss', np.mean(total_loss), epoch) logger_tb.update_value('val iou', np.mean(total_iou), epoch) logger_tb.update_value('val precision', np.mean(total_precision), epoch) # save model ckpt_dict = { 'model_name': model.__class__.__name__, 'model_args': model.args_dict(), 'model_state': model.to('cpu').state_dict() } ckpt_path = os.path.join(args.save_dir, f"{model.__class__.__name__}.pth") torch.save(ckpt_dict, ckpt_path)