Example #1
0
def main(train_file, user_item_side_information_file, hierarchy_file,
         test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_item_side_information_file)
    hierarchy = json.loads(open(hierarchy_file).read())

    lda = LDAHierarquical(B, hierarchy, topics=15)

    #####REMOVE_IT
    def important_topics(x, topics):
        if not x:
            return x
        transf = [(i, j) for i, j in enumerate(x)]
        transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1]))
        return [i[0] for i in transf[:topics]]

    topics = 3

    coincidencias = []
    for user in range(1, 101):
        # Topicos do usuario 10
        user_topics = important_topics(lda.model['users'][user], topics)

        # Topicos das cidades de teste do usuario 10
        T = tsv_to_matrix(test_file)
        cities = T[user].nonzero()[0]

        cities_topics = [
            important_topics(lda.model['cities'].get(city, []), topics)
            for city in cities
        ]

        total = 0
        topics_compared = 0
        coinc = 0
        for city_topic in cities_topics:
            if city_topic:
                coinc += len(set(user_topics) & set(city_topic))
                topics_compared += len(user_topics)
                total += 1
            else:
                pass

        if total:
            perc = (coinc / float(topics_compared))
        else:
            perc = -1

        coincidencias.append([coinc, topics_compared, perc])

    aa = open('/tmp/coincidencias.json', 'w')
    aa.write(json.dumps(coincidencias))
    aa.close()
    #####

    W = slim_train(A)

    recommendations = slim_lda_recommender(A, W, lda)

    compute_precision(recommendations, test_file)
Example #2
0
def main(train_file, test_file):
    A = tsv_to_matrix(train_file)

    W = slim_train(A)

    recommendations = slim_recommender(A, W)

    compute_precision(recommendations, test_file)
Example #3
0
def main(train_file, test_file):
    A = tsv_to_matrix(train_file)

    W = slim_train(A)

    recommendations = slim_recommender(A, W)

    compute_precision(recommendations, test_file)
Example #4
0
def main(train_file, user_item_side_information_file, hierarchy_file, test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_item_side_information_file)
    hierarchy = json.loads(open(hierarchy_file).read())

    lda = LDAHierarquical(B, hierarchy, topics=15)

    #####REMOVE_IT
    def important_topics(x, topics):
        if not x:
            return x
        transf = [ (i, j) for i, j in enumerate(x) ]
        transf = sorted(transf, cmp=lambda x, y: cmp(x[1], y[1]))
        return [ i[0] for i in transf[:topics] ]

    topics = 3

    coincidencias = []
    for user in range(1, 101):
        # Topicos do usuario 10
        user_topics = important_topics(lda.model['users'][user], topics)

        # Topicos das cidades de teste do usuario 10
        T = tsv_to_matrix(test_file)
        cities = T[user].nonzero()[0]

        cities_topics = [ important_topics(lda.model['cities'].get(city, []), topics) for city in cities ]


        total = 0
        topics_compared = 0
        coinc = 0
        for city_topic in cities_topics:
            if city_topic:
                coinc += len(set(user_topics) & set(city_topic))
                topics_compared += len(user_topics)
                total += 1
            else:
                pass

        if total:
            perc = (coinc/float(topics_compared))
        else:
            perc = -1

        coincidencias.append([coinc, topics_compared, perc])

    aa = open('/tmp/coincidencias.json', 'w')
    aa.write(json.dumps(coincidencias))
    aa.close()
    #####

    W = slim_train(A)

    recommendations = slim_lda_recommender(A, W, lda)

    compute_precision(recommendations, test_file)
Example #5
0
def main(train_file, part_file, test_file):

    AG = tsv_to_matrix(train_file, 942, 1682)
    AP = tsv_to_matrix(part_file, 942, 1682)

    W1 = slim_train(AG)
    W2 = slim_train(AP)
    for i in range(0, 11):
        W = (i / 10) * W1 + (1 - i / 10) * W2
        print(i / 10)
        recommendations = slim_recommender(AP, W)
        compute_precision(recommendations, test_file)
Example #6
0
def main(train_file, part_file):

    AG = tsv_to_matrix(train_file, 942, 1682)
    AP = tsv_to_matrix(part_file, 942, 1682)

    W1 = slim_train(AG)
    W2 = slim_train(AP)

    W = 0 * W1 + 1 * W2

    recommendations = slim_recommender(AP, W)

    compute_precision(recommendations, part_file)
Example #7
0
def main(train_file, user_sideinformation_file, test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_sideinformation_file)
    """
    from util import mm2csr
    mm2csr(A, '/tmp/train.mat')
    mm2csr(useritem_featureitem, '/tmp/train_feature.mat')
    C = tsv_to_matrix(test_file)
    mm2csr(C, '/tmp/test.mat')
    """

    W = sslim_train(A, B)

    recommendations = slim_recommender(A, W)

    compute_precision(recommendations, test_file)
Example #8
0
def main(train_file, user_sideinformation_file, test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_sideinformation_file)

    """
    from util import mm2csr
    mm2csr(A, '/tmp/train.mat')
    mm2csr(useritem_featureitem, '/tmp/train_feature.mat')
    C = tsv_to_matrix(test_file)
    mm2csr(C, '/tmp/test.mat')
    """

    W = sslim_train(A, B)

    recommendations = slim_recommender(A, W)

    compute_precision(recommendations, test_file)
Example #9
0
def main(train_file, user_sideinformation_file, hierarchy_file, test_file):
    A = tsv_to_matrix(train_file)
    B = tsv_to_matrix(user_sideinformation_file, A.shape[0], A.shape[1])
    hierarchy = hierarchy_factory(hierarchy_file)

    # Learning using SLIM
    # We handle user bias only in B because in B we have explicit evaluations
    K = slim_train(handle_user_bias(B))
    W = slim_train(A)

    Wline = generate_subitem_hierarchy(K, W, hierarchy)
    WlineNorm = normalize_wline(Wline)

    #recommendations = slim_recommender(A, W + 0.2 * WlineNorm)
    import pdb;pdb.set_trace()
    recommendations = slim_recommender(A, WlineNorm)

    # See if the predictor is just of not
    #user_cities = np.array([ map(hierarchy, B[i].nonzero()[0].tolist()) for i in range(B.shape[0]) ])
    #G = tsv_to_matrix(test_file)
    #print 'TEM QUE DAR VAZIO: ', set(G[1].nonzero()[0]) & set(user_cities[1])
    ### ---- FIM REMOVAME

    compute_precision(recommendations, test_file)
Example #10
0
    Mline = vstack((A, Balpha), format='lil')

    # Fit each column of W separately. We put something in each positions of W
    # to allow us direct indexing of each position in parallel
    total_columns = Mline.shape[1]
    ranges = generate_slices(total_columns)
    separated_tasks = []

    for from_j, to_j in ranges:
        separated_tasks.append([from_j, to_j, Mline, model])

    pool = multiprocessing.Pool()
    pool.map(work, separated_tasks)
    pool.close()
    pool.join()

    return shared_array


W = sslim_train(A, B)

recommendations = slim_recommender(A, W)

compute_precision(recommendations, test_file)

"""
main('data/atracoes/10/usuarios_atracoes_train.tsv',
     'data/atracoes/10/palavras_atracoes.tsv',
     'data/atracoes/10/usuarios_atracoes_test.tsv')
"""
Example #11
0
    """
    alpha = l1_reg+l2_reg
    l1_ratio = l1_reg/alpha

    model = SGDRegressor(
        penalty='elasticnet',
        fit_intercept=False,
        alpha=alpha,
        l1_ratio=l1_ratio,
    )

    total_columns = A.shape[1]
    ranges = generate_slices(total_columns)
    separated_tasks = []

    for from_j, to_j in ranges:
        separated_tasks.append([from_j, to_j, A, model])

    pool = multiprocessing.Pool()
    pool.map(work, separated_tasks)
    pool.close()
    pool.join()

    return shared_array

W = slim_train(A)

recommendations = slim_recommender(A, W)

compute_precision(recommendations, test_file)
Example #12
0
            [cost, accuracy, summaries, sialoss])
        print(
            "Iter=%d/epoch=%d, Loss=%.6f, Triplet loss=%.6f, Training Accuracy=%.6f, lr=%f"
            % (step * batch_size, epoch1, loss, triplet_loss, train_accuracy,
               lr))
        writer.add_summary(summaries_string, step)

    if step > 0 and step % validation_interval == 0:
        valacc = []
        a2, preds, vlbls = sess.run(
            [validation_accuracy,
             tf.argmax(valpred, 1), val_labels])
        valacc.append(a2)
        conf_mat = tf.math.confusion_matrix(vlbls, preds)
        conf_mat = conf_mat.eval(session=sess)
        precision = compute_precision(conf_mat)
        recall = compute_recall(conf_mat)

        print("Iter=%d/epoch=%d, Validation Accuracy=%.6f" %
              (step * batch_size, epoch1, np.mean(valacc)))
        valaccs.append(np.mean(valacc))
        precisions.append(precision)
        recalls.append(recall)

        # Implement early stopping
        if np.mean(valacc) >= val_threshold and epoch1 >= 15:
            break

        if np.mean(valacc) >= 0.89 and lr == 1e-5:
            lr /= 10
Example #13
0
def main(train_file, part_file, test_file):

    AG = tsv_to_matrix(train_file, 942, 1682)
    AP = tsv_to_matrix(part_file, 942, 1682)

    W1 = slim_train(AG)
    W2 = slim_train(AP)
    # total_precision = []
    k = 2
    matrix_5 = np.zeros((21, k))
    matrix_10 = np.zeros((21, k))
    matrix_15 = np.zeros((21, k))
    matrix_20 = np.zeros((21, k))

    for i in range(0, 105, 5):
        gu = i / 100
        W = gu * W1 + (1 - gu) * W2
        print("gu: " + str(gu))
        recommendations = slim_recommender(AP, W)
        top5, top10, top15, top20 = compute_precision(recommendations,
                                                      test_file)
        for j in range(2):
            matrix_5[int(i / 5)][j] = top5[j]
            matrix_10[int(i / 5)][j] = top10[j]
            matrix_15[int(i / 5)][j] = top15[j]
            matrix_20[int(i / 5)][j] = top20[j]

    hr_values = []
    hr_values1 = []
    index1, value1 = max(enumerate(matrix_5[:, 0]), key=operator.itemgetter(1))
    index2, value2 = max(enumerate(matrix_10[:, 0]),
                         key=operator.itemgetter(1))
    index3, value3 = max(enumerate(matrix_15[:, 0]),
                         key=operator.itemgetter(1))
    index4, value4 = max(enumerate(matrix_20[:, 0]),
                         key=operator.itemgetter(1))
    hr_values.append(index1 * 0.05)
    hr_values.append(value1)
    hr_values.append(index2 * 0.05)
    hr_values.append(value2)
    hr_values.append(index3 * 0.05)
    hr_values.append(value3)
    hr_values.append(index4 * 0.05)
    hr_values.append(value4)
    hr_values1.append(matrix_5[20][0])
    hr_values1.append(matrix_10[20][0])
    hr_values1.append(matrix_15[20][0])
    hr_values1.append(matrix_20[20][0])

    arhr_values = []
    arhr_values1 = []
    index1, value1 = max(enumerate(matrix_5[:, 1]), key=operator.itemgetter(1))
    index2, value2 = max(enumerate(matrix_10[:, 1]),
                         key=operator.itemgetter(1))
    index3, value3 = max(enumerate(matrix_15[:, 1]),
                         key=operator.itemgetter(1))
    index4, value4 = max(enumerate(matrix_20[:, 1]),
                         key=operator.itemgetter(1))

    arhr_values.append(index1 * 0.05)
    arhr_values.append(value1)
    arhr_values.append(index2 * 0.05)
    arhr_values.append(value2)
    arhr_values.append(index3 * 0.05)
    arhr_values.append(value3)
    arhr_values.append(index4 * 0.05)
    arhr_values.append(value4)

    arhr_values1.append(matrix_5[20][1])
    arhr_values1.append(matrix_10[20][1])
    arhr_values1.append(matrix_15[20][1])
    arhr_values1.append(matrix_20[20][1])

    print('k8 top5: %s' % (matrix_5))
    print('k8 top10: %s' % (matrix_10))
    print('k8 top15: %s' % (matrix_15))
    print('k8 top20: %s' % (matrix_20))

    print('Max HR: %s' % (hr_values))
    print('HR at gu = 1: %s' % (hr_values1))
    print('Max ARHR: %s' % (arhr_values))
    print('ARHR at gu = 1: %s' % (arhr_values1))
Example #14
0
def main(args):

    # tensorboard
    logger_tb = logger.Logger(log_dir=args.experiment_name)

    #augmenter = get_augmenter(args)

    # train dataloader and val dataset
    train_dataset = NucleiDataset(args.train_data, 'train', transform=True)
    val_dataset = NucleiDataset(args.val_data, 'val', transform=True)

    train_params = {
        'batch_size': args.batch_size,
        'shuffle': False,
        'num_workers': args.num_workers
    }

    train_dataloader = DataLoader(train_dataset, **train_params)

    # device
    device = torch.device(args.device)

    # model
    if args.model == "fusion":
        model = FusionNet(args, train_dataset.dim)
    elif args.model == "dilation":
        model = DilationCNN(train_dataset.dim)
    elif args.model == "unet":
        model = UNet(args.num_kernel, args.kernel_size, train_dataset.dim)

    if args.device == "cuda":
        # parse gpu_ids for data paralle
        if ',' in args.gpu_ids:
            gpu_ids = [int(ids) for ids in args.gpu_ids.split(',')]
        else:
            gpu_ids = int(args.gpu_ids)

        # parallelize computation
        if type(gpu_ids) is not int:
            model = nn.DataParallel(model, gpu_ids)
    model.to(device)

    # optimizer
    parameters = model.parameters()
    if args.optimizer == "adam":
        optimizer = torch.optim.Adam(parameters, args.lr)
    else:
        optimizer = torch.optim.SGD(parameters, args.lr)

    # loss
    loss_function = dice_loss

    # train model
    for epoch in range(args.epoch):
        model.train()

        with tqdm.tqdm(total=len(train_dataloader.dataset),
                       unit=f"epoch {epoch} itr") as progress_bar:
            total_loss = []
            total_iou = []
            total_precision = []
            for i, (x_train, y_train) in enumerate(train_dataloader):

                with torch.set_grad_enabled(True):

                    # send data and label to device
                    x = torch.Tensor(x_train.float()).to(device)
                    y = torch.Tensor(y_train.float()).to(device)

                    # predict segmentation
                    pred = model.forward(x)

                    # calculate loss
                    loss = loss_function(pred, y)
                    total_loss.append(loss.item())

                    # calculate IoU precision
                    predictions = pred.clone().squeeze().detach().cpu().numpy()
                    gt = y.clone().squeeze().detach().cpu().numpy()
                    ious = [
                        metrics.get_ious(p, g, 0.5)
                        for p, g in zip(predictions, gt)
                    ]
                    total_iou.append(np.mean(ious))

                    # back prop
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                # log loss and iou
                avg_loss = np.mean(total_loss)
                avg_iou = np.mean(total_iou)

                logger_tb.update_value('train loss', avg_loss, epoch)
                logger_tb.update_value('train iou', avg_iou, epoch)

                progress_bar.update(len(x))

        # validation
        model.eval()
        for idx in range(len(val_dataset)):
            x_val, y_val, mask_val = val_dataset.__getitem__(idx)

            total_precision = []
            total_iou = []
            total_loss = []
            with torch.no_grad():

                # send data and label to device
                x_val = np.expand_dims(x_val, axis=0)
                x = torch.Tensor(torch.tensor(x_val).float()).to(device)
                y = torch.Tensor(torch.tensor(y_val).float()).to(device)

                # predict segmentation
                pred = model.forward(x)

                # calculate loss
                loss = loss_function(pred, y)
                total_loss.append(loss.item())

                # calculate IoU
                prediction = pred.clone().squeeze().detach().cpu().numpy()
                gt = y.clone().squeeze().detach().cpu().numpy()
                iou = metrics.get_ious(prediction, gt, 0.5)
                total_iou.append(iou)

                # calculate precision
                precision = metrics.compute_precision(prediction, mask_val,
                                                      0.5)
                total_precision.append(precision)

                # display segmentation on tensorboard
                if idx == 1:
                    original = x_val
                    truth = np.expand_dims(y_val, axis=0)
                    seg = pred.cpu().squeeze().detach().numpy()
                    seg = np.expand_dims(seg, axis=0)

                    logger_tb.update_image("original", original, 0)
                    logger_tb.update_image("ground truth", truth, 0)
                    logger_tb.update_image("segmentation", seg, epoch)

        # log metrics
        logger_tb.update_value('val loss', np.mean(total_loss), epoch)
        logger_tb.update_value('val iou', np.mean(total_iou), epoch)
        logger_tb.update_value('val precision', np.mean(total_precision),
                               epoch)

    # save model
    ckpt_dict = {
        'model_name': model.__class__.__name__,
        'model_args': model.args_dict(),
        'model_state': model.to('cpu').state_dict()
    }
    ckpt_path = os.path.join(args.save_dir, f"{model.__class__.__name__}.pth")
    torch.save(ckpt_dict, ckpt_path)