コード例 #1
0
def test_small_cross():
    ds = DataSet('../datasets/', 'test', 'small-cross')
    print('DS: {}; iterations: {}'.format(ds.name, ds.set_count))
    for i in range(1, ds.set_count + 1):
        print("ITER #{}".format(i))
        trn, tst = ds.get_dataset(i)
        print('\tTRAIN: {}'.format(trn))
        print('\tTEST:  {}'.format(tst))

        trns, tsts = utils.get_edges_set(trn), utils.get_edges_set(tst)
        scores = get_small_scores()

        auc_res_tot = mtr.auc(ds.vx_count, trns, tsts, scores)
        auc_res_010 = mtr.auc(ds.vx_count, trns, tsts, scores, 10)
        auc_res_100 = mtr.auc(ds.vx_count, trns, tsts, scores, 100)
        auc_res_01k = mtr.auc(ds.vx_count, trns, tsts, scores, 1000)
        #        auc_res_10k = mtr.auc(ds.vx_count, trns, tsts, scores, 10000)
        #        auc_res_1ck = mtr.auc(ds.vx_count, trns, tsts, scores, 100000)
        #        auc_res_01m = mtr.auc(ds.vx_count, trns, tsts, scores, 1000000)
        prc_res_002 = mtr.precision(ds.vx_count, trns, tsts, scores, 2)

        print('\tMETRICS:')
        print('\t\t-> AUC___TOT: {:.04}'.format(auc_res_tot))  # expected: 0.67
        print('\t\t-> AUC____10: {:.04}'.format(auc_res_010))
        print('\t\t-> AUC___100: {:.04}'.format(auc_res_100))
        print('\t\t-> AUC____1K: {:.04}'.format(auc_res_01k))
        #        print('\t\t-> AUC___10K: {:.04}'.format(auc_res_10k))
        #        print('\t\t-> AUC__100K: {:.04}'.format(auc_res_1ck))
        #        print('\t\t-> AUC____1M: {:.04}'.format(auc_res_01m))
        print('\t\t-> PREC____2: {:.04}'.format(prc_res_002))  # expected: 0.50

    print()
コード例 #2
0
def __experiment_02(data_set, set_no=1, aucn=2000, category='math.GN'):
    print('Kategoria: ',category)
    data = dataset.DataSet('../datasets/', category, data_set)
    matrix = sparse.csc_matrix(
        data.get_training_set(mode='adjacency_matrix_lil', ds_index=set_no), dtype='d')
    training = data.get_training_set() #metrics.get_edges_set(data.get_training_set())
    test = data.get_test_edges() #metrics.get_edges_set(data.get_test_edges())
    print('Rozmiar grafu=',data.vx_count)

    print('Obliczanie MERW i GRW...')
    Pgrw, sd = merw.compute_grw(matrix)
    Pmerw, vekt, eval, stat = merw.compute_merw_matrix(matrix)
    for a in [.1, .5, .9]:
        print('alfa=', a)
        p_dist = merw.compute_P_distance(Pgrw, alpha=a)
        print('   Skuteczność PD (AUC {}):'.format(aucn),
              metrics.auc(data.vx_count, training, test, p_dist, aucn))
        p_dist = merw.compute_P_distance(Pmerw, alpha=a)
        print(' Skuteczność MEPD (AUC {}):'.format(aucn),
              metrics.auc(data.vx_count, training, test, p_dist, aucn))
        p_dist = merw.compute_P_distance(Pgrw, alpha=a)
        print('  Skuteczność PDM (AUC {}):'.format(aucn),
              metrics.auc(data.vx_count, training, test, p_dist, aucn))
        p_dist = merw.compute_P_distance(Pmerw, alpha=a)
        print('Skuteczność MEPDM (AUC {}):'.format(aucn),
              metrics.auc(data.vx_count, training, test, p_dist, aucn))
コード例 #3
0
def my_incremental_evaluate(sess, model, minibatch_iter, size, test=False):
    val_losses = []
    val_preds = []
    labels = []
    iter_num = 0
    finished = False

    while not finished:
        feed_dict_val, batch_labels, finished, _ = \
            minibatch_iter.incremental_node_val_feed_dict(
            size, iter_num, test=test)

        node_outs_val = sess.run([model.preds, model.loss],
                                 feed_dict=feed_dict_val)

        val_preds.append(node_outs_val[0])
        labels.append(batch_labels)
        val_losses.append(node_outs_val[1])
        iter_num += 1

    val_preds = np.vstack(val_preds)
    labels = np.vstack(labels)

    precision, recall, thresholds = precision_recall_curve(
        labels[:, 1], val_preds[:, 1])
    area = auc(recall, precision)

    return area
コード例 #4
0
 def supervised_eval(self, train_or_valid):
     data = self.dataset.get_labeled_data(train_or_valid)
     if data == None:
         raise ValueError('no labeled examples present in dataset')
     X_labeled, y_true, _ = data
     y_pred = self.model.predict(X_labeled)
     p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\
                        metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\
                        metrics.auc(y_true, y_pred)
     self.metrics[train_or_valid].append((p, r, ac, g, auc))
コード例 #5
0
def test(test_out_filename, clf, test_df, y_true):
    y_prob = clf.predict_proba(test_df)
    y_score = y_prob[:, 1]
    uids = test_df['did'].values

    with open(test_out_filename, 'w') as e_out:
        auc = metrics.auc(y_true, y_score)
        e_out.write("auc: %s\n" % str(auc))
        logging.info("auc: %s", str(auc))
        gauc = metrics.gauc(y_true, y_score, uids)
        # e_out.write("ndcg: %s\n" % str(ndcg))
        e_out.write("gauc: %s\n" % str(gauc))
        logging.info("gauc: %s", str(gauc))
コード例 #6
0
def plot_cmc_curve(os_scores, oaa_scores, extra_name=None):
    """
    The CMC shows how often the biometric subject template appears in the ranks (1, 5, 10, 100, etc.), based on the match rate.
    It is a method of showing measured accuracy performance of a biometric system operating in the closed-set identification task. 
    Templates are compared and ranked based on their similarity.
    """

    # Compute mean values
    os_mean = np.mean(os_scores, axis=0)
    oaa_mean = np.mean(oaa_scores, axis=0)
    x_axis = range(len(os_mean))
    os_auc = auc(x_axis, os_mean)
    ooa_auc = auc(x_axis, oaa_mean)

    # Plot Cumulative Matching Characteristic curve
    plt.clf()
    plt.plot(x_axis,
             os_mean,
             color='blue',
             linestyle='--',
             label='Open-set HPLS (%0.3f)' % (os_auc / len(os_scores[0])))
    plt.plot(x_axis,
             oaa_mean,
             color='red',
             linestyle='-',
             label='Closed-set OAA-PLS (%0.3f)' %
             (ooa_auc / len(os_scores[0])))
    plt.xlim([0, len(os_scores[0])])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Rank')
    plt.ylabel('Accuracy Rate')
    plt.title('Cumulative Matching Characteristic')
    plt.legend(loc="lower right")
    plt.grid()
    if extra_name == None:
        plt.show()
    else:
        plt.savefig('./plots/CMC_' + extra_name + '.pdf')
コード例 #7
0
ファイル: batch_test.py プロジェクト: liangtt677/JTCN
def get_auc(item_score, user_pos_test):
    item_score = sorted(item_score.items(), key=lambda kv: kv[1])
    item_score.reverse()
    item_sort = [x[0] for x in item_score]
    posterior = [x[1] for x in item_score]

    r = []
    for i in item_sort:
        if i in user_pos_test:
            r.append(1)
        else:
            r.append(0)
    auc = metrics.auc(ground_truth=r, prediction=posterior)
    return auc
コード例 #8
0
 def active_simulation_eval(self):
     data = self.dataset.get_unlabeled_data()
     if data == None:
         UserWarning(
             'all examples have been labeled; this eval mode works '
             'if there is unlabeled pool of data in `simulate` mode'
         )
         return
     X_unlabeled, unlabeled_indexes = data
     # get unlabeled examples labels in simulation with `y_ideal`
     y_true = self.dataset.y_ideal[unlabeled_indexes]
     y_pred = self.model.predict(X_unlabeled)
     p, r, ac, g, auc = metrics.precision(y_true, y_pred),metrics.recall(y_true, y_pred),\
                        metrics.accuracy(y_true, y_pred), metrics.g_means(y_true, y_pred),\
                        metrics.auc(y_true, y_pred)
     self.metrics['simulate'].append((p, r, ac, g, auc))
コード例 #9
0
def test(data_set, model, data_loader, show_auc = False, use_dummy_gcn=False, use_struc=None):
    with torch.no_grad():
        logging.info('----- start_test -----')
        model.eval()
        precision = []
        recall = []
        ndcg_score = []
        auc_score = []
        for user_ids, _, __ in data_loader:
            user_ids = user_ids.to(device)
            ratings = model.get_users_ratings(user_ids, use_dummy_gcn, use_struc)
            ground_truths = []
            for i, user_id_t in enumerate(user_ids):
                user_id = user_id_t.item()
                ground_truths.append(data_set.test_user_dict[user_id])
                train_pos = data_set.train_user_dict[user_id]
                for pos_item in train_pos:
                    ratings[i][pos_item] = -1 # delete train data in ratings
            # Precision, Recall, NDCG
            ___, index_k = torch.topk(ratings, k=TOPK) # index_k.shape = (batch_size, TOPK), dtype=torch.int
            batch_predict_items = index_k.cpu().tolist()
            batch_precision, batch_recall = precision_and_recall(batch_predict_items, ground_truths)
            batch_ndcg = ndcg(batch_predict_items, ground_truths)
            # AUC
            if show_auc:
                ratings = ratings.cpu().numpy()
                batch_auc = auc(ratings, data_set.get_item_num(), ground_truths)
                auc_score.append(batch_auc)

            precision.append(batch_precision)
            recall.append(batch_recall)
            ndcg_score.append(batch_ndcg)
        precision = np.mean(precision)
        recall = np.mean(recall)
        ndcg_score = np.mean(ndcg_score)
        if show_auc: # Calculate AUC scores spends a long time
            auc_score = np.mean(auc_score)
            logging.info('test result: precision ' + str(precision) + '; recall ' + str(recall) + '; ndcg ' + str(ndcg_score) + '; auc ' + str(auc_score))
        else:
            logging.info('test result: precision ' + str(precision) + '; recall ' + str(recall) + '; ndcg ' + str(ndcg_score))
コード例 #10
0
def pr_curve(y_true, scores, stage, show_var=False):
    precision, recall, thresholds = precision_recall_curve(y_true, scores)
    y_pred = (scores > 0).astype(np.int)
    auc_val = auc(y_true, y_pred)
    plt.figure(figsize=(20, 10))
    plt.plot(recall, precision, color='r')
    if show_var:
        precision_std = np.std(precision)
        precision_upper = precision + precision_std
        precision_lower = precision - precision_std
        plt.fill_between(recall,
                         precision_upper,
                         precision_lower,
                         color='r',
                         alpha=0.1)
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    plt.title('%s Precision-Recall curve: AP=%0.2f' % (stage.title(), auc_val))
    plt.grid("on")
    plt.show()
コード例 #11
0
def __experiment_01(data_set, skipSimRank=False, set_no=1, a=0.5, aucn=2000, simrank_iter=10, category='math.GN'):
    print('Kategoria: ',category)
    data = dataset.DataSet('../datasets/', category, data_set)
    matrix = sparse.csc_matrix(
        data.get_training_set(mode='adjacency_matrix_csc', ds_index=set_no), dtype='d')
    training = data.get_training_set() #metrics.get_edges_set(data.get_training_set())
    test = data.get_test_edges() #metrics.get_edges_set(data.get_test_edges())

    print('Zestaw',set_no,' N=', data.vx_count)
    #print('Obliczanie: macierzy przejścia MERW...', end=' ')
    #print(vekt)
    #print(Pmerw.get_shape()[0])
    #print('macierzy "odległości"...')
    #print('Obliczanie: macierzy przejścia GRW... ', end=' ')
    Pgrw, sd = merw.compute_grw(matrix)
    #print('macierzy "odległości"...')
    p_dist_grw = merw.compute_P_distance(Pgrw, alpha=a)
    print('   Skuteczność PD (AUC {}):'.format(aucn),
          metrics.auc(data.vx_count, training, test, p_dist_grw, aucn))
    Pmerw, vekt, eval, stat = merw.compute_merw_matrix(matrix)
    p_dist_merw = merw.compute_P_distance(Pmerw, alpha=a)
    print(' Skuteczność MEPD (AUC {}):'.format(aucn),
          metrics.auc(data.vx_count, training, test, p_dist_merw, aucn))
    ep_dist_grw = merw.compute_P_distance(Pgrw, alpha=a)
    print('  Skuteczność PDM (AUC {}):'.format(aucn),
          metrics.auc(data.vx_count, training, test, ep_dist_grw, aucn))
    ep_dist_merw = merw.compute_P_distance(Pmerw, alpha=a)
    print('  Skuteczność PDM (AUC {}):'.format(aucn),
          metrics.auc(data.vx_count, training, test, ep_dist_merw, aucn))

    if skipSimRank:
        return
    graph = merw.matrix_to_graph(matrix)
    #print(graph)
    print('SimRank...',end='')
    sr, eps = merw.compute_basic_simrank(graph, a, maxiter=simrank_iter)
    print(' Dokładność:', eps)
    print('   Skuteczność SR (AUC {}):'.format(aucn),
          metrics.auc(data.vx_count, training, test, sr, aucn))

    print('MERW SimRank...',end='')
    sr, eps = merw.compute_merw_simrank_ofmatrix(matrix, a, maxiter=simrank_iter)
    print(' Dokładność:', eps)
    print(' Skuteczność MESR (AUC {}):'.format(aucn),
          metrics.auc(data.vx_count, training, test, sr, aucn))
コード例 #12
0
def generate_det_curve(y_label_list, y_score_list):
    """
    DET curves typically feature missed detection rate on the Y axis, and false positive rate on the X axis. 
    This means that the bottom left corner of the plot is the ideal point - a false positive rate of zero, and a missed detection rate of zero as well. 
    This is not very realistic, but it does mean that a smaller area under the curve (AUC) is usually better.
    """
    # Prepare input data
    label_list = []
    score_list = []
    for line in y_label_list:
        temp_list = [item[1] for item in line]
        label_list.append(temp_list)
    for line in y_score_list:
        temp_list = [item[1] for item in line]
        score_list.append(temp_list)
    label_array = np.array(label_list)
    score_array = np.array(score_list)

    # Compute micro-average DET curve and DET area
    det = dict()
    det['fpr'], det['fnr'], det['thresh'] = detection_error_tradeoff(
        label_array.ravel(), score_array.ravel())
    det['auc'] = auc(det['fpr'], det['fnr'])
    return det
コード例 #13
0
def generate_roc_curve(y_label_list, y_score_list):
    """
    ROC curves typically feature true positive rate on the Y axis, and false positive rate on the X axis. 
    This means that the top left corner of the plot is the ideal point - a false positive rate of zero, and a true positive rate of one. 
    This is not very realistic, but it does mean that a larger area under the curve (AUC) is usually better.
    """
    # Prepare input data
    label_list = []
    score_list = []
    for line in y_label_list:
        temp_list = [item[1] for item in line]
        label_list.append(temp_list)
    for line in y_score_list:
        temp_list = [item[1] for item in line]
        score_list.append(temp_list)
    label_array = np.array(label_list)
    score_array = np.array(score_list)

    # Compute micro-average ROC curve and ROC area
    roc = dict()
    roc['fpr'], roc['tpr'], roc['thresh'] = roc_curve(label_array.ravel(),
                                                      score_array.ravel())
    roc['auc'] = auc(roc['fpr'], roc['tpr'])
    return roc
コード例 #14
0
def run():
    while True:
        trial = pull_pending()

        if trial is None:
            break

        params = eval(trial['Parameters'])

        logging.info(trial)

        dataset = load(trial['Dataset'])
        fold = int(trial['Fold']) - 1

        (X_train, y_train), (X_test,
                             y_test) = dataset[fold][0], dataset[fold][1]

        n_minority = Counter(y_train).most_common()[1][1]
        n_majority = Counter(y_train).most_common()[0][1]

        imblearn_ratios = [
            ((n_majority - n_minority) * ratio + n_minority) / n_majority
            for ratio in [0.5, 0.75, 1.0]
        ]

        clf = {
            'NB': NB(),
            'KNN': KNN(),
            'SVM': SVM(gamma='scale'),
            'CART': CART()
        }[params['classifier']]

        if (trial['Algorithm'] is None) or (trial['Algorithm'] == 'None'):
            algorithm = None
        else:
            algorithms = {
                'AKNN':
                ResamplingCV(AKNN, clf, n_neighbors=[1, 3, 5, 7]),
                'Bord':
                ResamplingCV(SMOTE,
                             clf,
                             kind=['borderline1'],
                             k_neighbors=[1, 3, 5, 7, 9],
                             m_neighbors=[5, 10, 15],
                             sampling_strategy=imblearn_ratios),
                'CC':
                ResamplingCV(CC, clf, sampling_strategy=imblearn_ratios),
                'CNN':
                ResamplingCV(CNN, clf, n_neighbors=[1, 3, 5, 7]),
                'ENN':
                ResamplingCV(ENN, clf, n_neighbors=[1, 3, 5, 7]),
                'IHT':
                ResamplingCV(IHT,
                             clf,
                             sampling_strategy=imblearn_ratios,
                             cv=[2]),
                'NCL':
                ResamplingCV(NCL, clf, n_neighbors=[1, 3, 5, 7]),
                'NM':
                ResamplingCV(NM, clf, n_neighbors=[1, 3, 5, 7]),
                'OSS':
                ResamplingCV(OSS, clf, n_neighbors=[1, 3, 5, 7]),
                'RBO':
                ResamplingCV(RBO,
                             clf,
                             gamma=[0.01, 0.1, 1.0, 10.0],
                             ratio=[0.5, 0.75, 1.0]),
                'RBU':
                ResamplingCV(RBU,
                             clf,
                             gamma=params.get('gamma'),
                             ratio=params.get('ratio')),
                'RENN':
                ResamplingCV(RENN, clf, n_neighbors=[1, 3, 5, 7]),
                'ROS':
                ResamplingCV(ROS, clf, sampling_strategy=imblearn_ratios),
                'RUS':
                ResamplingCV(RUS, clf, sampling_strategy=imblearn_ratios),
                'SMOTE':
                ResamplingCV(SMOTE,
                             clf,
                             k_neighbors=[1, 3, 5, 7, 9],
                             sampling_strategy=imblearn_ratios),
                'SMOTE+ENN':
                ResamplingCV(
                    SMOTEENN,
                    clf,
                    smote=[SMOTE(k_neighbors=k) for k in [1, 3, 5, 7, 9]],
                    sampling_strategy=imblearn_ratios),
                'SMOTE+TL':
                ResamplingCV(
                    SMOTETomek,
                    clf,
                    smote=[SMOTE(k_neighbors=k) for k in [1, 3, 5, 7, 9]],
                    sampling_strategy=imblearn_ratios),
                'TL':
                TL(),
            }

            algorithm = algorithms.get(trial['Algorithm'])

            if algorithm is None:
                raise NotImplementedError

        if algorithm is not None:
            X_train, y_train = algorithm.fit_sample(X_train, y_train)

        clf = clf.fit(X_train, y_train)
        predictions = clf.predict(X_test)

        scores = {
            'Precision': metrics.precision(y_test, predictions),
            'Recall': metrics.recall(y_test, predictions),
            'F-measure': metrics.f_measure(y_test, predictions),
            'AUC': metrics.auc(y_test, predictions),
            'G-mean': metrics.g_mean(y_test, predictions)
        }

        submit_result(trial, scores)
コード例 #15
0
ファイル: test_auc.py プロジェクト: 0416354917/isml15-thu
def test_u1234567():
    y_true = [0., 0., 1., 1.]
    y_score = [-20., 0.2, 0.1, 0.9]
    print('Expected AUC   = 0.75')
    print('Calculated AUC = %f' % auc(y_true, y_score))
コード例 #16
0
def _run_base_model_dfm(dfTrain, dfTest, folds, dfm_params):
    if os.path.exists(config.DF_FILE):
        print("FD EXISTED")
        with open(config.DF_FILE, 'rb') as fd_f:
            fd = pickle.load(fd_f)
    else:
        print("FD NO EXISTED")
        fd = FeatureDictionary(dfTrain=dfTrain, dfTest=dfTest,
                               numeric_cols=config.NUMERIC_COLS,
                               ignore_cols=config.IGNORE_COLS)
        with open(config.DF_FILE, 'wb') as fd_f:
            pickle.dump(fd, fd_f)

    data_parser = DataParser(feat_dict=fd)
    Xi_train, Xv_train, y_train = data_parser.parse(df=dfTrain, has_label=True)
    Xi_test, Xv_test, y_test = data_parser.parse(df=dfTest, has_label=True) #测试集也是有label
    # print(y_test)
    # print(Xi_train)
    # print(Xv_train)
    # print(y_train)

    dfm_params["feature_size"] = fd.feat_dim
    dfm_params["field_size"] = len(Xi_train[0])
    print(dfm_params)
    # print(dfm_params)

    y_train_meta = np.zeros((dfTrain.shape[0], 1), dtype=float)
    y_test_meta = np.zeros((dfTest.shape[0], 1), dtype=float)
    _get = lambda x, l: [x[i] for i in l]
    auc_results_cv = np.zeros(len(folds), dtype=float)
    test_auc_results_cv = np.zeros(len(folds), dtype=float)
    auc_results_epoch_train = np.zeros((len(folds), dfm_params["epoch"]), dtype=float)
    auc_results_epoch_valid = np.zeros((len(folds), dfm_params["epoch"]), dtype=float)
    # best_test_res = 0.0
    for i, (train_idx, valid_idx) in enumerate(folds):
        print(f"Fold {i}:")
        Xi_train_, Xv_train_, y_train_ = _get(Xi_train, train_idx), _get(Xv_train, train_idx), _get(y_train, train_idx)
        Xi_valid_, Xv_valid_, y_valid_ = _get(Xi_train, valid_idx), _get(Xv_train, valid_idx), _get(y_train, valid_idx)
        # print(Xi_train_)
        # print(Xv_train_)
        # print(y_train_)
        # print(Xi_valid_)
        # print(Xv_valid_)
        # print(y_valid_)
        dfm = DeepFM(**dfm_params)
        dfm.fit(Xi_train_, Xv_train_, y_train_, Xi_valid_, Xv_valid_, y_valid_, i)

        y_train_meta[valid_idx,0] = dfm.predict(Xi_valid_, Xv_valid_)
        y_test_meta[:,0] = dfm.predict(Xi_test, Xv_test)
        auc_results_cv[i] = auc(y_valid_, y_train_meta[valid_idx])
        test_auc_results = auc(y_test, y_test_meta)
        # if test_auc_results > best_test_res:
        #     MODEL_PATH = config.MODEL_PATH % (i, )
        #     dfm.save_model(config.MODEL_PATH)#可以写保存地址

        test_auc_results_cv[i] = test_auc_results
        auc_results_epoch_train[i] = dfm.train_result
        auc_results_epoch_valid[i] = dfm.valid_result

    y_test_meta /= float(len(folds))

    # save result
    if dfm_params["use_fm"] and dfm_params["use_deep"]:
        clf_str = "DeepFM"
    elif dfm_params["use_fm"]:
        clf_str = "FM"
    elif dfm_params["use_deep"]:
        clf_str = "DNN"
    print("%s: %.5f (%.5f)"%(clf_str, auc_results_cv.mean(), auc_results_cv.std()))
    print("test auc: ", test_auc_results_cv)
    filename = "%s_Mean%.5f_Std%.5f.csv"%(clf_str, auc_results_cv.mean(), auc_results_cv.std())
    # _make_submission(ids_test, y_test_meta, filename)

    # _plot_fig(auc_results_epoch_train, auc_results_epoch_valid, clf_str)

    return y_train_meta, y_test_meta
コード例 #17
0
ファイル: eval.py プロジェクト: yunshengb/SimGNN
 def _eval(self, models, rs, true_r, loss_list, metrics, plot,
           node_embs_list=None, graph_embs_mat=None, attentions=None,
           eps_dir=None):
     rtn = OrderedDict()
     for metric in metrics:
         if metric == 'mrr' or metric == 'mse' or metric == 'time' or \
                 'acc' in metric or metric == 'kendalls_tau' or \
                 metric == 'spearmans_rho':
             d = plot_single_number_metric(
                 FLAGS.dataset, models, rs, true_r, metric,
                 self.norms,
                 sim_kernel=get_flags('sim_kernel'),
                 yeta=get_flags('yeta'),
                 scale=get_flags('scale'),
                 thresh_poss=[get_flags('thresh_val_test_pos')],
                 thresh_negs=[get_flags('thresh_val_test_neg')],
                 thresh_poss_sim=[0.5],
                 thresh_negs_sim=[0.5],
                 plot_results=plot, eps_dir=eps_dir)
             rtn.update(d)
         elif metric == 'draw_gt_rk':
             comb_gt_rk(FLAGS.dataset, FLAGS.dist_algo,
                        rs[FLAGS.model], eps_dir + '/gt_rk')
         elif metric == 'groundtruth':
             pass
         elif metric == 'draw_heat_hist':
             if node_embs_list is not None:
                 draw_emb_hist_heat(
                     FLAGS.dataset, node_embs_list, FLAGS.dist_norm,
                     max_nodes=FLAGS.max_nodes,
                     apply_sigmoid=True,
                     eps_dir=eps_dir + '/mne')
         elif metric == 'emb_vis_gradual':
             if graph_embs_mat is not None:
                 visualize_embeddings_gradual(
                     FLAGS.dataset,
                     graph_embs_mat,
                     eps_dir=eps_dir + '/emb_vis_gradual')
         elif metric == 'ranking':
             pass
             # ranking(
             #     FLAGS.dataset, FLAGS.dist_algo, rs[FLAGS.model],
             #     eps_dir=eps_dir + '/ranking'
             # )
         elif metric == 'attention':
             if attentions is not None:
                 draw_attention(
                     FLAGS.dataset, FLAGS.dist_algo, attentions,
                     eps_dir=eps_dir + '/attention')
         elif metric == 'auc':
             auc_score = auc(
                 true_r, rs[FLAGS.model],
                 thresh_pos=
                 get_flags('thresh_val_test_pos'),
                 thresh_neg=
                 get_flags('thresh_val_test_neg'),
                 norm=FLAGS.dist_norm)
             print('auc', auc_score)
             rtn.update({'auc': auc_score})
         elif 'prec@k' in metric:
             d = plot_preck(
                 FLAGS.dataset, models, rs, true_r, metric,
                 self.norms, plot, eps_dir=eps_dir)
             rtn.update(d)
         elif metric == 'loss':
             rtn.update({metric: np.mean(loss_list)})
         elif metric == 'emb_vis_binary':
             if graph_embs_mat is not None:
                 visualize_embeddings_binary(
                     FLAGS.dataset, graph_embs_mat,
                     self.true_test_result,
                     thresh_pos=
                     get_flags('thresh_val_test_pos'),
                     thresh_neg=
                     get_flags('thresh_val_test_neg'),
                     thresh_pos_sim=0.5,
                     thresh_neg_sim=0.5,
                     norm=FLAGS.dist_norm,
                     eps_dir=eps_dir + '/emb_vis_binary')
         else:
             raise RuntimeError('Unknown metric {}'.format(metric))
     return rtn
コード例 #18
0
        X,
        y,
        cv=5,
        scoring=scoring)
    plt_handle.show()

# train and report test results
clf_supervised = models.default_model()
clf_supervised.fit(X, y)
sup_y_test_preds = clf_supervised.predict(X_test)
supervised_results = {
    'accuracy': metrics.accuracy(y_test, sup_y_test_preds),
    'precision': metrics.precision(y_test, sup_y_test_preds),
    'recall': metrics.recall(y_test, sup_y_test_preds),
    'gmeans': metrics.g_means(y_test, sup_y_test_preds),
    'auc': metrics.auc(y_test, sup_y_test_preds),
    'cohen-kappa': metrics.user_machine_agreement(y_test, sup_y_test_preds)
}

#============================================================
#                IV (a) - Active Learning
#============================================================
# Part IV of this demo is divided into two sub-parts:
#
#   (a) - Here we demonstrate the active "learning phase" of
#         a typical predictive coding life cycle, and since
#         demo will be in simulation mode, we will not
#         require an interactive session to get user labels
#
#   (b) - In this part we will simulate the review phase.
#
コード例 #19
0
def test_u1234567():
    y_true = [0., 0., 1., 1.]
    y_score = [-20., 0.2, 0.1, 0.9]
    print('Expected AUC   = 0.75')
    print('Calculated AUC = %f' % auc(y_true, y_score))
コード例 #20
0
            
        'Follow training by printing the loss and prediction for each image'
        #print("Batch ", i, "/",training_generator.__len__(), 
        #    ", Loss: ", loss_values.numpy()[0]) 
        #print("Prediction: ", logits.numpy()[0,:], ", Label: ", y_train[0])

        temp_loss_list.append(loss_values.numpy().mean())
        label_list.append(y_train[0])
        pred_list.append(logits.numpy()[0,1])
        
        grads = tape.gradient(loss_values, model.trainable_variables[-4:])
        optimizer.apply_gradients(zip(grads, model.trainable_variables[-4:]))

    'Compute metrics on training set'
    loss_train = np.mean(np.asarray(temp_loss_list))
    auc_train = auc(label_list, pred_list)
    print("Training loss: ", loss_train, ", AUC: ", auc_train)
    
    acc, sens, spec = conf(label_list, pred_list)
    print("Training accuracy: ", acc, ", sensitivity: ", sens, ", specificity: ", spec)
        
    'Evaluate on validation set'
    for i in range(validation_generator.__len__()):
        x_train,y_train=validation_generator.__getitem__(i)
        logits = model(x_train, training = False)
        loss_values = loss(y_train, logits)
        
        #print("Batch ", i, "/", validation_generator.__len__(), 
        #    ", Loss: ", loss_values.numpy()[0])
        #print("Prediction: ", logits.numpy()[0,:], ", Label: ", y_train[0])
コード例 #21
0
def dk_tests_1k():
    ds = DataSet('../datasets/', 'gr-qc', 'eg1k')
    trn, tst = ds.get_dataset()
    trns, tsts = utils.get_edges_set(trn), utils.get_edges_set(tst)

    rmtrns, rmtsts = set(), set()
    toTest = True
    for x in tsts:
        if x in trns:
            if toTest:
                rmtrns.add(x)
            else:
                rmtsts.add(x)
        toTest = not toTest

    for x in rmtrns:
        trns.remove(x)

    for x in rmtsts:
        tsts.remove(x)

    for x in tsts:
        if x in trns:
            print("NO!")

    A = lil_matrix((ds.vx_count, ds.vx_count))
    for v1, v2 in trns:
        A[v1, v2] = 1
        A[v2, v1] = 1

    A = csr_matrix(A, (ds.vx_count, ds.vx_count), 'd')

    ls, vs = sla.eigsh(A, 1, which='LA')
    l_max = ls[0]
    v_max = vs[:, 0]

    # print("Values of AUC (1000 samples) and precision (K=30) " +
    #       "for heat diffusion kernel variants:")

    print("Values of AUC (10000 samples) for heat diffusion kernel variants:")

    auc_sampl = 10000
    prc_k = 30

    # DK
    DK = kern.heat_diffusion_kernel(kern.laplacian(A))

    auc = mtr.auc(ds.vx_count, trns, tsts, DK, auc_sampl)
    print("   DK - AUC: {:.4f}".format(auc))
    prc = mtr.precision(ds.vx_count, trns, tsts, DK, prc_k)
    print("   DK - PRC: {:.4f}".format(prc))

    # NDK
    warnings.filterwarnings("ignore")

    NDK = kern.heat_diffusion_kernel(kern.symmetric_normalized_laplacian(A))

    auc = mtr.auc(ds.vx_count, trns, tsts, NDK, auc_sampl)
    # prc = mtr.precision(ds.vx_count, trns, tsts, NDK, prc_k)
    print("  NDK - AUC: {:.4f}".format(auc))
    # print("  NDK - PREC: {:.4f}".format(prc))

    # MEDK
    MEDK = kern.heat_diffusion_kernel(kern.mecl(A, l_max, v_max))

    auc = mtr.auc(ds.vx_count, trns, tsts, MEDK, auc_sampl)
    # prc = mtr.precision(ds.vx_count, trns, tsts, MEDK, prc_k)
    print(" MEDK - AUC: {:.4f}".format(auc))
    # print(" MEDK - PREC: {:.4f}".format(prc))

    # NMEDK
    NMEDK = kern.heat_diffusion_kernel(kern.mecl(A, l_max, v_max, type='sym'))

    auc = mtr.auc(ds.vx_count, trns, tsts, NMEDK, auc_sampl)
    # prc = mtr.precision(ds.vx_count, trns, tsts, NMEDK, prc_k)
    print("NMEDK - AUC: {:.4f}".format(auc))
コード例 #22
0
# 0.99, 0.4, 0.01
try:
    with open("cascade.pkl", "rb") as f:
        cascade_classifier = pickle.load(f)
except:
    cascade_classifier = cascade.train_cascade(train_f, train_y, 0.99, 0.4,
                                               0.01)
    with open("cascade.pkl", "wb") as f:
        pickle.dump(cascade_classifier, f, protocol=pickle.HIGHEST_PROTOCOL)

test_f, i_f = feature.get_features(test_x)

f_pred = classifier(test_f)
y_pred = classifier.predict(test_f)
print(metrics.tpr_fpr(test_y, y_pred))
print(metrics.auc(test_y, f_pred))

# Top 10 features
shape = (19, 19)
for i, (base, alpha) in enumerate(classifier):
    print(base.index)
    print("Feature {}: theta {:.2f}, alpha {:.2f}".format(
        i, base.theta, alpha))
    visulization.visualize_feature(shape,
                                   i_f[base.index],
                                   base.parity,
                                   save="feature_{}.png".format(i),
                                   show=False)

plt.figure()
for i in [1, 3, 5, 10]:
コード例 #23
0
# compare score to sliding box up to some width, up to last decile
max_box_width = np.sort(seq_lengths)[-len(seq_lengths)//10]

for box_width in xrange(max_box_width):
    if (box_width%10)==0:
        # select only unmasked and comparable datapoints.

        m = ~np.isnan(y[:,:,1])
        # uncensored or within box_width of boundary 
        m[m] = (y[:,:,1][m]==1)|(box_width<y[:,:,1][m]) 

        actual = y[:,:,0][m].flatten()<=box_width
        pred   = weibull.cmf(a=predicted[:,:,0],b=predicted[:,:,1],t=box_width)[m].flatten()

        fpr,tpr,thresholds = metrics.roc_curve(actual,pred)
        auc = metrics.auc(fpr,tpr)
        print('auc: ',auc,' sliding box ',box_width)
        aucs.append(auc)
plt.plot(aucs)
plt.ylabel('AUC')
plt.xlabel('box width')

## Esoteric plots
# Animate predicted churn
# Those with alpha higher than at the their last step is red. Red stream of blood going to the right corner are predicted churners

#### Walk through the timeline and look at the embedding.
# by day
padded = tr.right_pad_to_left_pad(predicted)
events_tmp = tr.right_pad_to_left_pad(events)
# by day since signup
コード例 #24
0
def train(model, train_loader, test_loader, criterion, optimizer, n_epochs,
          batches_per_epoch, model_name, batch_size):

    print('Start train')
    loss_plot = np.empty(int(batches_per_epoch * n_epochs))
    auc_plot = np.empty(int(batches_per_epoch * n_epochs))
    vloss_plot = np.zeros(int(batches_per_epoch * n_epochs))
    vauc_plot = np.zeros(int(batches_per_epoch * n_epochs))
    yhat_tosave = np.zeros((int(n_epochs), int(batches_per_epoch), batch_size))
    y_tosave = np.zeros((int(n_epochs), int(batches_per_epoch), batch_size))
    yhat_test_tosave = np.array([[]])
    y_test_tosave = np.array([[]])
    t0 = time.time()
    for epoch in range(n_epochs):

        cost = 0
        batch = 0

        for batch_ind, (x, y) in enumerate(train_loader):

            # x, y = x.to(device), y.to(device)
            # Train on batch
            # print('First Batch')
            optimizer.zero_grad()  # clear gradient
            z = model(x)  # make prediciton
            loss = criterion(z, y)  # calculate loss
            loss.backward()  # calculate gradients
            optimizer.step()  # update parameters
            cost += loss.item()

            # Save aucs and loss
            plot_index = int(batch + batches_per_epoch * epoch)
            metrics_text = ''
            if show_auc:
                y_np = y.detach().numpy()
                z_np = z.detach().numpy()
                sz_yhat, inter_yhat = met.split_yhat(y_np, z_np)
                a, _, _ = met.auc(sz_yhat, inter_yhat)
                auc_plot[plot_index] = a
                metrics_text += 'AUC: %.2g ' % a
            if show_loss:
                loss_plot[plot_index] = loss.item()
                metrics_text += 'loss: %.3g ' % loss.item()
            if save_forecasts:
                y_np = y.detach().numpy()
                yhat_np = z.detach().numpy()
                yhat_tosave[epoch,
                            batch_ind, :y_np.shape[0]] = yhat_np.flatten()
                y_tosave[epoch, batch_ind, :y_np.shape[0]] = y_np.flatten()

            # print
            batch += 1
            t = time.time() - t0
            percent_done = batch / (batches_per_epoch *
                                    n_epochs) + epoch / n_epochs
            print(
                'Epoch %d of %d, Batch %d of %d, %0.1f done, %0.2f of %0.2f seconds. '
                % (epoch + 1, n_epochs, batch, batches_per_epoch,
                   percent_done * 100, t, t / percent_done) + metrics_text)
            # sys.stdout.write('\rBatch %d of %d, %0.1f done, %0.2f of %0.2f seconds. ' % (
            #     batch, batches_per_epoch, percent_done * 100, t, t / percent_done) + metrics_text)

        if plot_loss:
            val_loss = 0
            print('Calculating Test Loss')

            z_np_ = np.array([])
            y_np_ = np.array([])

            for x_, y_ in test_loader:
                z_ = model(x_)

                l = criterion(z_, y_)
                vloss_ind = int(batches_per_epoch * (epoch + 1)) - 1

                sz_, inter_ = met.split_yhat(y_.detach().numpy(),
                                             z_.detach().numpy())
                a, _, _ = met.auc(sz_, inter_)

                z_np_ = np.append(z_np, z_.detach().numpy().flatten())
                y_np_ = np.append(y_np, y_.detach().numpy().flatten())

            vloss_plot[vloss_ind] = l.item()
            vauc_plot[vloss_ind] = a
            vis.loss_and_auc(loss_plot, auc_plot, vloss_plot, vauc_plot,
                             model_name, batches_per_epoch, n_epochs)

            if save_forecasts:
                if epoch > 0:
                    yhat_test_tosave = np.append(yhat_test_tosave,
                                                 np.reshape(
                                                     z_np_, (1, z_np_.size)),
                                                 axis=0)
                    y_test_tosave = np.append(y_test_tosave,
                                              np.reshape(
                                                  y_np_, (1, y_np_.size)),
                                              axis=0)
                else:
                    yhat_test_tosave = np.append(yhat_test_tosave,
                                                 np.reshape(
                                                     z_np_, (1, z_np_.size)),
                                                 axis=1)
                    y_test_tosave = np.append(y_test_tosave,
                                              np.reshape(
                                                  y_np_, (1, y_np_.size)),
                                              axis=1)

        print('--')

    if save_forecasts:
        np.save(
            '/media/projects/daniel_lstm/forecasts_training/' + model_name +
            '_yhat', yhat_tosave)
        np.save(
            '/media/projects/daniel_lstm/forecasts_training/' + model_name +
            '_y', y_tosave)
        np.save(
            '/media/projects/daniel_lstm/forecasts_training/' + model_name +
            '_yhat_t', yhat_test_tosave)
        np.save(
            '/media/projects/daniel_lstm/forecasts_training/' + model_name +
            '_y_t', y_test_tosave)

    return model