Beispiel #1
0
def val(path, model, test_loader, device, criterion, epoch, batch_size):
    model.eval()
    sum_running_loss = 0.0

    with torch.no_grad():
        for batch_idx, data in enumerate(tqdm(test_loader)):
            task = data['task'].to(device).float()
            task_label = data['task_label'].to(device).float()

            # All black
            # init = data['init']
            # init[:] = 0
            # init = init.to(device).float()

            # Normal
            init = data['init'].to(device).float()

            # print("init shape", init.shape)
            label = data['label'].to(device).float()
            # model prediction
            prediction = model(subx=task_label, mainx=init)

            # loss
            loss_mse = criterion(prediction, label.data)

            # accumulate loss
            sum_running_loss += loss_mse.item() * init.size(0)

            # visualize the sum testing result
            visualize_sum_testing_result(path, init, prediction, task_label,
                                         label.data, batch_idx, epoch,
                                         batch_size)
            if batch_idx == 0:
                prediction_output = prediction.cpu().detach().numpy()
                label_output = label.cpu().detach().numpy()
                init_output = init.cpu().detach().numpy()
            else:
                prediction_output = np.append(
                    prediction.cpu().detach().numpy(),
                    prediction_output,
                    axis=0)
                label_output = np.append(label.cpu().detach().numpy(),
                                         label_output,
                                         axis=0)
                init_output = np.append(init.cpu().detach().numpy(),
                                        init_output,
                                        axis=0)
    sum_running_loss = sum_running_loss / len(test_loader.dataset)
    print('\nTesting phase: epoch: {} Loss: {:.4f}\n'.format(
        epoch, sum_running_loss))
    auc_path = os.path.join(path, "epoch_" + str(epoch))
    auc(['flow'], [2, 4, 10, 100], [[label_output, prediction_output]],
        auc_path)
    return sum_running_loss, prediction_output, label_output, init_output
Beispiel #2
0
def test_step(images, labels, tp, tn, fn, fp):
    output = model(images, training=False)
    t_loss = loss_object(labels, output)
    test_loss(t_loss)
    test_accuracy(labels, output)
    aucvalue = auc(labels,
                   output,
                   weights=None,
                   num_thresholds=200,
                   name=None,
                   summation_method='trapezoidal')
    test_auc(aucvalue)
    # print("output: ", output)
    # print("labels: ", labels)
    y_pred = tf.argmax(output, 1)
    y_true = tf.argmax(labels, 1)
    # print("y_pred: ", y_pred)
    # print("y_true: ", y_true)
    pred = tf.cast(y_pred, tf.int32)
    true = tf.cast(y_true, tf.int32)
    # print("pred: ", pred)
    # print("true: ", true)
    for i in range(len(pred)):
        if pred[i] == true[i] and true[i] == 1:
            tp += 1
        elif pred[i] == true[i] and true[i] == 0:
            tn += 1
        elif pred[i] == 1 and true[i] == 0:
            fp += 1
        else:
            fn += 1
    return tp, tn, fp, fn, output
Beispiel #3
0
def cal_auc(ans, i, preds):
    preds['auc'] = 0.0
    for index, pred in preds.iterrows():
        truth = ans.loc[index]
        a = auc(pred[0:28], truth)
        preds.at[index, 'auc'] = round(a, 4)
    cols = preds.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    preds = preds[cols]
    # preds.to_csv('../auc/%s-%03d.csv' % (PREFIX, i))
    return preds
Beispiel #4
0
def test_step(images, labels):
    output = model(images, training=False)
    t_loss = loss_object(labels, output)

    test_loss(t_loss)
    test_accuracy(labels, output)
    aucvalue = auc(labels,
                   output,
                   weights=None,
                   num_thresholds=200,
                   name=None,
                   summation_method='trapezoidal')
    test_auc(aucvalue)
Beispiel #5
0
def train_step(images, labels):
    with tf.GradientTape() as tape:
        output = model(images, training=True)
        loss = loss_object(labels, output)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(labels, output)
    aucvalue = auc(labels,
                   output,
                   weights=None,
                   num_thresholds=200,
                   name=None,
                   summation_method='trapezoidal')
    train_auc(aucvalue)
Beispiel #6
0
def evaluate_c16_main():
    eva_score = get_testset_score()
    #print "##################################"
    #print eva_score
    gt_score = get_ground_truth()
    #print "++++++++++++++++++++++++++++++++++++"
    #print gt_score
    #print "================================="
    save_testset_score(eva_score, gt_score)
    fig_name = "c16_auc_mean_cnt_%d.jpg" % (MEAN_COUNT)
    fig_path = os.path.join(dir_c16_outs, fig_name)
    #print gt_score.shape
    #print eva_score.shape

    auc_score = auc.auc(gt_score, eva_score, fig_path)
    print("Meat count:%d C16 AUC is :%0.4lf" % (MEAN_COUNT, auc_score))
Beispiel #7
0
def conv1d_auc(i):
    pred_data = pd.read_csv('../con/%03d.csv' % i, index_col='user_id')
    pred_data = scaler.transform(pred_data)

    pred = model.predict(reshape(pred_data))
    pred_df = pd.DataFrame(pred)
    pred_df['auc'] = 0.0
    ans = pd.read_csv('../public/label-%03d.csv' % i, index_col='user_id')
    pi = 0
    for index, ans_row in ans.iterrows():
        a = auc.auc(pred[pi], ans_row)
        pred_df.at[pi, 'auc'] = a
        pi += 1

    cols = pred_df.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    pred_df = pred_df[cols]
    pred_df.to_csv('../conv1d-auc/%s-%03d.csv' %
                   (datetime.now().isoformat(timespec='minutes'), i))
def make_learning_curve(X,
                        Y,
                        min_repeat,
                        max_repeat,
                        ebar,
                        max_point_num,
                        debug=False,
                        useRPMat=False):
    if debug:
        print "ENTER MLC"
    """x, y, e = make_learning_curve(a, X, Y, min_repeat,
                                max_repeat, ebar, max_point_num)
% Make the learning curve
% Inputs:
% X -- data matrix
% Y -- labels
% Returns:
% x -- Number of samples
% y -- Performance (AUC)
% e -- error bar

"""
    """print 'X'
    print X.shape
    print X.sum()
    print 'Y'
    print Y.shape
    print Y.sum()"""
    #die
    if debug:
        print "Casting X to 64 bit"

    assert Y.any(axis=0).all(),\
           ("Some classes have no example, at least one should be present "
            "for each class")

    X = N.cast['float64'](X)
    Y = N.cast['float64'](Y.copy())

    # Verify dimensions and set target values
    p, n = X.shape

    if len(Y.shape) == 1:
        Y = (N.ones((1, 1)) * Y).T

    pp, cn = Y.shape
    sep_num = cn

    if pp != p:
        raise Exception('Size mismatch. X has ' + str(p) +
                        ' examples but Y has ' + str(pp) + ' labels')
    #

    # If only 2 classes, the 2nd col is the same as the first but opposite
    if cn == 2 and N.all(Y.sum(axis=1)):
        Y = (N.ones((1, 1)) * Y[:, 0]).T
        sep_num = 1
    #
    Y[Y == 0] = -1

    if debug:
        time.sleep(2)
        print "Creating the data matrices"

    # Create the data matrices (Y at this stage is still multi-column)
    D = data_struct(X, Y)
    feat_num = D.X.shape[1]

    K = None

    #if not pd_check.pd_check(D.X):
    #die

    #    if debug:
    #        time.sleep(2)
    #        print "kernelizing"
    #    K = kernelize.kernelize(D);
    #
    if not pd_check.pd_check(D.X) and D.X.shape[0] < 2000:
        D = kernelize.kernelize(D)

    # Load random splits (these are the same for everyone)
    RP = None
    if useRPMat and os.path.exists(rp_path) and os.path.isfile(rp_path):
        RP = io.loadmat(rp_path, struct_as_record=False)['RP']
        #print RP
        rp, mr = RP.shape

        if rp < p:
            if debug:
                print 'make_learning_curve::warning: RP too small'
            RP = None
        else:
            max_repeat = min(max_repeat, mr)
            RP = N.ceil(N.cast['float64'](RP) / (float(rp) / float(p)))
            RP = RP.astype(int)
            if debug:
                print 'make_learning_curve: using RP of dim ' + \
                        str(rp) + \
                        'x' + \
                        str(mr) + \
                        ' min=' + \
                        str(RP.min()) + \
                        ' max=' + \
                        str(RP.max()) + \
                        ', max_repeat=' + \
                        str(max_repeat)
    else:
        print 'make_learning_curve::warning: no RP file found\n'
    if debug:
        time.sleep(2)
        print "Computing sample sizes"

    # Sample sizes scaled in log2
    m = N.floor(math.log(p, 2))
    x = 2.**N.arange(0, int(m) + 1)

    if x[-1] != p:
        x = N.hstack((x, [p]))
    x = x[0:-1]  # need to remove the last point

    #print 'warning: this is a likely place i could have messed things up'

    if max_point_num is None:
        point_num = x.shape[0]
    else:
        point_num = min(x.shape[0], max_point_num)
    #

    # Loop over the sample sizes
    x = x[0:point_num]
    x = N.cast['uint32'](x)
    y = N.zeros(x.shape)
    e = N.zeros(x.shape)

    for k in xrange(0, point_num):

        if debug:
            print '-------------------- Point %d ----------------------' % k
        #

        A = N.zeros((sep_num, 1))
        E = N.zeros((sep_num, 1))
        e[k] = N.Inf
        # Loop over number of "1 vs all" separations
        for j in xrange(0, sep_num):

            if debug:
                print ' sep %d -- ' % j
            ""
            repnum = 0
            area = []

            # Loop over repeats (floating number of repeats)
            while repnum < min_repeat or \
                  (E[j] > ebar and repnum < max_repeat):

                if debug:
                    print 'repeat %d **' % repnum
                    print 'min_repeat: ' + str(min_repeat)
                    print 'max_repeat: ' + str(max_repeat)
                    pass
                #

                if RP is None:
                    rp = randperm(p)
                else:
                    rp = RP[0:p, repnum] - 1

                tr_idx = rp[0:x[k]]
                te_idx = rp[x[k]:]

                if debug:
                    print "Obtaining sub arrays"
                    time.sleep(2)

                if pd_check.pd_check(D):  # kernelized version
                    if debug:
                        print 'pd_check ok, using kernelized version'
                    Dtr = D.subdim(tr_idx, tr_idx, [j])
                    Dte = D.subdim(te_idx, tr_idx, [j])
                #elif x[k] < feat_num: # kernelized too (for speed reason)
                #    if debug:
                #        print 'x[k] < feat_num, using kernelized version'
                #    Dtr = K.subdim(tr_idx, tr_idx, [j]);
                #    Dte = K.subdim(te_idx, tr_idx, [j]);
                else:  # primal version
                    if debug:
                        print 'using non-kernelized version'
                    Dtr = data_struct(D.X[tr_idx, :], D.Y[tr_idx, j])
                    Dte = data_struct(D.X[te_idx, :], D.Y[te_idx, j])

                if debug:
                    time.sleep(2)
                    print "Training classifier"
                d, m = train.train(Dtr)

                if debug:
                    time.sleep(2)
                    print "Computing test values"
                #print 'Dte.Y'
                #print Dte.Y
                #assert False
                d1 = test.test(m, Dte)
                assert d1.X.shape[0] != 0, "d1.X.shape[0] == 0"
                assert repnum == len(area), "repnum == len(area)"
                #print 'target'
                #print d1.Y
                #print d1.Y.shape
                #print d1.Y.sum()
                #assert False
                if debug:
                    time.sleep(2)
                    print "Computing auc"
                area.append(auc.auc(d1.X, d1.Y, dosigma=False)[0])

                if debug:
                    time.sleep(2)
                    print "done"

                repnum += 1
                E[j] = N.asarray(area).std() / N.sqrt(repnum)
            # repnum loop
            assert not N.any(N.isnan(area))
            A[j] = N.asarray(area).mean()
            if N.isnan(A[j]):
                assert False, "Invalid area: " + str(area)
            #
        #end % for j=1:sep_num
        e[k] = E.mean()
        y[k] = A.mean()

        assert not N.isnan(y[k])

        if debug:
            print '==> ' + \
                    str(repnum) + \
                    ' repeats, auc=' + \
                    str(y[k]) + \
                    '+-' + \
                    str(e[k]) + \
                    ' -----------------'

        #
    # % Loop over k

    # Add point with 0 examples
    x = N.concatenate((N.asarray([0]), x))
    P = 0.5
    y = N.concatenate((N.asarray([P]), y))
    e = N.concatenate((N.asarray([N.sqrt(P * (1 - P) / p)]), e))
    return x, y, e
Beispiel #9
0
        model.load_weights('model_save/deep_fm_sample-ep001-loss0.184-val_loss0.172.h5')

        # model = load_model('model_save/deep_fm_sample-ep001-loss0.192-val_loss0.176.h5')

        data = pd.read_csv("./data/sample/validation.txt")

        # 1.Label Encoding for sparse features,and do simple Transformation for dense features
        for feat in sparse_features:
            lbe = LabelEncoder()
            data[feat] = lbe.fit_transform(data[feat])
        # 2.count #unique features for each sparse field
        sparse_feature_dim = {feat: data[feat].nunique()
                              for feat in sparse_features}
        # 3.generate input data for model
        model_input = [data[feat].values for feat in sparse_feature_dim]

        pred = model.predict(model_input, batch_size, 1)
        label = data[target].values.flatten().tolist()
        pred = pred.flatten().tolist()
        with open('data/pctr', 'w') as fw:
            for i in range(len(pred)):
                if i % 10000 == 0:
                    print('label: %f, pred: %f' % (label[i], pred[i]))
                to_write = str(i+1)+','+str(label[i])+','+str(pred[i])+'\n'
                fw.write(to_write)
            fw.close()
        AUC = auc.auc(label, pred)
        print('auc: %f' % AUC)

    print("demo done")
            s = "/valid/fluorescence_valid.txt"
            sn = None
            sp = "/valid/networkPositions_valid.txt"
            TV, _, PV = read(s, sn, sp, Knormal)
            print "reading test..."
            s = "/test/fluorescence_test.txt"
            sn = None
            sp = "/test/networkPositions_test.txt"
            TT, _, PT = read(s, sn, sp, Knormal)

            [RN2, RT, RV, RN3] = learnAndPredict(TN1, CN1, [TN2, TT, TV, TN3])
            suff = np.random.randint(10000)
            f = open("./res_ver" + str(VER) + ".csv", 'w')
            f.write("NET_neuronI_neuronJ,Strength\n")
            for i in range(1000):
                for j in range(1000):
                    f.write("valid_" + str(i + 1) + "_" + str(j + 1) + "," +
                            str(RV[i][j]) + "\n")
            for i in range(1000):
                for j in range(1000):
                    f.write("test_" + str(i + 1) + "_" + str(j + 1) + "," +
                            str(RT[i][j]) + "\n")
            f.close()
            print "Wrote solution of VER ==", str(VER)
            RN2_ = RN2.flatten().tolist()
            a = auc.auc(CN2.flatten().tolist(), RN2_)
            RN3_ = RN3.flatten().tolist()
            a2 = auc.auc(CN3.flatten().tolist(), RN3_)
            print("RES: %.2f learning (%.2f, %.2f)" %
                  ((a + a2) * 50, a * 100, a2 * 100))
                    batch[key + 1].values for key in np.hstack((
                        np.arange(0, 6),
                        np.arange(6, len(sparse_feature_dim))))
                ]
                labels.extend(batch[0].values)

                pred = model.predict_on_batch(X)
                preds.extend(pred.flatten().tolist())

                cnt += batch_size

                if cnt % (batch_size * 100):
                    print(pred[0])

        print('calculating auc ......')
        AUC = auc.auc(labels, preds)
        print('auc: %f' % AUC)

    elif mode == 'pred':
        # model.load_weights('model_save/deep_fm_fn_bs10000-ep001-loss0.155-val_loss0.153.h5')  # auc: 0.714774
        #model.load_weights('model_save/deep_fm_fn_bs15000-ep001-loss0.156-val_loss0.152.h5')  # auc: 0.717083
        #model.load_weights('model_save/deep_fm_fn-ep002-loss0.154-val_loss0.154-bs15000-ee20-hz[128, 128].h5')  # auc: 0.718581
        #model.load_weights('model_save/deep_fm_fn-ep020-loss0.153-val_loss0.153-bs15000-ee20-hz[5, 600].h5')  # auc: 0.719317
        model.load_weights(
            'model_save/deep_fm_fn-ep043-loss0.152-val_loss0.152-bs15000-ee20-hz[3, 600].h5'
        )  # auc: 0.722419

        ctr = []
        reader = pd.read_csv(
            '/home/yezhizi/Documents/python/2018DM_Project/track2/KDD_Track2_solution.csv',
            chunksize=chunk_size)
def auc_scorer(estimator, X, y):
    predicted = estimator.predict_proba(X)[:,1]
    return auc.auc(y, predicted)
                pred = model.predict_on_batch(X)
                preds.extend(pred.flatten().tolist())

                cnt += batch_size

                if cnt % (batch_size * 100):
                    print(pred[0])

        now_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        print(now_time)

        print('calculating auc ......')

        print('labels; %d' % len(click))
        print('preds: %d' % len(preds))
        AUC = auc.auc(np.array(click, dtype=np.float) / np.array(imp, dtype=np.float), preds)
        print('auc: %f' % AUC)
        AUC = auc.scoreClickAUC(click, imp, preds)
        print('scoreClickAUC: %f' % AUC)

        now_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
        print(now_time)

        # writing preds to csv
        with open('data/' + model_name + '.csv', 'w') as fw:
            for i in range(len(preds)):
                if i % 1000000 == 0:
                    print('label: %f, pred: %f' % (click[i]/imp[i], preds[i]))
                to_write = str(preds[i]) + '\n'
                fw.write(to_write)
            fw.close()
Beispiel #14
0
            print "reading valid..."
            s = "/valid/fluorescence_valid.txt"
            sn = None
            sp = "/valid/networkPositions_valid.txt"
            TV, _, PV = read(s,sn,sp, Knormal)
            print "reading test..."
            s = "/test/fluorescence_test.txt"
            sn = None
            sp = "/test/networkPositions_test.txt"
            TT, _, PT = read(s,sn,sp, Knormal)

            [RN2, RT, RV, RN3] = learnAndPredict(TN1, CN1, [TN2, TT, TV, TN3])
            suff = np.random.randint(10000)
            f = open("./res_ver"+str(VER)+".csv", 'w')
            f.write("NET_neuronI_neuronJ,Strength\n")
            for i in range (1000):
                for j in range (1000):
                    f.write("valid_" +str(i+1)+"_"+str(j+1)+","+str(RV[i][j])+"\n")
            for i in range (1000):
                for j in range (1000):
                    f.write("test_" +str(i+1)+"_"+str(j+1)+","+str(RT[i][j])+"\n")
            f.close()
            print "Wrote solution of VER ==", str(VER)        
            RN2_ = RN2.flatten().tolist()
            a = auc.auc(CN2.flatten().tolist(),RN2_)
            RN3_ = RN3.flatten().tolist()
            a2 = auc.auc(CN3.flatten().tolist(),RN3_)
            print ("RES: %.2f learning (%.2f, %.2f)" % ((a+a2)*50, a*100, a2*100 ))


Beispiel #15
0
    sp = "/" + "small" + "/networkPositions_" + name + ".txt"
    print name
    TSmall5, CSmall5, PSmall5 = read(s, sn, sp, Knormal)
    gc.collect()

    name = "iNet1_Size100_CC06inh"
    s = "/" + "small" + "/fluorescence_" + name + ".txt"
    sn = "/" + "small" + "/network_" + name + ".txt"
    sp = "/" + "small" + "/networkPositions_" + name + ".txt"
    print name
    TSmall6, CSmall6, PSmall6 = read(s, sn, sp, Knormal)
    gc.collect()

    [RS4, RS6] = learnAndPredict(TSmall5, CSmall5, [TSmall4, TSmall6])
    RS4_ = RS4.flatten().tolist()
    a = auc.auc(CSmall4.flatten().tolist(), RS4_)
    RS6_ = RS6.flatten().tolist()
    a2 = auc.auc(CSmall6.flatten().tolist(), RS6_)
    print("RES: %.2f Small4, %.2f Small6" % (a * 100, a2 * 100))
    f = open("res_small_4_6.csv", 'w')
    f.write("NET_neuronI_neuronJ,Strength\n")
    for i in range(NN):
        for j in range(NN):
            f.write("valid_" + str(i + 1) + "_" + str(j + 1) + "," +
                    str(RS4[i][j]) + "\n")
    for i in range(NN):
        for j in range(NN):
            f.write("test_" + str(i + 1) + "_" + str(j + 1) + "," +
                    str(RS6[i][j]) + "\n")
    f.close()
    print "Wrote solution to ./res_Small_4_6.csv"
Beispiel #16
0
import pandas as pd
from auc import auc

DATA_PATH = '../public/'

i = 1

ans = pd.read_csv(DATA_PATH + 'label-%03d.csv' % i, index_col='user_id')

preds = pd.read_csv('../predict-%03d.csv' % i, index_col='user_id')
preds['auc'] = 0.0

for index, pred in preds.iterrows():
    truth = ans.loc[index]
    a = auc(pred[0:28], truth)
    preds.at[index, 'auc'] = round(a, 4)

cols = preds.columns.tolist()
cols = cols[-1:] + cols[:-1]
preds = preds[cols]
preds.to_csv('../auc-%03d.csv' % i)
    sn = "/" + "small" + "/network_" + name + ".txt"
    sp = "/" + "small" + "/networkPositions_" + name + ".txt"
    print name
    TSmall5, CSmall5, PSmall5 = read(s, sn, sp, Knormal)
    gc.collect()

    name = "iNet1_Size100_CC06inh"
    s = "/" + "small" + "/fluorescence_" + name + ".txt"
    sn = "/" + "small" + "/network_" + name + ".txt"
    sp = "/" + "small" + "/networkPositions_" + name + ".txt"
    print name
    TSmall6, CSmall6, PSmall6 = read(s, sn, sp, Knormal)
    gc.collect()

    [RS4, RS6] = learnAndPredict(TSmall5, CSmall5, [TSmall4, TSmall6])
    RS4_ = RS4.flatten().tolist()
    a = auc.auc(CSmall4.flatten().tolist(), RS4_)
    RS6_ = RS6.flatten().tolist()
    a2 = auc.auc(CSmall6.flatten().tolist(), RS6_)
    print ("RES: %.2f Small4, %.2f Small6" % (a * 100, a2 * 100))
    f = open("res_small_4_6.csv", "w")
    f.write("NET_neuronI_neuronJ,Strength\n")
    for i in range(NN):
        for j in range(NN):
            f.write("valid_" + str(i + 1) + "_" + str(j + 1) + "," + str(RS4[i][j]) + "\n")
    for i in range(NN):
        for j in range(NN):
            f.write("test_" + str(i + 1) + "_" + str(j + 1) + "," + str(RS6[i][j]) + "\n")
    f.close()
    print "Wrote solution to ./res_Small_4_6.csv"