예제 #1
0
def get_CV_acc(X, Y, clf):
    kf = KFold(n_splits=10)
    acc = []
    for train, test in kf.split(X):
        X_train, X_test = X[train], X[test]
        Y_train, Y_test = Y[train], Y[test]
        clf.fit(X_train, Y_train)
        acc.append(ACC(clf.predict(X_test), Y_test))
    return np.mean(acc)
예제 #2
0
def plot_confusion_matrix(test_label, pred):

    mapping = {
        1: 'co2',
        2: 'humidity',
        3: 'pressure',
        4: 'rmt',
        5: 'status',
        6: 'stpt',
        7: 'flow',
        8: 'HW sup',
        9: 'HW ret',
        10: 'CW sup',
        11: 'CW ret',
        12: 'SAT',
        13: 'RAT',
        17: 'MAT',
        18: 'C enter',
        19: 'C leave',
        21: 'occu',
        30: 'pos',
        31: 'power',
        32: 'ctrl',
        33: 'fan spd',
        34: 'timer'
    }
    cm_ = CM(test_label, pred)
    cm = normalize(cm_.astype(np.float), axis=1, norm='l1')
    fig = pl.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(cm, cmap=Color.YlOrBr)
    fig.colorbar(cax)
    for x in range(len(cm)):
        for y in range(len(cm)):
            ax.annotate(str("%.3f(%d)" % (cm[x][y], cm_[x][y])),
                        xy=(y, x),
                        horizontalalignment='center',
                        verticalalignment='center',
                        fontsize=9)
    cm_cls = np.unique(np.hstack((test_label, pred)))
    cls = []
    for c in cm_cls:
        cls.append(mapping[c])
    pl.yticks(range(len(cls)), cls)
    pl.ylabel('True label')
    pl.xticks(range(len(cls)), cls)
    pl.xlabel('Predicted label')
    pl.title('Confusion Matrix (%.3f)' % (ACC(pred, test_label)))
    pl.show()
def metrics(label_list, pred_list, pos_prob_list):

    metric_dict = dict()
    for m in config['metric']:
        if m == 'fbs':
            metric_dict[m] = FBS(label_list, pred_list, 1)
        elif m == 'acc':
            metric_dict[m] = ACC(label_list, pred_list)
        elif m == 'auc':
            metric_dict[m] = AUC(label_list, pos_prob_list)
        else:
            print('Error : No such metric. Implement it.')
            raise

    return metric_dict
예제 #4
0
#print(type(train[0]))

vect = TfidfVectorizer()

trainfeat = vect.fit_transform(train[0])
testfeat = vect.transform(test[0])
#print(type(testfeat))
#print(trainfeat.shape)
#print(len(train[1]))
nb = MultinomialNB()
nb.fit(trainfeat, train[1])
predict = nb.predict(testfeat)
#print(predict)
#print(type(predict))

print("Accuracy:-{ACC}%".format(ACC=100 * ACC(test[1], predict)))

while True:
    review = []
    line = input("Enter a sentence('Quit' to quit):\n")
    if line != 'Quit':
        #print("Words Type:"+type(review).__name__)
        review.append(line)
        #print(review)
        a = words(review)
        #print(a)
        #print("Words Type:"+type(a).__name__)
        data = vect.transform(words(review))
        #print(data)
        #print(type(data))
        predict = nb.predict(data)
예제 #5
0
    def run_auto(self):
        '''
        test direct data feature based transfer accuracy on the new building
        '''
        rf = RFC(n_estimators=100, criterion='entropy')
        rf.fit(self.train_fd, self.train_label)
        pred = rf.predict(self.test_fd)
        print('direct data feature-based transfer acc on tgt_bldg:',
              ACC(pred, self.test_label))
        #plot_confusion_matrix(self.test_label, pred)
        '''
        step1: train base models from bldg1
        '''
        self.get_base_learners()
        '''
        step2: TL with name feature on bldg2
        '''
        label = self.test_label
        class_ = np.unique(self.train_label)

        for b in self.bl:
            print(b.score(self.test_fd, label))

        n_class = 32
        c = KMeans(init='k-means++', n_clusters=n_class, n_init=10)
        c.fit(self.test_fn)
        dist = np.sort(c.transform(self.test_fn))
        ex_id = DD(list)  #example id for each C
        for i, j, k in zip(c.labels_, range(len(self.test_fn)), dist):
            ex_id[i].append(int(j))

        #getting neighors for each ex
        nb_c = DD()  #nb from clustering results
        for exx in ex_id.values():
            exx = np.asarray(exx)
            for e in exx:
                nb_c[e] = exx[exx != e]

        nb_f = [DD(), DD(), DD()]  #nb from classification results
        for b, n in zip(self.bl, nb_f):
            preds = b.predict(self.test_fd)
            ex_ = DD(list)
            for i, j in zip(preds, range(len(self.test_fd))):
                ex_[i].append(int(j))
            for exx in ex_.values():
                exx = np.asarray(exx)
                for e in exx:
                    n[e] = exx[exx != e]

        #use base learners' predicitons
        acc_ = []
        cov_ = []
        #for delta in np.linspace(0.1, 0.5, 5):
        for delta in np.linspace(self.agreement_threshold,
                                 self.agreement_threshold, 1):
            print('running TL with agreement threshold =', delta)

            labeled_id = []
            confidence = []
            output = DD()
            preds = np.array([999 for i in range(len(self.test_fd))])
            for i in range(len(self.test_fn)):
                #get the weight for each bl: by computing sim btw cluster and clf
                w = []
                v_c = set(nb_c[i])
                for n in nb_f:
                    v_f = set(n[i])
                    cns = len(v_c & v_f) / float(
                        len(v_c | v_f))  #original count based weight
                    #print (len(v_c & v_f) , len(v_c | v_f))
                    inter = v_c & v_f
                    union = v_c | v_f
                    d_i = 0
                    d_u = 0
                    for it in inter:
                        d_i += np.linalg.norm(self.test_fn[i] -
                                              self.test_fn[it])
                        #print (np.linalg.norm(self.test_fn[i]-self.test_fn[it]))
                    #input('...')
                    for u in union:
                        d_u += np.linalg.norm(self.test_fn[i] -
                                              self.test_fn[u])
                    if len(inter) != 0:
                        sim = 1 - (d_i / d_u) / cns
                        #sim = (d_i/d_u)/cns

                    if i in output:
                        output[i].extend(
                            ['%s/%s' % (len(inter), len(union)), 1 - sim])
                    else:
                        output[i] = [
                            '%s/%s' % (len(inter), len(union)), 1 - sim
                        ]
                    w.append(sim)
                output[i].append(np.mean(w))

                if np.mean(w) >= delta:
                    confidence.append(np.mean(w))
                    w[:] = [float(j) / sum(w) for j in w]
                    pred_pr = np.zeros(len(class_))
                    for wi, b in zip(w, self.bl):
                        pr = b.predict_proba(self.test_fd[i].reshape(1, -1))
                        pred_pr = pred_pr + wi * pr
                    preds[i] = class_[np.argmax(pred_pr)]
                    labeled_id.append(i)

            acc_.append(ACC(preds[preds != 999], label[preds != 999]))
            cov_.append(1.0 * len(preds[preds != 999]) / len(label))

        print('acc =', acc_, ';')
        print('cov =', cov_, ';')

        return preds[preds != 999], labeled_id, confidence
예제 #6
0
            # scale continuous data
            scaler = StandardScaler()
            scaler.fit(x_train_cont)
            x_train_cont = scaler.transform(x_train_cont)
            x_test_cont = scaler.transform(x_test_cont)

            # fill scaled data
            with pd.option_context('mode.chained_assignment', None):
                for l, f in enumerate(features_to_extract):
                    x_train.loc[:, f] = x_train_cont[:, l]
                    x_test.loc[:, f] = x_test_cont[:, l]

            model.fit(x_train, y_train)
            y_pred = model.predict(x_test)

            accuracy_kfold[k] = ACC(y_test, y_pred)
        acc_mean[i, j] = accuracy_kfold.mean()
        acc_std[i, j] = accuracy_kfold.std()
    plt.errorbar(penalties,
                 acc_mean[i, :],
                 yerr=acc_std[i, :],
                 label=kernel,
                 fmt="o",
                 capsize=5,
                 markersize=7)
    best_penalties[i] = penalties[np.argmax(acc_mean[i, :])]

print("Best penalties:", best_penalties)

plt.legend()
plt.xscale("log")
rf = RFC(n_estimators=100, criterion='entropy')
bldg = ['rice', 'sdh', 'soda']
# for i in range(len(X_fd)):
i = 0
source = [X_fd[j] for j in range(len(X_fd)) if j != i]
train = np.vstack(source)
train_fd = train[:, :-1]
train_label = train[:, -1]
test_fd, test_label = X_fd[i][:, :-1], X_fd[i][:, -1]
#print (train_fd.shape, train_label.shape, test_fd.shape, test_label.shape)

rf.fit(train_fd, train_label)
preds = rf.predict(test_fd)

print(ACC(preds, test_label))
assert (len(test_label) == len(X_fn[i]))

sourceName = bldg[1]
targetName = bldg[0]

dataDir = "../../dataset/sensorType/sdh_soda_rice"
transferLabelFileName = "transferLabel_" + sourceName + "--" + targetName + ".txt"
transferLabelFileName = os.path.join(dataDir, transferLabelFileName)
f = open(transferLabelFileName, "w")

totalInstanceNum = len(test_label)
f.write("auditorLabel" + "\t" + "transferLabel" + "\t" + "trueLabel\n")
for instanceIndex in range(totalInstanceNum):
    transferLabel = preds[instanceIndex]
    trueLabel = test_label[instanceIndex]
예제 #8
0
                       penalty,
                       activation="tanh",
                       regression=False)
    NN.set_learning_params(a1, a2)
    NN.fit(X_train,
           Z_train,
           n_minibatches,
           n_epochs,
           std_W=std_W,
           const_b=const_b,
           track_cost=[X_test, Z_test])

    Z_pred = NN.classify(X_test)

    print(f"Neural Network with penalty lambda = {penalty}")
    print("  Accuracy score =", ACC(Z_test, Z_pred))

    plt.plot(np.arange(1, n_epochs + 1),
             NN.cost,
             label=f"$\lambda={penalty:.2f}$")

plt.xlabel("Number of epochs", fontsize=12)
plt.ylabel("Cost function", fontsize=12)
plt.title("Evolution of cost function", fontsize=15)
plt.legend()
plt.savefig("Figures/NNcla_sgd_cost_function.png", dpi=300)
plt.show()

# grid search learning parameters

n_hidden_layers = 5
예제 #9
0
 def score(self, X, Y):
     Y_hat = self.predict(X)
     return ACC(Y_hat.cpu(), Y.cpu())
예제 #10
0
            label = torch.tensor(list(batch[:, 1])).to(DEVICE)
            data, mask = data.to(DEVICE), mask.to(DEVICE)
            output = model(data, mask)
            logit, loss = classifier(output, label)
            pred = torch.argmax(torch.softmax(logit, dim=1),
                                dim=1).data.cpu().numpy()
            label = label.data.cpu().numpy()

            preds = np.concatenate((pred, preds))
            labels = np.concatenate((label, labels))
            loss = loss.mean()

            valid_loss += loss.item()

        print(len(preds[preds == 1]), len(labels[labels == 1]))
        acc = ACC(preds, labels)
        pre = P(preds, labels)
        rec = R(preds, labels)
        f1 = F1(preds, labels)
        print(
            'acc:{:.4f}, precision:{:.4f}, recall:{:.4f}, f1:{:.4f}, train_loss:{:.4f}, valid_loss:{:.4f}'
            .format(acc, pre, rec, f1, total_loss / len(train_dataloader),
                    valid_loss / len(valid_dataloader)))

    model.eval()
    classifier.eval()
    with torch.no_grad():
        preds, ids = [], []
        for i, batch in enumerate(test_dataloader):
            data, mask = tensorized(batch[:, 0], vocab)
            id = np.array(list(batch[:, 1]))