Esempio n. 1
0
def label_test():
    from labeler import Labelers
    model      = ObjectClassifier(NZ = False)
    labelers   = Labelers()
    test       = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2))
    train      = np.ix_( np.arange(4096/3, 4096), np.arange(4096/2, 4096))
    haiti_map  = map_overlay.haiti_setup()
    train_map  = haiti_map.sub_map(train)
    test_map   = haiti_map.sub_map(test)
    predictions = np.zeros((labelers.labels.shape[0], test_map.unique_segs(20).shape[0]))
    agreement = (labelers.labels == labelers.majority_vote())[:,train_map.unique_segs(20)]
    for i in range(labelers.labels.shape[0]):
        print labelers.emails[i]
        new_model = ObjectClassifier(NZ = False)
        new_model.fit(train_map, agreement[i])
        probs = new_model.predict_proba_segs(test_map)
        predictions[i] = probs
        print predictions
    best_labelers = np.argmax(predictions, axis = 0)
    print best_labelers
    np.save('predictions.npy', predictions)
    np.save('best.npy',best_labelers)
    assert(best_labelers.shape[0] == test_map.unique_segs(20).shape[0])
    model_labels = labelers.labels[best_labelers,test_map.unique_segs(20)]
    np.save('vote.npy', model_labels)
Esempio n. 2
0
 def setup_map_split(self):
     """Splits haiti map into training portion and testing portion"""
     self.train      = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2))
     self.test       = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2, 4096))
     self.haiti_map  = map_overlay.haiti_setup()
     self.train_map  = self.haiti_map.sub_map(self.train)
     self.test_map   = self.haiti_map.sub_map(self.test)
Esempio n. 3
0
def test():
    
    haiti_map = map_overlay.haiti_setup()
    labelers = Labelers()
    labelers.print_stats()
    l = []
    for i,j in zip(labelers.image_count.keys(), np.array(labelers.image_count.values())[:,0]):
        l.append([i,j])
        print i,j
    np.savetxt('all_emails.csv', np.array(l), delimiter = ',', fmt = '%s')
Esempio n. 4
0
def main_haiti():
    from Xie import EM
    from labeler import Labelers
    model      = ObjectClassifier(0,1)
    labelers   = Labelers()
    y          = labelers.majority_vote()
    train       = np.ix_(np.arange(4096/3, 4096), np.arange(4096/2))
    test      = np.ix_( np.arange(4096/3, 4096), np.arange(4096/2, 4096))
    haiti_map  = map_overlay.haiti_setup()
    train_map  = haiti_map.sub_map(train)
    test_map   = haiti_map.sub_map(test)
    #em = EM(train_map, labelers)
    #em.run()
    #y2 = em.G[:,1]>0.5
    g_truth    = y[test_map.segmentations[20]]
    FPRs = []
    TPRs = []
    for email in labelers.emails:
        print email
        a = time.time()
        labels = labelers.labeler(email)[test_map.segmentations[20]]
        b = time.time()
        FPR, TPR = analyze_results.confusion_analytics(g_truth.ravel(), labels.ravel())
        c = time.time()
        FPRs.append(FPR)
        TPRs.append(TPR)

    probs = model.fit_and_predict(train_map, test_map, y[train_map.unique_segs(20)])
    print analyze_results.FPR_from_FNR(g_truth.ravel(), probs.ravel(), TPR = .95)
    analyze_results.probability_heat_map(test_map, probs.ravel(), '')
    fig, _, _, _, _, _ = analyze_results.ROC(g_truth.ravel(), probs.ravel(), 'Classifier')
    plt.scatter(FPRs, TPRs)
    names = labelers.emails
    for i in range(len(FPRs)):
        plt.annotate(names[i], (FPRs[i], TPRs[i]))

    fig.savefig('All_ROCs/{}_ROC.png'.format('Classifier'), format='png')
    plt.show()
Esempio n. 5
0
    def E(self):
        G = np.tile(self.G, (2, 1, 1)).transpose((1, 2, 0))
        n = np.tile(self.n, (2, 1, 1, 1)).transpose((1, 2, 0, 3))
        alpha = np.sum(n * G, axis=1)
        alpha = (alpha.transpose((2, 0, 1)) / np.sum(alpha, axis=2)).transpose((1, 2, 0))
        self.alpha = alpha
        self.p = np.sum(self.G, axis=0) / self.I

    def run(self, runs=15, v=2):
        converged = False
        count = 0
        while not converged and count < 50:
            count += 1
            self.E()
            converged = self.M()
            if v > 0:
                print self.p
        if v > 1:
            self.show_probs()
            print self.p
            for i in zip(self.labelers.emails, list(self.alpha)):
                print i[0]
                print i[1]


if __name__ == "__main__":
    haiti_map = map_overlay.haiti_setup().sub_map(np.ix_(np.arange(4096 / 3, 4096), np.arange(4096)))
    labelers = Labelers()
    test = EM(haiti_map, labelers)
    test.run()