Exemple #1
0
    def classify(self):
        y_data = self.get_result(self.task.label)
        X_data = self.get_result(self.task.features)

        y = np.array(y_data.data).ravel()
        X = np.array(pd.get_dummies(X_data.data))
        #X = MinMaxScaler().fit_transform(X)

        X_train = X[:-TILE_SIZE]
        y_train = y[:-TILE_SIZE]
        X_test = X[-TILE_SIZE:]
        y_test = y[-TILE_SIZE:]

        cw = compute_class_weight('auto', np.array([0, 1]), y)
        cw = {0: cw[0], 1: cw[1]}

        b = get_classifier(self.task.classifier, cw)
        b.partial_fit(X_train, y_train, classes=np.array([0, 1]))

        y_prob = None
        y_pred = None
        if self.task.classifier in ['perceptron', 'svm']:
            y_pred = b.predict(X_test)
            y_prob = np.array([[0, y] for y in y_pred])
        else:
            y_prob = b.predict_proba(X_test)
            y_pred = [1 if t[0] >= 0.5 else 0 for t in y_prob]

        cm = confusion_matrix(y_test, y_pred)
        stats = classify_stats(cm, y_test, y_prob, TILE_SIZE)

        result = ClassifyResult(self.task, 1.0, b, stats)
        self.results[self.task.uuid] = result
Exemple #2
0
    def classify(self):
        y_data = self.get_result(self.task.label)
        X_data = self.get_result(self.task.features)

        y = np.array(y_data.data).ravel()
        X = np.array(pd.get_dummies(X_data.data))
        #X = MinMaxScaler().fit_transform(X)

        X_train = X[:-TILE_SIZE]
        y_train = y[:-TILE_SIZE]
        X_test = X[-TILE_SIZE:]
        y_test = y[-TILE_SIZE:]

        cw = compute_class_weight('auto', np.array([0,1]), y)
        cw = {0:cw[0],1:cw[1]}

        b = get_classifier(self.task.classifier, cw)
        b.partial_fit(X_train, y_train, classes=np.array([0,1]))

        y_prob = None
        y_pred = None
        if self.task.classifier in ['perceptron','svm']:
            y_pred = b.predict(X_test)
            y_prob = np.array([[0,y] for y in y_pred])
        else:
            y_prob = b.predict_proba(X_test)
            y_pred = [1 if t[0] >= 0.5 else 0 for t in y_prob]

        cm = confusion_matrix(y_test, y_pred)
        stats = classify_stats(cm, y_test, y_prob, TILE_SIZE)

        result = ClassifyResult(self.task, 1.0, b, stats)
        self.results[self.task.uuid] = result
Exemple #3
0
    def classify(self):
        y_data = self.results[self.task.label].data
        X_data = self.results[self.task.features].data
        y = np.array(y_data)
        X = np.array(X_data)

        cw = compute_class_weight('auto', np.array([0, 1]), y)
        cw = {0: cw[0], 1: cw[1]}
        print cw

        b = get_classifier(self.task.classifier, cw)

        tile_size = 1000
        num_tiles = y.size / tile_size
        for i in range(num_tiles):
            pos = i * tile_size
            X_sub = X[pos:pos + tile_size]
            y_sub = y[pos:pos + tile_size]

            y_prob = None
            y_pred = None
            if self.task.classifier == 'svm':
                y_pred = b.predict(X_sub)
                y_prob = np.array([[0, y] for y in y_pred])
            else:
                y_prob = b.predict_proba(X_sub)
                y_pred = [1 if y[1] >= 0.5 else 0 for y in y_prob]

            cm = confusion_matrix(y_sub, y_pred)
            stats = classify_stats(cm, y_test, y_prob)

            y_pred = pd.DataFrame(y_pred, columns=y_data.columns)
            result = ClassifyResult(self.task, 1.0, b, stats)
            self.results[self.task.uuid] = result

            b.partial_fit(X_sub, y_sub)
Exemple #4
0
    def classify(self):
        y_data = self.results[self.task.label].data
        X_data = self.results[self.task.features].data
        y = np.array(y_data)
        X = np.array(X_data)

        cw = compute_class_weight('auto', np.array([0,1]), y)
        cw = {0:cw[0],1:cw[1]}
        print cw

        b = get_classifier(self.task.classifier, cw)

        tile_size = 1000
        num_tiles = y.size / tile_size
        for i in range(num_tiles):
            pos = i * tile_size
            X_sub = X[pos : pos + tile_size]
            y_sub = y[pos : pos + tile_size]

            y_prob = None
            y_pred = None
            if self.task.classifier == 'svm':
                y_pred = b.predict(X_sub)
                y_prob = np.array([[0,y] for y in y_pred])
            else:
                y_prob = b.predict_proba(X_sub)
                y_pred = [1 if y[1] >= 0.5 else 0 for y in y_prob]

            cm = confusion_matrix(y_sub, y_pred)
            stats = classify_stats(cm, y_test, y_prob)

            y_pred = pd.DataFrame(y_pred, columns=y_data.columns)
            result = ClassifyResult(self.task, 1.0, b, stats)
            self.results[self.task.uuid] = result

            b.partial_fit(X_sub, y_sub)