Esempio n. 1
0
def partial_3():
    perceptron_3 = Perceptron()
    stop_3 = 0
    counter_3 = 0
    while stop_3 == 0:
        for x in range(1000):
            perceptron_3.partial_fit([data_3[x]],[labels_3[x]], classes= np.unique(labels_3))
            if perceptron_3.score(data_3,labels_3) == 1:
                stop_3 +=1
            if counter_3 >= 100000:
                return print('No Convergence')
            else:
                counter_3 +=1
    weights = perceptron_3.coef_
    w1 = weights[0][0]
    w2 = weights[0][1]
    w0 = perceptron_3.intercept_[0]
    print("Weights was adjusted {} times".format(counter_3))
    print("Intercept (w0) is {}".format(w0))
    print("Final weight vector is : {} Hence:".format(weights))
    print("w1 is : {}  , w2 weight is: {}".format(w1, w2))
    #deriving the line based on HW Problem 1.2
    a = -1* (w1/w2)
    b = -1* (w0/w2)
    print("The equation of the decision boundary line is y = ({})x + ({})".format(a,b))
Esempio n. 2
0
def train_classifiers(models, train_data):
    classifiers = dict()
    for modelname, model in models.items():

        if settings["classifier"] == "Perceptron":
            classifier = Perceptron()
        if settings["classifier"] == "PassiveAggressive":
            classifier = PassiveAggressiveClassifier()

        for sample_no, (text, is_acq) in enumerate(train_data):
            bow = dictionary.doc2bow(simple_preprocess(text))

            model_features = sparse2full(model[bow], model.__out_size)
            label = np.array([is_acq])
            #ln.debug("%s, %s "% (model_features, label.shape))

            classifier.partial_fit(model_features,
                                   label,
                                   classes=np.array([True, False]))
            if sample_no % 500 == 0:
                ln.debug("Classifier for %s trained %s samples so far." %
                         (modelname, sample_no))

        classifiers[modelname] = classifier
        ln.info("Finished training classifier for %s" % modelname)

    return classifiers
Esempio n. 3
0
def partial_2():
    perceptron_2 = Perceptron()
    stop_2 = 0
    counter_2 = 0
    while stop_2 == 0:
        for x in range(1000):
            perceptron_2.partial_fit([data_2[x]],[labels_2[x]], classes= np.unique(labels_2))
            if perceptron_2.score(data_2,labels_2) == 1:
                counter_2 +=1
                stop_2 +=1
                break
            else:
                counter_2 +=1
    weights = perceptron_2.coef_
    w1 = weights[0][0]
    w2 = weights[0][1]
    w0 = perceptron_2.intercept_[0]
    print("Weights was adjusted {} times".format(counter_2))
    print("Intercept (w0) is {}".format(w0))
    print("Final weight vector is : {} Hence:".format(weights))
    print("w1 is : {}  , w2 weight is: {}".format(w1, w2))
    #deriving the line based on HW Problem 1.2
    a = -1* (w1/w2)
    b = -1* (w0/w2)
    print("The equation of the decision boundary line is y = ({})x + ({})".format(a,b))
Esempio n. 4
0
 def test_basic(self, single_chunk_classification):
     X, y = single_chunk_classification
     a = PartialPerceptron(classes=[0, 1], max_iter=1000, tol=1e-3)
     b = Perceptron(max_iter=1000, tol=1e-3)
     a.fit(X, y)
     b.partial_fit(X, y, classes=[0, 1])
     assert_estimator_equal(a.coef_, b.coef_)
Esempio n. 5
0
    def train(self, parsed_sentences, path, **kwargs):
        all_classes = [
            'O', 'B-per', 'I-per', 'B-gpe', 'I-gpe', 'B-geo', 'I-geo', 'B-org',
            'I-org', 'B-tim', 'I-tim', 'B-art', 'I-art', 'B-eve', 'I-eve',
            'B-nat', 'I-nat'
        ]

        X, y = self.get_minibatch(parsed_sentences,
                                  kwargs.get('batch_size', 500))
        vectorizer = DictVectorizer(sparse=False)
        vectorizer.fit(X)

        clf = Perceptron(verbose=10, n_jobs=-1, n_iter=kwargs.get('n_iter', 5))

        while len(X):
            X = vectorizer.transform(X)
            clf.partial_fit(X, y, all_classes)
            X, y = self.get_minibatch(parsed_sentences,
                                      kwargs.get('batch_size', 500))

        clf = Pipeline([('vectorizer', vectorizer), ('classifier', clf)])

        model_pkl = open(path, 'wb')
        pickle.dump(clf, model_pkl)
        model_pkl.close()

        self._classifier = clf
class CalibratedPerceptron(BaseSKMObject, ClassifierMixin):
    """ Calibrated Perceptron classifier

    """
    def __init__(self, nominal_attributes=None):
        super().__init__()
        self.perceptron = Perceptron()
        self.cc = None

    def fit(self, X, y, sample_weight=None):
        self.perceptron.fit(X, y)
        if self.cc is None:
            self.cc = CalibratedClassifierCV(self.perceptron,
                                             cv='prefit',
                                             method='isotonic')
        self.cc.fit(X, y, sample_weight=sample_weight)

    def partial_fit(self, X, y, classes=None, sample_weight=None):
        print(y)
        self.perceptron.partial_fit(X,
                                    y,
                                    classes=classes,
                                    sample_weight=sample_weight)
        if self.cc is None:
            self.cc = CalibratedClassifierCV(self.perceptron,
                                             cv='prefit',
                                             method='sigmoid')
        self.cc.fit(X, y, sample_weight=sample_weight)

    def predict(self, X):
        return self.perceptron.predict(X)

    def predict_proba(self, X):
        return self.cc.predict_proba(X)
Esempio n. 7
0
def training_per(X_train, y_train, X_test, classes, dataset):
    per = Perceptron(verbose=10, n_jobs=-1, max_iter=5)
    per.partial_fit(X_train, y_train.values.ravel(), classes)
    y_pred = per.predict(X_test)

    model_filename = os.getcwd() + '/models/' + dataset + '/per_model.pkl'
    with open(model_filename, 'wb') as file_model:
        pickle.dump(per, file_model)
    return (y_pred)
class DrunkLearningOnline(DrunkLearningBatch):
    """drunk_learning class for online learning"""
    def __init__(self):
    	super(DrunkLearningOnline, self).__init__()
        self.clf = Perceptron()
        self.filename = 'modelPerceptron.pkl'

    def partial_fit(self, X, y):
        X = np.array([X])
        y = np.array(y)
        self.clf.partial_fit(X, y, [0, 1])
        joblib.dump(self.clf, self.filename, compress=9)
 def train(cls, parsed_sentences, feature_detector, all_classes, **kwargs):
     X,y = cls.get_minibatch(parsed_sentences, feature_detector, kwargs.get('batch_size', 500))
     vectorizer = DictVectorizer(sparse=False)
     vectorizer.fit(X)
     clf = Perceptron(verbose=10, n_jobs=-1, n_iter=kwargs.get('n_iter',5))
     while len(X):
         X = vectorizer.transform(X)
         clf.partial_fit(X, y, all_classes)
         X,y = cls.get_minibatch(parsed_sentences, feature_detector, kwargs.get('batch_size', 500))
         clf = Pipeline([
             ('vectorizer', vectorizer),
             ('classifier', clf)
         ])
         return cls(clf, feature_detector)
Esempio n. 10
0
def train_and_test_with_perceptron(X_train, y_train, X_test, y_test):
    classes = np.unique(y_train)
    classes = classes.tolist()
    print(classes)

    per = Perceptron(verbose=10, n_jobs=-1)
    per.partial_fit(X_train, y_train, classes=classes)

    new_class = list(set(classes) - set(['O']))
    print(new_class)

    print(
        classification_report(y_pred=per.predict(X_test),
                              y_true=y_test,
                              labels=new_class))
Esempio n. 11
0
def incremental_train_scikit_classifier(
        sentences,
        feature_detector,
        batch_size,
        max_iterations):

    initial_corpus_iterator, sentences = itertools.tee(sentences)

    # compute all labels
    ALL_LABELS = set([])

    for sentence in initial_corpus_iterator:
        for w, t in sentence:
            ALL_LABELS.add(t)

    ALL_LABELS = list(ALL_LABELS)

    batch = list(itertools.islice(sentences, batch_size))
    dataset = feature_detector(batch)

    # split the dataset into featuresets and the predicted labels
    featuresets, labels = zip(*dataset)

    # This vectorizer doesn't need to be fitted
    vectorizer = FeatureHasher(n_features=1000000)

    classifier = Perceptron(tol=0.00001, max_iter=25, n_jobs=-1)

    for _ in range(max_iterations):
        current_corpus_iterator, sentences = itertools.tee(sentences)
        batch_count = 0

        while True:
            batch_count += 1
            print("Training on batch={0}".format(batch_count))
            classifier.partial_fit(vectorizer.transform(featuresets), labels, ALL_LABELS)

            batch = list(itertools.islice(current_corpus_iterator, batch_size))
            if not batch:
                break

            dataset = feature_detector(batch)
            featuresets, labels = zip(*dataset)

    scikit_classifier = ScikitClassifier(classifier=classifier, vectorizer=vectorizer)

    return scikit_classifier
Esempio n. 12
0
def percey_demo(iris, column, epochs):
    def class_to_targets(target):
        if target == 'Iris-' + iris:
            return 1
        else:
            return 0

    percey = Perceptron()
    print("Training a Perceptron to classify Iris-" + iris + " using " +
          column + " width and " + column + " length.")
    iris_inputs = iris_data[[column + ' width', column + ' length']]
    iris_targets = iris_data['class'].apply(class_to_targets)

    xmin = min(iris_inputs[iris_inputs.columns[0]])
    xmax = max(iris_inputs[iris_inputs.columns[0]])
    xnums = np.arange(xmin, xmax, (xmax - xmin) / 100)

    for x in range(epochs):
        #print(np.unique(iris_targets))
        percey.partial_fit(iris_inputs,
                           iris_targets,
                           classes=np.unique(iris_targets))
        weights = percey.coef_[0]
        threshold = percey.intercept_

        # print(threshold)
        # print(weights)

        def makeline(xval):
            return (-threshold - weights[0] * xval) / weights[1]

        plt.scatter(iris_data[column + " width"],
                    iris_data[column + " length"],
                    c=iris_data['class'].apply(iris_to_color))
        plt.plot(xnums, makeline(xnums), c="orange")
        plt.xlabel(column + " width")
        plt.ylabel(column + " length")
        plt.axis(ymin=min(iris_inputs[iris_inputs.columns[1]]) - 0.5,
                 ymax=max(iris_inputs[iris_inputs.columns[1]]) + 0.5)
        plt.title(
            "Training Perceptron to identify Iris-" + iris + " on epoch=" +
            str(x) +
            "\nRed = Iris-setosa, Blue = Iris-versicolor, Green = Iris-virginica"
        )
        plt.show()
def trainPerceptron():
    pathTrain = './corpus_train/train_set/'
    pathTest = './corpus_train/test_set/'
    trainDocs = os.listdir(pathTrain)
    testDocs = os.listdir(pathTest)
    classifier = Perceptron()

    for i in range(0, len(os.listdir(pathTrain))):
        trainDocString = getString(pathTrain + trainDocs[i])
        testDocString = getString(pathTest + testDocs[i])

        trainSentences = getSentences(trainDocString)
        testSentences = getSentences(testDocString)

        docFeatures = getFeatures(trainDocString, trainSentences)
        docTargets = getTargets(trainSentences, testSentences)

        classifier.partial_fit(docFeatures, docTargets, classes=[0, 1])
    return classifier
Esempio n. 14
0
class ClassifierBolt(Bolt):
    outputs = ['prediction', 'actual', 'id', 'training_count']

    def initialize(self, config, context):
        self.config = config["sgd_config"].copy()
        if self.config['model'] == 'SGD':
            self.clf = SGDClassifier(loss=self.config['loss'],
                                     penalty=self.config['penalty'])
        elif self.config['model'] == 'MLP':
            self.clf = MLPClassifier(
                hidden_layer_sizes=self.config['hidden_layer_sizes'])
        elif self.config['model'] == 'PassiveAggressive':
            self.clf = PassiveAggressiveClassifier()
        elif self.config['model'] == 'Perceptron':
            self.clf = Perceptron(penalty=self.config['penalty'])

        self.trained_count = 0
        self.pure_training_size = config["benchmark_config"][
            "pure_training_size"]
        self.results = []

    def process(self, tup):
        id, image_data, classification = tup.values

        x = [image_data]
        if self.trained_count >= self.pure_training_size:
            prediction = self.clf.predict(x)[0]
            self.results.append(prediction == classification)
            self.log(
                "{} prediction {} result: {} (predicted: {}, actual: {}) accuracy: {}%, last 32: {}%"
                .format(
                    self.config, len(self.results),
                    prediction == classification, prediction, classification,
                    100 * sum(self.results) // len(self.results),
                    100 * sum(self.results[-32:]) // len(self.results[-32:])))
            self.emit([prediction, classification, id, self.trained_count])

        y = [classification]
        self.clf.partial_fit(x, y, classes=['Active', 'Rest'])
        self.trained_count += 1
        self.log("trained {}".format(self.trained_count))
Esempio n. 15
0
def fit_perceptron(p: Perceptron, X, Y, times, error=0.1):
    for time in range(1, times + 1):
        p = p.partial_fit(X, Y, classes=np.unique(Y))
        Y_hat = p.predict(X)
        correct = np.equal(Y, Y_hat)
        vals, count = np.unique(correct, return_counts=True)
        index = 0 if not vals[0] else 1
        if len(vals) == 1:
            count = np.append(count, 0)
        false_num = count[index]
        err = false_num / len(correct)
        if err < error:
            return p, time

    return p, times
Esempio n. 16
0
def train_classifiers(models, train_data):
    classifiers = dict()
    for modelname, model in models.items():

        if settings["classifier"] == "Perceptron":
            classifier = Perceptron()
        if settings["classifier"] == "PassiveAggressive":
            classifier = PassiveAggressiveClassifier()

        for sample_no, (text, is_acq) in enumerate(train_data):
            bow = dictionary.doc2bow(simple_preprocess(text))

            model_features = sparse2full(model[bow], model.__out_size)
            label = np.array([is_acq])
            #ln.debug("%s, %s "% (model_features, label.shape))

            classifier.partial_fit(model_features, label, classes=np.array([True, False]))
            if sample_no % 500 == 0:
                ln.debug("Classifier for %s trained %s samples so far." % (modelname, sample_no))

        classifiers[modelname] = classifier
        ln.info("Finished training classifier for %s" % modelname)

    return classifiers
Esempio n. 17
0
class PerceptronClassifier(object):
    def __init__(self, classes):
        self.classes = classes
        self.model = Perceptron()
        self.w = None
        
    def predict(self,X):
        X = X.reshape(1,-1)
        try:
            return self.model.predict(X)[0]
        except:
            return self.classes[0]
    
    def partial_fit(self, X, y, sample_weight = 1.0):
        X = X.reshape(1,-1)
        y = y.reshape(1,-1)
        return self.model.partial_fit(X,y, sample_weight = [sample_weight], classes = self.classes)
Esempio n. 18
0
class PerceptronMask(BaseSKMObject, ClassifierMixin):
    """ Mask for sklearn.linear_model.Perceptron.

    scikit-multiflow requires a few interfaces, not present in scikit-learn,
    This mask serves as a wrapper for the Perceptron classifier.

    """
    def __init__(self,
                 penalty=None,
                 alpha=0.0001,
                 fit_intercept=True,
                 max_iter=None,
                 tol=None,
                 shuffle=True,
                 verbose=0,
                 eta0=1.0,
                 n_jobs=None,
                 random_state=0,
                 early_stopping=False,
                 validation_fraction=0.1,
                 n_iter_no_change=5,
                 class_weight=None,
                 warm_start=False,
                 n_iter=None):
        self.penalty = penalty
        self.alpha = alpha
        self.fit_intercept = fit_intercept
        self.max_iter = max_iter
        self.tol = tol
        self.shuffle = shuffle
        self.verbose = verbose
        self.eta0 = eta0
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.early_stopping = early_stopping
        self.validation_fraction = validation_fraction
        self.n_iter_no_change = n_iter_no_change
        self.class_weight = class_weight
        self.warm_start = warm_start
        self.n_iter = n_iter
        super().__init__()
        self.classifier = Perceptron(
            penalty=self.penalty,
            alpha=self.alpha,
            fit_intercept=self.fit_intercept,
            max_iter=self.max_iter,
            tol=self.tol,
            shuffle=self.shuffle,
            verbose=self.verbose,
            eta0=self.eta0,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
            early_stopping=self.early_stopping,
            validation_fraction=self.validation_fraction,
            n_iter_no_change=self.n_iter_no_change,
            class_weight=self.class_weight,
            warm_start=self.warm_start)

    def fit(self, X, y, classes=None, sample_weight=None):
        """ Calls the Perceptron fit function from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        sample_weight:
            Samples weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.fit(X=X, y=y, sample_weight=sample_weight)
        return self

    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ partial_fit

        Calls the Perceptron partial_fit from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        sample_weight:
            Samples weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.partial_fit(X=X,
                                    y=y,
                                    classes=classes,
                                    sample_weight=sample_weight)
        return self

    def predict(self, X):
        """ predict

        Uses the current model to predict samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        Returns
        -------
        numpy.ndarray
            A numpy.ndarray containing the predicted labels for all instances in X.

        """
        return np.asarray(self.classifier.predict(X))

    def predict_proba(self, X):
        """ Predicts the probability of each sample belonging to each one of the known classes.
    
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.
    
        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is 
            associated with the X entry of the same index. And where the list in 
            index [i] contains len(self.target_values) elements, each of which represents
            the probability that the i-th sample of X belongs to a certain label.
    
        """
        return self.classifier._predict_proba_lr(X)
Esempio n. 19
0
class PerceptronMask(BaseSKMObject, ClassifierMixin):
    """ Mask for sklearn.linear_model.Perceptron.

    scikit-multiflow requires a few interfaces, not present in scikit-learn,
    This mask serves as a wrapper for the Perceptron classifier.

    Examples
    --------
    >>> # Imports
    >>> from skmultiflow.neural_networks import PerceptronMask
    >>> from skmultiflow.data import SEAGenerator
    >>>
    >>> # Setup a data stream
    >>> stream = SEAGenerator(random_state=1)
    >>>
    >>> # Setup the Perceptron Mask
    >>> perceptron = PerceptronMask()
    >>>
    >>> n_samples = 0
    >>> correct_cnt = 0
    >>> while n_samples < 5000 and stream.has_more_samples():
    >>>     X, y = stream.next_sample()
    >>>     my_pred = perceptron.predict(X)
    >>>     if y[0] == my_pred[0]:
    >>>         correct_cnt += 1
    >>>     perceptron.partial_fit(X, y, classes=stream.target_values)
    >>>     n_samples += 1
    >>>
    >>> # Display the results
    >>> print('Perceptron Mask usage example')
    >>> print('{} samples analyzed'.format(n_samples))
    >>> print("Perceptron's performance: {}".format(correct_cnt / n_samples))
    """
    def __init__(self,
                 penalty=None,
                 alpha=0.0001,
                 fit_intercept=True,
                 max_iter=1000,
                 tol=0.001,
                 shuffle=True,
                 verbose=0,
                 eta0=1.0,
                 n_jobs=None,
                 random_state=0,
                 early_stopping=False,
                 validation_fraction=0.1,
                 n_iter_no_change=5,
                 class_weight=None,
                 warm_start=False):
        self.penalty = penalty
        self.alpha = alpha
        self.fit_intercept = fit_intercept
        self.max_iter = max_iter
        self.tol = tol
        self.shuffle = shuffle
        self.verbose = verbose
        self.eta0 = eta0
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.early_stopping = early_stopping
        self.validation_fraction = validation_fraction
        self.n_iter_no_change = n_iter_no_change
        self.class_weight = class_weight
        self.warm_start = warm_start
        super().__init__()
        self.classifier = Perceptron(
            penalty=self.penalty,
            alpha=self.alpha,
            fit_intercept=self.fit_intercept,
            max_iter=self.max_iter,
            tol=self.tol,
            shuffle=self.shuffle,
            verbose=self.verbose,
            eta0=self.eta0,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
            early_stopping=self.early_stopping,
            validation_fraction=self.validation_fraction,
            n_iter_no_change=self.n_iter_no_change,
            class_weight=self.class_weight,
            warm_start=self.warm_start)

    def fit(self, X, y, classes=None, sample_weight=None):
        """ Calls the Perceptron fit function from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        sample_weight:
            Samples weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.fit(X=X, y=y, sample_weight=sample_weight)
        return self

    def partial_fit(self, X, y, classes=None, sample_weight=None):
        """ partial_fit

        Calls the Perceptron partial_fit from sklearn.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        y: Array-like
            The class labels for all samples in X.

        classes: Not used.

        sample_weight:
            Samples weight. If not provided, uniform weights are assumed.

        Returns
        -------
        PerceptronMask
            self

        """
        self.classifier.partial_fit(X=X,
                                    y=y,
                                    classes=classes,
                                    sample_weight=sample_weight)
        return self

    def predict(self, X):
        """ predict

        Uses the current model to predict samples in X.

        Parameters
        ----------
        X: numpy.ndarray of shape (n_samples, n_features)
            The feature's matrix.

        Returns
        -------
        numpy.ndarray
            A numpy.ndarray containing the predicted labels for all instances in X.

        """
        return np.asarray(self.classifier.predict(X))

    def predict_proba(self, X):
        """ Predicts the probability of each sample belonging to each one of the known classes.
    
        Parameters
        ----------
        X: Numpy.ndarray of shape (n_samples, n_features)
            A matrix of the samples we want to predict.
    
        Returns
        -------
        numpy.ndarray
            An array of shape (n_samples, n_features), in which each outer entry is 
            associated with the X entry of the same index. And where the list in 
            index [i] contains len(self.target_values) elements, each of which represents
            the probability that the i-th sample of X belongs to a certain label.
    
        """
        return self.classifier._predict_proba_lr(X)
Esempio n. 20
0
array = clones_test.values
X_test = array[:,3:30]
Y_test = array[:,2]
print("test loaded")

chunkSize=1024
#clf=SGDClassifier()
#clf=PassiveAggressiveClassifier()
clf=Perceptron()
for chunk in pd.read_csv(path_train, names=colNames, chunksize=chunkSize):
    chunk = chunk.sample(frac=1).reset_index(drop=True)  # shuffle data
    array = chunk.values
    X_train = array[:, 3:30]
    Y_train = array[:, 2]
    start_time = time.time()
    model =clf.partial_fit(X_train,Y_train,classes=numpy.unique(Y_train.astype(bool)))
    end_time=time.time()
    print("one chunk complete")

filename = 'sgd_model.sav'
pickle.dump(clf, open(filename, 'wb'))
print("model saved")

# load the model from disk
start_time = time.time()
loaded_model = pickle.load(open(filename, 'rb'))
# result = loaded_model.score(X_test, Y_test.astype(bool))
# print(result)
for chunk in pd.read_csv(path_test, names=colNames, chunksize=chunkSize):
    print("chunk read complete")
    array = chunk.values
Esempio n. 21
0
# For looping through chunks of data, set step size
step_size = 1000

percept = Perceptron(n_jobs = -1)

prev = 0

nxt = step_size

X_train = features_to_train[prev:nxt,:]
Y_train = targets_to_train[prev:nxt]

print len(X_train)
print len(Y_train)

percept.partial_fit(X_train, Y_train, classes=np.unique(targets_to_train))
prev += step_size
nxt += step_size

for i in range(len(features_to_train) / step_size - 1):

    X_train = features_to_train[prev:nxt,:]
    Y_train = targets_to_train[prev:nxt]

    percept.partial_fit(X_train, Y_train)

    predicted_targets = percept.predict(features_to_test)

    prev += step_size
    nxt += step_size
Esempio n. 22
0
#     return X

fh = FeatureHasher(n_features = 2**20, input_type="string", non_negative=True)
# ohe = OneHotEncoder(categorical_features=columns)

# Train classifier
clf = Perceptron()
train = pd.read_csv("testtrain.csv", chunksize = 50000, iterator = True)
all_classes = np.array([0, 1])
for chunk in train:
    y_train = chunk["click"]
    chunk = chunk[cols]
    chunk = chunk.join(pd.DataFrame([dayhour(x) for x in chunk.hour], columns=["wd", "hr"]))
    chunk.drop(["hour"], axis=1, inplace = True)
    Xcat = fh.transform(np.asarray(chunk.astype(str)))
    clf.partial_fit(Xcat, y_train, classes=all_classes)
    
# Create a submission file
usecols = cols + ["id"]
X_test = pd.read_csv("testtest.csv", usecols=usecols)
X_test = X_test.join(pd.DataFrame([dayhour(x) for x in X_test.hour], columns=["wd", "hr"]))
X_test.drop(["hour"], axis=1, inplace = True)

X_enc_test = fh.transform(np.asarray(X_test.astype(str)))

y_act = pd.read_csv("testtest.csv", usecols=['click'])
y_pred = clf.predict(X_enc_test)

with open('logloss.txt','a') as f:
    f.write('\n'+str(log_loss(y_act, y_pred))+'\tPerceptron')
model = train_averaged_perceptron(y_train,
                                  X_train,
                                  y_vali,
                                  X_vali,
                                  num_iter=1000)
print("AP. Train-Accuracy: {:.3}".format(model.score(X_train, y_train)))
print("AP. Vali-Accuracy: {:.3}".format(model.score(X_vali, y_vali)))

# Note that Sci-Kit Learn's Perceptron uses an alternative method of training.
# Is it an averaged perceptron or a regular perceptron?
skP = Perceptron()
print("Train sklearn-Perceptron (skP)")
for iter in range(1000):
    # Note we use partial_fit rather than fit to expose the loop to our code!
    skP.partial_fit(X_train, y_train, classes=(0, 1))
    learning_curves["skPerceptron"].add_sample(skP, X_train, y_train, X_vali,
                                               y_vali)
print("skP. Train-Accuracy: {:.3}".format(skP.score(X_train, y_train)))
print("skP. Vali-Accuracy: {:.3}".format(skP.score(X_vali, y_vali)))

## TODO Exploration 1: use a loop around partial-fit to generate another graph!
#
## TODO Exploration 1A: Try a MLP (Multi-Layer Perceptron).
mlp = MLPClassifier(hidden_layer_sizes=(32, ))
print("Train MLPClassifier (mla)")
for iter in range(1000):
    # Note we use partial_fit rather than fit to expose the loop to our code!
    mlp.partial_fit(X_train, y_train, classes=(0, 1))
    learning_curves["MLPClassifier"].add_sample(mlp, X_train, y_train, X_vali,
                                                y_vali)
Esempio n. 24
0
labels = dataset[:, 2].reshape((numSamples,))
classif = Perceptron()

print('Fitting a', type(classif).__name__, 'model to the dataset')]

## We have to reshape the data here because partial_fit expects a 2D arrray
##for the X input and an array for the Y input

reshapedData = data.reshape(1000,1,2)
reshapedLabel = labels.reshape(1000,1)
y = reshapedLabel
y_index, x_index, callsToPf, totalLoops, errorRate = 0,0,0,0,1
while(errorRate != 0):
    errorRate = 0
    for x in reshapedData:
        classif.partial_fit(x,y[y_index],classes = [-1,1])
        callsToPf += 1
        preds = classif.predict(data)
        errorRate = metrics.zero_one_loss(y,preds)
        print(errorRate)
        y_index += 1
        if(errorRate == 0):
            break
    totalLoops += 1
    y_index = 0


print("It took ", callsToPf, " weight updates to get the errorRate to 0")
print("Final weight vector is ",classif.coef_)
# x1 and x2 are the weights
x1 = classif.coef_[0][0]
#print x_df.head()

vectorizer = DictVectorizer(sparse=False)
x = vectorizer.fit_transform(x_df.to_dict("records"))

#print x.shape

#The output class
y = dframe.tag.values
all_classes = np.unique(y)
#print all_classes.shape
#print y.shape

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=0)

print(x_train.shape)
print(y_train.shape)

clf = Perceptron(verbose=10, n_jobs=-1, n_iter=5)
all_classes = list(set(y))
clf.partial_fit(x_train, y_train, all_classes)

clf = joblib.dump(clf, 'clf.model')
print "Done"

clf = joblib.load('clf.model')

print(f1_score(clf.predict(x_test), y_test, average="micro"))
Esempio n. 26
0
class StreamingLearner(BaseListener):
    """
    Trains a Perceptron classifier on a stream of data
    (updates with every sample) using feature hashing
    (as you cannot know the vocabulary in before).

    In this example only English tweets containing a happy
    :) or sad :( emoticons, which are used as annotation
    for the sentiment of the message, are used as training
    and testing data. Every 5th tweet is used for evaluation
    of the model.
    """

    def __init__(self, zmq_sub_string, channel):

        self.classes = ["pos", "neg"]
        self.re_emoticons = re.compile(r":\)|:\(")
        self.vec = HashingVectorizer(n_features=2 ** 20, non_negative=True)
        self.clf = Perceptron()

        self.count = {
            "train": {
                "pos": 0,
                "neg": 0,
            },
            "test": {
                "pos": 0,
                "neg": 0,
            }
        }

        self.train = 1
        self.eval_count = {
            "pos": {"tp": 0, "fp": 0, "fn": 0},
            "neg": {"tp": 0, "fp": 0, "fn": 0},
        }

        super(StreamingLearner, self).__init__(zmq_sub_string, channel)

    def on_msg(self, tweet):
        print_tick()

        if tweet.get("lang") != "en":
            return  # skip non english tweets

        emoticons = self.re_emoticons.findall(tweet["text"])

        if not emoticons:
            return  # skip tweets without emoticons

        text = self.re_emoticons.sub("", tweet["text"].replace("\n", ""))

        X = self.vec.transform([text])

        # label for message
        last_emoticon = emoticons[-1]
        if last_emoticon == ":)":
            label = "pos"
        elif last_emoticon == ":(":
            label = "neg"
        y = np.asarray([label])

        if not self.train:
            # use every 5th message for evaluation

            print("")
            print("TEST %s |" % label, text)

            self.count["test"][label] += 1

            y_pred = self.clf.predict(X)
            pred_label, gold_label = y_pred[0], label

            print("PRED: ", pred_label)

            if pred_label == gold_label:
                self.eval_count[gold_label]["tp"] += 1
            else:
                self.eval_count[pred_label]["fp"] += 1
                self.eval_count[gold_label]["fn"] += 1

            pos_acc = (
                self.eval_count["pos"]["tp"] / self.count["test"]["pos"]
            ) if self.count["test"]["pos"] else 0

            neg_acc = (
                self.eval_count["neg"]["tp"] / self.count["test"]["neg"]
            ) if self.count["test"]["neg"] else 0

            print("*** CLF TESTED ON: %s :) samples (Acc %.3f),"
                  " %s :( samples (Acc %.3f)" %
                 (self.count["test"]["pos"], pos_acc,
                  self.count["test"]["neg"], neg_acc))
            print(json.dumps(self.eval_count, indent=2))
            print()

        else:
            self.count["train"][label] += 1

            # set higher sample weight for underrepresented class
            tc = self.count["train"]
            if label == "pos":
                sample_weight = min(3, max(1, tc["neg"] - tc["pos"]))
            elif label == "neg":
                sample_weight = min(3, max(1, tc["pos"] - tc["neg"]))
            else:
                sample_weight = 0

            print("\nTRAIN %s (weight %s) |" % (label, sample_weight), text)

            print(">>> CLF TRAINED ON: %s :) samples, %s :( samples" % (
                self.count["train"]["pos"], self.count["train"]["neg"]))

            self.clf.partial_fit(X, y, self.classes, [sample_weight])

        self.train += 1
        # use every 5th message for evaluation
        if not self.train % 5:
            self.train = 0
Esempio n. 27
0
#nbrtest=int(X.shape[0]*0.33)
#nbrtrain=X.shape[0]-nbrtest

#X_train=X[:nbrtrain]
#X=np.delete(X,range(nbrtrain), 0)
#X_test=X
#del(X)

#y_train=y[:nbrtrain]
#y=np.delete(y, range(nbrtrain))
#y_test=y
#del(y)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=0)
#print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

per = Perceptron(verbose=10, n_jobs=-1, max_iter=5)

per.partial_fit(X_train, y_train, classes)

new_classes = classes.copy()
print(new_classes.pop())
print(new_classes)
print(
    classification_report(y_pred=per.predict(X_test),
                          y_true=y_test,
                          labels=new_classes))
# 現在の手(0~2の整数)をランダムに初期化
j = np.random.randint(0, 3)

# 過去の手(入力データ)をscikit_learn用の配列に変換
Jprev_set = np.array([Jprev])
# 現在の手(ターゲット)をscikit_learn用の配列に変換
jnow_set = np.array([j])

# 三層ニューラルネットワークを定義
#clf_janken = MLPClassifier(hidden_layer_sizes=(200, ), random_state=None)
# 単純パーセプトロンを定義
clf_janken = Perceptron(random_state=None)
# ランダムな入力でオンライン学習を1回行う。
# 初回の学習では、あり得るターゲット(0, 1, 2)を分類器に知らせる必要がある
clf_janken.partial_fit(Jprev_set, jnow_set, classes=[0, 1, 2])

# 勝敗の回数を初期化
win = 0
draw = 0
lose = 0

# 状態保存用のフラグ
appliStop = False
jankenLoop = False
recognizedHand = 0

# 学習済ファイルの確認
if len(sys.argv) == 2:
    savefile = sys.argv[1]
    try:
Esempio n. 29
0
ch_prev_set = np.array([ch_prev])


# 今回の手(0~2の整数)をランダムに初期化
j = np.random.randint(0, 3)
# 今回の手(ターゲット)をscikit_learn用の配列に変換
h_now_set = np.array([j])


# ====2.機械学習の実行=============================
# 単純パーセプトロンを定義
clf = Perceptron(random_state=None)

# ランダムな入力でオンライン学習を1回行う。
# 初回の学習では、あり得るターゲット(0, 1, 2)を分類器に知らせる必要がある
clf.partial_fit(ch_prev_set, h_now_set, classes=[0, 1, 2])


# ====3.機械学習の結果の表示と評価=============================
def janken_ml(h_choice, result):
    global ch_prev, ch_prev_set, total
    h_choice -= 1
    if(h_choice < 0 or h_choice > 2):
        flash("0,1,2を入力してください")

    # 過去のじゃんけんの手(ベクトル形式)をscikit_learn形式に
    ch_prev_set = np.array([ch_prev])
    # 今回のじゃんけんの手(0~2の整数)をscikit_learn形式に
    h_now_set = np.array([h_choice])

    # コンピュータが、過去の手から人間の今回の手を予測
Esempio n. 30
0
class Neuron:
    __classes = []
    __savefile_per = ""
    __per = None

    def __init__(self, save_folder, classes):
        """

        :param save_folder:
        :param classes:
        """

        self.__classes = classes
        self.__savefile_per = save_folder + "per.joblib"
        self.__per = Perceptron()

    def train(self, X, y):
        """

        :param X:
        :param y:

        :return:
        """
        self.__per.partial_fit(X, y, classes=self.__classes)
        self.__save_model()

    def predict(self, X):
        """

        :param X:

        :return:
        """
        if self.__per is None:
            self.__load_model()
        return self.__per.predict(X)

    def update(self, X, y):
        """

        :param X:
        :param y:

        :return:
        """
        self.train(X, y)
        # If no longer using train(), add save_model()

    def __save_model(self):
        """

        """
        dump(self.__per, self.__savefile_per)

    def __load_model(self):
        """

        """
        if os.path.exists(self.__savefile_per):
            self.__per = load(self.__savefile_per)
        else:
            print("ERROR: Perceptron not initialized. Run train() first.")
class Parser(ParserI):

    @staticmethod
    def build_labels_dataset(parses, feature_extractor):
        """ Transform a list of parses to a labels dataset """
        labels_X, labels_y = [], []
        for gold_parse in parses:
            for child, head in enumerate(gold_parse.heads()[1:-1]):
                features = feature_extractor(gold_parse, head, child + 1)

                label = gold_parse.labels()[child + 1]
                labels_X.append(features)
                labels_y.append(label)

        return labels_X, labels_y

    @staticmethod
    def build_transition_dataset(parses, feature_extractor):
        """ Transform a list of parses to a transitions dataset """
        transitions_X, transitions_y = [], []
        for gold_parse in parses:
            # Init an empty parse
            dep_parse = DependencyParse(gold_parse.tagged_words()[1:-1])

            # Start from an empty state
            state = ParserState(dep_parse)

            while state.stack or (state.buffer_index + 1) < len(dep_parse):
                features = feature_extractor(state)
                gold_moves = state.next_gold(gold_parse)

                if not gold_moves:
                    # Something is wrong here ...
                    break

                # Pick one of the possible transitions
                t = random.choice(gold_moves)

                # Append the features and transition to the dataset
                transitions_X.append(features)
                transitions_y.append(t)

                # Apply the transition to the state
                state.apply(t)

        return transitions_X, transitions_y

    def __init__(self, feature_detector, label_feature_detector):
        self.feature_extractor = feature_detector
        self.label_feature_detector = label_feature_detector

        self._vectorizer = FeatureHasher()
        self._model = SGDClassifier(loss='modified_huber')

        self._label_vectorizer = FeatureHasher()
        self._label_model = Perceptron()

    def evaluate(self, parses):
        correct_heads, correct_labels, total = 0, 0, 0

        for parse in parses:
            predicted_parse = self.parse(parse.tagged_words()[1:-1])

            heads = np.array(parse.heads()[1:-1])
            predicted_heads = np.array(predicted_parse.heads()[1:-1])

            labels = np.array(parse.labels()[1:-1])

            # Relabel the gold parse with what our model would label
            self.label_parse(parse)
            predicted_labels = np.array(parse.labels()[1:-1])

            total += len(heads)
            correct_heads += np.sum(heads == predicted_heads)
            correct_labels += np.sum(labels == predicted_labels)

        return correct_heads / total, correct_labels / total

    def parse(self, sent, *args, **kwargs):
        """ Parse a tagged sentence """
        state = ParserState(DependencyParse(sent))
        while state.stack or (state.buffer_index + 1) < len(state.parse):
            # Extract the features of the current state
            features = self.feature_extractor(state)
            vectorized_features = self._vectorizer.transform([features])

            # Get probabilities for the next transitions
            predictions = self._model.predict_proba(vectorized_features)[0]
            scores = dict(zip(list(self._model.classes_), list(predictions)))

            # Check what moves are actually valid
            valid_moves = state.next_valid()

            # Get the most probable valid mode
            guess = max(valid_moves, key=lambda move: scores[move])

            # apply the transition to the state
            state.apply(guess)

        self.label_parse(state.parse)  # Add labels too ...

        return state.parse

    def label_parse(self, parse):
        """ Add labels to a dependency parse """
        label_features = []
        for child, head in enumerate(parse.heads()[1:-1]):
            features = self.label_feature_detector(parse, head, child + 1)
            label_features.append(features)

        vectorized_label_features = self._label_vectorizer.transform(label_features)
        predicted_labels = self._label_model.predict(vectorized_label_features)
        parse._labels = [None] + list(predicted_labels) + [None]

        return parse

    def train(self, corpus_iterator, n_iter=5, batch_size=100):
        """ Train a model on a given corpus """
        for _ in range(n_iter):
            # Fork the iterator
            corpus_iterator, parses = itertools.tee(corpus_iterator)
            batch_count = 0

            while True:
                batch_count += 1
                print("Training on batch={0}".format(batch_count))
                batch = list(itertools.islice(parses, batch_size))

                # No more batches
                if not batch:
                    break

                # Train the model on a batch
                self.train_batch(batch)

    def train_batch(self, gold_parses):
        """ Train the model on a single batch """
        t_X, t_Y = self.build_transition_dataset(
            gold_parses, self.feature_extractor)

        self._model.partial_fit(self._vectorizer.transform(t_X), t_Y,
                                classes=Transitions.ALL)

        l_X, l_Y = self.build_labels_dataset(
            gold_parses, self.label_feature_detector)

        self._label_model.partial_fit(self._label_vectorizer.transform(l_X), l_Y,
                                      classes=DEPENDENCY_LABELS)
Esempio n. 32
0
    def train(self):
        model = os.path.abspath('1server/nlp/data/model3.joblib')
        if os.path.exists(model):

            model = load(model)
            # train_file = open(self.file_path)
            # lines = [line for line in train_file.read().split("\n")]
            # # train_file.close()
            # train_ = []
            # for row in lines:
            #     if parseEntity(row):
            #         train_.append(parseEntity(row))

            # score = model.evaluate(
            #     train_[:1500]
            # )
            # data_test = [conlltags2tree(iobs) for iobs in train_[1500:]]

            self._chunk = model
            return model
        else:
            # train_file = open(self.file_path)
            # lines = [line for line in train_file.read().split("\n")]
            # # train_file.close()
            # train_ = []
            # word_feature = []
            # for row in lines:
            #     if parseEntity(row):
            #         train_.append(parseEntity(row))
            # for sentence in train_:
            #     history = []
            #     untagged_sentence, tags = zip(*sentence)
            #     pprint(sentence)
            #     for index in range(len(sentence)):
            #         featureset = features(untagged_sentence, index, history)
            #         featureset['label'] = tags[index]
            #         word_feature.append((featureset, tags[index]))
            #         history.append(tags[index])

            # feature_key = [k for k, v in word_feature]
            # feature_key_unique = [i for i in feature_key[0]]

            # with open(os.path.abspath(
            #         'server/nlp/data/list_data_features.csv'), 'w', encoding="utf8",newline="") as csv_feature:
            #     writer = csv.writer(csv_feature)
            #     writer.writerow(feature_key_unique)
            #     for k in feature_key:
            #         writer.writerow(list(k.values()))

            pd = df.read_csv(
                os.path.abspath('server/nlp/data/list_data_features.csv'),
                encoding="ISO-8859-1",
                error_bad_lines=False)
            vectorizer = DictVectorizer(sparse=False)
            pd = pd[:2600]

            pd = pd.fillna(method='ffill')
            y = pd['label'].values

            x = pd.drop('label', axis=1)
            pd_dict = x.to_dict("records")
            print(pd.isnull().sum())
            # x = [vectorizer.fit_transform(i)[0].tolist() for i in pd_dict]

            # x = np.asarray(x)
            # pprint(x[0])
            # y = np.asarray(y)
            x = vectorizer.fit_transform(pd_dict)

            all_classes = np.unique(y)
            x_train, x_test, y_train, y_test = train_test_split(x,
                                                                y,
                                                                test_size=0.2,
                                                                random_state=0)

            clf = Perceptron(verbose=10, n_jobs=-1, max_iter=1000)
            all_classes = list(set(y))
            clf.partial_fit(x_train, y_train, all_classes)
            new_classes = all_classes.copy()
            new_classes.remove('O')

            print(
                classification_report(y_pred=clf.predict(x_test),
                                      y_true=y_test,
                                      labels=new_classes))

            self._chunk = clf

            # csv_feature.close()
            # vectorizer = DictVectorizer()
            # feature_select = [k for k, v in word_feature[:5000]]
            # le = LabelEncoder()
            # for i in feature_select:
            #     for k in i:
            #         feature_select[i][k] =
            # data_x = vectorizer.fit_transform(feature_select).toarray()
            # data_y = [ v for k, v in word_feature[:5000]]
            # all_classes = np.unique(data_y)

            # Y = np.asarray(data_y)
            # X = np.asarray(data_x)
            # clf = GaussianNB()
            # clf.fit(data_x, Y)

            # print(Y.shape)
            # data_x, Y = np.arange(len(all_classes)*2).reshape(
            #     (len(all_classes), 2)), range(len(all_classes))
            # X = np.asarray(data_x)

            # print(type(data_y))

            # x_train, x_test, y_train, y_test = train_test_split(
            #     data_x, Y, test_size=0.2, random_state=0)

            # print(data_x)
            # pprint(dict(word_feature))
            # x = vectorizer.fit_transform(dict(word_feature))
            # print(60*"=")
            # pprint(x)
            # self._chunk = NamedEntityChunker(train_)
            # score = self._chunk.evaluate(
            #     [conlltags2tree([(w, t, iob) for (w, t), iob in iobs]) for iobs in train_[1500:]])

            # dump(self._chunk, os.path.abspath(
            #      'server/nlp/data/model3.joblib'))
            return self._chunk
Esempio n. 33
0
    Jprev[3 * i:3 * i + 3] = janken_array[j]

# 現在の手(0~2の整数)をランダムに初期化
j = np.random.randint(0, 3)

# 過去の手(入力データ)をscikit_learn用の配列に変換
Jprev_set = np.array([Jprev])
# 現在の手(ターゲット)をscikit_learn用の配列に変換
jnow_set = np.array([j])

# 単純パーセプトロンを定義
clf = Perceptron(random_state=None)

# ランダムな入力でオンライン学習を1回行う。
# 初回の学習では、あり得るターゲット(0, 1, 2)を分類器に知らせる必要がある
clf.partial_fit(Jprev_set, jnow_set, classes=[0, 1, 2])

# プログラム上はグー、チョキ、パーは0, 1, 2に対応するが、
# キー入力は入力のしやすさから1, 2, 3に割り当てる
print('1:グー、2:チョキ、3:パー')

# 対戦結果の初期化
win = 0
draw = 0
lose = 0

try:
    while True:
        try:
            # 入力された数値(1~3)を(0~2)に変換
            j = int(input()) - 1