예제 #1
0
def copy_logistic_model(model: LogisticRegression,
                        max_iter=10e5,
                        penalty='none') -> LogisticRegression:
    copied_model = LogisticRegression(max_iter=max_iter, penalty=penalty)
    copied_model.coef_ = model.coef_.copy()
    copied_model.classes_ = model.classes_.copy()
    copied_model.intercept_ = model.intercept_
    return copied_model
def deserialize_logistic_regression(model_dict):
    model = LogisticRegression(model_dict['params'])

    model.classes_ = np.array(model_dict['classes_'])
    model.coef_ = np.array(model_dict['coef_'])
    model.intercept_ = np.array(model_dict['intercept_'])
    model.n_iter_ = np.array(model_dict['intercept_'])

    return model
예제 #3
0
def deserialize_logistic_regression(model_dict):
    model = LogisticRegression(model_dict["params"])

    model.classes_ = np.array(model_dict["classes_"])
    model.coef_ = np.array(model_dict["coef_"])
    model.intercept_ = np.array(model_dict["intercept_"])
    model.n_iter_ = np.array(model_dict["intercept_"])

    return model
    def choose(self, pvalues, method, outcome):
        '''
        Randomly choose state of node from probability distribution conditioned on *pvalues*.
        This method has two parts: (1) determining the proper probability
        distribution, and (2) using that probability distribution to determine
        an outcome.
        Arguments:
            1. *pvalues* -- An array containing the assigned states of the node's parents. This must be in the same order as the parents appear in ``self.Vdataentry['parents']``.
        The function creates a Gaussian distribution in the manner described in :doc:`lgbayesiannetwork`, and samples from that distribution, returning its outcome.
        
        '''
        warnings.filterwarnings("ignore", category=FutureWarning)
        rand = random.random()

        dispvals = []
        lgpvals = []
        for pval in pvalues:
            if (isinstance(pval, str)):
                dispvals.append(pval)
            else:
                lgpvals.append(pval)
        # find correct Gaussian
        lgdistribution = self.Vdataentry["hybcprob"][str(dispvals)]

        for pvalue in lgpvals:
            assert pvalue != 'default', "Graph skeleton was not topologically ordered."

        model = LogisticRegression(multi_class='multinomial',
                                   solver='newton-cg',
                                   max_iter=100)

        model.classes_ = np.array(lgdistribution["classes"], dtype=object)

        if len(lgdistribution["classes"]) > 1:
            model.coef_ = np.array(lgdistribution["mean_scal"],
                                   dtype=float).reshape(-1, len(lgpvals))
            model.intercept_ = np.array(lgdistribution["mean_base"],
                                        dtype=float)
            distribution = model.predict_proba(
                np.array(lgpvals).reshape(1, -1))[0]

            # choose
            rand = random.random()
            lbound = 0
            ubound = 0
            for interval in range(len(lgdistribution["classes"])):
                ubound += distribution[interval]
                if (lbound <= rand and rand < ubound):
                    rindex = interval
                    break
                else:
                    lbound = ubound

            return str(lgdistribution["classes"][rindex])

        else:
            return str(lgdistribution["classes"][0])
예제 #5
0
def LogR_predict():
    X = json.loads(request.form['X'])
    params = json.loads(request.form['params'])
    reg = LogisticRegression()
    reg.coef_ = np.array(params['coef'])
    reg.intercept_ = np.array(params['inter'])
    reg.n_iter_ = np.array(params['niter'])
    reg.classes_ = np.array(params['classes'])
    y = reg.predict(X)
    return jsonify(pred=y.tolist())
예제 #6
0
def examineModel(X_train, y_train):
    model = util.openPkl("../models/avg_logistic_model")
    avg_coefficients = model['coeff_']
    avg_intercepts = model['intercept_']

    clf = LogisticRegression()
    clf.coef_ = avg_coefficients
    clf.intercept_ = avg_intercepts
    clf.classes_ = util.classes
    print("Averaged model train accuracy:", clf.score(X_train, y_train))
    avg_preds = clf.predict(X_train)
예제 #7
0
def set_initial_params(model: LogisticRegression):
    """Sets initial parameters as zeros Required since model params are
    uninitialized until model.fit is called.

    But server asks for initial parameters from clients at launch. Refer
    to sklearn.linear_model.LogisticRegression documentation for more
    information.
    """
    n_classes = 10  # MNIST has 10 classes
    n_features = 784  # Number of features in dataset
    model.classes_ = np.array([i for i in range(10)])

    model.coef_ = np.zeros((n_classes, n_features))
    if model.fit_intercept:
        model.intercept_ = np.zeros((n_classes,))
예제 #8
0
def load_model_info(model_info):
    """Return a longform model from a model info JSON object.

    Parameters
    ----------
    model_info : dict
        The JSON object containing the attributes of a model.

    Returns
    -------
    longform_model : py:class:`adeft.classify.AdeftClassifier`
        The classifier that was loaded from the given JSON object.
    """
    shortforms = model_info['shortforms']
    pos_labels = model_info['pos_labels']
    longform_model = AdeftClassifier(shortforms=shortforms,
                                     pos_labels=pos_labels)
    ngram_range = model_info['tfidf']['ngram_range']
    tfidf = TfidfVectorizer(ngram_range=ngram_range, stop_words='english')
    logit = LogisticRegression(multi_class='auto')

    tfidf.vocabulary_ = model_info['tfidf']['vocabulary_']
    tfidf.idf_ = model_info['tfidf']['idf_']
    logit.classes_ = np.array(model_info['logit']['classes_'], dtype='<U64')
    logit.intercept_ = np.array(model_info['logit']['intercept_'])
    logit.coef_ = np.array(model_info['logit']['coef_'])

    estimator = Pipeline([('tfidf', tfidf), ('logit', logit)])
    longform_model.estimator = estimator
    # These attributes do not exist in older adeft models.
    # For backwards compatibility we check if they are present
    if 'stats' in model_info:
        longform_model.stats = model_info['stats']
    if 'std' in model_info:
        longform_model._std = np.array(model_info['std'])
    if 'timestamp' in model_info:
        longform_model.timestamp = model_info['timestamp']
    if 'training_set_digest' in model_info:
        longform_model.training_set_digest = model_info['training_set_digest']
    if 'params' in model_info:
        longform_model.params = model_info['params']
    if 'version' in model_info:
        longform_model.version == model_info['version']
    if 'confusion_info' in model_info:
        longform_model.confusion_info = model_info['confusion_info']
    if 'other_metadata' in model_info:
        longform_model.other_metadata = model_info['other_metadata']
    return longform_model
예제 #9
0
def fed_integrate_model_lr(model1, model2):

    coef_1 = model1.coef_
    coef_2 = model2.coef_

    intercept_1 = model1.intercept_
    intercept_2 = model2.intercept_

    classes = model1.classes_

    model = LogisticRegression(solver='sag')
    model.coef_ = mulkeys_add_2Darray(model1.coef_, model2.coef_)/2
    model.intercept_ = mulkeys_add_array(
        model1.intercept_, model1.intercept_)/2
    model.classes_ = classes

    return model
예제 #10
0
    def convert(self, model_dict):
        param_obj = model_dict["HomoLogisticRegressionParam"]
        meta_obj = model_dict["HomoLogisticRegressionMeta"]

        sk_lr_model = LogisticRegression(penalty=meta_obj.penalty.lower(),
                                         tol=meta_obj.tol,
                                         fit_intercept=meta_obj.fit_intercept,
                                         max_iter=meta_obj.max_iter)

        coefficient = np.empty((1, len(param_obj.header)))
        for index in range(len(param_obj.header)):
            coefficient[0][index] = param_obj.weight[param_obj.header[index]]
        sk_lr_model.coef_ = coefficient
        sk_lr_model.intercept_ = np.array([param_obj.intercept])
        # hard-coded 0-1 classification as HomoLR only supports this for now
        sk_lr_model.classes_ = np.array([0., 1.])
        sk_lr_model.n_iter_ = [param_obj.iters]
        return sk_lr_model
예제 #11
0
파일: encdec.py 프로젝트: vipyoung/covid_fl
 def object_hook(self, json_msg):
     if '__type__' in json_msg:
         if json_msg['__type__'] == 'np.ndarray':
             return np.array(json_msg['data'])
         elif json_msg['__type__'] == 'LogisticRegression' or json_msg[
                 '__type__'] == 'SGDClassifier':
             if json_msg['__type__'] == 'LogisticRegression':
                 model = LogisticRegression()
             elif json_msg['__type__'] == 'SGDClassifier':
                 model = SGDClassifier()
             model.set_params(**json_msg['params'])
             if 'intercept_' in json_msg:
                 model.intercept_ = json_msg['intercept_']
             if 'classes_' in json_msg:
                 model.classes_ = json_msg['classes_']
             return model
         elif json_msg['__type__'] == 'pd.Series':
             return pd.Series(json_msg['data'])
         else:
             raise TypeError()
     return json_msg
예제 #12
0
def load_model(serialization_dir):
    with open(os.path.join(args.model, "best_hyperparameters.json"), 'r') as f:
        hyperparameters = json.load(f)
    if hyperparameters.pop('stopwords') == 1:
        stop_words = 'english'
    else:
        stop_words = None
    weight = hyperparameters.pop('weight')
    if weight == 'binary':
        binary = True
    else:
        binary = False
    ngram_range = hyperparameters.pop('ngram_range')
    ngram_range = sorted([int(x) for x in ngram_range.split()])
    if weight == 'tf-idf':
        vect = TfidfVectorizer(stop_words=stop_words,
                               lowercase=True,
                               ngram_range=ngram_range)
    else:
        vect = CountVectorizer(binary=binary,
                               stop_words=stop_words,
                               lowercase=True,
                               ngram_range=ngram_range)
    with open(os.path.join(args.model, "vocab.json"), 'r') as f:
        vocab = json.load(f)
    vect.vocabulary_ = vocab
    hyperparameters['C'] = float(hyperparameters['C'])
    hyperparameters['tol'] = float(hyperparameters['tol'])
    classifier = LogisticRegression(**hyperparameters)
    if os.path.exists(os.path.join(serialization_dir, "archive", "idf.npy")):
        vect.idf_ = np.load(
            os.path.join(serialization_dir, "archive", "idf.npy"))
    classifier.coef_ = np.load(
        os.path.join(serialization_dir, "archive", "coef.npy"))
    classifier.intercept_ = np.load(
        os.path.join(serialization_dir, "archive", "intercept.npy"))
    classifier.classes_ = np.load(
        os.path.join(serialization_dir, "archive", "classes.npy"))
    return classifier, vect
예제 #13
0
def load_model_info(model_info):
    """Return a longform model from a model info JSON object.

    Parameters
    ----------
    model_info : dict
        The JSON object containing the attributes of a model.

    Returns
    -------
    longform_model : py:class:`adeft.classify.AdeftClassifier`
        The classifier that was loaded from the given JSON object.
    """
    shortforms = model_info['shortforms']
    pos_labels = model_info['pos_labels']
    longform_model = AdeftClassifier(shortforms=shortforms,
                                     pos_labels=pos_labels)
    ngram_range = model_info['tfidf']['ngram_range']
    tfidf = TfidfVectorizer(ngram_range=ngram_range, stop_words='english')
    logit = LogisticRegression(multi_class='auto')

    tfidf.vocabulary_ = model_info['tfidf']['vocabulary_']
    tfidf.idf_ = model_info['tfidf']['idf_']
    logit.classes_ = np.array(model_info['logit']['classes_'], dtype='<U64')
    logit.intercept_ = np.array(model_info['logit']['intercept_'])
    logit.coef_ = np.array(model_info['logit']['coef_'])

    estimator = Pipeline([('tfidf', tfidf), ('logit', logit)])
    longform_model.estimator = estimator
    # Load model statistics if they are available
    if 'stats' in model_info:
        longform_model.stats = model_info['stats']
    # Load standard deviations for calculating feature importances
    # if they are available
    if 'std' in model_info:
        longform_model._std = np.array(model_info['std'])
    return longform_model
classifiers = [
    LogisticRegression(multi_class='multinomial',
                       solver='sag',
                       max_iter=10000,
                       n_jobs=-1).fit(
                           train_images[i * n_samples:(i + 1) * n_samples],
                           train_labels[i * n_samples:(i + 1) * n_samples])
    for i in range(n_classifiers)
]
combined = LogisticRegression(multi_class='multinomial',
                              solver='sag',
                              max_iter=10000,
                              n_jobs=-1)
combined.coef_ = sum([c.coef_ for c in classifiers])
combined.intercept_ = sum([c.intercept_ for c in classifiers])
combined.classes_ = classifiers[
    0].classes_  # Assumes first has. Easiest for now
end = time()
print("train completed in {} seconds".format(int(end - start)))

# Check accuracy
accuracy = combined.score(test_images, list(test_labels))
print("accuracy on test set is {} percent".format(accuracy * 100))

# save coefficients
np.savetxt(result_file_prefix + "coefficients.csv",
           combined.coef_,
           delimiter=",")
np.savetxt(result_file_prefix + "intercepts.csv",
           combined.intercept_,
           delimiter=",")
np.savetxt(result_file_prefix + "classes.csv",
예제 #15
0
from numpy import array

_clf = LogisticRegression()
_clf.coef_ = array([[
    -3.09925498, -1.2233267, 0.11772731, 0.11320903, -0.61476615, 0.16965524,
    0.74448574, -0.04194057, -1.35630572, -0.24676336, 0.41220109, -0.35504167,
    0.92710423, -2.1255104, 0.08247018, 0.73824089, 0.31262326, 0.41799742,
    -0.60178821, -0.33976708, -0.03794846, 0., -0.91812391, -0.2121092,
    0.19453728, -0.60672439, -0.92668671, 0.12330509, 0.04666124, 0.,
    1.68901669, -0.67069155, -0.45640558, 1.05191066, 1.03483933, 0.73240274,
    0.17867271, -0.07407496, 2.06098403, 0.42573062, -0.38634422, 0.7109839,
    -0.02934711, 0.05490663, 0.02008552, 0.05069223, 0., 0.2016651,
    -0.28770706, -0.88722735, -0.26507582, 0.52628048, -1.28404466,
    -1.96447254, 0.07607324, 0.70359565, 0.35094977, 0.01376572
]])
_clf.classes_ = array([False, True])
_clf.intercept_ = [-2.75918545]

_v = DictVectorizer()
_v.feature_names_ = [
    'first_chars= ', 'first_chars="a', "first_chars=' ", "first_chars='A",
    'first_chars=(0', 'first_chars=(A', 'first_chars=(a', 'first_chars=)]',
    'first_chars=, ', 'first_chars=. ', 'first_chars=0', 'first_chars=0 ',
    'first_chars=0,', 'first_chars=0.', 'first_chars=00', 'first_chars=0:',
    'first_chars=0\\', 'first_chars=@', 'first_chars=A', 'first_chars=A ',
    'first_chars=A,', 'first_chars=A-', 'first_chars=A.', 'first_chars=A0',
    'first_chars=A=', 'first_chars=AA', 'first_chars=Aa', 'first_chars=[0',
    'first_chars=[A', 'first_chars=[a', 'first_chars=\\A', 'first_chars=a ',
    'first_chars=a(', 'first_chars=a-', 'first_chars=a.', 'first_chars=a0',
    'first_chars=aA', 'first_chars=a[', 'first_chars=aa', 'isalpha', 'isdigit',
    'islower', 'mean_len', 'prev_len', 'punct= ', 'punct="', 'punct=%',
예제 #16
0
                            n_jobs=1)
total = LogisticRegression(multi_class='multinomial',
                           solver='newton-cg',
                           max_iter=1000,
                           n_jobs=1)
start = time()
part_A = part_A.fit(train_images[:100], train_labels[:100])
part_B = part_B.fit(train_images[100:], train_labels[100:])
total = total.fit(train_images[:], train_labels[:])
end = time()
print("train completed in {} seconds".format(int(end - start)))

# Check accuracy
accuracy_a = part_A.score(test_images, list(test_labels))
print("part A accuracy on test set is {} percent".format(accuracy_a * 100))
accuracy_b = part_B.score(test_images, list(test_labels))
print("part B accuracy on test set is {} percent".format(accuracy_b * 100))
accuracy_t = total.score(test_images, list(test_labels))
print("total accuracy on test set is {} percent".format(accuracy_t * 100))

# sum matrixes for sets
combined = LogisticRegression(multi_class='multinomial',
                              solver='newton-cg',
                              max_iter=1000,
                              n_jobs=1)
combined.coef_ = (part_A.coef_ + part_B.coef_) / 2
combined.intercept_ = (part_A.intercept_ + part_B.intercept_) / 2
combined.classes_ = part_A.classes_
accuracy_c = combined.score(test_images, list(test_labels))
print("summed coefficient accuracy on test set is {} percent".format(
    accuracy_c * 100))
예제 #17
0
def main(seed=0,
         n_train=60000,
         n_test=10000,
         inhib=250,
         kernel_size=(16, ),
         stride=(2, ),
         time=100,
         n_filters=25,
         crop=0,
         lr=1e-2,
         lr_decay=0.99,
         dt=1,
         theta_plus=0.05,
         theta_decay=1e-7,
         intensity=5,
         norm=0.2,
         progress_interval=10,
         update_interval=250,
         train=True,
         plot=False,
         gpu=False):

    assert n_train % update_interval == 0 and n_test % update_interval == 0, \
        'No. examples must be divisible by update_interval'

    params = [
        seed, kernel_size, stride, n_filters, crop, lr, lr_decay, n_train,
        inhib, time, dt, theta_plus, theta_decay, intensity, norm,
        progress_interval, update_interval
    ]

    model_name = '_'.join([str(x) for x in params])

    if not train:
        test_params = [
            seed, kernel_size, stride, n_filters, crop, lr, lr_decay, n_train,
            n_test, inhib, time, dt, theta_plus, theta_decay, intensity, norm,
            progress_interval, update_interval
        ]

    np.random.seed(seed)

    if gpu:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.manual_seed_all(seed)
    else:
        torch.manual_seed(seed)

    side_length = 28 - crop * 2
    n_inpt = side_length**2
    n_examples = n_train if train else n_test
    n_classes = 10

    # Build network.
    if train:
        network = LocallyConnectedNetwork(
            n_inpt=n_inpt,
            input_shape=[side_length, side_length],
            kernel_size=kernel_size,
            stride=stride,
            n_filters=n_filters,
            inh=inhib,
            dt=dt,
            nu=[0, lr],
            theta_plus=theta_plus,
            theta_decay=theta_decay,
            wmin=0.0,
            wmax=1.0,
            norm=norm)

    else:
        network = load_network(os.path.join(params_path, model_name + '.pt'))
        network.connections['X', 'Y'].update_rule = NoOp(
            connection=network.connections['X', 'Y'],
            nu=network.connections['X', 'Y'].nu)
        network.layers['Y'].theta_decay = 0
        network.layers['Y'].theta_plus = 0

    conv_size = network.connections['X', 'Y'].conv_size
    locations = network.connections['X', 'Y'].locations
    conv_prod = int(np.prod(conv_size))
    n_neurons = n_filters * conv_prod

    # Voltage recording for excitatory and inhibitory layers.
    voltage_monitor = Monitor(network.layers['Y'], ['v'], time=time)
    network.add_monitor(voltage_monitor, name='output_voltage')

    # Load MNIST data.
    dataset = MNIST(path=data_path, download=True)

    if train:
        images, labels = dataset.get_train()
    else:
        images, labels = dataset.get_test()

    images *= intensity
    images = images[:, crop:-crop, crop:-crop]

    # Record spikes during the simulation.
    spike_record = torch.zeros(update_interval, time, n_neurons)
    full_spike_record = torch.zeros(n_examples, n_neurons)

    # Neuron assignments and spike proportions.
    if train:
        logreg_model = LogisticRegression(warm_start=True,
                                          n_jobs=-1,
                                          solver='lbfgs',
                                          max_iter=1000,
                                          multi_class='multinomial')
        logreg_model.coef_ = np.zeros([n_classes, n_neurons])
        logreg_model.intercept_ = np.zeros(n_classes)
        logreg_model.classes_ = np.arange(n_classes)
    else:
        path = os.path.join(params_path,
                            '_'.join(['auxiliary', model_name]) + '.pt')
        logreg_coef, logreg_intercept = torch.load(open(path, 'rb'))
        logreg_model = LogisticRegression(warm_start=True,
                                          n_jobs=-1,
                                          solver='lbfgs',
                                          max_iter=1000,
                                          multi_class='multinomial')
        logreg_model.coef_ = logreg_coef
        logreg_model.intercept_ = logreg_intercept
        logreg_model.classes_ = np.arange(n_classes)

    if train:
        best_accuracy = 0

    # Sequence of accuracy estimates.
    curves = {'logreg': []}
    predictions = {scheme: torch.Tensor().long() for scheme in curves.keys()}

    spikes = {}
    for layer in set(network.layers):
        spikes[layer] = Monitor(network.layers[layer],
                                state_vars=['s'],
                                time=time)
        network.add_monitor(spikes[layer], name=f'{layer}_spikes')

    # Train the network.
    if train:
        print('\nBegin training.\n')
    else:
        print('\nBegin test.\n')

    spike_ims = None
    spike_axes = None
    weights_im = None
    weights2_im = None

    start = t()
    for i in range(n_examples):
        if i % progress_interval == 0:
            print(f'Progress: {i} / {n_examples} ({t() - start:.4f} seconds)')
            start = t()

        if i % update_interval == 0 and i > 0:
            if i % len(labels) == 0:
                current_labels = labels[-update_interval:]
                current_record = full_spike_record[-update_interval:]
            else:
                current_labels = labels[i % len(labels) - update_interval:i %
                                        len(labels)]
                current_record = full_spike_record[i % len(labels) -
                                                   update_interval:i %
                                                   len(labels)]

            # Update and print accuracy evaluations.
            curves, preds = update_curves(curves,
                                          current_labels,
                                          n_classes,
                                          full_spike_record=current_record,
                                          logreg=logreg_model)
            print_results(curves)

            for scheme in preds:
                predictions[scheme] = torch.cat(
                    [predictions[scheme], preds[scheme]], -1)

            # Save accuracy curves to disk.
            to_write = ['train'] + params if train else ['test'] + params
            f = '_'.join([str(x) for x in to_write]) + '.pt'
            torch.save((curves, update_interval, n_examples),
                       open(os.path.join(curves_path, f), 'wb'))

            if train:
                if any([x[-1] > best_accuracy for x in curves.values()]):
                    print(
                        'New best accuracy! Saving network parameters to disk.'
                    )

                    # Save network to disk.
                    network.save(os.path.join(params_path, model_name + '.pt'))
                    path = os.path.join(
                        params_path,
                        '_'.join(['auxiliary', model_name]) + '.pt')
                    torch.save((logreg_model.coef_, logreg_model.intercept_),
                               open(path, 'wb'))
                    best_accuracy = max([x[-1] for x in curves.values()])

                # Refit logistic regression model.
                logreg_model = logreg_fit(full_spike_record[:i], labels[:i],
                                          logreg_model)

            print()

        # Get next input sample.
        image = images[i % len(images)].contiguous().view(-1)
        sample = bernoulli(datum=image, time=time, dt=dt)
        inpts = {'X': sample}

        # Run the network on the input.
        network.run(inpts=inpts, time=time)

        retries = 0
        while spikes['Y'].get('s').sum() < 5 and retries < 3:
            retries += 1
            image *= 2
            sample = bernoulli(datum=image, time=time, dt=dt)
            inpts = {'X': sample}
            network.run(inpts=inpts, time=time)

        # Add to spikes recording.
        spike_record[i % update_interval] = spikes['Y'].get('s').view(time, -1)
        full_spike_record[i] = spikes['Y'].get('s').view(time, -1).sum(0)

        if plot:
            # Optionally plot various simulation information.
            _spikes = {
                'X': spikes['X'].get('s').view(side_length**2, time),
                'Y': spikes['Y'].get('s').view(n_filters * conv_prod, time)
            }

            spike_ims, spike_axes = plot_spikes(spikes=_spikes,
                                                ims=spike_ims,
                                                axes=spike_axes)
            weights_im = plot_locally_connected_weights(
                network.connections[('X', 'Y')].w,
                n_filters,
                kernel_size,
                conv_size,
                locations,
                side_length,
                im=weights_im)
            weights2_im = plot_weights(logreg_model.coef_, im=weights2_im)

            plt.pause(1e-8)

        network.reset_()  # Reset state variables.

    print(f'Progress: {n_examples} / {n_examples} ({t() - start:.4f} seconds)')

    i += 1

    if i % len(labels) == 0:
        current_labels = labels[-update_interval:]
        current_record = full_spike_record[-update_interval:]
    else:
        current_labels = labels[i % len(labels) - update_interval:i %
                                len(labels)]
        current_record = full_spike_record[i % len(labels) -
                                           update_interval:i % len(labels)]

    # Update and print accuracy evaluations.
    curves, preds = update_curves(curves,
                                  current_labels,
                                  n_classes,
                                  full_spike_record=current_record,
                                  logreg=logreg_model)
    print_results(curves)

    for scheme in preds:
        predictions[scheme] = torch.cat([predictions[scheme], preds[scheme]],
                                        -1)

    if train:
        if any([x[-1] > best_accuracy for x in curves.values()]):
            print('New best accuracy! Saving network parameters to disk.')

            # Save network to disk.
            network.save(os.path.join(params_path, model_name + '.pt'))
            path = os.path.join(params_path,
                                '_'.join(['auxiliary', model_name]) + '.pt')
            torch.save((logreg_model.coef_, logreg_model.intercept_),
                       open(path, 'wb'))

    if train:
        print('\nTraining complete.\n')
    else:
        print('\nTest complete.\n')

    print('Average accuracies:\n')
    for scheme in curves.keys():
        print('\t%s: %.2f' % (scheme, float(np.mean(curves[scheme]))))

    # Save accuracy curves to disk.
    if train:
        to_write = ['train'] + params
        f = '_'.join([str(x) for x in to_write]) + '.pt'
        torch.save((curves, update_interval, n_examples),
                   open(os.path.join(curves_path, f), 'wb'))

    # Save results to disk.
    results = [np.mean(curves['logreg']), np.std(curves['logreg'])]

    to_write = params + results if train else test_params + results
    to_write = [str(x) for x in to_write]

    if train:
        name = 'train.csv'
    else:
        name = 'test.csv'

    if not os.path.isfile(os.path.join(results_path, name)):
        with open(os.path.join(results_path, name), 'w') as f:
            if train:
                f.write(
                    'random_seed,kernel_size,stride,n_filters,crop,lr,lr_decay,n_train,inhib,time,timestep,theta_plus,'
                    'theta_decay,intensity,norm,progress_interval,update_interval,mean_logreg,std_logreg\n'
                )
            else:
                f.write(
                    'random_seed,kernel_size,stride,n_filters,crop,lr,lr_decay,n_train,n_test,inhib,time,timestep,'
                    'theta_plus,theta_decay,intensity,norm,progress_interval,update_interval,mean_logreg,std_logreg\n'
                )

    with open(os.path.join(results_path, name), 'a') as f:
        f.write(','.join(to_write) + '\n')

    if labels.numel() > n_examples:
        labels = labels[:n_examples]
    else:
        while labels.numel() < n_examples:
            if 2 * labels.numel() > n_examples:
                labels = torch.cat(
                    [labels, labels[:n_examples - labels.numel()]])
            else:
                labels = torch.cat([labels, labels])

    # Compute confusion matrices and save them to disk.
    confusions = {}
    for scheme in predictions:
        confusions[scheme] = confusion_matrix(labels, predictions[scheme])

    to_write = ['train'] + params if train else ['test'] + test_params
    f = '_'.join([str(x) for x in to_write]) + '.pt'
    torch.save(confusions, os.path.join(confusion_path, f))
n_neurons = int(np.prod(network.layers['Y'].shape))
update_interval = 100
progress_interval = 10

path = os.path.join(ROOT_DIR, 'data', 'MNIST')

dataset = MNIST(path=path, download=True, shuffle=True)
images, labels = dataset.get_train()
images = images[:, 4:-4, 4:-4].contiguous()
images = images.view(-1, n_input) * 0.5
labels = labels.long()

model = LogisticRegression(solver='lbfgs', multi_class='multinomial')
model.coef_ = np.zeros([n_classes, n_neurons])
model.intercept_ = np.zeros(n_classes)
model.classes_ = np.arange(n_classes)

full_spike_record = torch.zeros(len(images), n_neurons)
locations = network.connections['X', 'Y'].locations

spike_ims = None
spike_axes = None
weights_im = None
weights2_im = None
weights3_im = None

start = t()
for i in range(len(images)):
    if i % progress_interval == 0:
        print(f'Progress: {i} / {len(images)} ({t() - start:.4f} seconds)')
        start = t()
예제 #19
0
 def _fake_lr(self):
     lr = LogisticRegression()
     lr.coef_ = self.coef_
     lr.intercept_ = self.intercept_
     lr.classes_ = self.classes_
     return lr
예제 #20
0
파일: main.py 프로젝트: oleg131/cuisines
        'fit_intercept': True,
        'intercept_scaling': 1,
        'max_iter': 100,
        'multi_class': 'warn',
        'n_jobs': None,
        'penalty': 'l2',
        'random_state': 0,
        'solver': 'warn',
        'tol': 0.0001,
        'verbose': 0,
        'warm_start': False
    })

clf.coef_ = np.fromfile('coef.npy').reshape(-1, n_features)
clf.intercept_ = np.fromfile('intercept.npy')
clf.classes_ = np.loadtxt('classes.txt', delimiter=',', dtype=str)

estimator = make_pipeline(lemmatizer, vectorizer, clf)


def predict(query):
    pred = estimator.predict_proba([[query]])
    pred = {j: i for i, j in zip(pred[0], clf.classes_)}

    return pred


@app.route('/get')
def get():
    query = request.args.get('query', '')
    pred = predict(query)
예제 #21
0
monitor_directory = os.environ['MONITOR_DIRECTORY'] if os.environ.has_key('MONITOR_DIRECTORY') else '/var/lib/motion'
label_directory = os.environ['LABEL_DIRECTORY'] if os.environ.has_key('LABEL_DIRECTORY') else monitor_directory+'/label'
poll = os.environ['POLL'] == 'true' if os.environ.has_key('POLL') else False

print 'Monitor directory:', monitor_directory
print 'Poll:', poll

with open('classifier.json') as data_file:    
    clf_dict = json.load(data_file)
clf = LogisticRegression()
clf.set_params(**clf_dict['params'])
clf.coef_ = np.asarray(clf_dict['coef_'])
clf.intercept_ = np.asarray(clf_dict['intercept_'])
clf.n_iter_ = np.asarray(clf_dict['n_iter_'])
clf.classes_ = np.asarray(clf_dict['classes_'])

labelled = set()
while True:
    candidates = set(glob(monitor_directory+'/*.jpg'))
    
    for candidate in candidates:
        name = candidate.split('/')[-1]
        print name

        if name in labelled:
            continue
        if os.path.isfile(label_directory+'/error/'+name) or os.path.isfile(label_directory+'/guess/open/'+name) or os.path.isfile(label_directory+'/guess/closed/'+name) or os.path.isfile(label_directory+'/known/open/'+name) or os.path.isfile(label_directory+'/known/closed/'+name):
            labelled.add(name)
            continue
예제 #22
0
def train_confidence_model(argv=None):
    import pickle

    from sklearn.linear_model import LogisticRegression, LogisticRegressionCV
    from sklearn.preprocessing import MaxAbsScaler
    from ..utils import get_logger

    parser = argparse.ArgumentParser()
    parser.add_argument("output", type=str, help="the path to save the model to")
    parser.add_argument("-s", "--summary", type=str, help='the summarized sequence NN outputs')
    parser.add_argument('-O', '--onnx', metavar='PATH', nargs='?', const=True, default=False, type=str,
                        help='Save data in ONNX format. If an argument is passed, it is assumed to be the path to a pickled model to convert to ONNX format')
    parser.add_argument("-k", "--topk", metavar="TOPK", type=int, help="use the TOPK probabilities for building confidence model", default=None)
    parser.add_argument('-j', '--n_jobs', metavar='NJOBS', nargs='?', const=True, default=None, type=int,
                        help='the number of jobs to use for cross-validation. if NJOBS is not specified, use the number of folds i.e. 5')
    parser.add_argument('-c', '--cvs', metavar='NFOLDS', default=5, type=int,
                        help='the number of cross-validation folds to use. default is 5')

    args = parser.parse_args(argv)

    if args.n_jobs and isinstance(args.n_jobs, bool):
        args.n_jobs = args.cvs

    logger = get_logger()

    if isinstance(args.onnx, str):
        # just convert the given model to ONNX
        with open(args.output, 'rb') as f:
            lr = pickle.load(f)
        args.output = args.onnx
    else:
        # train a new model
        logger.info(f"reading outputs summary data from {args.summary}")
        f = h5py.File(args.summary, 'r')

        true = f['labels'][:]
        pred = f['preds'][:]

        # the top-k probabilities, in descending order
        maxprobs = f['maxprob'][:, :args.topk]
        lengths = f['lengths'][:]

        f.close()

        X = np.concatenate([lengths[:, np.newaxis], maxprobs], axis=1)
        y = (true == pred).astype(int)

        scaler = MaxAbsScaler()
        scaler.fit(X)

        lrcv = LogisticRegressionCV(penalty='elasticnet', solver='saga', l1_ratios=[.1, .5, .7, .9, .95, .99, 1], n_jobs=args.n_jobs, cv=args.cvs)
        logger.info(f"building confidence model with \n{lrcv}")

        lrcv.fit(scaler.transform(X), y)

        lr = LogisticRegression()
        lr.coef_ = scaler.transform(lrcv.coef_)
        lr.intercept_ = lrcv.intercept_
        lr.classes_ = lrcv.classes_

    if args.onnx:
        from skl2onnx import convert_sklearn
        from skl2onnx.common.data_types import FloatTensorType

        logger.info(f"saving {lr} to ONNX file {args.output}")
        initial_type = [('float_input', FloatTensorType([None, lr.coef_.shape[1]]))]
        onx = convert_sklearn(lr, target_opset=12, initial_types=initial_type, options={type(lr): {'zipmap': False}})
        with open(args.output, "wb") as f:
            f.write(onx.SerializeToString())
    else:
        logger.info(f"pickling {lr} to {args.output}")
        with open(args.output, 'wb') as f:
            pickle.dump(lr, f)
예제 #23
0
def main(seed=0,
         n_train=60000,
         n_test=10000,
         kernel_size=(16, ),
         stride=(4, ),
         n_filters=25,
         padding=0,
         inhib=100,
         time=25,
         lr=1e-3,
         lr_decay=0.99,
         dt=1,
         intensity=1,
         progress_interval=10,
         update_interval=250,
         plot=False,
         train=True,
         gpu=False):

    assert n_train % update_interval == 0 and n_test % update_interval == 0, \
        'No. examples must be divisible by update_interval'

    params = [
        seed, n_train, kernel_size, stride, n_filters, padding, inhib, time,
        lr, lr_decay, dt, intensity, update_interval
    ]

    model_name = '_'.join([str(x) for x in params])

    if not train:
        test_params = [
            seed, n_train, n_test, kernel_size, stride, n_filters, padding,
            inhib, time, lr, lr_decay, dt, intensity, update_interval
        ]

    np.random.seed(seed)

    if gpu:
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
        torch.cuda.manual_seed_all(seed)
    else:
        torch.manual_seed(seed)

    n_examples = n_train if train else n_test
    input_shape = [20, 20]

    if kernel_size == input_shape:
        conv_size = [1, 1]
    else:
        conv_size = (int((input_shape[0] - kernel_size[0]) / stride[0]) + 1,
                     int((input_shape[1] - kernel_size[1]) / stride[1]) + 1)

    n_classes = 10
    n_neurons = n_filters * np.prod(conv_size)
    total_kernel_size = int(np.prod(kernel_size))
    total_conv_size = int(np.prod(conv_size))

    # Build network.
    if train:
        network = Network()
        input_layer = Input(n=400, shape=(1, 1, 20, 20), traces=True)
        conv_layer = DiehlAndCookNodes(n=n_filters * total_conv_size,
                                       shape=(1, n_filters, *conv_size),
                                       thresh=-64.0,
                                       traces=True,
                                       theta_plus=0.05 * (kernel_size[0] / 20),
                                       refrac=0)
        conv_layer2 = LIFNodes(n=n_filters * total_conv_size,
                               shape=(1, n_filters, *conv_size),
                               refrac=0)
        conv_conn = Conv2dConnection(input_layer,
                                     conv_layer,
                                     kernel_size=kernel_size,
                                     stride=stride,
                                     update_rule=WeightDependentPostPre,
                                     norm=0.05 * total_kernel_size,
                                     nu=[0, lr],
                                     wmin=0,
                                     wmax=0.25)
        conv_conn2 = Conv2dConnection(input_layer,
                                      conv_layer2,
                                      w=conv_conn.w,
                                      kernel_size=kernel_size,
                                      stride=stride,
                                      update_rule=None,
                                      wmax=0.25)

        w = -inhib * torch.ones(n_filters, conv_size[0], conv_size[1],
                                n_filters, conv_size[0], conv_size[1])
        for f in range(n_filters):
            for f2 in range(n_filters):
                if f != f2:
                    w[f, :, :f2, :, :] = 0

        w = w.view(n_filters * conv_size[0] * conv_size[1],
                   n_filters * conv_size[0] * conv_size[1])
        recurrent_conn = Connection(conv_layer, conv_layer, w=w)

        network.add_layer(input_layer, name='X')
        network.add_layer(conv_layer, name='Y')
        network.add_layer(conv_layer2, name='Y_')
        network.add_connection(conv_conn, source='X', target='Y')
        network.add_connection(conv_conn2, source='X', target='Y_')
        network.add_connection(recurrent_conn, source='Y', target='Y')

        # Voltage recording for excitatory and inhibitory layers.
        voltage_monitor = Monitor(network.layers['Y'], ['v'], time=time)
        network.add_monitor(voltage_monitor, name='output_voltage')
    else:
        network = load_network(os.path.join(params_path, model_name + '.pt'))
        network.connections['X', 'Y'].update_rule = NoOp(
            connection=network.connections['X', 'Y'],
            nu=network.connections['X', 'Y'].nu)
        network.layers['Y'].theta_decay = 0
        network.layers['Y'].theta_plus = 0

    # Load MNIST data.
    dataset = MNIST(data_path, download=True)

    if train:
        images, labels = dataset.get_train()
    else:
        images, labels = dataset.get_test()

    images *= intensity
    images = images[:, 4:-4, 4:-4].contiguous()

    # Record spikes during the simulation.
    spike_record = torch.zeros(update_interval, time, n_neurons)
    full_spike_record = torch.zeros(n_examples, n_neurons)

    # Neuron assignments and spike proportions.
    if train:
        logreg_model = LogisticRegression(warm_start=True,
                                          n_jobs=-1,
                                          solver='lbfgs',
                                          max_iter=1000,
                                          multi_class='multinomial')
        logreg_model.coef_ = np.zeros([n_classes, n_neurons])
        logreg_model.intercept_ = np.zeros(n_classes)
        logreg_model.classes_ = np.arange(n_classes)
    else:
        path = os.path.join(params_path,
                            '_'.join(['auxiliary', model_name]) + '.pt')
        logreg_coef, logreg_intercept = torch.load(open(path, 'rb'))
        logreg_model = LogisticRegression(warm_start=True,
                                          n_jobs=-1,
                                          solver='lbfgs',
                                          max_iter=1000,
                                          multi_class='multinomial')
        logreg_model.coef_ = logreg_coef
        logreg_model.intercept_ = logreg_intercept
        logreg_model.classes_ = np.arange(n_classes)

    # Sequence of accuracy estimates.
    curves = {'logreg': []}
    predictions = {scheme: torch.Tensor().long() for scheme in curves.keys()}

    if train:
        best_accuracy = 0

    spikes = {}
    for layer in set(network.layers):
        spikes[layer] = Monitor(network.layers[layer],
                                state_vars=['s'],
                                time=time)
        network.add_monitor(spikes[layer], name='%s_spikes' % layer)

    # Train the network.
    if train:
        print('\nBegin training.\n')
    else:
        print('\nBegin test.\n')

    inpt_ims = None
    inpt_axes = None
    spike_ims = None
    spike_axes = None
    weights_im = None

    plot_update_interval = 100

    start = t()
    for i in range(n_examples):
        if i % progress_interval == 0:
            print('Progress: %d / %d (%.4f seconds)' %
                  (i, n_examples, t() - start))
            start = t()

        if i % update_interval == 0 and i > 0:
            if train:
                network.connections['X', 'Y'].update_rule.nu[1] *= lr_decay

            if i % len(labels) == 0:
                current_labels = labels[-update_interval:]
                current_record = full_spike_record[-update_interval:]
            else:
                current_labels = labels[i % len(labels) - update_interval:i %
                                        len(labels)]
                current_record = full_spike_record[i % len(labels) -
                                                   update_interval:i %
                                                   len(labels)]

            # Update and print accuracy evaluations.
            curves, preds = update_curves(curves,
                                          current_labels,
                                          n_classes,
                                          full_spike_record=current_record,
                                          logreg=logreg_model)
            print_results(curves)

            for scheme in preds:
                predictions[scheme] = torch.cat(
                    [predictions[scheme], preds[scheme]], -1)

            # Save accuracy curves to disk.
            to_write = ['train'] + params if train else ['test'] + params
            f = '_'.join([str(x) for x in to_write]) + '.pt'
            torch.save((curves, update_interval, n_examples),
                       open(os.path.join(curves_path, f), 'wb'))

            if train:
                if any([x[-1] > best_accuracy for x in curves.values()]):
                    print(
                        'New best accuracy! Saving network parameters to disk.'
                    )

                    # Save network to disk.
                    network.save(os.path.join(params_path, model_name + '.pt'))
                    path = os.path.join(
                        params_path,
                        '_'.join(['auxiliary', model_name]) + '.pt')
                    torch.save((logreg_model.coef_, logreg_model.intercept_),
                               open(path, 'wb'))
                    best_accuracy = max([x[-1] for x in curves.values()])

                # Refit logistic regression model.
                logreg_model = logreg_fit(full_spike_record[:i], labels[:i],
                                          logreg_model)

            print()

        # Get next input sample.
        image = images[i % len(images)]
        sample = bernoulli(datum=image, time=time, dt=dt,
                           max_prob=1).unsqueeze(1).unsqueeze(1)
        inpts = {'X': sample}

        # Run the network on the input.
        network.run(inpts=inpts, time=time)

        network.connections['X', 'Y_'].w = network.connections['X', 'Y'].w

        # Add to spikes recording.
        spike_record[i % update_interval] = spikes['Y_'].get('s').view(
            time, -1)
        full_spike_record[i] = spikes['Y_'].get('s').view(time, -1).sum(0)

        # Optionally plot various simulation information.
        if plot and i % plot_update_interval == 0:
            _input = inpts['X'].view(time, 400).sum(0).view(20, 20)
            w = network.connections['X', 'Y'].w

            _spikes = {
                'X': spikes['X'].get('s').view(400, time),
                'Y': spikes['Y'].get('s').view(n_filters * total_conv_size,
                                               time),
                'Y_': spikes['Y_'].get('s').view(n_filters * total_conv_size,
                                                 time)
            }

            inpt_axes, inpt_ims = plot_input(image.view(20, 20),
                                             _input,
                                             label=labels[i % len(labels)],
                                             ims=inpt_ims,
                                             axes=inpt_axes)
            spike_ims, spike_axes = plot_spikes(spikes=_spikes,
                                                ims=spike_ims,
                                                axes=spike_axes)
            weights_im = plot_conv2d_weights(
                w, im=weights_im, wmax=network.connections['X', 'Y'].wmax)

            plt.pause(1e-2)

        network.reset_()  # Reset state variables.

    print(f'Progress: {n_examples} / {n_examples} ({t() - start:.4f} seconds)')

    i += 1

    if i % len(labels) == 0:
        current_labels = labels[-update_interval:]
        current_record = full_spike_record[-update_interval:]
    else:
        current_labels = labels[i % len(labels) - update_interval:i %
                                len(labels)]
        current_record = full_spike_record[i % len(labels) -
                                           update_interval:i % len(labels)]

    # Update and print accuracy evaluations.
    curves, preds = update_curves(curves,
                                  current_labels,
                                  n_classes,
                                  full_spike_record=current_record,
                                  logreg=logreg_model)
    print_results(curves)

    for scheme in preds:
        predictions[scheme] = torch.cat([predictions[scheme], preds[scheme]],
                                        -1)

    if train:
        if any([x[-1] > best_accuracy for x in curves.values()]):
            print('New best accuracy! Saving network parameters to disk.')

            # Save network to disk.
            network.save(os.path.join(params_path, model_name + '.pt'))
            path = os.path.join(params_path,
                                '_'.join(['auxiliary', model_name]) + '.pt')
            torch.save((logreg_model.coef_, logreg_model.intercept_),
                       open(path, 'wb'))

    if train:
        print('\nTraining complete.\n')
    else:
        print('\nTest complete.\n')

    print('Average accuracies:\n')
    for scheme in curves.keys():
        print('\t%s: %.2f' % (scheme, float(np.mean(curves[scheme]))))

    # Save accuracy curves to disk.
    to_write = ['train'] + params if train else ['test'] + params
    to_write = [str(x) for x in to_write]
    f = '_'.join(to_write) + '.pt'
    torch.save((curves, update_interval, n_examples),
               open(os.path.join(curves_path, f), 'wb'))

    # Save results to disk.
    results = [np.mean(curves['logreg']), np.std(curves['logreg'])]

    to_write = params + results if train else test_params + results
    to_write = [str(x) for x in to_write]
    name = 'train.csv' if train else 'test.csv'

    if not os.path.isfile(os.path.join(results_path, name)):
        with open(os.path.join(results_path, name), 'w') as f:
            if train:
                columns = [
                    'seed', 'n_train', 'kernel_size', 'stride', 'n_filters',
                    'padding', 'inhib', 'time', 'lr', 'lr_decay', 'dt',
                    'intensity', 'update_interval', 'mean_logreg', 'std_logreg'
                ]

                header = ','.join(columns) + '\n'
                f.write(header)
            else:
                columns = [
                    'seed', 'n_train', 'n_test', 'kernel_size', 'stride',
                    'n_filters', 'padding', 'inhib', 'time', 'lr', 'lr_decay',
                    'dt', 'intensity', 'update_interval', 'mean_logreg',
                    'std_logreg'
                ]

                header = ','.join(columns) + '\n'
                f.write(header)

    with open(os.path.join(results_path, name), 'a') as f:
        f.write(','.join(to_write) + '\n')

    if labels.numel() > n_examples:
        labels = labels[:n_examples]
    else:
        while labels.numel() < n_examples:
            if 2 * labels.numel() > n_examples:
                labels = torch.cat(
                    [labels, labels[:n_examples - labels.numel()]])
            else:
                labels = torch.cat([labels, labels])

    # Compute confusion matrices and save them to disk.
    confusions = {}
    for scheme in predictions:
        confusions[scheme] = confusion_matrix(labels, predictions[scheme])

    to_write = ['train'] + params if train else ['test'] + test_params
    f = '_'.join([str(x) for x in to_write]) + '.pt'
    torch.save(confusions, os.path.join(confusion_path, f))
예제 #24
0
labels = np.load("labels.npy")
print("labels")
print(labels.shape)
labels = np.sort(labels, axis=0)

X_train, X_test, y_train, y_test = train_test_split(X_in,
                                                    y_in,
                                                    test_size=0.2,
                                                    random_state=42)
print("done splitting")
LR = LogisticRegression(random_state=0,
                        verbose=2,
                        n_jobs=4,
                        multi_class='multinomial',
                        max_iter=2000)
LR.classes_ = labels
LR.fit(X_train, y_train.flatten())
print("done fitting")
print(LR.predict(X_test))
temp = LR.predict_proba(X_test)
print(temp)
print(list(temp[0]))
print(y_test)
score = LR.score(X_test, y_test)
print(score)
# yhat = LR.predict_proba(X_test)
# print("predicted")
# print(yhat)
# print("actual")
# print(y_test)
# print("done predicting")