# ## Perceptrón
# Un viejo conocido, en este caso también tiene opción de regularización. He probado, como se puede ver más abajo, con y sin ella (norma l2).
#
# - Itera por todos los datos.
# - Si está bien situado para el dato dado, no cambia.
# - Si no lo está, se corrige.
# - Si no cambia en una pasada completa o llega a las iteraciones máximas, para.

# In[7]:

# Modelo Perceptrón

from sklearn.linear_model import Perceptron
models = [
    Perceptron(random_state=1),
    Perceptron(alpha=0.0001, penalty='l2', random_state=1),
    Perceptron(alpha=0.00025, penalty='l2', random_state=1),
    Perceptron(alpha=0.0004, penalty='l2', random_state=1),
]

model_strings = [
    'Perceptron sin regularización',
    'Perceptron alpha = 0.0001',
    'Perceptron alpha = 0.00025',
    'Perceptron alpha = 0.0004',
]

print(validate_models(model_strings, models, X_train, Y_train))
input('\nPulse cualquier tecla para continuar\n')
コード例 #2
0
def test_predict_proba(create_X_y):
    X, y = create_X_y

    clf1 = Perceptron()
    clf1.fit(X, y)
    LCA([clf1, clf1]).fit(X, y)
コード例 #3
0
def classify2(X, Y, classifier, X_test, Y_test):
    name = classifier[0]
    clf = classifier[1]
    print("training %s" % name)
    clf.fit(X, Y)
    y_pred = clf.predict(X_test)
    accuracy = np.mean(y_pred == Y_test) * 100
    print(accuracy)


# define different classifiers
classifiers = [("KNneighbors", KNeighborsClassifier(n_neighbors=3)),
               ("SVM", svm.SVC()),
               ("SAG", LogisticRegression(solver='sag', tol=1e-1)),
               ("SGD", SGDClassifier()), ("ASGD", SGDClassifier(average=True)),
               ("Perceptron", Perceptron()),
               ("Passive-Aggressive I",
                PassiveAggressiveClassifier(loss='hinge', C=1.0)),
               ("Passive-Aggressive II",
                PassiveAggressiveClassifier(loss='squared_hinge', C=1.0))]

##data_test=np.array(1)
##for i in range(10):
##    if np.array_equal(data_test,np.array(1)):
##        cur=np.load('data_test%d_10_7.npy' % i)
##        shape=[1]+list(cur.shape)
##        data_test=np.reshape(cur,shape)
##    else:
##        cur=np.load('data_test%d_10_7.npy' % i)
##        shape=[1]+list(cur.shape)
##        data_test=np.append(data_test,np.reshape(cur,shape),axis=0)
コード例 #4
0
ファイル: wdbc.py プロジェクト: enerve/ml
def main():
    args = cmd_line.parse_args()
    
    util.prefix_init(args)
    util.pre_dataset = "wdbc"

    logger = logging.getLogger()
    logger.setLevel(logging.DEBUG)
    log.configure_logger(logger, util.pre_dataset)

    logger.info("--- WDBC dataset ---")

    data = np.genfromtxt(args.file, delimiter=",",
                         converters={1: lambda x: 1.0 if x=='M' else 0.0})
    Y = data[:, 1].astype(int)
    X = data[:, features_to_use()]

    X, Y, X_valid, Y_valid, X_test, Y_test = \
        data_util.split_into_train_test_sets(X, Y, args.validation_portion,
                                             args.test_portion)
    
    logger.debug("%s %s", X.shape, X_test.shape)

    if args.normalize:
        logger.info("Normalizing...")
        util.pre_norm = "n"
        X, X_valid, X_test = data_util.normalize_all(X, X_valid, X_test)
    
    if args.draw_classes_histogram:
        draw_classes_histogram(X, Y)
        
    if args.draw_classes_data:
        util.draw_classes_data(X, Y, 5, 6)

    if args.bayes:
        logger.info("Bayes classifier...")
        util.pre_alg = "bayes"
        from ml_lib.gaussian_plugin_classifier import GaussianPlugInClassifier 
        # Gaussian plug-in classifier
        gpi_classifier = GaussianPlugInClassifier(X, Y, 2)
        # util.report_accuracy(gpi_classifier.classify(X, Y, 0.5)[0])
        util.report_accuracy(
            gpi_classifier.classify(X_test, Y_test, [0.5, 0.5])[0])

        util.draw_ROC_curve(X_test, Y_test, gpi_classifier)
        # util.draw_classes_pdf(X, Y, gpi_classifier, [0.5, 0.5], 3)
    
    if args.naive:
        logger.info("Naive Bayes classifier...")
        util.pre_alg = "naive"
        from ml_lib.gaussian_naive_classifier import GaussianNaiveClassifier
        # Gaussian naive classifier
        gn_classifier = GaussianNaiveClassifier(X, Y, 2)
        # util.report_accuracy(gn_classifier.classify(X, Y, 0.5)[0])
        util.report_accuracy(
            gn_classifier.classify(X_test, Y_test, [0.5, 0.5])[0])
        
        util.draw_ROC_curve(X_test, Y_test, gn_classifier)

    if args.sklearn_perceptron:
        logger.info("Scikit-learn Perceptron...")
        util.pre_alg = "scikitperceptron"
        from sklearn.linear_model import Perceptron
        perceptron = Perceptron(tol=None, max_iter=300000)
        perceptron.fit(X, Y)
        logger.info("Mean accuracy: %s%%", 100 * perceptron.score(X, Y))

    if args.perceptron:
        logger.info("Perceptron...")
        util.pre_alg = "perceptron"
        from ml_lib.perceptron import Perceptron

        helper.classify_one_vs_one([],
            X, Y, X_test, Y_test, 2,
            lambda X, Y: Perceptron(X, Y, args.stochastic, 1, 30000, 0))
            
    if args.logistic:
        logger.info("Logistic Regression...")
        util.pre_alg = "logistic"
        from ml_lib.logistic import Logistic
        
        helper.classify_one_vs_one([],
            X, Y, X_test, Y_test, 2,
            lambda X, Y: Logistic(X, Y, step_size=0.001, max_steps=15000,
                                  reg_constant=1))
        

    if args.knn:
        logger.info("k-Nearest Neighbor...")
        util.pre_alg = "knn"
        from ml_lib.knn import KNN
        
        k_range = 10
        p_range = 6 # / 2.0
        a_matrix = np.zeros((k_range, p_range))
        for k in range(k_range):
            logger.info("%s-NN", k+1)
            for p in range(p_range):
                knn_classifier = KNN(X, Y, 1+k, dist_p=(p+1)/2.0)
                a_matrix[k, p] = util.get_accuracy(
                    knn_classifier.classify(X_test, Y_test))

        logger.info("%s", a_matrix)

    if args.svm:
        logger.info("Support Vector Machine...")
        util.pre_alg = "svm"
        from ml_lib.svm import SVM, RBFKernel
        
        single_svm_test = False
        if single_svm_test:
            cm = SVM(X, Y, lam=None).classify(X_test, Y_test)
            util.report_accuracy(cm)

        single_svm_rbf_test = False
        if single_svm_rbf_test:
            svm = SVM(X, Y, lam=100, kernel=RBFKernel(0.3))
            cm = svm.classify(X_test, Y_test)
            util.report_accuracy(cm)

        linear_svm_validation = False
        if linear_svm_validation:
            #lam_val = [math.pow(1.2, p) for p in range(-10,20)]
            lam_val = [p/2 for p in range(1,200)]
    
            acc = np.zeros(len(lam_val))
            for i, lam in enumerate(lam_val):
                svm_classifier = SVM(X, Y, lam)#), kernel=RBFKernel(1))
                #util.report_accuracy(svm_classifier.classify(X, Y))
                cm = svm_classifier.classify(X_valid, Y_valid)
                util.report_accuracy(cm)
                acc[i] = util.get_accuracy(cm)
    
            logger.info("\nAccuracies found for lambda:")
            for i, lam in enumerate(lam_val):
                logger.info("%f: \t%f", lam, acc[i])
            util.plot_accuracy(acc, lam_val)

        rbf_svm_validation = False
        if rbf_svm_validation:
            for reps in range(2):
                pre_svm_cv_x = "b" if reps == 0 else "l"
                
                if pre_svm_cv_x == "b":
                    lam_val = [math.pow(1.5, p+1)*10 for p in range(7)]
                    b_val = [(p+1)/20 for p in range(27)]
                elif pre_svm_cv_x == "l":
                    lam_val = [math.pow(1.2, p+1)*10 for p in range(27)]
                    b_val = [(p+1)/10 for p in range(7)]
                logger.debug(lam_val)
                logger.debug(b_val)
    
                # Use a single instance so K matrix can be shared better
                single_svm = SVM(X)
                lmbd_classifier = lambda X, Y, b, lam, svm=single_svm: \
                    svm.initialize(Y, lam, RBFKernel(b))

                cm, acc_2d_list = helper.classify([b_val, lam_val], X, Y,
                                                  X_valid,  Y_valid,
                                                  lmbd_classifier)

                acc_matrix = np.array(acc_2d_list)
                logger.info("%s", acc_matrix)
                
                suff = "val_%s"%(pre_svm_cv_x)
                np.savetxt(util.prefix() + suff + ".csv",
                           acc_matrix, delimiter=",", fmt='%.3f')
                if pre_svm_cv_x == 'b':
                    util.plot_accuracies(acc_matrix.T, b_val, "RBF width b",
                                         lam_val, "Lambda (C)", suff)
                elif pre_svm_cv_x == 'l':
                    util.plot_accuracies(acc_matrix, lam_val,  "Lambda (C)",
                                         b_val, "RBF width b", suff)
コード例 #5
0
X = iris.data[:, [2, 3]]
y = iris.target  # 标签已经转换成0,1,2了
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)  #

# 为了追求机器学习和最优化算法的最佳性能,我们将特征缩放
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(X_train)  # 估算每个特征的平均值和标准差
sc.mean_  # 查看特征的平均值
sc.scale_  # 查看特征的标准差
X_train_std = sc.transform(X_train)
# 注意:这里我们要用同样的参数来标准化测试集,使得测试集和训练集之间有可比性
X_test_std = sc.transform(X_test)

# 训练感知机模型
from sklearn.linear_model import Perceptron
# n_iter:可以理解成梯度下降中迭代的次数
# eta0:可以理解成梯度下降中的学习率
# random_state:设置随机种子的,为了每次迭代都有相同的训练集顺序
ppn = Perceptron(n_iter=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)

# 分类测试集,这将返回一个测试结果的数组
y_pred = ppn.predict(X_test_std)
# 计算模型在测试集上的准确性,我的结果为0.9,还不错
accuracy_score(y_test, y_pred)
コード例 #6
0
ファイル: p356_perceptron.py プロジェクト: inJAJA/Study
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
x = iris.data[:, (2, 3)]  # 꽃잎의 길이와 너비
y = (iris.target == 0).astype(np.int)  # 부채붓꽃(iris setosa)인가?

per_clf = Perceptron(
)  # loss = 'perceptron', lr = 'constant', eta0(학습률)= 1, penalty=None(규제 없음)인
per_clf.fit(x, y)  #         SGDClassifer와 같다

y_pred = per_clf.predict([[2, 0.5]])
print(y_pred)  # [0]
コード例 #7
0
ファイル: main.py プロジェクト: nikitosoleil/University
from sklearn.metrics import accuracy_score
import numpy as np


def generate(count):
    x = []
    y = []
    for ir in range(0, count):
        math = np.random.randint(1, 6)
        physics = np.random.randint(1, 6)
        russian = np.random.randint(1, 6)
        disabled = np.random.randint(0, 2)
        x.append([math, physics, russian, disabled])
        math_plus = math >= 4
        physics_plus = physics >= 4
        sum_plus = math + physics + russian >= 11
        y.append(1 if disabled == 1 or (math_plus and physics_plus and sum_plus) else 0)
    return np.array(x), np.array(y)


if __name__ == '__main__':
    X, y = generate(100)
    X_test, y_test = generate(20)
    perceptron = Perceptron(tol=0.0000001)
    perceptron.fit(X, y)
    predict = perceptron.predict(X_test)
    accuracy_score(predict, y_test)

    x_example = [[3, 3, 5, 1]]
    print("Passed" if perceptron.predict(x_example) == 1 else "Not passed")
コード例 #8
0
TRAIN_DATA_NUM = 2
TEST_DATA_NUM = 1000

#%%

if __name__ == "__main__":
    dataset = Dataset()
    svm_erro = 0
    sv_vs_pla = 0
    sv_num = 0
    pla_erro = 0
    for _ in range(RUNS):
        dataset.create_target_function()
        dataset.generate_data(TRAIN_DATA_NUM)
        pla = Perceptron(max_iter=1000).fit(
            dataset.X, dataset.Y
        )  # if fit_intercept=False the learning algorithm will force y intercept at the origin 0
        svm = SVM()
        svm.fit(dataset.X, dataset.Y)

        plot_contour(svm, dataset)

        dataset.generate_data(TEST_DATA_NUM)
        svm_predict = svm.predict(dataset.X)
        pla_predict = pla.predict(dataset.X)

        sv_num += len(svm.alphas[svm.sv])
        svm_erro += sum(svm_predict != dataset.Y) / TEST_DATA_NUM
        pla_erro += sum(pla_predict != dataset.Y) / TEST_DATA_NUM
        if sum(svm_predict != dataset.Y) / TEST_DATA_NUM < sum(
                pla_predict != dataset.Y) / TEST_DATA_NUM:
コード例 #9
0
                               non_negative=True)

# Iterator over parsed Reuters SGML files.
data_stream = stream_reuters_documents()

# We learn a binary classification between the "acq" class and all the others.
# "acq" was chosen as it is more or less evenly distributed in the Reuters
# files. For other datasets, one should take care of creating a test set with
# a realistic portion of positive instances.
all_classes = np.array([0, 1])
positive_class = 'acq'

# Here are some classifiers that support the `partial_fit` method
partial_fit_classifiers = {
    'SGD': SGDClassifier(),
    'Perceptron': Perceptron(),
    'NB Multinomial': MultinomialNB(alpha=0.01),
    'Passive-Aggressive': PassiveAggressiveClassifier(),
}


def get_minibatch(doc_iter, size, pos_class=positive_class):
    """Extract a minibatch of examples, return a tuple X_text, y.

    Note: size is before excluding invalid docs with no topics assigned.

    """
    data = [(u'{title}\n\n{body}'.format(**doc), pos_class in doc['topics'])
            for doc in itertools.islice(doc_iter, size) if doc['topics']]
    if not len(data):
        return np.asarray([], dtype=int), np.asarray([], dtype=int)
コード例 #10
0
    "Perceptron",
    # "XGBreglinear",
    # "XGBreglogistic",
    "NearestNeighbors",
    # "LinearSVM",
    "DecisionTree",
    "RandomForest",
    "AdaBoost",
    #"NeuralNet",
    #"NaiveBayes",
    #"LDA",
    #"QDA"
]

classifiers = [
    Perceptron(),
    # xgb.XGBClassifier(objective='reg:linear'),
    # xgb.XGBClassifier(objective='reg:logistic'),
    KNeighborsClassifier(10),
    # SVC(kernel="linear"),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    AdaBoostClassifier(),
    #MLPClassifier(verbose=False),
    #GaussianNB(),
    #LinearDiscriminantAnalysis(),
    #QuadraticDiscriminantAnalysis()
]


def getBestClassifiers(X, y, testPerc=0.4):
コード例 #11
0
# perceptron to classify documents

from sklearn.datasets import fetch_20newsgroups
from sklearn.metrics import f1_score, classification_report
from sklearn.feature_extraction.text import TfidfVectorizer, HashingVectorizer
from sklearn.linear_model import Perceptron

categories = ['rec.sport.hockey', 'rec.sport.baseball', 'rec.autos']
newsgroups_train = fetch_20newsgroups(subset='train',
                                      categories=categories,
                                      remove=('headers', 'footers', 'quotes'))
newsgroups_test = fetch_20newsgroups(subset='test',
                                     categories=categories,
                                     remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(newsgroups_train['data'])
X_test = vectorizer.transform(newsgroups_test['data'])

classifier = Perceptron(n_iter=100, eta0=0.1)
classifier.fit(X_train, newsgroups_train['target'])
predictions = classifier.predict(X_test)
print(classification_report(newsgroups_test['target'], predictions))
コード例 #12
0

# Iterator over parsed Reuters SGML files.
data_stream = stream_reuters_documents()

# We learn a binary classification between the "acq" class and all the others.
# "acq" was chosen as it is more or less evenly distributed in the Reuters
# files. For other datasets, one should take care of creating a test set with
# a realistic portion of positive instances.
all_classes = np.array([0, 1])
positive_class = 'acq'

# Here are some classifiers that support the `partial_fit` method
partial_fit_classifiers = {
    'SGD': SGDClassifier(max_iter=5),
    'Perceptron': Perceptron(tol=1e-3),
    'NB Multinomial': MultinomialNB(alpha=0.01),
    'Passive-Aggressive': PassiveAggressiveClassifier(tol=1e-3),
}


def get_minibatch(doc_iter, size, pos_class=positive_class):
    """Extract a minibatch of examples, return a tuple X_text, y.

    Note: size is before excluding invalid docs with no topics assigned.

    """
    data = [(u'{title}\n\n{body}'.format(**doc), pos_class in doc['topics'])
            for doc in itertools.islice(doc_iter, size)
            if doc['topics']]
    if not len(data):
コード例 #13
0
def get_model_from_name(model_name, training_params=None):
    global keras_imported

    # For Keras
    epochs = 250
    if os.environ.get('is_test_suite', 0) == 'True' and model_name[:12] == 'DeepLearning':
        print('Heard that this is the test suite. Limiting number of epochs, which will increase training speed dramatically at the expense of model accuracy')
        epochs = 30

    all_model_params = {
        'LogisticRegression': {'n_jobs': -2},
        'RandomForestClassifier': {'n_jobs': -2},
        'ExtraTreesClassifier': {'n_jobs': -1},
        'AdaBoostClassifier': {'n_estimators': 10},
        'SGDClassifier': {'n_jobs': -1},
        'Perceptron': {'n_jobs': -1},
        'LinearSVC': {'dual': False},
        'LinearRegression': {'n_jobs': -2},
        'RandomForestRegressor': {'n_jobs': -2},
        'LinearSVR': {'dual': False, 'loss': 'squared_epsilon_insensitive'},
        'ExtraTreesRegressor': {'n_jobs': -1},
        'MiniBatchKMeans': {'n_clusters': 8},
        'GradientBoostingRegressor': {'presort': False, 'learning_rate': 0.05, 'warm_start': True},
        'GradientBoostingClassifier': {'presort': False, 'learning_rate': 0.05, 'warm_start': True},
        'SGDRegressor': {'shuffle': False},
        'PassiveAggressiveRegressor': {'shuffle': False},
        'AdaBoostRegressor': {'n_estimators': 10},
        'XGBRegressor': {'nthread':-1, 'n_estimators': 200},
        'XGBClassifier': {'nthread':-1, 'n_estimators': 200},
        'LGBMRegressor': {'n_estimators': 2000, 'learning_rate': 0.05, 'num_leaves': 8, 'lambda_l2': 0.001},
        'LGBMClassifier': {'n_estimators': 2000, 'learning_rate': 0.05, 'num_leaves': 8, 'lambda_l2': 0.001},
        'DeepLearningRegressor': {'epochs': epochs, 'batch_size': 50, 'verbose': 2},
        'DeepLearningClassifier': {'epochs': epochs, 'batch_size': 50, 'verbose': 2},
        'CatBoostRegressor': {},
        'CatBoostClassifier': {}
    }

    model_params = all_model_params.get(model_name, None)
    if model_params is None:
        model_params = {}

    if training_params is not None:
        print('Now using the model training_params that you passed in:')
        print(training_params)
        # Overwrite our stock params with what the user passes in (i.e., if the user wants 10,000 trees, we will let them do it)
        model_params.update(training_params)
        print('After overwriting our defaults with your values, here are the final params that will be used to initialize the model:')
        print(model_params)


    model_map = {
        # Classifiers
        'LogisticRegression': LogisticRegression(),
        'RandomForestClassifier': RandomForestClassifier(),
        'RidgeClassifier': RidgeClassifier(),
        'GradientBoostingClassifier': GradientBoostingClassifier(),
        'ExtraTreesClassifier': ExtraTreesClassifier(),
        'AdaBoostClassifier': AdaBoostClassifier(),


        'SGDClassifier': SGDClassifier(),
        'Perceptron': Perceptron(),
        'PassiveAggressiveClassifier': PassiveAggressiveClassifier(),
        'LinearSVC': LinearSVC(),

        # Regressors
        'LinearRegression': LinearRegression(),
        'RandomForestRegressor': RandomForestRegressor(),
        'Ridge': Ridge(),
        'LinearSVR': LinearSVR(),
        'ExtraTreesRegressor': ExtraTreesRegressor(),
        'AdaBoostRegressor': AdaBoostRegressor(),
        'RANSACRegressor': RANSACRegressor(),
        'GradientBoostingRegressor': GradientBoostingRegressor(),

        'Lasso': Lasso(),
        'ElasticNet': ElasticNet(),
        'LassoLars': LassoLars(),
        'OrthogonalMatchingPursuit': OrthogonalMatchingPursuit(),
        'BayesianRidge': BayesianRidge(),
        'ARDRegression': ARDRegression(),
        'SGDRegressor': SGDRegressor(),
        'PassiveAggressiveRegressor': PassiveAggressiveRegressor(),

        # Clustering
        'MiniBatchKMeans': MiniBatchKMeans()
    }

    if xgb_installed:
        model_map['XGBClassifier'] = XGBClassifier()
        model_map['XGBRegressor'] = XGBRegressor()

    if lgb_installed:
        model_map['LGBMRegressor'] = LGBMRegressor()
        model_map['LGBMClassifier'] = LGBMClassifier()

    if catboost_installed:
        model_map['CatBoostRegressor'] = CatBoostRegressor(calc_feature_importance=True)
        model_map['CatBoostClassifier'] = CatBoostClassifier(calc_feature_importance=True)

    if model_name[:12] == 'DeepLearning':
        if keras_imported == False:
            # Suppress some level of logs if TF is installed (but allow it to not be installed, and use Theano instead)
            try:
                os.environ['TF_CPP_MIN_VLOG_LEVEL'] = '3'
                os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
                from tensorflow import logging
                logging.set_verbosity(logging.INFO)
            except:
                pass

            global maxnorm
            global Dense, Dropout
            global LeakyReLU, PReLU
            global Sequential
            global keras_load_model
            global regularizers
            global KerasRegressor, KerasClassifier

            from keras.constraints import maxnorm
            from keras.layers import Dense, Dropout
            from keras.layers.advanced_activations import LeakyReLU, PReLU
            from keras.models import Sequential
            from keras.models import load_model as keras_load_model
            from keras import regularizers
            from keras.wrappers.scikit_learn import KerasRegressor, KerasClassifier
            maxnorm
            Dense
            Dropout
            LeakyReLU
            PReLU
            Sequential
            keras_load_model
            regularizers
            KerasRegressor
            KerasClassifier
            keras_imported = True

        model_map['DeepLearningClassifier'] = KerasClassifier(build_fn=make_deep_learning_classifier)
        model_map['DeepLearningRegressor'] = KerasRegressor(build_fn=make_deep_learning_model)

    try:
        model_without_params = model_map[model_name]
    except KeyError as e:
        print('It appears you are trying to use a library that is not available when we try to import it, or using a value for model_names that we do not recognize')
        raise(e)
    model_with_params = model_without_params.set_params(**model_params)

    return model_with_params
コード例 #14
0
def test_predict_proba():
    X = X_dsel_ex1
    y = y_dsel_ex1
    clf1 = Perceptron()
    clf1.fit(X, y)
    DESP([clf1, clf1]).fit(X, y)
コード例 #15
0
# Logistic regression CV takes too much time to compile
#print "\nUsing Logistic regression CV"
#clf_LGCV = LogisticRegressionCV()
#scores = cross_val_score(clf_LGCV, feature_normal, labels, cv=10, n_jobs = 4)
#print scores
#print "Accuracy", scores.mean()

print "\nUsing MLPClassifier single hidden layer"
mlp = MLPClassifier(alpha=1)
scores = cross_val_score(mlp, feature_normal, labels, cv=10, n_jobs=4)
print scores
print "Accuracy", scores.mean()

print "\nUsing the perceptron"
per = Perceptron(fit_intercept=False, n_iter=10, shuffle=False)
scores = cross_val_score(per, feature_normal, labels, cv=10, n_jobs=4)
print scores
print "Accuracy", scores.mean()

print "\nUsing MLPClassifier 3 hidden layer"
mlp = MLPClassifier(hidden_layer_sizes=(30, 30, 30))
scores = cross_val_score(mlp, feature_normal, labels, cv=10, n_jobs=4)
print scores
print "Accuracy", scores.mean()

print "\nUsing Passive aggressive Classifier"
pac = PassiveAggressiveClassifier()
scores = cross_val_score(pac, feature_normal, labels, cv=10, n_jobs=4)
print scores
print "Accuracy", scores.mean()
コード例 #16
0
accuracy = total_correct_predictions / total_predictions_made * 100

######################  Passive Aggressive ###########################--Code from ASTD
classifier = PassiveAggressiveClassifier(n_iter=100)
classifier.fit(X_train, Y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, y_pred)
total_correct_predictions = cm[0, 0] + cm[1, 1] + cm[2, 2]
total_predictions_made = np.sum(cm)
accuracy = total_correct_predictions / total_predictions_made * 100

###################### Perceptron  ###################################--Code from ASTD
classifier = Perceptron(n_iter=100)
classifier.fit(X_train, Y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Y_test, y_pred)
total_correct_predictions = cm[0, 0] + cm[1, 1] + cm[2, 2]
total_predictions_made = np.sum(cm)
accuracy = total_correct_predictions / total_predictions_made * 100

######################  bnb ###########################################--Code from ASTD
classifier = BernoulliNB(binarize=0.5)
classifier.fit(X_train, Y_train)
# Predicting the Test set results
y_pred = classifier.predict(X_test)
コード例 #17
0
iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=0)

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

ppn = Perceptron(n_iter_no_change=40, eta0=0.1, random_state=0)
ppn.fit(X_train_std, y_train)

# y_pred = ppn.predict(X_test_std)
# print(f'Misclassified samples: {(y_test != y_pred).sum()}')

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined_std,
                      y=y_combined,
                      classifier=ppn,
                      test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
plt.ylabel('petal width [standardized]')
plt.legend(loc='upper left')
plt.show()
コード例 #18
0
ファイル: __main__.py プロジェクト: Nithingale/epitech-ml-nlp
    # Extracting data from our parser output and apply tokenizing if enabled
    if USE_NTLK_TOKENIZER:
        stopWords: set = set(stopwords.words('english'))
        trainingX: list = [' '.join(word for word in word_tokenize(userData.data) if word not in stopWords) for userData in trainingData]
        testX: list = [' '.join(word for word in word_tokenize(userData.data) if word not in stopWords) for userData in testData]
    else:
        trainingX: list = [userData.data for userData in trainingData]
        testX: list = [userData.data for userData in testData]

    trainingYGender: list = [userData.gender for userData in trainingData]
    testYGender: list = [userData.gender for userData in testData]

    trainingYAge: list = [userData.age for userData in trainingData]
    testYAge: list = [userData.age for userData in testData]

    defaultClassifier = Perceptron()

    print('INFO: Benchmarking Count Vectorizer (1/2)...', flush = True)

    BenchmarkVectorizer('CountVectorizer.Ngram12.Word', CountVectorizer(max_features = FEATURES_COUNT, ngram_range = (1, 2), analyzer = 'word'), defaultClassifier).run(timeStr, trainingX, trainingYGender, testX, testYGender)
    BenchmarkVectorizer('CountVectorizer.Ngram23.Word', CountVectorizer(max_features = FEATURES_COUNT, ngram_range = (2, 3), analyzer = 'word'), defaultClassifier).run(timeStr, trainingX, trainingYGender, testX, testYGender)
    BenchmarkVectorizer('CountVectorizer.Ngram34.Word', CountVectorizer(max_features = FEATURES_COUNT, ngram_range = (3, 4), analyzer = 'word'), defaultClassifier).run(timeStr, trainingX, trainingYGender, testX, testYGender)

    print('INFO: Benchmarking Count Vectorizer (2/2)...', flush = True)

    BenchmarkVectorizer('CountVectorizer.Ngram34.Char', CountVectorizer(max_features = FEATURES_COUNT, ngram_range = (3, 4), analyzer = 'char'), defaultClassifier).run(timeStr, trainingX, trainingYGender, testX, testYGender)
    BenchmarkVectorizer('CountVectorizer.Ngram45.Char', CountVectorizer(max_features = FEATURES_COUNT, ngram_range = (4, 5), analyzer = 'char'), defaultClassifier).run(timeStr, trainingX, trainingYGender, testX, testYGender)
    BenchmarkVectorizer('CountVectorizer.Ngram56.Char', CountVectorizer(max_features = FEATURES_COUNT, ngram_range = (5, 6), analyzer = 'char'), defaultClassifier).run(timeStr, trainingX, trainingYGender, testX, testYGender)

    print('INFO: Benchmarking TFIDF Vectorizer (1/2)...', flush = True)
コード例 #19
0
# 5.4  k-Nearest Neighbors algorithm (or k-NN for short)
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(X_train, Y_train)
Y_pred = knn.predict(X_test)
acc_knn = round(knn.score(X_train, Y_train) * 100, 2)
acc_knn

# 5.5 Gaussian Naive Bayes
gaussian = GaussianNB()
gaussian.fit(X_train, Y_train)
Y_pred = gaussian.predict(X_test)
acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)
acc_gaussian

# 5.6 Perceptron
perceptron = Perceptron()
perceptron.fit(X_train, Y_train)
Y_pred = perceptron.predict(X_test)
acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)
acc_perceptron

# 5.7 Linear SVC
linear_svc = LinearSVC()
linear_svc.fit(X_train, Y_train)
Y_pred = linear_svc.predict(X_test)
acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)
acc_linear_svc

# 5.8 Stochastic Gradient Descent
sgd = SGDClassifier()
sgd.fit(X_train, Y_train)
コード例 #20
0
ファイル: test_des_clustering.py プロジェクト: postyear/DES
def test_predict_proba(example_estimate_competence):
    X, y = example_estimate_competence[0:2]

    clf1 = Perceptron()
    clf1.fit(X, y)
    DESClustering([clf1, clf1]).fit(X, y)
コード例 #21
0
#path_test="./test/train_sample_100k.txt"
path_train="D:\\PhD\\Clone\\\MlCC\\train_samples\\train_equal_cloneNonClone.txt"
path_train="D:\\PhD\\Clone\\\MlCC\\train_samples\\train_sample_100k.txt"
path_test="D:\\PhD\\Clone\\\MlCC\\train_samples\\train_sample_100k.txt"
colNames=["block1", "block2", "isClone", "COMP", "NOCL", "NOS", "HLTH", "HVOC", "HEFF", "HBUG", "CREF", "XMET", "LMET", "NLOC", "NOC", "NOA", "MOD", "HDIF", "VDEC", "EXCT", "EXCR", "CAST", "TDN", "HVOL", "NAND", "VREF", "NOPR", "MDN", "NEXP", "LOOP"]

clones_test = pd.read_csv(path_test, names=colNames)
array = clones_test.values
X_test = array[:,3:30]
Y_test = array[:,2]
print("test loaded")

chunkSize=1024
#clf=SGDClassifier()
#clf=PassiveAggressiveClassifier()
clf=Perceptron()
for chunk in pd.read_csv(path_train, names=colNames, chunksize=chunkSize):
    chunk = chunk.sample(frac=1).reset_index(drop=True)  # shuffle data
    array = chunk.values
    X_train = array[:, 3:30]
    Y_train = array[:, 2]
    start_time = time.time()
    model =clf.partial_fit(X_train,Y_train,classes=numpy.unique(Y_train.astype(bool)))
    end_time=time.time()
    print("one chunk complete")

filename = 'sgd_model.sav'
pickle.dump(clf, open(filename, 'wb'))
print("model saved")

# load the model from disk
コード例 #22
0
ファイル: test_des_clustering.py プロジェクト: postyear/DES
def test_not_clustering_algorithm(create_X_y):
    X, y = create_X_y

    des_clustering = DESClustering(clustering=Perceptron())
    with pytest.raises(ValueError):
        des_clustering.fit(X, y)
コード例 #23
0
"""Read train data"""
train_data = pandas.read_csv('perceptron-train.csv',
                             names=['Class', 'Sign1', 'Sign2'])

train_class = train_data['Class']
train_signs = train_data.drop(['Class'], inplace=False, axis=1)
"""Read test data"""
test_data = pandas.read_csv('perceptron-test.csv',
                            names=['Class', 'Sign1', 'Sign2'])

test_class = test_data['Class']
test_signs = test_data.drop(['Class'], inplace=False, axis=1)

"Train the Perceptron"

clf = Perceptron(random_state=241)
clf.fit(train_signs, train_class)
"""Check an accuracy of prediction for non-normalized data"""

predictions = clf.predict(test_signs)
accuracy = sklearn.metrics.accuracy_score(test_class, predictions)

print('Non-normalized data ', accuracy, '\n')
"""Check an accuracy of prediction for normalized data"""

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
"""Normalize tain and test data"""
train_signs_scaled = scaler.fit_transform(train_signs)
test_signs_scaled = scaler.transform(test_signs)
コード例 #24
0
#Data split for training and testing
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.35,
                                                    random_state=1,
                                                    stratify=y)

#Scaling training data
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

#Creating perceptron with hyperparameters
ppn = Perceptron(max_iter=40, eta0=0.45, random_state=1)

#This is training the model
ppn.fit(X_train_std, y_train)

#Scaling test data
sc.fit(X_test)
X_test_std = sc.transform(X_test)

#Testing the model data
y_pred = ppn.predict(X_test_std)

# View the predict test data
y_pred

# View model accuracy
コード例 #25
0
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.3,
                                                    random_state=1,
                                                    stratify=y)

print('Labels counts in y:', np.bincount(y))
print('Labels counts in y_train:', np.bincount(y_train))
print('Labels counts in y_test:', np.bincount(y_test))

sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
ppn = Perceptron(max_iter=4, eta0=0.1, random_state=1)
ppn.fit(X_train_std, y_train)

y_pred = ppn.predict(X_test_std)
print('Misclassified samples: %d' % (y_test != y_pred).sum())

print('Accuracy: %.2f' % accuracy_score(y_test, y_pred))
print('Accuracy std: %.2f' % ppn.score(X_test_std, y_test))

X_combined_std = np.vstack((X_train_std, X_test_std))
y_combined = np.hstack((y_train, y_test))
plot_decision_regions(X=X_combined_std,
                      y=y_combined,
                      classifier=ppn,
                      test_idx=range(105, 150))
plt.xlabel('petal length [standardized]')
コード例 #26
0
"""
perceptron : 가장 간단한 인공 신경망 구조
TLU threshold logic unit : 퍼셉트론은 TLU 인공 뉴련을 기반으로 하며 입력의 가중치 합을 계산한 뒤,
계산된 합에 계단함수를 적용하여 결과를 출력
"""
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron

iris = load_iris()
X = iris.data[:, (2, 3)]
y = (iris.target == 0).astype(np.int)

per_Clf = Perceptron()
per_Clf.fit(X, y)

y_pred = per_Clf.predict(X)
print(y_pred)
'''
[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0]
'''
コード例 #27
0
        print("density: %f" % density(clf.coef_))

    print("classification report:")
    print(metrics.classification_report(y_test, pred, target_names=categories))

    print("confusion matrix:")
    print(metrics.confusion_matrix(y_test, pred))

    print()
    clf_descr = str(clf).split('(')[0]
    return clf_descr, score, train_time, test_time


results = []
for clf, name in ((RidgeClassifier(tol=1e-2, solver="lsqr"),
                   "Ridge Classifier"), (Perceptron(n_iter=50), "Perceptron"),
                  (PassiveAggressiveClassifier(n_iter=50),
                   "Passive-Aggressive"),
                  (KNeighborsClassifier(n_neighbors=10), "kNN")):
    print('=' * 80)
    print(name)
    results.append(benchmark(clf))

for penalty in ["l2", "l1"]:
    print('=' * 80)
    print("%s penalty" % penalty.upper())
    # Train Liblinear model
    results.append(
        benchmark(LinearSVC(loss='l2', penalty=penalty, dual=False, tol=1e-3)))

    # Train SGD model
コード例 #28
0
# import pandas as pd
# from sklearn.model_selection import train_test_split

# Initialize relevant models
# Note: PasiveAggressive and MLP not supported by AdaBoost
models = [
    None,
    DecisionTreeClassifier(criterion='entropy',
                           splitter='best',
                           max_features=None),
    BernoulliNB(),
    LogisticRegression(solver='lbfgs',
                       multi_class='multinomial',
                       penalty='l2',
                       C=1.0),
    Perceptron(penalty='l1', alpha=0.0001),
    SVC(kernel='rbf', probability=True)
]

algorithms = ['SAMME.R', 'SAMME']

# USER INPUTS #############################################

modelIndex = 1  # Default is DecisionTree
n_estimators = 100  # Default is 50

algorithmIndex = 0  # Use 1 for Perceptron, 0 for all others

# Choose number of training images to use
NUM_TRAINING_IMAGES = 500
コード例 #29
0
for i in target[:10].values.ravel():
    foo.append(i)  


#splitting data - DO NOT RUN WITH FULL SET UNLESS YOU HAVE SEVERAL DAYS TO SPARE!
#x_train, x_test, y_train, y_test = train_test_split(augmented_input, target, test_size = test_size, random_state = random_state)

#splitting much smaller data
x_train, x_test, y_train, y_test = train_test_split(augmented_input[0:10], foo, test_size = test_size, random_state = random_state)


n_iter =10  #iterations
eta0 =0.1 #learning rate


perc = Perceptron(n_iter_no_change=n_iter,eta0=eta0,random_state=random_state)
perc.fit(x_train,y_train)

#predictions (change to desired test)
y_pred = perc.predict(x_test)


print("accuracy: {0:.2f}%".
         format(accuracy_score(y_test,
                               y_pred)*100))

#n = 100
#change target values into something sclearn likes 
foo = []
for i in target[:100].values.ravel():
    foo.append(i)
コード例 #30
0
from sklearn.linear_model import LogisticRegression, Perceptron, SGDClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, ExtraTreesClassifier

from sklearn.model_selection import cross_val_score

# Set random state and number of estimators for tree based models
random_state = 4
n_estimators = 100

models = [
    LogisticRegression(random_state=random_state),
    Perceptron(random_state=random_state),
    SGDClassifier(random_state=random_state),
    SVC(random_state=random_state),
    KNeighborsClassifier(),
    GaussianNB(),
    DecisionTreeClassifier(random_state=random_state),
    RandomForestClassifier(random_state=random_state,
                           n_estimators=n_estimators),
    ExtraTreesClassifier(random_state=random_state, n_estimators=n_estimators),
    AdaBoostClassifier(random_state=random_state, n_estimators=n_estimators),
    GradientBoostingClassifier(random_state=random_state,
                               n_estimators=n_estimators)
]

# Lists to store the results
model_name = []