def trainModel(): sss = [] train_list = [["comp.speech/train/s1.wav", 0], ["comp.speech/train/s2.wav", 1], ["comp.speech/train/s3.wav", 2], ["comp.speech/train/s4.wav", 3], ["comp.speech/train/s5.wav", 4], ["comp.speech/train/s6.wav", 5], ["comp.speech/train/s7.wav", 6], ["comp.speech/train/s8.wav", 7]] for wav_name in train_list: add_wav_to_db(wav_name[0], wav_name[1], sss) data = [] ans = [] i = 0 for index in xrange(len(sss)): for v in sss[index]: data.append(v[0]) ans.append(v[1]) clfNeural = MLPClassifier() clfNeural.fit(data, ans) clfForest = DecisionTreeClassifier(max_depth=250) clfForest.fit(data, ans) joblib.dump(clfNeural, 'model.pkl') joblib.dump(clfForest, 'forest.pkl')
def train(self, labeledDoc): """ Entrena el modelo final de clasificacion :param labeledDoc: objeto labeledDoc :return: True si todo correcto, Raise exception si fallo """ if self.save_loc == None: raise UnboundLocalError("Should have set the save path <setSaveLocation>") if self.dependenceModel == None: raise UnboundLocalError("Should have set the TextProcessing.Doc2Vec model <setDependenceModel>") tags_id = {} Y = [] X = [] for doc in labeledDoc: for tag in doc.tags[1:]: if tag not in tags_id: tags_id[tag] = len(tags_id) labeledDoc.reloadDoc() for doc in labeledDoc: tags = doc.tags text = doc.words auxY = np.zeros(len(tags_id)) for tag in tags[1:]: auxY[tags_id[tag]] = 1. Y.append(auxY) vecX = self.dependenceModel.predict(text)[0] X.append(vecX) Y = np.array(Y) X = np.array(X) clf = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(15,), random_state=1) clf.fit(X, Y) print clf.predict(X) joblib.dump(clf, self.save_loc) with open(self.save_loc+"_tags_id", "w") as fout: fout.write(json.dumps(tags_id))
mode = sys.argv[1] library = 'mine' if library != 'mine': if mode == 'train': print "training" obj = ExerciseDataProvider(".") X = obj.x[:,0:125] y = obj.t Xt = obj.xt[:,0:125] yt = obj.tt print "input vec shape: ", X.shape # print y.shape # print X.shape[-1] clf_t = MLPClassifier(algorithm='l-bfgs', alpha=1e-5, hidden_layer_sizes=(X.shape[-1], 19), random_state=1, spectral_mode='fft') clf_t.fit(X, y) with open('/afs/inf.ed.ac.uk/user/s12/s1235260/model_spec3.pkl', 'wb') as m: p.dump((clf_t, Xt, yt) , m) else: with open('/afs/inf.ed.ac.uk/user/s12/s1235260/model_spec3.pkl', 'rb') as m: clf, Xt, yt = p.load(m) y2 = clf.predict(Xt) print clf.coefs_[0].shape #.shape print y2, yt print len(y2), len(yt) acc = sum(y2==yt) / float(len(y2))
Normalizer() ] #=================Classifier classifier_test = [ OneVsRestClassifier(SVC()), DecisionTreeClassifier(max_depth=5), SVC(), SVC(kernel="linear", C=0.025), LogisticRegressionCV(cv=5, random_state=0), GradientBoostingClassifier(random_state=0), BaggingClassifier(base_estimator=SVC(), n_estimators=10, random_state=0).fit(features, target), ExtraTreesClassifier(n_estimators=100, random_state=0), HistGradientBoostingClassifier(), MLPClassifier(random_state=1, max_iter=300), OneVsOneClassifier(LinearSVC(random_state=0)), OutputCodeClassifier(estimator=RandomForestClassifier(random_state=0), random_state=0) ] print('Importacao OK') # %% # =================Looping here from sklearn.preprocessing import StandardScaler from sklearn.ensemble import GradientBoostingClassifier from sklearn.pipeline import Pipeline def super_test(model_scaler, model_classifier, name_scaler, name_classifier,
if __name__ == '__main__': np.random.seed(100) nn = NeuralNetwork([2, 2, 1]) X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) y = np.array([0, 1, 1, 0]) nn.fit(X, y, learning_rate=0.1, epochs=1000) print("Final prediction") for s in X: print(s, nn.predict(s)) mlp = MLPClassifier(random_state=1) mlp.fit(X, y) data = X markers = ('s', '*', '^') colors = ('blue', 'green', 'red') cmap = ListedColormap(colors) x_min, x_max = data[:, 0].min() - 1, data[:, 0].max() + 1 y_min, y_max = data[:, 1].min() - 1, data[:, 1].max() + 1 resolution = 0.01 x, y = np.meshgrid(np.arange(x_min, x_max, resolution), np.arange(y_min, y_max, resolution)) Z = mlp.predict(np.array([x.ravel(), y.ravel()]).T) Z = Z.reshape(x.shape)
class MLPClassifierImpl(): def __init__(self, hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10): self._hyperparams = { 'hidden_layer_sizes': hidden_layer_sizes, 'activation': activation, 'solver': solver, 'alpha': alpha, 'batch_size': batch_size, 'learning_rate': learning_rate, 'learning_rate_init': learning_rate_init, 'power_t': power_t, 'max_iter': max_iter, 'shuffle': shuffle, 'random_state': random_state, 'tol': tol, 'verbose': verbose, 'warm_start': warm_start, 'momentum': momentum, 'nesterovs_momentum': nesterovs_momentum, 'early_stopping': early_stopping, 'validation_fraction': validation_fraction, 'beta_1': beta_1, 'beta_2': beta_2, 'epsilon': epsilon, 'n_iter_no_change': n_iter_no_change } self._wrapped_model = Op(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X)
def classification(self): clf = Pipeline( [('tfidf', TfidfVectorizer()), ('clf', self.classifier)]) clf = clf.fit(train_data, train_label) pred = clf.predict(test_data) print("the Accuracy of %s: %0.4f" % (self.name, np.mean(pred == test_label))) print(classification_report(test_label, pred)) clf_name = { 'Naive Bayes': MultinomialNB(alpha=0.01), 'SVM': SVC(), "Linear SVM": LinearSVC(), 'Logistic Regression': LogisticRegression(), 'MLP': MLPClassifier(), 'KNN': KNeighborsClassifier(), 'Decision Tree': DecisionTreeClassifier(), 'Random Forest': RandomForestClassifier(n_estimators=8), 'Adaboost': AdaBoostClassifier() } for key, value in clf_name.items(): test = Classifier(key, value) test.classification() # pipeline = Pipeline([ # ('vect', CountVectorizer()), # ('tfidf', TfidfTransformer()), # ('clf', KNeighborsClassifier()) # ])
#from sklearn.cross_validation import train_test_split from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler iris = datasets.load_iris() data = iris.data labels = iris.target data_train, data_test, labels_train, labels_test = train_test_split( data, labels, test_size=0.5, random_state=1) scaler = StandardScaler() scaler.fit(data) data_train_std = scaler.transform(data_train) data_test_std = scaler.transform(data_test) data_train = data_train_std data_test = data_test_std # We add max_iter=1000 becaue the default is max_iter=200 and # it is not enough for full convergence mlp = MLPClassifier(random_state=1, max_iter=1000) mlp.fit(data, labels) mlp.fit(data_train, labels_train) pred = mlp.predict(data_test) print() print('Misclassified samples: %d' % (labels_test != pred).sum()) print('Accuracy: %.2f' % accuracy_score(labels_test, pred))
# from warnings import warn import numpy as np from data_utils import * from sklearn.neural_network.multilayer_perceptron import MLPClassifier data = gather_and_clean_data() X = data[:, 0:-1] y = data[:, -1] MClass = MLPClassifier() MClass.fit(X, y) pred = MClass.predict(X) score = MClass.score(X, y) print(f"Pred: {pred}") print(f"Score: {score}")
print(examplesMatrix.shape) print("Y values results:") print(Y_vector.shape) print("Max/min of Y: ") ymax = max(Y_vector) ymin = min(Y_vector) print(str(ymax) + "/" + str(ymin)) X_train, X_test, y_train, y_test = train_test_split(examplesMatrix, Y_vector, test_size=0.2) print("Training...") # Commented code for several models: model = MLPClassifier(hidden_layer_sizes=(128, 64, 32, 16, 8), max_iter=2500) # model = SVC(gamma='scale', probability = True) # model = KNeighborsClassifier() # model = LinearDiscriminantAnalysis() # model = GaussianNB() # model = DecisionTreeClassifier() # model = LogisticRegression() model.fit(X_train, y_train) predictions = model.predict(X_test) # Test to see if the model(s) is seeing and producing reasonable values print("Max/min of predictions: ") ymax = max(predictions) ymin = min(predictions) print(str(ymax) + "/" + str(ymin))
def all_classifier_models(): models = [] metrix = [] c_report = [] train_accuracy = [] test_accuracy = [] models.append(('LogisticRegression', LogisticRegression(solver='liblinear', multi_class='ovr'))) models.append(('LinearDiscriminantAnalysis', LinearDiscriminantAnalysis())) models.append(('KNeighborsClassifier', KNeighborsClassifier())) models.append(('DecisionTreeClassifier', DecisionTreeClassifier())) models.append(('GaussianNB', GaussianNB())) models.append(('RandomForestClassifier', RandomForestClassifier(n_estimators=100))) models.append(('SVM', SVC(gamma='auto'))) models.append(('Linear_SVM', LinearSVC())) models.append(('XGB', XGBClassifier())) models.append(('SGD', SGDClassifier())) models.append(('Perceptron', Perceptron())) models.append(('ExtraTreeClassifier', ExtraTreeClassifier())) models.append(('OneClassSVM', OneClassSVM(gamma = 'auto'))) models.append(('NuSVC', NuSVC())) models.append(('MLPClassifier', MLPClassifier(solver='lbfgs', alpha=1e-5, random_state=1))) models.append(('RadiusNeighborsClassifier', RadiusNeighborsClassifier(radius=2.0))) models.append(('OutputCodeClassifier', OutputCodeClassifier(estimator=RandomForestClassifier(random_state=0),random_state=0))) models.append(('OneVsOneClassifier', OneVsOneClassifier(estimator = RandomForestClassifier(random_state=1)))) models.append(('OneVsRestClassifier', OneVsRestClassifier(estimator = RandomForestClassifier(random_state=1)))) models.append(('LogisticRegressionCV', LogisticRegressionCV())) models.append(('RidgeClassifierCV', RidgeClassifierCV())) models.append(('RidgeClassifier', RidgeClassifier())) models.append(('PassiveAggressiveClassifier', PassiveAggressiveClassifier())) models.append(('GaussianProcessClassifier', GaussianProcessClassifier())) models.append(('HistGradientBoostingClassifier', HistGradientBoostingClassifier())) estimators = [('rf', RandomForestClassifier(n_estimators=10, random_state=42)),('svr', make_pipeline(StandardScaler(),LinearSVC(random_state=42)))] models.append(('StackingClassifier', StackingClassifier(estimators=estimators, final_estimator=LogisticRegression()))) clf1 = LogisticRegression(multi_class='multinomial', random_state=1) clf2 = RandomForestClassifier(n_estimators=50, random_state=1) clf3 = GaussianNB() models.append(('VotingClassifier', VotingClassifier(estimators=[('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard'))) models.append(('AdaBoostClassifier', AdaBoostClassifier())) models.append(('GradientBoostingClassifier', GradientBoostingClassifier())) models.append(('BaggingClassifier', BaggingClassifier())) models.append(('ExtraTreesClassifier', ExtraTreesClassifier())) models.append(('CategoricalNB', CategoricalNB())) models.append(('ComplementNB', ComplementNB())) models.append(('BernoulliNB', BernoulliNB())) models.append(('MultinomialNB', MultinomialNB())) models.append(('CalibratedClassifierCV', CalibratedClassifierCV())) models.append(('LabelPropagation', LabelPropagation())) models.append(('LabelSpreading', LabelSpreading())) models.append(('NearestCentroid', NearestCentroid())) models.append(('QuadraticDiscriminantAnalysis', QuadraticDiscriminantAnalysis())) models.append(('GaussianMixture', GaussianMixture())) models.append(('BayesianGaussianMixture', BayesianGaussianMixture())) test_accuracy= [] names = [] for name, model in models: try: m = model m.fit(X_train, y_train) y_pred = m.predict(X_test) train_acc = round(m.score(X_train, y_train) * 100, 2) test_acc = metrics.accuracy_score(y_test,y_pred) *100 c_report.append(classification_report(y_test, y_pred)) test_accuracy.append(test_acc) names.append(name) metrix.append([name, train_acc, test_acc]) except: print("Exception Occurred :",name) return metrix,test_accuracy,names
] class_label = "organism_resistence" for csv_file_path in csv_file_paths: print( "============================= {} ============================".format( csv_file_path)) data, classes = helper.load_csv_file(csv_file_path, class_label) data = preprocess(data) classes = preprocess_classes(classes) data, search_data, classes, search_classes = train_test_split( data, classes, test_size=.20, stratify=classes) classifiers = [ MLPClassifier(), DecisionTreeClassifier(), svm.SVC(), RandomForestClassifier() ] search_iterations = 140 i = 0 mean_std_pair = None while i < len(classifiers): print("======= Param search {} ======".format(type(classifiers[i]))) random_search = RandomizedSearchCV( classifiers[i], param_distributions=helper.PARAM_DISTS[type(classifiers[i])], n_iter=search_iterations, cv=5)