def main(): print('---Loading Dataset---') training_data, validation_data, test_data = load_dataset.load() validation_accuracy = [] test_accuracy = [] number_epochs = 20 perceptron_obj = SingleLayerPerceptron() for i in range(number_epochs): print('---Epoch #', i,'---') print('---Training---') perceptron_obj.train(training_data, 1) print('---Validating---') acc = perceptron_obj.test(validation_data)*100 validation_accuracy.append(acc) print(acc) print('---Testing---') acc = perceptron_obj.test(test_data)*100 test_accuracy.append(acc) print(acc) # plot accuracies plt.plot(validation_accuracy, label='validation') plt.plot(test_accuracy, label='test') plt.xlabel('Epochs') plt.ylabel('Accuracy (%)') plt.suptitle('Perceptron (binary feature representation)') plt.legend(loc='lower right') plt.show()
def prepare_dataset(path="dataset/dataset_train.csv", sep=";"): dataset = load(path, sep) X = dataset.drop(['user_id', 'is_churned'], axis=1) y = dataset['is_churned'] X_mm = MinMaxScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X_mm, y, test_size=0.3, shuffle=True, stratify=y, random_state=100) # Снизим дизбаланс классов X_train_balanced, y_train_balanced = SMOTE(random_state=42, ratio=0.3).fit_sample( X_train, y_train) return X_train_balanced, y_train_balanced, X_test, y_test
def preprocess(value="TRAIN"): positive, negative, stop_words = loader.load(value) positive = lower_case(positive) negative = lower_case(negative) # create vectors for each message positive_vectors = [[]] negative_vectors = [[]] [ positive_vectors.append(word_tokenize(msg, stop_words)) for msg in positive ] [ negative_vectors.append(word_tokenize(msg, stop_words)) for msg in negative ] return positive_vectors, negative_vectors
def main(): print('---Loading Dataset---') training_data, validation_data, test_data = load_dataset.load() test_accuracy = [] lr_obj = LogisticRegression() print('---Training---') vaidation_accuracy, n_trained_examples = lr_obj.train( training_data, 0.001, validation_data, 0.1) print('---Testing---') acc = lr_obj.test(test_data) * 100 test_accuracy.append(acc) print(acc) plt.plot(np.arange(10000, n_trained_examples + 1, 10000), vaidation_accuracy[1:]) plt.xlabel('Number of training examples') plt.ylabel('Accuracy (%)') plt.suptitle('Logistic Regression (pairwise feature representation)') plt.show()
def main(): print('---Loading Dataset---') training_data, validation_data, test_data = load_dataset.load() test_accuracy = [] mlp = MultiLayerPerceptron(n_hidden_layers=1, n_neurons=[50]) epochs = 20 for k in range(epochs): print('---Training---') for i in range(len(training_data)): training_example, y = draw_example(training_data) mlp.backpropagation(training_example, y, eta=1e-2, reg_lambda=1e-3) print('---Testing---') mistakes = 0 where_ok = [0] * 26 for j in range(len(test_data)): validation_example, y = draw_example(test_data, j) y_hat = mlp.predict(validation_example) if y_hat != y: mistakes += 1 else: where_ok[y] +=1 test_accuracy.append(1-mistakes/len(test_data)) print('Acc : ', 1-mistakes/len(test_data)) print(where_ok) plt.plot(test_accuracy, label='test set') plt.xlabel('Epochs') plt.ylabel('Accuracy (%)') plt.suptitle('MultiLayer Perceptron') plt.legend(loc='lower right') plt.show()
if event_name == 'oscar pistorius': dataset = Datasets.oscar_pistorius else: sys.exit(0) logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s', level=logging.INFO, stream=sys.stderr) Session = sessionmaker(bind=engine, autocommit=True) session = Session() add_event(event_name, dataset, session) url_objs = add_urls(event_name, session) df, urls_df = load(event_name, dataset, engine) tweet_urls = add_tweets_url(event_name, df, url_objs, session) uf = add_documents(event_name, dataset, tweet_urls, session) def main_info(): from sqlalchemy.orm import sessionmaker import sys from settings import engine import time logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s', level=logging.INFO, stream=sys.stderr) Session = sessionmaker(bind=engine,
import load_dataset import network train_data, train_labels, test_data, test_labels = load_dataset.load() answers = [] for i in range(0, 100): model = network.Model(input_units=len(train_data.keys()), hidden_units=64, epochs=500) model.fit(train_data, train_labels) loss, mae, mse = model.evaluate(test_data, test_labels) # model.plot_history() print("Среднее абсолютное отклонение на проверочных данных: {:5.2f} галлон на милю".format(mae))
def train_naive_bayes(): global positive_messages, negative_messages, stop_words positive_messages, negative_messages, stop_words = dataset.load() return save_words()