def build_perceptron_classifier(class_dirs, class_values): #combine the data from each directory of example instances of a class data = [] for class_name, dir_name in class_dirs.items(): data.extend(extract_instances(dir_name, class_values[class_name])) #do a 70:30 split SPLIT_PROPS = {'train': .7, 'valid': .3} splits = main.split_data(data, SPLIT_PROPS) training_split, validation_split = splits['train'], splits['valid'] def test_accuracy(n_iters, min_occurences): #copy and filter the training split training_copy = copy.deepcopy(training_split) filter_data(training_copy, min_occurences) #now train a perceptron perceptron = train_perceptron(training_copy, n_iters=n_iters) accr = get_accuracy(perceptron, validation_split) print('accuracy for {} iters (only using attributes that occur in >={} documents): {}' \ .format(n_iters, min_occurences, accr)) return accr #find the number of iterations and which gives us the best accuracy print('Tuning parameters for perceptron...') n_iters, min_occurences = max(product(range(2,10), range(1,4)), \ key=lambda pair: test_accuracy(*pair)) print('Selected {} iters, {} min occurences'.format( n_iters, min_occurences)) filter_data(data, min_occurences) return train_perceptron(data, n_iters=n_iters)
def main(): rev_by_star = get_data() X_train, Y_test, X_target, Y_target = split_data(rev_by_star) X_train = X_train.toarray() Y_test = Y_test.toarray() #Y_target = trans_target(Y_target) #X_target = trans_target(X_target) input_num = 1000 output_num = 5 X_target = to_categorical(X_target, 5) #Y_target = to_categorical(Y_target, 5) #data = np.random.random((2148051, input_num)) #labels = np.random.randint(output_num, size=(2148051, 1)) #print(X_target[0]) #print(X_train.dtype, X_target.dtype) #labels = to_categorical(labels, 5) #print("data shape", data.dtype) #print("label shape", labels.dtype) print(type(X_train)) model = build_model(input_num, output_num) model.fit(X_train, X_target, batch_size=128, nb_epoch=5, validation_split=0.25) #model.fit(data, labels, batch_size=32, nb_epoch=10) Y_pred = model.predict(Y_test) Y_pred = categorical_probas_to_classes(Y_pred) print("Accuracy is : %.2f" % ((Y_target == Y_pred).sum() * 1.0 / (1.0 * Y_test.shape[0]))) plot_confusion_matrix(Y_pred, Y_target, "neural_network")
def predict_model(experiment, output_dir): """Writes the predictions of a given dataset file.""" saved_dir = "/home2/preetmhn/clms/ling_575_nlms/models/saved_{}".format(experiment) model = torch.load('{}/hate_speech_model_trained.pt'.format(saved_dir)) settings = Settings(experiment, True) # get gpu device = get_gpu(settings) # get data, split with the same random seed as in training input_ids, labels, attention_masks = prepare_data(settings) _, validation_inputs, _, validation_labels = train_test_split(input_ids, labels, random_state=2018, test_size=0.1) _, validation_dataloader = split_data(settings, input_ids, labels, attention_masks) # make predictions and write to file settings.write_debug("Getting model predictions") preds = predict(device, model, validation_dataloader) # load tokenizer for the decoding tokenizer = load_bert_tokenizer(settings, True) # write to file settings.write_debug("Writing model predictions") output_file = os.path.join(output_dir, experiment + '_pred.txt') out = csv.writer(open(output_file, 'w+', encoding='utf-8'), delimiter='\t') out.writerow(['input', 'true', 'pred']) for i in range(len(preds)): tokens = tokenizer.decode(input_ids[i], skip_special_tokens=True) out.writerow([tokens, labels[i], preds[i]]) # write scores settings.write_debug("Getting test evaluation") record_score_information(settings, validation_labels, preds)
def showModelGraph( ): #Funcion to run main.py to run and train LSTM model, accuracy score is displayed in console index = main.split_data(stockEntry.get()) result = main.train_test(1, index[0], index[1], index[2], index[3], index[4], index[5]) plot_result = main.plot(result[0], result[1], result[2], result[3]) fig = plt.figure(figsize=(10.75, 4.5)) #Graph size and resolution graph = FigureCanvasTkAgg(fig, window) graph.draw() graph.get_tk_widget().place( x=500, y=410) #Where the graph is placed on the window plt.xlabel('Time Step', fontsize=18) plt.ylabel('Close Price', fontsize=18) #X and Y axis plt.plot(plot_result[0], "-b", label="Training Data") plt.plot(plot_result[1], "-r", label="Test Data") plt.legend(loc="upper right" ) #Legends for colour coding the training and test data
log_transformer = lambda c: np.log(c + np.abs(np.min(c)) + 1) transformers = [ DateTransformer('observation_date') ] transformers = [ DateTransformer('observation_date') ] transformed_data = imputed_data.copy() for transformer in transformers: transformed_data = transformer.fit_transform(transformed_data) final_data = pd.get_dummies(transformed_data) X_train, X_test, y_train, y_test = split_data(final_data, 'target', 'masterloanidtrepp', test_size=.2, random_state=123) X_train = X_train.drop(['masterloanidtrepp'], axis=1) X_test = X_test.drop(['masterloanidtrepp'], axis=1) rf = RandomForestClassifier(max_depth=3) rf.fit(X_train, y_train) y_pred_rf = rf.predict_proba(X_test)[:, 1] fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf) auc_rf_lm = auc(fpr_rf, tpr_rf) plt.figure(1) plt.plot([0, 1], [0, 1], 'k--') plt.plot(fpr_rf, tpr_rf, label='RF %s' % auc_rf_lm) plt.xlabel('False positive rate')
import main data = main.load_dataset("data/ripple_0.0_50_200") # init ga input_size = data.shape[1] - 1 hidden_layer_size = 5 output_size = 1 population_size = 10 selection_size = 4 learning_rate = 1e-3 epochs = 10 generations = 10 estimator = GeneticAlgorithm( True, input_size, hidden_layer_size, output_size, population_size, selection_size, learning_rate, epochs, generations, ) X, y = main.split_data(data) estimator.fit(X, y) print(estimator) import pickle with open("test", "wb") as f: pickle.dump(estimator, f)
from main import fetch_dataset, fetch_data_details, split_data, dimensionality_reduction_ICA, train_text_transform_Model, classification_svc, prediction, print_report, plot_images, title # Load data dataset = fetch_dataset() # get dataset details and target names n_samples, height, width, X, n_features, y, target_names, n_classes = fetch_data_details( dataset) # split into a training and testing set X_train, X_test, y_train, y_test = split_data(X, y) # compute ICA n_components = 150 ica, eigenfaces = dimensionality_reduction_ICA(n_components, X_train, height, width) X_train_ica, X_test_ica = train_text_transform_Model(ica, X_train, X_test) # Training a SVM classification model clf = classification_svc(X_train_ica, y_train) # Quantitative evaluation of the model quality on the test set y_pred = prediction(clf, X_test_ica) # printing classification report print_report(y_test, y_pred, target_names, n_classes) # printing images prediction_titles = [