コード例 #1
0
def build_perceptron_classifier(class_dirs, class_values):
    #combine the data from each directory of example instances of a class
    data = []
    for class_name, dir_name in class_dirs.items():
        data.extend(extract_instances(dir_name, class_values[class_name]))

    #do a 70:30 split
    SPLIT_PROPS = {'train': .7, 'valid': .3}
    splits = main.split_data(data, SPLIT_PROPS)
    training_split, validation_split = splits['train'], splits['valid']

    def test_accuracy(n_iters, min_occurences):
        #copy and filter the training split
        training_copy = copy.deepcopy(training_split)
        filter_data(training_copy, min_occurences)

        #now train a perceptron
        perceptron = train_perceptron(training_copy, n_iters=n_iters)

        accr = get_accuracy(perceptron, validation_split)

        print('accuracy for {} iters (only using attributes that occur in >={} documents): {}' \
              .format(n_iters, min_occurences, accr))

        return accr

    #find the number of iterations and which gives us the best accuracy
    print('Tuning parameters for perceptron...')
    n_iters, min_occurences = max(product(range(2,10), range(1,4)), \
                                  key=lambda pair: test_accuracy(*pair))

    print('Selected {} iters, {} min occurences'.format(
        n_iters, min_occurences))
    filter_data(data, min_occurences)
    return train_perceptron(data, n_iters=n_iters)
コード例 #2
0
def main():
    rev_by_star = get_data()
    X_train, Y_test, X_target, Y_target = split_data(rev_by_star)
    X_train = X_train.toarray()
    Y_test = Y_test.toarray()
    #Y_target = trans_target(Y_target)
    #X_target = trans_target(X_target)
    input_num = 1000
    output_num = 5
    X_target = to_categorical(X_target, 5)
    #Y_target = to_categorical(Y_target, 5)
    #data = np.random.random((2148051, input_num))
    #labels = np.random.randint(output_num, size=(2148051, 1))
    #print(X_target[0])
    #print(X_train.dtype, X_target.dtype)
    #labels = to_categorical(labels, 5)
    #print("data shape", data.dtype)
    #print("label shape", labels.dtype)
    print(type(X_train))
    model = build_model(input_num, output_num)
    model.fit(X_train,
              X_target,
              batch_size=128,
              nb_epoch=5,
              validation_split=0.25)
    #model.fit(data, labels, batch_size=32, nb_epoch=10)
    Y_pred = model.predict(Y_test)
    Y_pred = categorical_probas_to_classes(Y_pred)
    print("Accuracy is : %.2f" % ((Y_target == Y_pred).sum() * 1.0 /
                                  (1.0 * Y_test.shape[0])))
    plot_confusion_matrix(Y_pred, Y_target, "neural_network")
コード例 #3
0
ファイル: predict.py プロジェクト: blongwill/portfolio
def predict_model(experiment, output_dir):
	"""Writes the predictions of a given dataset file."""
	saved_dir = "/home2/preetmhn/clms/ling_575_nlms/models/saved_{}".format(experiment)
	model = torch.load('{}/hate_speech_model_trained.pt'.format(saved_dir))
	settings = Settings(experiment, True)
	
	# get gpu
	device = get_gpu(settings)
	
	# get data, split with the same random seed as in training
	input_ids, labels, attention_masks = prepare_data(settings)
	_, validation_inputs, _, validation_labels = train_test_split(input_ids, labels, random_state=2018, test_size=0.1)
	_, validation_dataloader = split_data(settings, input_ids, labels, attention_masks)
	
	# make predictions and write to file
	settings.write_debug("Getting model predictions")
	preds = predict(device, model, validation_dataloader)

	# load tokenizer for the decoding
	tokenizer = load_bert_tokenizer(settings, True)
	
	# write to file
	settings.write_debug("Writing model predictions")
	output_file = os.path.join(output_dir, experiment + '_pred.txt')
	out = csv.writer(open(output_file, 'w+', encoding='utf-8'), delimiter='\t')
	out.writerow(['input', 'true', 'pred'])
	for i in range(len(preds)):
		tokens = tokenizer.decode(input_ids[i], skip_special_tokens=True)
		out.writerow([tokens, labels[i], preds[i]])
	
	# write scores
	settings.write_debug("Getting test evaluation")
	record_score_information(settings, validation_labels, preds)
コード例 #4
0
def showModelGraph(
):  #Funcion to run main.py to run and train LSTM model, accuracy score is displayed in console
    index = main.split_data(stockEntry.get())
    result = main.train_test(1, index[0], index[1], index[2], index[3],
                             index[4], index[5])
    plot_result = main.plot(result[0], result[1], result[2], result[3])

    fig = plt.figure(figsize=(10.75, 4.5))  #Graph size and resolution
    graph = FigureCanvasTkAgg(fig, window)
    graph.draw()
    graph.get_tk_widget().place(
        x=500, y=410)  #Where the graph is placed on the window
    plt.xlabel('Time Step', fontsize=18)
    plt.ylabel('Close Price', fontsize=18)  #X and Y axis
    plt.plot(plot_result[0], "-b", label="Training Data")
    plt.plot(plot_result[1], "-r", label="Test Data")
    plt.legend(loc="upper right"
               )  #Legends for colour coding the training and test data
コード例 #5
0
    log_transformer = lambda c: np.log(c + np.abs(np.min(c)) + 1)
    transformers = [
        DateTransformer('observation_date')
    ]

    transformers = [
        DateTransformer('observation_date')
    ]

    transformed_data = imputed_data.copy()
    for transformer in transformers:
        transformed_data = transformer.fit_transform(transformed_data)
    final_data = pd.get_dummies(transformed_data)

    X_train, X_test, y_train, y_test = split_data(final_data, 'target', 'masterloanidtrepp', test_size=.2, random_state=123)

    X_train = X_train.drop(['masterloanidtrepp'], axis=1)
    X_test = X_test.drop(['masterloanidtrepp'], axis=1)

    rf = RandomForestClassifier(max_depth=3)
    rf.fit(X_train, y_train)

    y_pred_rf = rf.predict_proba(X_test)[:, 1]
    fpr_rf, tpr_rf, _ = roc_curve(y_test, y_pred_rf)
    auc_rf_lm = auc(fpr_rf, tpr_rf)

    plt.figure(1)
    plt.plot([0, 1], [0, 1], 'k--')
    plt.plot(fpr_rf, tpr_rf, label='RF %s' % auc_rf_lm)
    plt.xlabel('False positive rate')
コード例 #6
0
    import main

    data = main.load_dataset("data/ripple_0.0_50_200")
    # init ga
    input_size = data.shape[1] - 1
    hidden_layer_size = 5
    output_size = 1
    population_size = 10
    selection_size = 4
    learning_rate = 1e-3
    epochs = 10
    generations = 10
    estimator = GeneticAlgorithm(
        True,
        input_size,
        hidden_layer_size,
        output_size,
        population_size,
        selection_size,
        learning_rate,
        epochs,
        generations,
    )
    X, y = main.split_data(data)
    estimator.fit(X, y)
    print(estimator)
    import pickle

    with open("test", "wb") as f:
        pickle.dump(estimator, f)
コード例 #7
0
ファイル: ica.py プロジェクト: tulsyanp/tcd-ai-group-project
from main import fetch_dataset, fetch_data_details, split_data, dimensionality_reduction_ICA, train_text_transform_Model, classification_svc, prediction, print_report, plot_images, title

# Load data
dataset = fetch_dataset()

# get dataset details and target names
n_samples, height, width, X, n_features, y, target_names, n_classes = fetch_data_details(
    dataset)

# split into a training and testing set
X_train, X_test, y_train, y_test = split_data(X, y)

# compute ICA
n_components = 150

ica, eigenfaces = dimensionality_reduction_ICA(n_components, X_train, height,
                                               width)

X_train_ica, X_test_ica = train_text_transform_Model(ica, X_train, X_test)

# Training a SVM classification model
clf = classification_svc(X_train_ica, y_train)

# Quantitative evaluation of the model quality on the test set
y_pred = prediction(clf, X_test_ica)

# printing classification report
print_report(y_test, y_pred, target_names, n_classes)

# printing images
prediction_titles = [