def get_parse(): ap = argparse.ArgumentParser() #list_of_methods = ['wordnet', 'word2vec', 'onehot', 'glove'] list_of_modes = ['train', 'sample'] #ap.add_argument('-m', '--method', required=False, help='Method to use for WSD. Default = wordnet.', default='wordnet', choices = list_of_methods) ap.add_argument('-d', '--data', required=True, help='Training data file.') ap.add_argument('-r', '--reference', required=True, help='References data file.') ap.add_argument('-lf', '--load_file', required=False, help='Filename selected for loading trained model.') ap.add_argument('-m', '--mode', required=False, help='Choose between training mode or sampling mode.', default='train', choices=list_of_modes) ap.add_argument('-ep', '--epoch', required=False, help='Number of epoch', default='100000', type=int) ap.add_argument('-bs', '--batch_size', required=False, help='Batch size', default='64', type=int) ap.add_argument('-ih', '--image_height', required=False, help='Image height', default='480', type=int) ap.add_argument('-iw', '--image_width', required=False, help='Image width', default='720', type=int) # ap.add_argument('-ls', '--latent_size', required=False, help='Latent vector size * n = intput size', default='2', type=int) # ap.add_argument('-lr', '--learning_rate', required=False, help='Learning rate', default='5000', choices=list_of_modes) args = vars(ap.parse_args()) x = data_reader.read(args['data'], args['image_height'], args['image_width']) y = data_reader.read(args['reference'], args['image_height'], args['image_width']) return x, y, args
#!/usr/bin/env python from data_reader import read import matplotlib.pyplot as plt import sys gtr_w = read(sys.argv[-1]) gtr_w.plot_normalized_ft() plt.xlabel('Frequency [Hz]') plt.ylabel('Amplitude [V]') plt.show()
# Custom libs import prototypes import distances from precalcs import get_classes from knn import knn from data_reader import read from args import args from nn import nn from random import shuffle from numpy import mean # Getting the arguments dataset = read(args.d) k = args.k p = args.p classes = get_classes(dataset) repetitions = args.r window = args.window division = args.split gen_repetitions = args.repetitions distance = getattr(distances, args.distance) if (args.shuffle): shuffle(dataset) training_size_index = int(len(dataset) * division) training = dataset[0:training_size_index] evaluation = dataset[training_size_index:len(dataset)] alpha = 0.01 e = 2
import nlp_model as nm import data_reader nlp = nm.initialize_model('en', blank=True) nm.initialize_textcat(nlp) data_reader.read() optimizer = nlp.begin_training() print("Training model...")
from random import shuffle import numpy # Custom libs import distances import args from data_reader import read from precalcs import precalcs from precalcs import swap_array # Getting the arguments arguments = args.args kfold = arguments.kfold k = arguments.k distance = getattr(distances, arguments.distance) dataset = read(arguments.d) w = arguments.w swap = arguments.swap # Shuffles dataset if it's said so precalcs_time_begin = time() if (arguments.shuffle): shuffle(dataset) if (arguments.distance != "euclidean"): precalcs(dataset) precalcs_time_endtime = time() precalcs_time = precalcs_time_endtime - precalcs_time_begin if (swap): swap_array(dataset) print "Pre-processing time: " + str(precalcs_time) + ' seconds' print ""
biases = tf.get_variable(name="biases", shape=[1], initializer=tf.initializers.constant) output = tf.matmul(input_data, weights) + biases return tf.nn.sigmoid(output) def train(input_x, input_y, ephocs=10, batch_size=100): x = tf.placeholder("float", shape=[None, input_x.shape[1]], name="x-input") y = tf.placeholder("float", shape=[None, input_y.shape[1]], name="y-input") output = inference(x) # 定义损失函数 cost = -tf.reduce_sum(y * tf.log(output) + (1 - y) * tf.log(1 - output)) # 逻辑回归的损失函数 entry_cost = tf.train.GradientDescentOptimizer(0.0003).minimize(cost) batches = input_x.shape[0] // batch_size if input_x.shape[0] % batch_size != 0: batches += 1 with tf.Session() as sess: sess.run(tf.initialize_all_variables()) for _ in range(ephocs): for batch in range(batches): start = batch * batch_size % input_x.shape[0] end = min(start + batch_size, input_x.shape[0]) sess.run([entry_cost], feed_dict={x: input_x[start:end], y: input_y[start:end]}) c = sess.run([cost], feed_dict={x: input_x, y: input_y}) print(c) data, label = data_reader.read() train(data, label)
from collections import defaultdict from data_reader import read from nlp_model import initialize_model from similarity import Similarity from similarity_trainer import SimilarityTrainer # path = 'data.psv' path = 'D:\Dev\Java\clinical-trials\\trials_combined_text.psv' maxRecords = 120 correctLabels, textRows = read(path, '|') labels = sorted(set(correctLabels)) labelString = ' '.join(labels) #en_core_web_lg nlp = initialize_model('en') similarity = Similarity(nlp, labelString, boost=5, threshold=0.7) size = min(len(correctLabels), maxRecords) trainer = SimilarityTrainer(correctLabels, textRows, similarity, size) #trainer.detect_entities() misinterpretations, correctGuesses = trainer.calculate_similarity() print("Correct Guesses", correctGuesses, correctGuesses / size * 100) misDict = defaultdict(lambda: []) for m in misinterpretations: key = m.get_key() misDict[key].append(m)
def get_cost(theta, x, y): h = sigmoid(np.dot(x, theta)) return -1 * np.sum(y * np.log(h) + (1 - y) * np.log(1 - h)) def batch_gradient_descending(theta, x, y, learning_rate, batch_size, ephocs): costs = [] rows = x.shape[0] batches = rows // batch_size if rows % batch_size != 0: batches += 1 for ephoc in range(ephocs): for batch in range(batches): start = batch * batch_size % rows end = min(start + batch_size, rows) t_x = x[start:end] t_y = y[start:end] theta = theta - learning_rate * get_grad(theta, t_x, t_y) cost = get_cost(theta, x, y) costs.append(cost) # 使用学习率衰减模型,更新迭代学习率 learning_rate = learning_rate / (1 + 0.99 * ephoc) show.show_cost(costs) x, y = data_reader.read() theta = np.random.rand(x.shape[1], 1) learning_rate = 0.1 batch_size = 100 batch_gradient_descending(theta, x, y, learning_rate, batch_size, 20)
PLOT = False if __name__ == '__main__': if '-u' in sys.argv: UNC = True if '-c' in sys.argv: CONNECT = True if '-e' in sys.argv: EXPECTED = True if '-p' in sys.argv: PLOT = True gtr_waves = data_reader.read(sys.argv[-1], strings=['E'], frets=['0']) peaks_all = [] fig = plt.figure() ax = fig.gca(projection='3d') if PLOT: i = 0 for gtr_wave in gtr_waves: x_s, y_s = gtr_wave.normalized_abs_dft() peaks = gtr_wave.ft_peaks(min_dist=0.001, thres=0.2) peaks_all.append(peaks) ax.plot(x_s, np.full(x_s.size, i) + D_MIN, y_s) for peak in peaks:
from operator import itemgetter from joblib import Parallel, delayed from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score from sklearn.model_selection import train_test_split import data_reader as dr import grid_search as gs from vectorizer import Vectorizer df = dr.read(file_names=['data/rt-polarity.pos', 'data/rt-polarity.neg'], labels=[1, 0]) # Task 1 X_train, X_test, y_train, y_test = train_test_split(df.values[:, 0], df.values[:, 1].astype('int'), test_size=0.2, random_state=0) # Task 2 v = Vectorizer(docs=X_train) # Task 3-4 result = Parallel(n_jobs=4)( delayed(gs.grid_search)(X=X_train, y=y_train, f_num=f_num, v=v) for f_num in range(1, 6)) result = sorted(result, key=itemgetter(0), reverse=True) best_c = result[0][1] best_penalty = result[0][2]