def main(): input_data,output_file = utils.IO_files() if not utils.check_data(input_data) : utils.generate_data(output_file) output_file.close()
def indexed_comparison(input_file, idf_file, optimal_cosine_score = 0.2): term_freq_dict = {} articles = {} cosine_tfidf_instance = Cosine_tfidf(idf_file) for article_a_id, article_a_text in utils.generate_data(input_file): # calculate current article's term frequency and store article_a_term_freq = calculate_term_frequencies(article_a_text) articles[article_a_id] = article_a_term_freq current_cosine_score = 0 best_article_id = 0 # loop back through all the previous articles for article_b_id in articles: if article_b_id == article_a_id: continue article_b_term_freq = articles[article_b_id] new_cosine_score = cosine_tfidf_instance.calculate_cosine_tfidf(article_a_term_freq, article_b_term_freq) if new_cosine_score > current_cosine_score: best_article_id = article_b_id current_cosine_score = new_cosine_score if current_cosine_score > optimal_cosine_score: yield article_a_id, best_article_id
def build_corpus_sample(K, T, W, D, M, a=1, b=1, tau=4): ''' Builds a toy Corpus dataset. Generates toy data using the generative model of MixEHR. :param K: number of topics :param T: number of types :param W: number of words in the vocabulary :param D: number of documents :param M: number of words. We assume that all documents have same length. :param a: shape parameter of gamma distribution; used to sample the hyper-parameters :param b: scale parameter of gamma distribution; used to sample the hyper-parameters :param tau: mean used to sample w :return: y: response b: types x: words z: topics-assignment g: response ''' y, b, x, z, g, theta = generate_data(K, T, W, D, M, a, b, tau) dataset = [] C = 0 for i in range(D): cnt = Counter() len(b[:, i]) patient = Corpus.Patient(i, i, y[i]) for batch in zip(b[:, i], x[:, i]): cnt[batch] += 17 for type, word in cnt: freq = cnt[(type, word)] patient.append_record(type, word, freq) C += freq dataset.append(patient) corpus = Corpus(dataset, T, W, C) corpus.z = z corpus.g = g return corpus, theta
def indexed_comparison(input_file, idf_file, optimal_cosine_score=0.2): term_freq_dict = {} articles = {} cosine_tfidf_instance = Cosine_tfidf(idf_file) for article_a_id, article_a_text in utils.generate_data(input_file): # calculate current article's term frequency and store article_a_term_freq = calculate_term_frequencies(article_a_text) articles[article_a_id] = article_a_term_freq current_cosine_score = 0 best_article_id = 0 # loop back through all the previous articles for article_b_id in articles: if article_b_id == article_a_id: continue article_b_term_freq = articles[article_b_id] new_cosine_score = cosine_tfidf_instance.calculate_cosine_tfidf( article_a_term_freq, article_b_term_freq) if new_cosine_score > current_cosine_score: best_article_id = article_b_id current_cosine_score = new_cosine_score if current_cosine_score > optimal_cosine_score: yield article_a_id, best_article_id
def main(): args = parser.parse_args() model_path = args.model dataset_size = args.size batch_size = args.batch_size backend_name = args.backend print_freq = args.print_freq # Load ONNX model onnx_protobuf = onnx.load(model_path) # Change batch size defined in model to value passed by user as argument onnx_protobuf.graph.input[0].type.tensor_type.shape.dim[0].dim_value = batch_size ng_model = import_onnx_model(onnx_protobuf) model_batch, model_channels, model_height, model_width = ng_model.get_parameters()[0].shape # Generate synthetic dataset filled with random values dataset = generate_data(count=dataset_size, batch_size=model_batch, image_channels=model_channels, image_height=model_height, image_width=model_width) dataset = [(img, 0) for img in dataset] perf_metrics = evaluate(backend_name, ng_model, dataset, batch_size, print_freq) save_results('results/', args.output_file, {key: val.data for key, val in perf_metrics.items()})
def main(): features, cases, variance = 5, 10000, 0 x, y, thetas = generate_data(features, cases, variance) print(f'Generated thetas: {thetas}') thetas = normal_equation(x, y) print(f'Calculated thetas: {thetas}')
def create_application(): app = Flask(__name__) app.config.from_object(Config()) app.logger.removeHandler(default_handler) generate_data(app.config['JSON_TMP_FILE']) from controllers.guids import guids_blueprint app.register_blueprint(guids_blueprint) simple_errors = (400, 401, 404, 403) def simple_error(e): return jsonify(error=e.code, message=e.description), e.code for error in simple_errors: app.errorhandler(error)(simple_error) return app
def test_PointerSeq2Seq_TSP(): ''' The data has been generated from https://github.com/vyraun/Keras-Pointer-Network. A sample in X looks like [(8, 0), (2, 8), (6, 9), (9, 8), (7, 5), (0, 5), (4, 6), (8, 2), (5, 2), (4, 9), (5, 0)] A sample in Y looks like [0, 1, 3, 9, 7, 8, 5, 4, 2, 10, 6] ''' X = [] Y = [] for _ in xrange(0, tsp_samples): X.append(utils.generate_data(tsp_input_length)) for samples in X: solution = utils.Tsp(samples) Y.append(solution.solve_tsp_dynamic()) ''' One hot encoding for the output symbols. ''' one_hot_matrix = np.eye(tsp_input_length) Y = [[one_hot_matrix[sample[x]] for x in range(len(sample))] for sample in Y] # pprint(X[0]) # pprint(Y[0]) # raw_input() #Transmuting the data into Numpy arrays X = np.asarray(X) / 10.0 Y = np.asarray(Y) x_train, x_test = X[:int(X.shape[0] * .80)], X[int(X.shape[0] * .80):] y_train, y_test = Y[:int(Y.shape[0] * .80)], Y[int(Y.shape[0] * .80):] print "Done making dummy data" print "tsp_input_length", tsp_input_length, "sd", tsp_input_dim models = Pointer(output_dim=tsp_output_dim, hidden_dim=tsp_hidden_dim, output_length=tsp_output_length, input_shape=(tsp_input_length, tsp_input_dim), batch_size=10, bidirectional=False) print "Done creating model" # models.compile(loss='mse', optimizer='fast_compile') models.compile(loss='mse', optimizer='sgd') print models.summary() models.fit(X, Y, epochs=10, batch_size=10) print "Done fitting model" print "Done everything master" while True: cmd = raw_input( "Master, please give Dobby a sock now. (Just write sock)") if cmd.lower() == "sock": break print "Master, why must thy be so cruel." print "Let's try that again."
def build_fullyconnected(norm=np.inf, nhidden=5): # generate data (in a box) X, y = generate_data(norm=norm) # build network L1 = Layer((nhidden, 2)) L2 = Layer((1, nhidden)) net = Network(X, y, [L1, L2]) return net
def main(): features, cases, variance = 5, 10000, 0 alpha, iterations, epsilon = 0.000000005, 100000, 0.000001 x, y, generated_thetas = generate_data(features, cases, variance) calculated_thetas, costs = gradient_descent( x, y, alpha, features, cases, iterations, epsilon ) plot_costs(costs) print(f'Generated thetas: {generated_thetas}') print(f'Calculated thetas: {calculated_thetas}')
def test_PointerSeq2Seq_TSP(): ''' The data has been generated from https://github.com/vyraun/Keras-Pointer-Network. A sample in X looks like [(8, 0), (2, 8), (6, 9), (9, 8), (7, 5), (0, 5), (4, 6), (8, 2), (5, 2), (4, 9), (5, 0)] A sample in Y looks like [0, 1, 3, 9, 7, 8, 5, 4, 2, 10, 6] ''' X = [] Y = [] for _ in xrange(0,tsp_samples): X.append(utils.generate_data(tsp_input_length)) for samples in X: solution = utils.Tsp(samples) Y.append(solution.solve_tsp_dynamic()) ''' One hot encoding for the output symbols. ''' one_hot_matrix = np.eye(tsp_input_length) Y = [[ one_hot_matrix[sample[x]] for x in range(len(sample)) ] for sample in Y ] # pprint(X[0]) # pprint(Y[0]) # raw_input() #Transmuting the data into Numpy arrays X = np.asarray(X)/10.0 Y = np.asarray(Y) x_train,x_test = X[:int(X.shape[0]*.80)],X[int(X.shape[0]*.80):] y_train,y_test = Y[:int(Y.shape[0]*.80)],Y[int(Y.shape[0]*.80):] print "Done making dummy data" print "tsp_input_length", tsp_input_length, "sd", tsp_input_dim models = Pointer(output_dim=tsp_output_dim, hidden_dim=tsp_hidden_dim, output_length=tsp_output_length, input_shape=(tsp_input_length, tsp_input_dim), batch_size=10,bidirectional=False) print "Done creating model" # models.compile(loss='mse', optimizer='fast_compile') models.compile(loss='mse', optimizer='sgd') print models.summary() models.fit(X, Y, epochs=10,batch_size=10) print "Done fitting model" print "Done everything master" while True: cmd = raw_input("Master, please give Dobby a sock now. (Just write sock)") if cmd.lower() == "sock": break print "Master, why must thy be so cruel." print "Let's try that again."
def run_k_means_gmm(): k = 3 num_samples = 1000 x_limits = [0,10] y_limits = [0,10] data, means = utils.generate_data(k=k, num_samples=num_samples, x_limits=x_limits, y_limits=y_limits) # plot generated data utils.plot_data_k(data, k, means) # run k-means a few times and take best init_means, assignments, best_dist = initialize(data, k, num_runs=10) # plot results print 'total euclidean distance: {}'.format(best_dist) utils.plot_data_assigments(data, init_means, assignments)
def train(): model.train() losses = list() for batch_id, batch in enumerate( generate_data(text_as_int, batch_size, seq_len)): src, trg = batch src = src.permute(1, 0).to(device) trg = trg.permute(1, 0).to(device) optimizer.zero_grad() preds = model(src) loss = criterion(preds.contiguous().view(-1, vocab_size), trg.contiguous().view(-1)) losses.append(loss.item()) avg_loss = sum(losses) / len(losses) loss.backward() optimizer.step() if batch_id % 10 == 0: print(f'epoch: {epoch} | loss: {avg_loss:.4f}')
def cw_attack(file_name, norm, sess, num_image=10, cifar = False, tinyimagenet = False): np.random.seed(1215) tf.set_random_seed(1215) random.seed(1215) if norm == '1': attack = EADL1 norm_fn = lambda x: np.sum(np.abs(x),axis=(1,2,3)) elif norm == '2': attack = CarliniL2 norm_fn = lambda x: np.sum(x**2,axis=(1,2,3)) elif norm == 'i': attack = CarliniLi norm_fn = lambda x: np.max(np.abs(x),axis=(1,2,3)) if cifar: data = CIFAR() elif tinyimagenet: data = tinyImagenet() else: data = MNIST() model = load_model(file_name, custom_objects={'fn':loss,'tf':tf, 'ResidualStart' : ResidualStart, 'ResidualStart2' : ResidualStart2}) inputs, targets, true_labels, true_ids, img_info = generate_data(data, samples=num_image, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.predict, start=0) model.predict = model model.num_labels = 10 if cifar: model.image_size = 32 model.num_channels = 3 elif tinyimagenet: model.image_size = 64 model.num_channels = 3 model.num_labels = 200 else: model.image_size = 28 model.num_channels = 1 start_time = timer.time() attack = attack(sess, model, max_iterations = 1000) perturbed_input = attack.attack(inputs, targets) UB = np.average(norm_fn(perturbed_input-inputs)) return UB, (timer.time()-start_time)/len(inputs)
def train(bob_or_eve, results, max_iters, print_every, es=0., es_limit=100): count = 0 for i in range(max_iters): msg_in_val, key_val = generate_data() if bob_or_eve == 'bob': loss = train_bob(msg_in_val, key_val) results = np.hstack((results, error_bob(msg_in_val, key_val).sum())) elif bob_or_eve == 'eve': loss = train_eve(msg_in_val, key_val) results = np.hstack((results, error_eve(msg_in_val, key_val).sum())) if i % print_every == 0: print 'training loss:', loss if es and loss < es: count += 1 if count > es_limit: break return np.hstack((results, np.repeat(results[-1], max_iters - i - 1)))
def create_feature_list(): """ generate list of features Args: output: output file Returns: None """ # generate test data eeg, user, classlabel, name = utils.generate_data() # only use small fraction of data len = features.SIGNAL_LENGTH / SIGNAL_LENGTH_REDUCE_FACTOR eeg = eeg[:len, :] # generate features row = OrderedDict() features.extract_all_eeg_features(eeg, row, len, "") return row.keys()
def main(): # Step 1: Generate and visualize training data X_train, Y_train, X_test, Y_test = generate_data(3, 5, train_set_ratio=0.9) visualize_data(X_train, Y_train) n_samples = len(X_train) # Step 1b: Normalize Xs and re-visualize training data X_train, mean_train, std_train = normalize_feature(X_train, mode='train') X_test = normalize_feature(X_test, mode='test', mean=mean_train, std=std_train) visualize_data(X_train, Y_train, viz_trainining=True) # Step 2: Initialize Placeholders for input data X = tf.placeholder(shape=[None], dtype=tf.float32, name='X') Y = tf.placeholder(shape=[None], dtype=tf.float32, name='Y') # Step 3: Build up your model graph a_sym, b_sym, = define_parameters() cost = define_cost_func(X, Y, a_sym, b_sym, n_samples) # Step 4: Create optimizer op and initializer op learning_rate = 0.03 optimizer, initializer = define_optimizer(learning_rate, cost) # tf.summary.scalar('cost', cost_tensor) with tf.Session() as sess: sess.run(initializer) for i in range(400): _, cost_train = sess.run([optimizer, cost], feed_dict={X: X_train, Y: Y_train}) a, b = sess.run([a_sym, b_sym]) cost_test = sess.run(cost, feed_dict={X: X_test, Y: Y_test}) print('a=', a, 'b=', b) print('Training Cost =', cost_train, "\tTesting Cost =", cost_test) plt.plot(X_train, Y_train, 'bo') draw_model(a, b) plt.pause(0.1) print('Optimized variable: a = ', a) print('Optimized variable: b = ', b)
""" @Author:yanqiang @File: sentiment_classidication.py @Time: 2018/9/28 11:14 @Software: PyCharm @Description: """ import numpy as np import pandas as pd import lightgbm as lgb from utils import generate_data,word_seg from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score X,y_sub,y_sent,X_submit,labels_subject=generate_data(use_sina=True) # 类别标签转换一下 sent_labels={-1:0,0:1,1:2} labels_sent={0:-1,1:0,2:1} y_sent=[sent_labels[i] for i in y_sent.tolist()] X_train_sent,X_test_sent,y_train_sent,y_test_sent=train_test_split(X,y_sent,random_state=42) def train(): # 主题 lgb_train_sub=lgb.Dataset(X_train_sent,y_train_sent) lgb_eval_sub=lgb.Dataset(X_test_sent,y_test_sent) params_sub= { 'task': 'train',
def main(_): a = datetime.datetime.now() if FLAGS.input_width is None: FLAGS.input_width = FLAGS.input_height if FLAGS.output_width is None: FLAGS.output_width = FLAGS.output_height if not os.path.exists(FLAGS.checkpoint_par_dir): os.makedirs(FLAGS.checkpoint_par_dir) if not os.path.exists(FLAGS.sample_dir): os.makedirs(FLAGS.sample_dir) test_cases = [{ 'id': 'OI_11_00', 'alpha': 1.0, 'beta': 1.0, 'delta_v': 0.0, 'delta_m': 0.0 }, { 'id': 'OI_11_11', 'alpha': 1.0, 'beta': 1.0, 'delta_v': 0.1, 'delta_m': 0.1 }, { 'id': 'OI_11_22', 'alpha': 1.0, 'beta': 1.0, 'delta_v': 0.2, 'delta_m': 0.2 }, { 'id': 'OI_101_00', 'alpha': 1.0, 'beta': 0.1, 'delta_v': 0.0, 'delta_m': 0.0 }, { 'id': 'OI_101_11', 'alpha': 1.0, 'beta': 0.1, 'delta_v': 0.1, 'delta_m': 0.1 }, { 'id': 'OI_101_22', 'alpha': 1.0, 'beta': 0.1, 'delta_v': 0.2, 'delta_m': 0.2 }, { 'id': 'OI_1001_00', 'alpha': 1.0, 'beta': 0.01, 'delta_v': 0.0, 'delta_m': 0.0 }, { 'id': 'OI_1001_11', 'alpha': 1.0, 'beta': 0.01, 'delta_v': 0.1, 'delta_m': 0.1 }, { 'id': 'OI_1001_22', 'alpha': 1.0, 'beta': 0.01, 'delta_v': 0.2, 'delta_m': 0.2 }] found = False for case in test_cases: if case['id'] == FLAGS.test_id: found = True FLAGS.alpha = case['alpha'] FLAGS.beta = case['beta'] FLAGS.delta_m = case['delta_m'] FLAGS.delta_v = case['delta_v'] print(case) if not found: print("Using OI_11_00") FLAGS.test_id = "OI_11_00" FLAGS.alpha = 1.0 FLAGS.beta = 1.0 FLAGS.delta_m = 0.0 FLAGS.delta_v = 0.0 FLAGS.input_height = 7 FLAGS.input_width = 7 FLAGS.output_height = 7 FLAGS.output_width = 7 if FLAGS.shadow_gan: checkpoint_folder = FLAGS.checkpoint_par_dir + '/' + FLAGS.dataset + "/" + 'atk_' + FLAGS.test_id else: checkpoint_folder = f'{FLAGS.checkpoint_par_dir}/{FLAGS.dataset}/{FLAGS.test_id}' if not os.path.exists(checkpoint_folder): os.makedirs(checkpoint_folder) FLAGS.checkpoint_dir = checkpoint_folder pp.pprint(flags.FLAGS.__flags) print(FLAGS.y_dim) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) run_config = tf.ConfigProto() run_config.gpu_options.allow_growth = True print("Chekcpoint : " + FLAGS.checkpoint_dir) with tf.Session(config=run_config) as sess: tablegan = TableGan(sess, input_width=FLAGS.input_width, input_height=FLAGS.input_height, output_width=FLAGS.output_width, output_height=FLAGS.output_height, batch_size=FLAGS.batch_size, sample_num=FLAGS.batch_size, y_dim=FLAGS.y_dim, dataset_name=FLAGS.dataset, crop=FLAGS.crop, checkpoint_dir=FLAGS.checkpoint_dir, sample_dir=FLAGS.sample_dir, alpha=FLAGS.alpha, beta=FLAGS.beta, delta_mean=FLAGS.delta_m, delta_var=FLAGS.delta_v, label_col=FLAGS.label_col, attrib_num=FLAGS.attrib_num, is_shadow_gan=FLAGS.shadow_gan, test_id=FLAGS.test_id) show_all_variables() if FLAGS.train: tablegan.train(FLAGS) else: if not tablegan.load(FLAGS.checkpoint_dir)[0]: raise Exception("[!] Train a model first, then run test mode") # Below is codes for visualization if FLAGS.shadow_gan: # using Disriminator sampler for Membership Attack OPTION = 5 else: print("Generating data...") OPTION = 1 generate_data(sess, tablegan, FLAGS, OPTION) print('Time Elapsed: ') b = datetime.datetime.now() print(b - a)
[np.log(pis[k]) + mvn.logpdf(X[n, :], mus[k, :], Sigmas[k, :, :]) for k in range(K) for n in range(N)]) log_probs = np.reshape(log_probs_flat, (K, N)).T L = np.sum(logsumexp(log_probs, axis=1)) return L from utils import generate_parameters, generate_data from plotting import double_panel_demo if __name__ == '__main__': K = 3 N = 100 num_its = 16 X = generate_data(N, generate_parameters(K))[0] plt = double_panel_demo(K) while True: X = generate_data(N, generate_parameters(K))[0] plt.set_new_lims(X, num_its) params = generate_parameters(K) # these initial parameters are an independent draw from the prior objective = [] plt.cla('ax1') plt.cla('ax2') plt.plot_points_black(X) plt.draw() plt.pause(2.)
print(f"Please enter god_separation with {d} dimensions:") god_separation = list(map(int, input().split())) assert len(god_separation ) == d, f"Length of god_separation is not equal to {d}." print( "Please enter [lower bound, upper bound) of points (e.g. \'-100 100\'):" ) lb, ub = list(map(int, input().split())) assert lb < ub and lb * ub < 0, f"The input lower bound {lb} and upper bound {ub} are not valid." else: raise ValueError(f"Input {use_given_config} is not valid.") output_path = generate_data(n, d, god_separation, lower_bound=lb, upper_bound=ub, data_path=output_path) plot_dataset(output_path) # plot points from data set data_iter = get_iter(output_path) w = main_loop(data_iter, w, n, d, ub, lb) # added two parameters ub lb print(f"Finish in {time.time()-st:.2f}s.") print(f"Tartget plane: {god_separation}") else: n, d, ub, lb = plot_dataset(input_path) print(f"Dataset size: {n} dimensions: {d}") with open(input_path) as data_iter: w = main_loop(data_iter, w, n, d, ub, lb) # added 4 parameters n, d, ub, lb print(f"Finish in {time.time()-st:.2f}s.") print(f"Found separation plane: {w}")
import matplotlib.pyplot as plt plt.ion() from utils import generate_data, get_context # DEBUGGING from theano import ProfileMode # mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) # mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False) mode = None # generate data print ">> Generating dataset..." data = generate_data(1000) # np.random.randint(2, size=(10000, n_visible)) data_context = get_context(data, N=1) # keep the number of dimensions low data_train = data[:-1000, :] data_eval = data[-1000:, :] data_context_train = data_context[:-1000, :] data_context_eval = data_context[-1000:, :] n_visible = data.shape[1] n_context = data_context.shape[1] n_hidden = 20 n_factors = 50 print ">> Constructing RBM..." numpy_rng = np.random.RandomState(123)
print("Evaluating", modelfile) sys.stdout.flush() random.seed(args.seed) np.random.seed(args.seed) tf.set_random_seed(args.seed) # the weights and bias are saved in lists: weights and bias # weights[i-1] gives the ith layer of weight and so on weights, biases = get_weights_list(model) inputs, targets, true_labels, true_ids, img_info = generate_data( data, samples=data.test_labels.shape[0], total_images=args.numimage, targeted=targeted, random_and_least_likely=True, force_label=force_label, target_type=target_type, predictor=model.model.predict, start=args.startimage) # get the logit layer predictions preds = model.model.predict(inputs) task_input = locals() task_modudle = __import__("task_" + args.task) task = task_modudle.task(**task_input) # warmup if args.warmup: print("warming up...") task.warmup()
# Network predictions pred_out = RNN(x, W, b, num_hidden_units, seq_max_len, seqLen) # pred_out = LSTM(x, W, b, num_hidden_units, seq_max_len, seqLen) # Define the loss function (i.e. mean-squared error loss) and optimizer cost = tf.reduce_mean(tf.square(pred_out - y)) train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Creating the op for initializing all variables init = tf.global_variables_initializer() # ========== # TOY DATA # ========== x_train, y_train, seq_len_train = generate_data(count=1000, max_length=seq_max_len, dim=input_dim) x_test, y_test, seq_len_test = generate_data(count=5, max_length=seq_max_len, dim=input_dim) # x_test = np.array([[[1], [2], [3], [4]], # [[1], [2], [0], [0]], # [[4], [5], [3], [9]]]) # seq_len_test = np.array([4, 2, 4]) # y_test = np.array([[10], [3], [21]]) # ========== # Launch the graph (session) with tf.Session() as sess: sess.run(init) print('----------Training---------') for i in range(training_steps): x_batch, y_batch, seq_len_batch = next_batch(x_train, y_train, seq_len_train, batch_size)
batch_first=True, independent_linears=False, copy_mode=copy_mode) rnn.load_state_dict(torch.load(current_model)) # Execute the evaluation sigm = T.nn.Sigmoid() sequence_length -= 1 for i in tqdm(range(0, args.iterations)): x, y, _ = generate_data(1, sequence_length, sequence_num_of_bits + 3, steps=steps, non_uniform=True, ordered=False) a = execute(rnn, x, y, sequence_length) x, y, _ = generate_data(1, sequence_length, sequence_num_of_bits + 3, steps=steps, non_uniform=True, ordered=True) b = execute(rnn, x, y, sequence_length) x, y, _ = generate_data(1, sequence_length, sequence_num_of_bits + 3,
# -*- coding: utf-8 -*- """ Created on Sat Feb 6 17:21:14 2016 @author: hughsalimbeni """ from utils import generate_data import pickle import numpy as np import matplotlib.pyplot as plt mus = np.reshape((3., 3., -3, 3, 0, -3), (3, 2)) Sigmas = np.reshape((1., 0., 0., 1., 1., 0., 0., 1., 2., 0., 0., 0.5), (3, 2, 2)) pis_ = np.array((2., 1., 1.,)) pis = pis_/np.sum(pis_) params = (pis, mus, Sigmas) data_1 = generate_data(50, params)[0] plt.scatter(data_1[:, 0], data_1[:, 1]) data_2 = generate_data(500, params)[0] plt.scatter(data_2[:, 0], data_2[:, 1]) pickle.dump((data_1, data_2), open( "data.p", "wb" ))
def main(): parser = argparse.ArgumentParser(description='Generates a 2-dimensional grid dataset.') parser.add_argument('data_file', help='The location of the file where the data will be saved.') parser.add_argument('weights_file', help='The location of the file where the true prior weights will be saved.') parser.add_argument('signals_file', help='The location of the file where the underlying true signals will be saved.') parser.add_argument('oracle_file', help='The location of the file where the oracle posteriors will be saved.') parser.add_argument('edges_file', help='The location of the file where the grid graph edges will be saved.') parser.add_argument('trails_file', help='The location of the file where the trails will be saved.') parser.add_argument('--verbose', type=int, default=0, help='Print detailed progress information to the console. 0=none, 1=outer-loop only, 2=all details.') # Grid dimensions parser.add_argument('--width', type=int, default=128, help='The width of the 2d grid') parser.add_argument('--height', type=int, default=128, help='The height of the 2d grid') # Signal region settings parser.add_argument('--region_min_x', nargs='+', type=int, default=[10, 40], help='The min x locations at which the signal weight changes.') parser.add_argument('--region_max_x', nargs='+', type=int, default=[25, 50], help='The max x locations at which the signal weight changes.') parser.add_argument('--region_min_y', nargs='+', type=int, default=[10, 50], help='The min y locations at which the signal weight changes.') parser.add_argument('--region_max_y', nargs='+', type=int, default=[25, 60], help='The max y locations at which the signal weight changes.') parser.add_argument('--region_weights', nargs='+', type=float, default=[0.5, 0.8], help='The value of the signal weight for every region.') parser.add_argument('--default_weight', type=float, default=0.05, help='The default signal weight for any areas not in the specified regions.') # Distribution settings parser.add_argument('--null_mean', type=float, default=0., help='The mean of the null distribution.') parser.add_argument('--null_stdev', type=float, default=1., help='The variance of the null distribution.') parser.add_argument('--signal_mean', type=float, default=0., help='The mean of the signal distribution.') parser.add_argument('--signal_stdev', type=float, default=3., help='The variance of the signal distribution.') parser.add_argument('--signal_dist_name', help='The name of the signal distribution. This will dynamically call it by name. It must be in the signal_distributions.py file and have both the foo_pdf and foo_sample functions defined.') # Plot results parser.add_argument('--plot', help='Plot the resulting data and save to the specified file.') # Get the arguments from the command line args = parser.parse_args() if args.verbose: print 'Generating data and saving to {0}'.format(args.data_file) # Get the form of the signal distribution if args.signal_dist_name: signal_pdf = getattr(signal_distributions, '{0}_pdf'.format(args.signal_dist_name)) noisy_signal_pdf = getattr(signal_distributions, '{0}_noisy_pdf'.format(args.signal_dist_name)) signal_sample = getattr(signal_distributions, '{0}_sample'.format(args.signal_dist_name)) signal_dist = ProxyDistribution(args.signal_dist_name, signal_pdf, signal_sample) else: signal_dist = GaussianKnown(args.signal_mean, args.signal_stdev) noisy_signal_pdf = signal_dist.noisy_pdf signal_weights = calculate_signal_weights(args.width, args.height, args.default_weight, args.region_min_x, args.region_max_x, args.region_min_y, args.region_max_y, args.region_weights) # Create the synthetic dataset data, signals = generate_data(args.null_mean, args.null_stdev, signal_dist, signal_weights) # Save the dataset to file np.savetxt(args.data_file, data, delimiter=',', fmt='%f') # Save the dataset to file np.savetxt(args.weights_file, signal_weights, delimiter=',', fmt='%f') # Save the truth to file np.savetxt(args.signals_file, signals, delimiter=',', fmt='%d') # Save the oracle posteriors to file oracle_signal_weight = signal_weights * noisy_signal_pdf(data) oracle_null_weight = (1-signal_weights) * norm.pdf(data, loc=args.null_mean, scale=args.null_stdev) oracle_posteriors = oracle_signal_weight / (oracle_signal_weight + oracle_null_weight) np.savetxt(args.oracle_file, oracle_posteriors, delimiter=',', fmt='%f') # Save the edges to file indices = np.arange(args.width * args.height).reshape((args.width, args.height)) edges = np.array(list(zip(indices[:, :-1].flatten(), indices[:, 1:].flatten())) +\ list(zip(indices[:-1].flatten(), indices[1:].flatten()))) np.savetxt(args.edges_file, edges, delimiter=',', fmt='%d') # Save the trails to file trails = np.array(list(indices) + list(indices.T)) np.savetxt(args.trails_file, trails, delimiter=',', fmt='%d') # Plot the data if args.plot: plot_2d(args.plot, data, weights=None, true_weights=signal_weights)
adj = sp.hstack([adj, feats]).tolil() if dataset in ['protein', 'cora', 'citeseer', 'pubmed']: train = sp.hstack([train, feats]).tolil() print ae.summary() # Specify some hyperparameters epochs = 50 train_batch_size = 8 val_batch_size = 256 print('\nFitting autoencoder model...\n') dummy = np.empty(shape=(adj.shape[0], 1)) y_true = dummy.copy() mask = dummy.copy() train_data = generate_data(adj, train, feats, y_true, mask, shuffle=True) batch_data = batch_data(train_data, train_batch_size) num_iters_per_train_epoch = adj.shape[0] / train_batch_size for e in xrange(epochs): print('\nEpoch {:d}/{:d}'.format(e + 1, epochs)) print('Learning rate: {:6f}'.format(K.eval(ae.optimizer.lr))) curr_iter = 0 train_loss = [] for batch_adj, batch_train, batch_f, dummy_y, dummy_m in batch_data: # Each iteration/loop is a batch of train_batch_size samples if dataset in ['conflict', 'metabolic']: batch_adj = StandardScaler().fit_transform(batch_adj) res = ae.train_on_batch([batch_adj], [batch_train, batch_f]) else: res = ae.train_on_batch([batch_adj], [batch_train]) train_loss.append(res)
from keras.layers.core import Dense, Flatten from utils import generate_data def mlp(nhidden=5): mdl = Sequential() mdl.add(Dense(nhidden, input_shape=(2,), activation='tanh')) mdl.add(Dense(1, activation='tanh')) mdl.compile(loss='binary_crossentropy', optimizer='adam') return mdl if __name__ == '__main__': mdl = mlp(nhidden=25) X, y = generate_data() every = 100 V = [0., 0.25, 0.5, 0.75, 1.] xm, ym = np.meshgrid(np.linspace(-1,1,200), np.linspace(-1,1,200)) Xm = np.stack([xm.ravel(), ym.ravel()]) loss = np.zeros(1e5) for j in range(loss.size): loss[j] = mdl.train_on_batch(X.T, y) if j % every == 0: yhat = mdl.predict(Xm.T) plt.contourf(xm, ym, yhat.reshape(200,200), V, cmap='RdBu')
def run(file_name, n_samples, p_n, q_n, activation = 'relu', cifar=False, tinyimagenet=False): np.random.seed(1215) tf.set_random_seed(1215) random.seed(1215) keras_model = load_model(file_name, custom_objects={'fn':fn, 'tf':tf}) if tinyimagenet: model = CNNModel(keras_model, inp_shape = (64,64,3)) elif cifar: model = CNNModel(keras_model, inp_shape = (32,32,3)) else: model = CNNModel(keras_model) #Set correct linear_bounds function global linear_bounds if activation == 'relu': linear_bounds = relu_linear_bounds elif activation == 'ada': linear_bounds = ada_linear_bounds elif activation == 'sigmoid': linear_bounds = sigmoid_linear_bounds elif activation == 'tanh': linear_bounds = tanh_linear_bounds elif activation == 'arctan': linear_bounds = atan_linear_bounds upper_bound_conv.recompile() lower_bound_conv.recompile() compute_bounds.recompile() if cifar: inputs, targets, true_labels, true_ids, img_info = generate_data(CIFAR(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0) elif tinyimagenet: inputs, targets, true_labels, true_ids, img_info = generate_data(tinyImagenet(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0) else: inputs, targets, true_labels, true_ids, img_info = generate_data(MNIST(), samples=n_samples, targeted=True, random_and_least_likely = True, target_type = 0b0010, predictor=model.model.predict, start=0) #0b01111 <- all #0b0010 <- random #0b0001 <- top2 #0b0100 <- least steps = 15 eps_0 = 0.05 summation = 0 warmup(model, inputs[0].astype(np.float32), eps_0, p_n, find_output_bounds) start_time = time.time() for i in range(len(inputs)): print('--- CNN-Cert: Computing eps for input image ' + str(i)+ '---') predict_label = np.argmax(true_labels[i]) target_label = np.argmax(targets[i]) weights = model.weights[:-1] biases = model.biases[:-1] shapes = model.shapes[:-1] W, b, s = model.weights[-1], model.biases[-1], model.shapes[-1] last_weight = (W[predict_label,:,:,:]-W[target_label,:,:,:]).reshape([1]+list(W.shape[1:])) weights.append(last_weight) biases.append(np.asarray([b[predict_label]-b[target_label]])) shapes.append((1,1,1)) #Perform binary search log_eps = np.log(eps_0) log_eps_min = -np.inf log_eps_max = np.inf for j in range(steps): LB, UB = find_output_bounds(weights, biases, shapes, model.pads, model.strides, inputs[i].astype(np.float32), np.exp(log_eps), p_n) print("Step {}, eps = {:.5f}, {:.6s} <= f_c - f_t <= {:.6s}".format(j,np.exp(log_eps),str(np.squeeze(LB)),str(np.squeeze(UB)))) if LB > 0: #Increase eps log_eps_min = log_eps log_eps = np.minimum(log_eps+1, (log_eps_max+log_eps_min)/2) else: #Decrease eps log_eps_max = log_eps log_eps = np.maximum(log_eps-1, (log_eps_max+log_eps_min)/2) if p_n == 105: str_p_n = 'i' else: str_p_n = str(p_n) print("[L1] method = CNN-Cert-{}, model = {}, image no = {}, true_id = {}, target_label = {}, true_label = {}, norm = {}, robustness = {:.5f}".format(activation,file_name, i, true_ids[i],target_label,predict_label,str_p_n,np.exp(log_eps_min))) summation += np.exp(log_eps_min) K.clear_session() eps_avg = summation/len(inputs) total_time = (time.time()-start_time)/len(inputs) print("[L0] method = CNN-Cert-{}, model = {}, total images = {}, norm = {}, avg robustness = {:.5f}, avg runtime = {:.2f}".format(activation,file_name,len(inputs),str_p_n,eps_avg,total_time)) return eps_avg, total_time
# Model parameters tf.flags.DEFINE_integer("embedding_dim", 4800, "The dimension of the embeddings") # Testing parameters tf.flags.DEFINE_string("checkpoint_dir", "./runs/1528468039/checkpoints", "Checkpoint directory from training run") tf.flags.DEFINE_string("output_file", "./output.csv", "Csv file containing the results") tf.flags.DEFINE_boolean("has_labels", False, "if has_labels => compute accuracy, if not dump output in file") FLAGS = tf.flags.FLAGS # load testing embeddings all_testing_embeddings = utils.load_embeddings(FLAGS.testing_embeddings_dir, FLAGS.embedding_dim) # generate data test_stories, test_true_endings, test_wrong_endings = utils.generate_data(all_testing_embeddings) test_stories = np.concatenate((test_stories, test_stories), axis=0) test_endings = np.concatenate((test_true_endings, test_wrong_endings), axis=0) # construct test input test_labels = [1] * len(test_true_endings) + [0] * len(test_wrong_endings) ## EVALUATION ## checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) graph = tf.Graph() with graph.as_default(): session_conf = tf.ConfigProto( allow_soft_placement=True, log_device_placement=False) sess = tf.Session(config=session_conf)
from boto3.dynamodb.types import TypeDeserializer from pyperf import Runner from utils import generate_data data = generate_data() def deserialize_aiodynamo(): result = [ {k: TypeDeserializer().deserialize(v) for k, v in item.items()} for item in data ] Runner().bench_func("deserialize", deserialize_aiodynamo)
import numpy as np import utils as utils # generating a visualisable 2d spiral data set X, y = utils.generate_data() X_train = X[:300] X_train = [np.reshape(x, (2, 1)) for x in X_train] X_test = X[200:] X_test = [np.reshape(x, (2, 1)) for x in X_test] # in the future implementation will split the test into the train and test data. Y_train = y[:300] Y_train = [np.reshape(utils.num_to_list(z), (3, 1)) for z in Y_train] Y_test = y[200:] Y_test = [np.reshape(utils.num_to_list(z), (3, 1)) for z in Y_test] # preparing the data train_data = list(zip(X_train, Y_train)) test_data = list(zip(X_test, Y_test)) utils.visualise(X, y) # training the example net. example_net = utils.initialize_new() utils.train_ex(example_net, train_data, 300, 1, 10)
import matplotlib.pyplot as plt plt.ion() from utils import generate_data, get_context # DEBUGGING from theano import ProfileMode # mode = theano.ProfileMode(optimizer='fast_run', linker=theano.gof.OpWiseCLinker()) # mode = theano.compile.DebugMode(check_py_code=False, require_matching_strides=False) mode = None # generate data data = generate_data(200) # use the predefined binary-binary RBM, which has visible units (rbm.v), hidden units (rbm.h), # a weight matrix W connecting them (rbm.W), and visible and hidden biases (rbm.bv and rbm.bh). n_visible = data.shape[1] n_hidden = 100 rbm = rbms.GaussianBinaryRBM(n_visible, n_hidden) initial_vmap = { rbm.v: T.matrix('v') } # We use single-step contrastive divergence (CD-1) to train the RBM. For this, we can use # the CDParamUpdater. This requires symbolic CD-1 statistics: s = stats.cd_stats(rbm, initial_vmap, visible_units=[rbm.v], hidden_units=[rbm.h], k=1) # We create an updater for each parameter variable
if __name__ == "__main__": # norm ball for generate toy data norm = np.inf # build the network net = build_fullyconnected(norm=norm) # train (should take ~10s) numiter = 20000 objective = np.array([net() for _ in trange(numiter)]) # predicted class labels (on held out data) X_holdout, y_holdout = generate_data(norm=norm, nsamples=5000) yhat = net.predict(X_holdout)[0] # plot the training curve plt.figure() plt.plot(np.arange(numiter), objective) plt.xlabel("Iteration ($k$)") plt.ylabel("Training error ($f(k)$)") # plot labeled training data plt.figure() plt.scatter(X_holdout[0], X_holdout[1], s=50, c=yhat, cmap="seismic") plt.gca().set_aspect("equal") plt.xlim(-1, 1) plt.ylim(-1, 1)
TEST_PATH = '../data/test.json' WEIGHT_SAVE_PATH = '../model_weights.hdf5' BATCH_SIZE = 32 EPOCHS = 100 # Increase this train_data = pd.read_json(TRAIN_PATH) train_data['inc_angle'] = train_data['inc_angle'].replace('na', 0) train_data['inc_angle'] = train_data['inc_angle'].astype(float).fillna(0.0) if TEST: SEED = np.random.randint(9999) else: SEED = 42 # Constant seed for comparability between runs X = generate_data(train_data) y = train_data['is_iceberg'] X_train, X_val, y_train, y_val = train_test_split(X, y, train_size=.8, random_state=SEED) callback_list = get_callbacks(WEIGHT_SAVE_PATH, 20) model = build_model() start_time = time.time() if USE_AUGMENTATION: image_augmentation = ImageDataGenerator(rotation_range=20, horizontal_flip=True, vertical_flip=True,
action='store_true', default=False, help='Don\'t train the neural network') args = parser.parse_args() if args.loss == 'MSE': ONE_HOT = True loss = ff.MSELoss() elif args.loss == 'CrossEntropy': ONE_HOT = False loss = ff.CrossEntropyLoss() else: raise ValueError('Unknown loss.') DATASET_SIZE = 1000 ## Generate dataset train_input, train_target, test_input, test_target, test_input_raw = generate_data(DATASET_SIZE, one_hot=ONE_HOT, normalize=True) ## Create model model = Net(nb_nodes = 25) print(model) if args.no_train: ## Load best model model.load('../model/best-model.pt') model.eval() # Set model to eval mode ## Ploting results of best model plot_prediction(test_input, test_input_raw, test_target, model) plt.suptitle('Prediction of the best model') plt.show() else: print('Using : {}Loss\n'.format(args.loss)) ## Training model
import pandas as pd import numpy as np from sklearn.preprocessing import OneHotEncoder import xgboost as xgb from sklearn.linear_model import LogisticRegression from config import logging import utils import config class LoadData: def __init__(self,aim = 'train'): self.aim = aim def load_data(self,): logging.info('开始加载订单数据!') df_order_data = utils.generate_data(column_names=config.order_data_names,\ aim='train',\ table_name='order_data' ) logging.info('订单数据加载完毕!') logging.info('开始加载poi数据!') df_order_data = utils.generate_data(column_names=config.poi_data_names,\ aim='train',\ table_name='poi_data' ) logging.info('poi数据加载完毕!') logging.info('开始加载道路拥堵数据!') df_traffic_data = utils.generate_data(column_names=config.traffic_data_names,\ aim='train',\ table_name='traffic_data') logging.info('道路拥堵数据加载完成!') logging.info('开始加载天气数据!') df_weather_data = utils.generate_data(column_names=config.weather_data_names,\ aim='train',\
from utils import generate_data generate_data()
"Use training dataset for training") tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement") tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices") FLAGS = tf.flags.FLAGS # Prepare the data print("Load training and validation embeddings \n") # load training embeddings and generate training data if FLAGS.use_training_dataset: all_training_embeddings = utils.load_embeddings( FLAGS.training_embeddings_dir, FLAGS.embedding_dim) training_stories, training_true_endings, training_wrong_endings = utils.generate_data( all_training_embeddings, FLAGS.training_negative_sampling_file) print("len(training_true_endings), len(training_wrong_endings)", len(training_true_endings), len(training_wrong_endings)) training_stories = np.concatenate((training_stories, training_stories), axis=0) training_endings = np.concatenate( (training_true_endings, training_wrong_endings), axis=0) training_labels = [1] * len(training_true_endings) + [0] * len( training_wrong_endings) training_true_endings = [] training_wrong_endings = [] # load validation embeddings and generate validation data all_validation_embeddings = utils.load_embeddings(
if os.path.isfile(model_weights_name): # Loading old model, will continue training from saved point print ("Loading old model...") model.load_weights(model_weights_name) else: print ("Could not find weights, starting from scratch") model.compile(loss='mean_squared_error', optimizer='adam') # Let's see the how the output changes as the model trains class training_monitor(Callback): def __init__(self): self.epoch = 0 def on_epoch_end(self, epoch, logs={}): cur_img = model.predict(X) save_ndarray(args.model_output_root + "_image_epoch_" + str(self.epoch) + ".jpg", cur_img, args.pixels, args.pixels) model.save_weights(args.model_output_root + "_facepaint_model_epoch_" + str(self.epoch) + ".h5", overwrite=True) self.epoch = self.epoch + 1 image_progress_monitor = training_monitor() #model.fit(X, Y, nb_epoch = args.epochs, batch_size = args.batch_size, callbacks=[image_progress_monitor], shuffle=True) model.fit_generator(generator=generate_data(X,Y,mask_matrix,args.batch_size,image_size=args.pixels), steps_per_epoch=1000, epochs=args.epochs, callbacks=[image_progress_monitor]) # Save final (best?) model model.save_weights(model_weights_name) learnt_image = model.predict(X) save_ndarray(args.model_output_root + "_final_image.jpg", learnt_image, args.pixels, args.pixels)
self.train_len=0 self.test_len = 0 self.valid_len = 0 self.mode="test" ## transformer的参数 self.dropout=0.5 self.max_len=5000 self.nhead=2 # data_path="E:/study_series/2020_3/re_write_classify/data/" # data_path="/mnt/data3/wuchunsheng/code/nlper/NLP_task/text_classification/my_classification_cnews/2020_3_30/text_classify/data/" config = Config() train_iter, valid_iter, test_iter, TEXT = generate_data(config) #model = RNNModel(config, TEXT).to(config.device) model=TransformerModel(config, TEXT).to(config.device) model =load_model(config, model) #sen="目"*50 sen="体育快讯" #sen="".join(['c', 'o', 'n', 't', 'e', 'x', 't', ',', 'l', 'a', 'b', 'e', 'l']) #res=test_sentence(config, model ,TEXT, sen) #print(sen) #print(res) #res=test(config,model,TEXT, test_iter) #print(res) print("=========================")
##################################################################### if __name__ == '__main__': # Create log path: Create at this level (or one above, in launch script) # so that we use same one for both steps log_path = FLAGS.log_path if FLAGS.log_path is not None else \ get_log_dir_path(FLAGS.log_root, FLAGS.run_name) # Note that the flags in this file control the dataset size, not the # normal flags in train_scripts.py! dims = [FLAGS.synthetic_dim] if FLAGS.subsample_seed > 0: np.random.seed(FLAGS.subsample_seed) X = generate_data(FLAGS.synthetic_n, d=FLAGS.synthetic_dim, r=FLAGS.synthetic_r) # For testing, also include a discriminator which is perfectly correct if FLAGS.oracle_disc: d_class = OracleDiscriminator else: d_class = SimpleDiscriminator ### ### STEP 1: TRAIN TAN ### if FLAGS.is_test: print("STEP 1: Training TAN") train_tan(X,