def main(): """Program uses LU-decomposition to solve Au=g""" # getting dimensions of matrix, and labeling data: global N N = int(eval(input("Give value for N: "))) name = "LUtest%i" % int(np.log10(N)) # generating data: x = np.linspace(0, 1, N) h = (x[-1] - x[0]) / N gen.generate_data(x, name) g = np.loadtxt("%s/data_files/%s.dat" % (dir, name)) * h**2 A = np.zeros((N, N)) # creating zero matrix. A = Afunc(A) # populating the three diagonals. start = time.default_timer() # timing solve. LU, piv = sp.lu_factor(A) # decomp and forward sub. u = sp.lu_solve((LU, piv), g) # backward sub. end = time.default_timer() print("Time spent on LU %g" % (end - start)) # printing elapsed time. # saving numerical solution: np.savetxt("%s/data_files/solution_%s.dat" % (dir, name), u, fmt="%g")
def init_data(): """Initialising data for program as global variables.""" global dir, N, name, x, h, anal_sol, u, d, d_prime, a, b, g, g_prime dir = os.path.dirname(os.path.realpath(__file__)) # current directory. # defining number of rows and columns in matrix: N = int(eval(input("Specify number of data points N: "))) # defining common label for data files: name = input("Label of data-sets without file extension: ") x = np.linspace(0, 1, N) # array of normalized positions. h = (x[0] - x[-1]) / N # defining step-siz. gen.generate_data(x, name) # generating dataanal_name set. anal_sol = np.loadtxt("%s/data_files/anal_solution_for_%s.dat" % (dir, name)) u = np.empty(N) # array for unkown values. d = np.full(N, 2) # array for diagonal elements. d_prime = np.empty(N) # array for diagonal after decom. and sub. a = np.full(N - 1, -1) # array for upper, off-center diagonal. b = np.full(N - 1, -1) # array for lower, off-center diagonal. # array for g in matrix eq. Au=g. f = np.loadtxt("%s/data_files/%s.dat" % (dir, name)) g = f * h**2 g_prime = np.empty(N) # array for g after decomp. and sub.
def get_model_generate(f): data = generate_data(f) probs = [] for o in states: probs.append(sum(o == t[0] for t in data) / len(data)) trans = [[0 for x in range(len(states))] for y in range(len(states))] states_i = np.zeros(len(states)) for i in range(len(data) - 1): trans[states.index(data[i][0])][states.index(data[i + 1][0])] += 1 states_i[states.index(data[i][0])] += 1 p = data.pop() for i in range(len(states)): for j in range(len(states)): trans[i][j] /= states_i[i] data.append(p) probs_est = [[0 for x in range(len(states))] for y in range(len(colors))] for i in range(len(colors)): for j in range(len(states)): probs_est[i][j] = data.count((states[j], colors[i])) / sum(states[j] == t[0] for t in data) model = hmm.MultinomialHMM(len(states)) model.n_features = len(colors) model.startprob_ = np.array(probs) model.transmat_ = np.array(trans) model.emissionprob_ = np.array(probs_est) #model = hmm.MultinomialHMM(len(states), np.array(probs), np.array(trans)) #model = model.fit(np.array([colors.index(o[1]) for o in data]).reshape(-1, 1) ) return model
def get_model_predictions( model_type: type(Model), num_data: int, num_models: int, model_kwargs: Optional[Dict] = None, ) -> np.ndarray: """Generate many model predictions. Args: model_type: The type of model to create num_data: The number of data points to generate num_models: The number of models to generate model_kwargs: Keyword arguments for building the model. Returns: predictions: A 2D numpy array of shape (num_models, num_data) where each row is the predictions for a different model """ if not model_kwargs: model_kwargs = {} predictions = np.zeros((num_models, len(X_TEST))) for model_num in range(num_models): data = data_generator.generate_data(num_data) model = model_type(data[:, 0], data[:, 1], **model_kwargs) model.fit() y_hat = model.predict(X_TEST) predictions[model_num] = y_hat return predictions
def main(args): blues, reds = generate_data(args.num_datapoints) data = np.vstack([blues, reds]) data = np.array([[1] + list(i) for i in data]) labels = np.array([0] * len(blues) + [1] * len(reds)) if args.model is Perceptron: labels = labels * 2 - 1 inds = np.arange(len(labels)) np.random.shuffle(inds) labels, data = labels[inds], data[inds] plt.ion() fig = plt.figure(figsize=(9, 9)) ax = fig.add_subplot(111) if args.model is Perceptron: kwargs = {} else: kwargs = {'update_params': args.update_params} if args.update_method is not None: kwargs['update_method'] = args.update_method if args.epochs is not None: kwargs['epochs'] = args.epochs model = args.model(data.shape[1], **kwargs) ax.scatter(*blues.T, c='blue') ax.scatter(*reds.T, c='red') ln, = plt.plot([], [], c='black') lmaxx = np.max(data[:, 1]) lmaxy = np.max(data[:, 2]) lminx = np.min(data[:, 1]) lminy = np.min(data[:, 2]) plt.xlim([lminx - 1, lmaxx + 1]) plt.ylim([lminy - 1, lmaxy + 1]) losses = [] for w in model.fit(data, labels): losses.append(model.loss(data, labels)) plotw(ln, w, [lminx - 1, lmaxx + 1], [lminy - 1, lmaxy + 1]) fig.canvas.draw() plt.pause(args.sleep_time) # time.sleep(args.sleep_time) if args.model is LogisticRegression: xs = np.linspace(lminx - 1, lmaxx + 1, 100) ys = np.linspace(lminx - 1, lmaxx + 1, 100) X, Y = np.meshgrid(xs, ys) Z = np.array([ model.calculate_probabilities( np.array([np.ones(100), X_, Y_]).T) for X_, Y_ in zip(X, Y) ]) plt.pcolormesh(X, Y, Z, shading='gouraud', cmap=plt.cm.seismic, vmin=0, vmax=1, zorder=0) plt.show(block=True) plt.plot(losses) plt.show(block=True)
def prob_5_2_2(N, d, k, sigma, lam = 300, seed=12231): # N = 50, d = 75, k = 5, sigma = 10 #lambda^* = 300 (y, X, wg, eps) = dg.generate_data(N, d, k, sigma, seed) ########## w = randn(1 + d) prob = ls.initialize_computation(X, w, y) prob[3] = lam ls.round_robin(prob, 1000) return (prob, sum(abs(prob[0][1:k + 1]) > 0), sum(abs(prob[0][1:]) > 0))
def init_data(): """Initialising data for program as global variables.""" global dir, N, name, x, h, anal_sol, u, d, d_prime, a, b, g, g_prime dir = os.path.dirname(os.path.realpath(__file__)) # current directory. # defining number of rows and columns in matrix: N = int(eval(input("Specify number of data points N: "))) # defining common label for data files: name = input("Label of data-sets without file extension: ") x = np.linspace(0, 1, N) # array of normalized positions. h = (x[0] - x[-1]) / N # defining step-siz. gen.generate_data(x, name) # generating dataanal_name set. anal_sol = np.loadtxt("%s/data_files/anal_solution_for_%s.dat" % (dir, name)) u = np.empty(N) # array for unkown values. s = np.arange(1, N + 1) d_prime = 2 * (s) / (2 * (s + 1)) # pre-calculating the 1/d_prime factors. f = np.loadtxt("%s/data_files/%s.dat" % (dir, name)) g = f * h**2 g_prime = np.empty(N) # array for g after decomp. and sub.
def average_calculator(model, k, kwargs, gen_data=True): if gen_data: generate_data(kwargs) p_1 = 0.0 r_1 = 0.0 f_1 = 0.0 f_scores = [] train_data = ATEDataProcessor(kwargs["train_file"], **kwargs) test_data = ATEDataProcessor(kwargs["test_file"], pos_id=get_count( train_data.annotated_sentences), **kwargs) for i in range(k): print("Run number: {}".format(i)) test_set = test_data.annotated_sentences train_set, dev_set = split(train_data.annotated_sentences, test_size=kwargs["test_size"]) train = DataIterator(train_set, **kwargs) dev = DataIterator(dev_set, **kwargs) test = DataIterator(test_set, **kwargs) if model == "lstm": model = LSTMNetwork(**kwargs) elif model == "cnn": model = CNNNetwork(max_sentence_length=train_data.max_sentence_len, **kwargs) model.build() model.train(train, dev) model.restore_session(model.model_directory) results = model.evaluate(test) f_scores.append(results["f_1"]) p_1 += float(results["p_1"]) r_1 += float(results["r_1"]) f_1 += float(results["f_1"]) model.close_session() print("p_1: {}\nr_1: {}\nf_1: {}".format(p_1 / k, r_1 / k, f_1 / k)) print(mean_confidence_interval(f_scores)) return {"precision": p_1 / k, "recall": r_1 / k, "fscore": f_1 / k}
def test_student_exact(): ''' MCTS is now working. The number of rollouts required to be optimal grows very fast as a function of the horizon. Still, even if not fully optimal, MCTS is an extremely good approximation. Default student with horizon 10 needs about 50 rollouts is good learn prob 0.15 student with horizon 40 needs about 150 rollouts is good; gets about 0.94 which is 0.02 off from 0.96 ''' import concept_dependency_graph as cdg from simple_mdp import create_custom_dependency r_type = DENSE n_concepts = 4 learn_prob = 0.5 horizon = 6 n_rollouts = 50 n_trajectories = 100 n_jobs = 8 traj_per_job = n_trajectories // n_jobs #dgraph = create_custom_dependency() dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0) student2 = st.Student2(n_concepts, transition_after) test_student = student2 accs = Parallel(n_jobs=n_jobs)(delayed(test_student_exact_chunk)( traj_per_job, dgraph, test_student, horizon, n_rollouts, sparse_r) for _ in range(n_jobs)) avg = sum(accs) / (n_jobs * traj_per_job) test_data = dg.generate_data(dgraph, student=test_student, n_students=1000, seqlen=horizon, policy='expert', filename=None, verbose=False) print('Number of jobs {}'.format(n_jobs)) print('Trajectory per job {}'.format(traj_per_job)) print('Average posttest true: {}'.format(expected_reward(test_data))) print('Average posttest mcts: {}'.format(avg))
def raw_data_scatterplot(): savedir = os.path.join("images", "raw", "scatter") os.makedirs(savedir, exist_ok=True) print("Raw Data") counts = np.geomspace(8, 100000, 200) for index, i in tqdm(enumerate(counts), total=len(counts)): i = int(i) points = data_generator.generate_data(i, "fixed") plt.scatter( points[:, 0], points[:, 1], s=10, edgecolor='k', facecolor="none", ) plt.xlabel("X") plt.ylabel("Y") plt.ylim(-105, 205) plt.xlim(-0.5, 10.5) plt.savefig(os.path.join(savedir, f"scatter.{i:06d}.{index}.png")) plt.close()
def prob_5_2_1(N, d, k, sigma, rat=9/10., seed=12231): # N = 50, d = 75, k = 5, sigma = 1 (y, X, wg, eps) = dg.generate_data(N, d, k, sigma, seed) ########## w = randn(1 + d) prob = ls.initialize_computation(X, w, y) ret = [] coeffs = [] for i in range(100): ls.round_robin(prob, 1000) ret.append([prob[3], sum(abs(prob[0][1:k + 1]) > 0), sum(abs(prob[0][1:]) > 0)]) coeffs.append(prob[0].copy()) prob[3] *= rat ret = array(ret) coeffs = array(coeffs) #Precision ret[:, 2] = ret[:, 1]/ret[:, 2] #Recall ret[:, 1] = ret[:, 1]/float(k) return (prob, array(ret), coeffs)
if __name__ == '__main__': ''' N: number of figures waiting for sort M: number of processes ''' if (len(sys.argv) < 2): print("Deficiency of N[,M] !") sys.exit() N = int(sys.argv[1]) if (len(sys.argv) == 2): M = os.cpu_count() else: M = int(sys.argv[2]) data = generate_data(N) #split data into M parts based on bucket-sort split_linspace = np.linspace(data.min(), data.max() + 1, M + 1) split_data = [0] * M for i in range(M): f = filter( lambda x: x >= split_linspace[i] and x < split_linspace[i + 1], data) split_data[i] = np.fromiter(f, dtype=np.int32).tolist() ''' 这里之所以使用了fromiter+tolist函数将iterable转为list是因为数字类型为np.int32,该类型不能序列化为json格式。 如果只用:split_data[i] = list(f),数字格式仍为np.int32,在调用函数call_slave_...时无法转为json格式。 ''' slaves_config = config.slaves()
def init(self, duration): self.content = data_generator.generate_data(duration)
import data_generator import learn if __name__ == '__main__': # Generate sample data data_generator.generate_data() # Train the linear regression model learn.train_model()
def compare(File1, File2): file1 = open(File1, 'r') file2 = open(File2, 'r') ans1 = file1.readlines() ans2 = file2.readlines() file1.close() file2.close() if len(ans1) != len(ans2): print("length does not match") return False for i in range(min(len(ans1), len(ans2))): if ans1[i] != ans2[i]: print("In line {}".format(i + 1)) print(ans1[i]) print(ans2[i]) print() return False if __name__ == "__main__": for i in range(100): gen.generate_data() python() [t1, t2] = java() if t2 - t1 > 2.2: print("CPU TIME EXCEED with CPU TIME is {0}".format(t2 - t1)) exit(0) if compare('python_ans.txt', 'java_ans.txt') == False: exit(0) print("No.{} has done.".format(i + 1))
def raw_data_intro(): savedir = os.path.join("images", "raw", "intro2") os.makedirs(savedir, exist_ok=True) n = N_POINTS m = 5 s = 50 points = data_generator.generate_data(n, "fixed") ylim = (-105, 205) xlim = (-0.5, 10.5) cmap = get_cmap("gnuplot2") # just points plt.figure() plt.scatter( points[:, 0], points[:, 1], s=s, edgecolor='k', facecolor="none", ) plt.xlabel("X") plt.ylabel("Y") plt.ylim(*ylim) plt.xlim(*xlim) plt.tight_layout() plt.savefig(os.path.join(savedir, f"scatter.png")) plt.close() predictions = [] plt.figure() plt.scatter( points[:, 0], points[:, 1], s=s, edgecolor='k', facecolor="none", zorder=60, ) for deg in range(m): model = PolyfitModel(x_vals=points[:, 0], y_vals=points[:, 1], deg=deg) model.fit() _predictions = model.predict(evaluate_model.X_TEST) predictions.append(_predictions) mse = np.mean((model.predict(points[:, 0]) - points[:, 1])**2) plt.plot( evaluate_model.X_TEST, predictions[-1], label=f"Deg {deg}", c=cmap((1 + deg) / (1 + m)), zorder=50, ) plt.xlabel("X") plt.ylabel("Y") plt.ylim(*ylim) plt.xlim(*xlim) legend = plt.legend(loc='upper center', framealpha=1.0) legend.get_frame().set_alpha(0.8) legend.get_frame().set_facecolor((1, 1, 1, 0.8)) legend.set_zorder(100) plt.tight_layout() plt.savefig(os.path.join(savedir, f"poly.png")) plt.close() os.makedirs(savedir, exist_ok=True) model_type = PolyfitModel num_data = 8 num_models = 100 model_kwargs = {"deg": 2} predictions = evaluate_model.get_model_predictions( model_type=model_type, num_data=num_data, num_models=num_models, model_kwargs=model_kwargs, ) ## Upper Plot: Many Models for i in range(predictions.shape[0]): label = "Models" if i == 0 else None plt.plot( evaluate_model.X_TEST, predictions[i, :], c='blue', alpha=0.8, linewidth=0.1, zorder=50, label=label, ) plt.plot(evaluate_model.X_TEST, np.mean(predictions, axis=0), c='red', alpha=1, zorder=55, label="Average Model") plt.plot( evaluate_model.X_TEST, evaluate_model.Y_TEST, c='k', alpha=1, zorder=60, label="Truth", ) legend = plt.legend(loc='upper left', framealpha=1.0) legend.get_frame().set_alpha(0.8) legend.get_frame().set_facecolor((1, 1, 1, 0.8)) legend.set_zorder(100) plt.ylim(-55, 205) plt.xlim(-0.5, 10.5) plt.suptitle(f'Polynomial (Deg={2})', fontsize=16) plt.tight_layout() plt.savefig(os.path.join(savedir, f"combined.png")) plt.close()
columns=alpha_list).to_csv(f'../results/{data}/regret_alpha.csv') if __name__ == "__main__": warnings.filterwarnings("ignore") tf.get_logger().setLevel("ERROR") args = parser.parse_args() # hyper-parameters iters = args.iters n_trials = args.n_trials alpha_list = args.alpha_list # run simulations # generate and preprocess semi-synthetic datasets generate_data(iters=iters) # prediction by meta-learners, parameter estimation by metrics. run_preds(alpha_list=alpha_list) estimate_metrics_on_val() # model evaluation by metrics. # calculate and save the evaluation performacen of metrics. save_results() if alpha_list: estimate_alpha_metrics_on_val(alpha_list=alpha_list) save_results_alpha(alpha_list=alpha_list) main_tuner(iters=iters, n_trials=n_trials) # optuna experiments visualizer = Visualizer(iters=iters, alpha_list=alpha_list) visualizer.plot_prediction_mse()
combinations = list(itertools.product( params_values["lstm_size"] )) i = 1 j = len(combinations) with open("cnn_hybrid_fasttext100d_rest_lstm_size_tuning_results.txt", "w") as fp: for lstm_size in combinations: print("{} out of {} combinations done".format(i-1, j)) i += 1 params_opt["lstm_size"] = lstm_size use_params = merge_dicts(params, params_opt) print("*******************************") print("Configuation:") print(json.dumps(params_opt, indent=2)) if i == 2: generate_data(use_params) results = average_calculator(model=sys.argv[1], k=10, gen_data=False, kwargs=use_params) f_score = results["fscore"] fp.write("********************\n") if f_score > best_fscore: best_fscore = f_score best_configuration = use_params best_results = results fp.write("Best configuration so far!\n") print("Best configuration so far!\n") fp.write(json.dumps(results, indent=2)) fp.write("\nConfiguation:\n") fp.write(json.dumps(use_params, indent=2)) fp.write("\n*********************\n") print("Results:")
import sys from argument_parser import AnalysisSettings from display import display_data from data_generator import generate_data sys.argv[1:] = [ "-k=4.0", "-k_end=9.0", "-k_step=0.1", "-tests=1000000", "-pattern_trials=100", "-d=100", "-m=512" ] #sys.argv[1:] = ["-tests=10", "-d=110", "-d_end=150", "-d_step=10", "-n=110", "-che", "-crs", "-source=random", "-source=random"] if __name__ == '__main__': settings = AnalysisSettings(sys.argv) result = generate_data(settings) display_data(result, settings)
def query_property_history_by_name(property_name, n_prev_day): """ Example for location: Get the historical values for the car status property :param property_name: status property, currently accept: Score, Speed, Odometer or Location :param n_prev_day: <last n_prev_day> day values will be returned :return: if property_name is Location: {"series": [ [<timestamp>,{"latitude": <float_val>, "longitude": <float_val>}], ... ] } else: {"series": [ [<timestamp>, <value>], ...] } Example for location: Example for other property: { "series": [ [ 1447647446000, 9 ], [ 1447654646000, 13.405531792385005 ], [ 1447661846000, 15.905465020438427 ] ] } """ if property_name == 'Score': if n_prev_day < 4: start_val = random.randrange(30, 36) elif n_prev_day < 7: start_val = random.randrange(36, 41) elif n_prev_day < 14: start_val = random.randrange(40, 61) else: start_val = 75 series_data = generate_data(start_val, 0, 100, n_prev_day, 120, [ [datetime.now() - timedelta(days=21), 0.4, 1.0], [datetime.now() - timedelta(days=10), -0.1, 0.5], [datetime.now() - timedelta(days=14), -0.4, 0.3], [datetime.now() - timedelta(days=7), -0.8, 0.3], [datetime.now() - timedelta(days=2), -1.5, 0.4] ]) elif property_name == 'Speed': print('SPEED') if n_prev_day < 4: start_val = random.randrange(20, 30) elif n_prev_day < 7: start_val = random.randrange(30, 35) elif n_prev_day < 14: start_val = random.randrange(35, 40) else: start_val = 30 series_data = generate_data(start_val, 0, 120, n_prev_day, 120, [ [datetime.now() - timedelta(days=21), 0.4, 1.0], [datetime.now() - timedelta(days=20), 0.1, 0.3], [datetime.now() - timedelta(days=10), -1.0, 0.5], [datetime.now() - timedelta(days=7), 0.3, 1.2], [datetime.now() - timedelta(days=5), -0.8, 0.5], [datetime.now() - timedelta(days=4), 0.3, 0.2], [datetime.now() - timedelta(days=2), 3, 1.3] ]) elif property_name == 'Odometer': if n_prev_day < 4: start_val = random.randrange(0, 20) elif n_prev_day < 7: start_val = random.randrange(20, 50) elif n_prev_day < 14: start_val = random.randrange(50, 120) else: start_val = 120 series_data = generate_data(start_val, 0, 1000, n_prev_day, 120, [ [datetime.now() - timedelta(days=21), 2, 1.0], [datetime.now() - timedelta(days=20), 0.5, 0.3], [datetime.now() - timedelta(days=10), 2, 0.5], [datetime.now() - timedelta(days=7), 2, 1], [datetime.now() - timedelta(days=5), 3.5, 1], [datetime.now() - timedelta(days=2), 3.5, 2.0] ]) elif property_name == 'Location': series_data = generate_location(n_prev_day) else: lam = min(random.random(), 0.005) series_data = create_poisson_series(lam=lam, n_prev_day=n_prev_day) # series_data_sorted = sorted(series_data, key=lambda x: x[0]) # if property_name == 'Odometer': # property_name = 'Distance (km)' # elif property_name == 'Speed': # property_name = 'Max Speed (km/h)' # elif property_name == 'HarshBrake': # property_name = 'Harsh Brake' # elif property_name == 'SuddenTurn': # property_name = 'Sudden Turn' # # chart_options = { # 'chart': {'zoomType': 'x'}, # 'title': {'text': property_name + ' Over Time'}, # 'xAxis': {'type': 'datetime'}, # 'yAxis': {'title': {'text': property_name}}, # 'legend': {'enabled': False}, # 'series': [{'name': property_name, # 'data': series_data_sorted}] # } # # # For score color the chart differently for diff y-axis # if property_name == 'Score': # chart_options["series"][0].update( # {'zones': [ # { # 'value': 50, # 'color': '#FC8662' # }, # { # 'value': 75, # 'color': '#FADE25' # }, # { # 'color': '#ADF005' # } # ]} # ) # # if property_name == 'Score': # chart_options['yAxis'].update({'max': 100}) return jsonify({"series": series_data})
def main(argv=None): ds = generate_data() evaluate(ds)
def main(argv=None): print('argv: ', argv) ds = generate_data(recreate=False) train(ds)
def main(): args = parser.parse_args() args.device = None if torch.cuda.is_available(): args.device = torch.device('cuda') else: args.device = torch.device('cpu') pprint.pprint(vars(args)) train_data, train_labels, test_data, test_labels = generate_data( args.block_size, args.use_complex) test_data = test_data.to(args.device) test_labels = test_labels.to(args.device) if not args.use_autoencoder: train_data = train_data.unsqueeze(1) train_labels = train_labels.unsqueeze(1) test_data = test_data.unsqueeze(1) test_labels = test_labels.unsqueeze(1) # Data loading params = { 'batch_size': args.batch_size, 'shuffle': True, 'num_workers': args.workers } training_set = Dataset(train_data, train_labels) training_loader = torch.utils.data.DataLoader(training_set, **params) loss_fn = nn.BCEWithLogitsLoss() if args.use_autoencoder: model = Autoencoder(channel_use=args.channel_use, block_size=args.block_size, snr=args.snr, cuda=args.device, use_lpf=args.use_lpf, use_complex=args.use_complex, channel_type=args.channel_type) else: model = RNN(channel_use=args.channel_use, block_size=args.block_size, n_layers=1, snr=args.snr, num_taps=args.lpf_num_taps, cuda=args.device, channel_type=args.channel_type) model = model.to(args.device) optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) for epoch in range(args.epochs): if args.lpf_shift_type == 'cont': cutoff = max(args.lpf_cutoff, (args.epochs - epoch - 1) / args.epochs) else: if epoch % (args.epochs / 10) == 0: cutoff = max(args.lpf_cutoff, (args.epochs - epoch - 1) / args.epochs) # Filter weights (if enabled) should not change for name, param in model.named_parameters(): if 'conv' in name: param.requires_grad = False model.train() for batch_idx, (batch, labels) in tqdm( enumerate(training_loader), total=int(training_set.__len__() / args.batch_size)): batch = batch.to(args.device) labels = labels.to(args.device) if args.use_autoencoder: output = model(batch) else: output, hidden = model(batch) loss = loss_fn(output, labels) optimizer.zero_grad() loss.backward() optimizer.step() if batch_idx % (args.batch_size - 1) == 0: pred = torch.round(torch.sigmoid(output)) acc = model.accuracy(pred, labels) print( 'Epoch %2d for SNR %s, shift type %s: loss=%.4f, acc=%.2f' % (epoch, args.snr, args.lpf_shift_type, loss.item(), acc)) # Validation model.eval() if epoch % 10 == 0: if args.use_autoencoder: val_output = model(test_data) else: val_output, _ = model(test_data) val_loss = loss_fn(val_output, test_labels) val_pred = torch.round(torch.sigmoid(val_output)) val_acc = model.accuracy(val_pred, test_labels) print( 'Validation: Epoch %2d for SNR %s and cutoff %s: loss=%.4f, acc=%.5f' % (epoch, args.snr, cutoff, val_loss.item(), val_acc)) model.train() if args.use_complex: torch.save( model.state_dict(), './models/complex_(%s,%s)_%s' % (str(args.channel_use), str(args.block_size), str(args.snr))) else: torch.save( model.state_dict(), './models/real_(%s,%s)_%s' % (str(args.channel_use), str(args.block_size), str(args.snr)))
def main(): args = argprocessor.process() data_array = data_generator.generate_data(args) file_writer.write_output_file(args.output_file, data_array)
sequence_length = 20 num_train, num_valid, num_test = 2000, 500, 500 #cell_type = 'simple' #cell_type = 'gru' cell_type = 'lstm' num_hidden = 20 batch_size = 40 learning_rate = 0.001 max_epoch = 100 # ---------------------------------------------------------------------- # Generate delayed XOR samples X_train, y_train = generate_data(num_train, sequence_length) sl_train = sequence_length * np.ones(num_train) # NEW X_valid, y_valid = generate_data(num_valid, sequence_length) sl_valid = sequence_length * np.ones(num_valid) # NEW X_test, y_test = generate_data(num_test, sequence_length) sl_test = sequence_length * np.ones(num_test) # NEW # Crop data # Artificially define variable sequence lengths # for demo-purposes for i in range(num_train): ll = 10 + random.randint(0, sequence_length - 10) sl_train[i] = ll
AGENT_COUNT = ConfigReader.readconfigfile('config', 'agent_count') EVENT_COUNT = ConfigReader.readconfigfile('config', 'event_count') VALID_DATA_RATIO= ConfigReader.readconfigfile('config', 'valid_data_ratio') API_URL = ConfigReader.readconfigfile('config', 'api_url') #Reset server res = requests.get(ConfigReader.readconfigfile('config', 'server_stats_reset_url')) if res.status_code == 200: print "Server Reset successfully" else: print "Failed to Reset server" sys.exit(-1) #data generation agent_stats = generate_data(AGENT_COUNT, EVENT_COUNT, VALID_DATA_RATIO) print "Total data generated: %d" % agent_stats['Total'] print "Valid data: %d" % agent_stats['Valid'] print "Invalid data: %d\n" % agent_stats['Invalid'] #Agents Simulation print "*****Agents Simulation*****\nTotal Agents Simulated: %d" % AGENT_COUNT # start = time.clock() q = Queue(AGENT_COUNT) for i in range(AGENT_COUNT): t = Thread(target=do_work, args=(q,)) t.daemon = True t.start() for i in range(1, AGENT_COUNT + 1): q.put("Agent-"+str(i))
'rotating_XY', 'rotating_YZ', 'random' ] job_number = 1 greedy = False # Greedy sweep rule correlated = False # Correlated errors rates = [1] # How many times to apply the sweep rule per syndrome measurement for rate in rates: for l in l_list: timeouts = [32 * l ] # How many times to apply rule in perfect decoding phase # sweep_limit = (int) (round(pow(l, 0.5))) # How many sweeps before changing direction in error suppression phase sweep_limit = (int)( round(math.log(l)) ) # How many sweeps before changing direction in error suppression phase for timeout in timeouts: for N in cycles_list: for p in p_list: q = p # Measurement error probability start_time = time.time() data_generator.generate_data(lattice_type, l, p, q, sweep_limit, sweep_schedules[0], timeout, N, trials, job_number, greedy, correlated, rate) # Data will be saved as a json file finish_time = round(time.time() - start_time, 2) print('l={} p=q={} cycles={} trials={} job done in {} s'. format(l, p, N, trials, finish_time)) job_number += 1
def query_score(n_prev_day): """ Get each driver score parameter :param n_prev_day: <last n_prev_day> day score will be returned :return: {"score": [<distance_score>, <harsh_break_score>, <sudden_turn_score>, <harsh_accel_score>, <speed_score>] } """ # Get the speed series if n_prev_day < 4: start_val = random.randrange(20, 30) elif n_prev_day < 7: start_val = random.randrange(30, 35) elif n_prev_day < 14: start_val = random.randrange(35, 40) else: start_val = 30 speed_series = generate_data(start_val, 0, 120, n_prev_day, 120, [ [datetime.now() - timedelta(days=21), 0.4, 1.0], [datetime.now() - timedelta(days=20), 0.1, 0.3], [datetime.now() - timedelta(days=10), -1.0, 0.5], [datetime.now() - timedelta(days=7), 0.3, 1.2], [datetime.now() - timedelta(days=5), -0.8, 0.5], [datetime.now() - timedelta(days=4), 0.3, 0.2], [datetime.now() - timedelta(days=2), 3, 1.3] ]) speed_scores = [score_speed(s[1]) for s in speed_series] speed_score = np.average(speed_scores) print('SPEED_SCORE: {}'.format(speed_score)) # Get the odometer series if n_prev_day < 4: start_val = random.randrange(0, 20) elif n_prev_day < 7: start_val = random.randrange(20, 50) elif n_prev_day < 14: start_val = random.randrange(50, 120) else: start_val = 120 odomoter_series = generate_data(start_val, 0, 1000, n_prev_day, 120, [ [datetime.now() - timedelta(days=21), 2, 1.0], [datetime.now() - timedelta(days=20), 0.5, 0.3], [datetime.now() - timedelta(days=10), 2, 0.5], [datetime.now() - timedelta(days=7), 2, 1], [datetime.now() - timedelta(days=5), 3.5, 1], [datetime.now() - timedelta(days=2), 3.5, 2.0] ]) odomoter_series_sorted = sorted(odomoter_series, key=lambda x: x[0], reverse=True) day_dist_series = [] odo_iter = iter(odomoter_series_sorted) day = 1 cur_odo = next(odo_iter) while day <= n_prev_day: try: next_odo = next(odo_iter) except StopIteration: break day_diff = (datetime.fromtimestamp(cur_odo[0] / 1000) - datetime.fromtimestamp(next_odo[0] / 1000)).days if day_diff == 1: day += 1 day_dist_series.append( (cur_odo[1] - next_odo[1])) cur_odo = next_odo if len(day_dist_series) < 1 or len(day_dist_series) < n_prev_day: day_dist_series.append((cur_odo[1] - next_odo[1]) / 10.0) day_dist_scores = [score_day_dist(x) for x in day_dist_series] day_dist_score = np.average(day_dist_scores) # Array order # 'Distance', 'Harsh Break', 'Sudden Turn', 'Harsh Accel', 'Speed' def rand_score(): return random.randrange(50, 100) if n_prev_day < 2: speed_score = max(10, speed_score - 45) day_dist_score = max(12, day_dist_score - 45) a, b, c = rand_score() - 40, rand_score() - 30, rand_score() - 35 elif n_prev_day < 4: speed_score = max(15, speed_score - 40) day_dist_score = max(15, day_dist_score - 40) a, b, c = rand_score() - 25, rand_score() - 20, rand_score() - 30 elif n_prev_day < 8: speed_score = max(20, speed_score - 30) day_dist_score = max(25, day_dist_score - 30) a, b, c = rand_score() - 10, rand_score() - 10, rand_score() - 10 else: speed_score = max(20, speed_score - 12) day_dist_score = max(25, day_dist_score - 12) a, b, c = rand_score(), rand_score(), rand_score() return jsonify({'score': [day_dist_score, a, b, c, speed_score]})
color_map, size_map = get_page_rank_and_colors(G) print("--[x] SUCCESS") success_log[ '--4. Trying to compute color and size maps...'] = 'SUCCESS' except Exception as e: pull_the_plug = True print("--4. Error at generating color and size maps...", e) success_log['--4. Trying to compute color and size maps...'] = 'FAIL' try: print( '5. Trying to architect network visualization json for sigma.js...' ) converted_data_for_sigmajs = generate_data(G, embeddings, color_map, size_map) print("--5. [x] SUCCESS") success_log[ '--5. Trying to architect network visualization json for sigma.js...'] = 'SUCCESS' except Exception as e: pull_the_plug = True print("--5. Error at converting network visualization json..", e) success_log[ '--5. Trying to architect network visualization json for sigma.js...'] = 'FAIL' if not pull_the_plug: try: print("6. Trying to open the file and dump the json data...") with open('./network/data.json', 'w', encoding='utf-8') as f: json.dump(converted_data_for_sigmajs, f)
def gen(): yield from data_generator.generate_data(DATA_DIR, VOCAB_DIR, VOCAB_SIZE, CONEXT_SIZE)
gutils.construct_kd_tree(vectors, save_file=cnt.WV_KD_TREE_FILE) group_indices = gutils.load_data_pkl(cnt.GROUP_INDICES_FILE) print(items[group_indices[0]][0], items[group_indices[0]][5]) print(gutils.get_item_text(items[group_indices[0]])) kdtree = gutils.load_data_pkl(cnt.WV_KD_TREE_FILE) query_vector = gutils.get_wv_embeddings([group_indices[0]])[0] u = gutils.get_nearest_neighbors_count(kdtree, query_vector, count=5) for x in u: print(items[group_indices[x]][0], items[group_indices[x]][5]) print(gutils.get_item_text(items[group_indices[x]])) print("Generating data...") num_train, num_test, num_validation = dg.generate_data(test_pct=0.2, validation_pct=0.2) print(num_train, num_test, num_validation) del (items) print("Training Siamese...") sapi = SiameseAPI() sapi.train_model() print(sapi.get_distance_threshold(threshold=0.95)) print("Inserting embeddings...") sapi.insert_embeddings_pytables() print("Constructing KD-Tree...") vectors = sapi.fetch_embeddings_pytables()
def main(): fp = open('results.csv', 'w') col_names = [ 'dataset', 'number of classes', 'number of runs', 'epochs', 'number of parameters-total', 'model', 'number of experts', 'loss', 'training accuracy', 'validation accuracy' ] fp.write(','.join(col_names) + '\n') num_runs = 2 #dataset = 'expert_0_gate_0_checker_board-1' # X, y, trainset, trainloader, testset, testloader, num_classes = generate_data(dataset) # total_experts = 2 # epochs = 2 # runs = [] # for r in range(0, num_runs): # models = run_experiment(dataset, trainset, trainloader, testset, testloader, num_classes, total_experts, epochs) # runs.append(models) # results = runs[0] # if num_runs > 1: # results = aggregate_results(runs, total_experts) # pickle.dump(results,open('../results/'+dataset+'_results.pkl','wb')) # log_results(results, total_experts, num_classes, num_runs, epochs, dataset, fp) # plot_results(X, y, num_classes, trainset, trainloader, testset, testloader, runs[0], dataset, total_experts) # plot_error_rate(results, total_experts, 'figures/all/accuracy_'+dataset+'_'+ str(num_classes)+'_experts.png') # dataset = 'expert_0_gate_0_checker_board-2' #X, y, trainset, trainloader, testset, testloader, num_classes = generate_data(dataset) # total_experts = 2 # epochs = 2 # runs = [] # for r in range(0, num_runs): # models = run_experiment(dataset, trainset, trainloader, testset, testloader, num_classes, total_experts, epochs) # runs.append(models) # results = runs[0] # if num_runs > 1: # results = aggregate_results(runs, total_experts) # pickle.dump(results,open('../results/'+dataset+'_results.pkl','wb')) # log_results(results, total_experts, num_classes, num_runs, epochs, dataset, fp) # plot_results(X, y, num_classes, trainset, trainloader, testset, testloader, runs[0], dataset, total_experts) # plot_error_rate(results, total_experts, 'figures/all/accuracy_'+dataset+'_'+ str(num_classes)+'_experts.png') # total_experts = 10 # epochs = 40 # for size in [3000]:#, 5000, 8000]: # dataset = 'expert_1_gate_1_single_shallow_checker_board_rotated_'+str(size) # X, y, trainset, trainloader, testset, testloader, num_classes = data_generator.generate_data(dataset, size, 128, 6) # runs = [] # for r in range(0, num_runs): # models = run_experiment(dataset, single_model_shallow, trainset, trainloader, testset, testloader, num_classes, total_experts, epochs, loss_importance=True) # runs.append(models) # results = runs[0] # if num_runs > 1: # results = aggregate_results(runs, total_experts) # torch.save(results,open('../results/'+dataset+'_results.pt','wb')) # log_results(results, total_experts, num_classes, num_runs, epochs, dataset, fp) # generated_data = data_generator.create_meshgrid(X) # plot_results(X, y, generated_data, num_classes, trainset, trainloader, testset, testloader, runs[0], dataset, total_experts) # plot_error_rate(results, total_experts, 'figures/all/accuracy_'+dataset+'_'+ str(num_classes)+'_experts.png') # total_experts = 5 # epochs = 40 # for size in [3000]:#, 5000, 8000]: # dataset = 'expert_1_gate_1_single_deep_checker_board_rotated_'+str(size) # X, y, trainset, trainloader, testset, testloader, num_classes = data_generator.generate_data(dataset, size) # runs = [] # for r in range(0, num_runs): # models = run_experiment_1(dataset, single_model_deep, trainset, trainloader, testset, testloader, num_classes, total_experts, epochs) # runs.append(models) # results = runs[0] # if num_runs > 1: # results = aggregate_results(runs, total_experts) # torch.save(results,open('../results/'+dataset+'_results.pt','wb')) # log_results(results, total_experts, num_classes, num_runs, epochs, dataset, fp) # generated_data = data_generator.create_meshgrid(X) # plot_results(X, y, generated_data, num_classes, trainset, trainloader, testset, testloader, runs[0], dataset, total_experts) # plot_error_rate(results, total_experts, 'figures/all/accuracy_'+dataset+'_'+ str(num_classes)+'_experts.png') for size in [3000]: #, 5000, 8000]: dataset = 'expert_1_gate_1_single_deep_checker_board_rotated_' + str( size) X, y, trainset, trainloader, testset, testloader, num_classes = data_generator.generate_data( dataset, size) total_experts = 10 epochs = 40 runs = [] for r in range(0, num_runs): models = run_experiment_1(dataset, single_model_deep, trainset, trainloader, testset, testloader, num_classes, total_experts, epochs, True, True) runs.append(models) results = runs[0] if num_runs > 1: results = aggregate_results(runs, total_experts) torch.save(results, open('../results/' + dataset + '_results.pt', 'wb')) log_results(results, total_experts, num_classes, num_runs, epochs, dataset, fp) generated_data = data_generator.create_meshgrid(X) plot_results(X, y, generated_data, num_classes, trainset, trainloader, testset, testloader, runs[0], dataset, total_experts) plot_error_rate( results, total_experts, 'figures/all/accuracy_' + dataset + '_' + str(num_classes) + '_experts.png') fp.close()
#Takes in a model and ciphertext and asks the neural network what cipher it is def predict(m, ct): test = np.array([func(ct) for func in statistics.stats_funcs]) test = (np.expand_dims(test, 0)) prediction = m.predict(test)[0] for probability in sorted(zip(prediction, range(len(data_generator.config.values()))), key=lambda x: x[0], reverse=True): print(data_generator.reverse_config[probability[1]], probability[0]) print("Generating data...") data_generator.generate_data(1000, "data/train_data.dat", "data/train_labels.dat") data_generator.generate_data(100, "data/test_data.dat", "data/test_labels.dat") #Load in data train_data = np.load("data/train_data.dat") train_labels = np.load("data/train_labels.dat") test_data = np.load("data/test_data.dat") test_labels = np.load("data/test_labels.dat") model = create_model() print("Training model...") model.fit(train_data, train_labels, epochs=5, callbacks=[cp_callback]) model.load_weights(checkpoint_path) print("Testing model...")
def test_dkt(model_id, n_concepts, transition_after, horizon, n_rollouts, n_trajectories, r_type, use_real, use_mem, checkpoints=[]): ''' Test DKT+MCTS Can accept a number of checkpoints, meaning to use an ensemble if more than one. ''' import concept_dependency_graph as cdg from simple_mdp import create_custom_dependency #learn_prob = 0.5 n_jobs = 8 traj_per_job = n_trajectories // n_jobs #dgraph = create_custom_dependency() dgraph = cdg.ConceptDependencyGraph() dgraph.init_default_tree(n_concepts) #student = st.Student(n=n_concepts,p_trans_satisfied=learn_prob, p_trans_not_satisfied=0.0, p_get_ex_correct_if_concepts_learned=1.0) student2 = st.Student2(n_concepts, transition_after) test_student = student2 test_student.reset() test_student.knowledge[0] = 1 # initialize the first concept to be known sim = st.StudentExactSim(test_student.copy(), dgraph) # create a shared dktcache across all processes dktcache_manager = mp.Manager() dktcache = dktcache_manager.dict() print('Testing model: {}'.format(model_id)) print('horizon: {}'.format(horizon)) print('rollouts: {}'.format(n_rollouts)) accs = np.array( Parallel(n_jobs=n_jobs)(delayed(test_dkt_chunk)(traj_per_job, dgraph, sim, model_id, checkpoints, horizon, n_rollouts, r_type, dktcache=dktcache, use_real=use_real, use_mem=use_mem) for _ in range(n_jobs))) results = np.sum(accs, axis=0) / (n_jobs * traj_per_job) avg_acc, avg_best_q = results[0], results[1] test_data = dg.generate_data(dgraph, student=test_student, n_students=1000, seqlen=horizon, policy='expert', filename=None, verbose=False) print('Average posttest true: {}'.format(expected_reward(test_data))) print('Average posttest mcts: {}'.format(avg_acc)) print('Average best q: {}'.format(avg_best_q)) return avg_acc, avg_best_q