def remove_features_sklearn(criterion, splitter, removal_order, train_file, test_file, attr_file, max_features): train_accs = [] test_accs = [] remove_columns = [] for col in removal_order: print(col) remove_columns.append(col) if len(remove_columns) == max_features: break print(remove_columns) train_data, train_attr = read_data(train, attr, remove_columns=remove_columns) test_data, test_attr = read_data(test, attr, remove_columns=remove_columns) train_acc, test_acc = other_trees.sklearn_decision_tree( criterion, splitter, train_data, test_data, train_attr, class_column, ds_name="custom") train_accs.append(train_acc) test_accs.append(test_acc) return train_accs, test_accs
def load(histmods, kmers, tissue, short, dist): if 'both' in tissue: if 'randoms' in tissue: randoms = 'randoms_' else: randoms = '' filename = get_filename(randoms +'heart', short) filename2 = get_filename(randoms +'brain', short) target_data = load_vista.load_enhancers_with_seq(filename) target_data2 = load_vista.load_enhancers_with_seq(filename2) files_remove = [filename+".remove"] files_remove2 = [filename2+".remove"] if dist: files_remove.append( "FANTOM_heart_brain.remove" ) files_remove2.append("FANTOM_brain_heart.remove") data = read_data(histmods, kmers, target_data, filename) data = remove_samples(data, target_data, files_remove) data2 = read_data(histmods, kmers, target_data2, filename2) data2 = remove_samples(data2, target_data2, files_remove2) return join_and_balance(data, data2, False)[0] else: filename = get_filename(tissue, short) target_data = load_vista.load_enhancers_with_seq(filename) files_remove = [filename+".remove"] if dist: if tissue == "heart": files_remove.append( "FANTOM_heart_brain.remove" ) elif tissue == "brain": files_remove.append("FANTOM_brain_heart.remove") print target_data data = read_data(histmods, kmers, target_data, filename) data = remove_samples(data, target_data, files_remove) return data
def drawEpipolarLine(inputPath, F, outputPath): img_a = cv2.imread(os.path.join(inputPath, "pic_a.jpg")) img_b = cv2.imread(os.path.join(inputPath, "pic_b.jpg")) pts_a = read_data(inputPath, "pts2d-pic_a.txt") pts_b = read_data(inputPath, "pts2d-pic_b.txt") pts_a = np.column_stack((pts_a, np.ones(pts_a.shape[0]))) pts_b = np.column_stack((pts_b, np.ones(pts_a.shape[0]))) eplines_a = np.dot(pts_b, F) eplines_b = np.dot(pts_a, F.T) height, width, _ = img_a.shape boundary_l = np.cross([0, 0, 1], [height - 1, 0, 1]) boundary_r = np.cross([0, width - 1, 1], [height - 1, width - 1, 1]) for line_a, line_b in zip(eplines_a, eplines_b): pts1 = np.cross(line_a, boundary_l) pts2 = np.cross(line_a, boundary_r) pts1 /= pts1[2] pts2 /= pts2[2] cv2.line(img_a, tuple(pts1[:2].astype(int)), tuple(pts2[:2].astype(int)), (0, 255, 0), thickness=2) pts1 = np.cross(line_b, boundary_l) pts2 = np.cross(line_b, boundary_r) pts1 /= pts1[2] pts2 /= pts2[2] cv2.line(img_b, tuple(pts1[:2].astype(int)), tuple(pts2[:2].astype(int)), (0, 255, 0), thickness=2) cv2.imwrite(os.path.join(outputPath, "ps3-2-c-1.png"), img_a) cv2.imwrite(os.path.join(outputPath, "ps3-2-c-2.png"), img_b) return img_a, img_b
def remove_features(removal_order, train_file, test_file, attr_file, max_features): train_accs = [] test_accs = [] remove_columns = [] for col in removal_order: print(col) remove_columns.append(col) if len(remove_columns) == max_features: break print(remove_columns) train_data, train_attr = read_data(train, attr, remove_columns=remove_columns) test_data, test_attr = read_data(test, attr, remove_columns=remove_columns) tree = decision_tree.DecisionTreeLearning(train_data, train_attr, "normal", "class") decision_tree.print_tree(tree) y_pred, y_true = decision_tree.predict(train_data, tree) train_acc = decision_tree.accuracy_score(y_pred, y_true) print('Accuracy on Training Data: {0}'.format(train_acc * 100)) y_pred, y_true = decision_tree.predict(test_data, tree) test_acc = decision_tree.accuracy_score(y_pred, y_true) print('Accuracy on Training Data: {0}'.format(test_acc * 100)) train_accs.append(train_acc) test_accs.append(test_acc) return train_accs, test_accs
def load(histmods, kmers, tissue, short, dist): if 'both' in tissue: if 'randoms' in tissue: randoms = 'randoms_' else: randoms = '' filename = get_filename(randoms + 'heart', short) filename2 = get_filename(randoms + 'brain', short) target_data = load_vista.load_enhancers_with_seq(filename) target_data2 = load_vista.load_enhancers_with_seq(filename2) files_remove = [filename + ".remove"] files_remove2 = [filename2 + ".remove"] if dist: files_remove.append("FANTOM_heart_brain.remove") files_remove2.append("FANTOM_brain_heart.remove") data = read_data(histmods, kmers, target_data, filename) data = remove_samples(data, target_data, files_remove) data2 = read_data(histmods, kmers, target_data2, filename2) data2 = remove_samples(data2, target_data2, files_remove2) return join_and_balance(data, data2, False)[0] else: filename = get_filename(tissue, short) target_data = load_vista.load_enhancers_with_seq(filename) files_remove = [filename + ".remove"] if dist: if tissue == "heart": files_remove.append("FANTOM_heart_brain.remove") elif tissue == "brain": files_remove.append("FANTOM_brain_heart.remove") print target_data data = read_data(histmods, kmers, target_data, filename) data = remove_samples(data, target_data, files_remove) return data
def main(): # create logger logger = logging.getLogger('main') logger.setLevel(logging.DEBUG) # create console handler ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') # add formatter to ch ch.setFormatter(formatter) # add ch to logger logger.addHandler(ch) logger.debug('Loading training data ' + TR_PATH) training_data = read_data.read_data(TR_PATH, 100) logger.debug('Loading testing data ' + TS_PATH) testing_data = read_data.read_data(TS_PATH) logger.debug('Loading root tweets ' + RT_PATH) root_data = read_data.read_data(RT_PATH) logger.debug('Loading social network ' + SN_PATH) social_network = read_data.read_data(SN_PATH) # extract_labels labels = read_data.extract_labels(training_data, K) # run basic features basic_vectors = basic.get_vectors(root_data, training_data, social_network)
def main(_): filename = "Data11-17.txt" vectors_data1,labels_data1 = read_data.read_data(filename) filename = "valid18-20.txt" vectors_data2,labels_data2 = read_data.read_data(filename) filename = "Data21-25.txt" vectors_data3,labels_data3 = read_data.read_data(filename) vectors_data = np.vstack((vectors_data1,vectors_data2,vectors_data3)) print(vectors_data.shape) labels_data = np.vstack((np.reshape(labels_data1,(len(labels_data1),1)), np.reshape(labels_data2,(len(labels_data2),1)), np.reshape(labels_data3,(len(labels_data3),1)))) labels_data = np.reshape(labels_data,-1) print(labels_data.shape) filename = "Data4-10.txt" validation_data,vlabels_data = read_data.read_data(filename) filename = "Data26-29.txt" test_data,tlabels_data = read_data.read_data(filename) test_data = test_data[0:8000,] tlabels_data = tlabels_data[0:8000,] config = get_config() eval_config = get_config() eval_config.batch_size = 1 eval_config.num_steps = 1 with tf.Graph().as_default(), tf.Session() as session: initializer = tf.random_uniform_initializer(-config.init_scale, config.init_scale) with tf.variable_scope("model", reuse=None, initializer=initializer): m = PTBModel(is_training=True, config=config) with tf.variable_scope("model", reuse=True, initializer=initializer): mvalid = PTBModel(is_training=False, config=config) mtest = PTBModel(is_training=False, config=eval_config) tf.initialize_all_variables().run() summary_writer = tf.train.SummaryWriter("train/lstm3s",session.graph) for i in range(config.max_max_epoch): lr_decay = config.lr_decay ** max(i - config.max_epoch, 0.0) m.assign_lr(session, config.learning_rate * lr_decay) print("Epoch: %d Learning rate: %.3f" % (i + 1, session.run(m.lr))) train_perplexity = run_epoch(session, m, vectors_data, labels_data, m.train_op,summary_writer, verbose=True) print("Epoch: %d Train Perplexity: %.3f" % (i + 1, train_perplexity)) valid_perplexity = run_epoch(session, mvalid, validation_data, vlabels_data, tf.no_op(),summary_writer) print("Epoch: %d Valid Perplexity: %.3f" % (i + 1, valid_perplexity)) test_perplexity = run_epoch(session, mtest, test_data, tlabels_data, tf.no_op(),summary_writer) print("Test Perplexity: %.3f" % test_perplexity)
def main(_): if FLAGS.config == "None": config = get_config(FLAGS.__flags, {}) else: # TODO : create configs file (.json) config_path = os.path.join("configs", "%s%s" % (FLAGS.model_name, FLAGS.config_ext)) config = get_config_from_file(FLAGS.__flags, config_path, FLAGS.config) load_meta_data(config) mkdirs(config) # load other files init_emb_mat_path = os.path.join(config.data_dir, 'init_emb_mat.h5') config.init_emb_mat = h5py.File(init_emb_mat_path, 'r')['data'][:] if config.train: train_ds = read_data(config, 'train') dev_ds = read_data(config, 'dev') else: test_ds = read_data(config, 'test') # For quick draft initialize (deubgging). if config.draft: config.train_num_batches = 1 config.val_num_batches = 1 config.test_num_batches = 1 config.num_epochs = 1 config.val_period = 1 config.save_period = 1 # TODO : Add any other parameter that induces a lot of computations pprint(config.__dict__) # TODO : specify eval tensor names to save in evals folder eval_tensor_names = [] graph = tf.Graph() # TODO : initialize BaseTower-subclassed objects towers = [BaseTower(config) for _ in range(config.num_devices)] sess = tf.Session(graph=graph, config=tf.ConfigProto(allow_soft_placement=True)) # TODO : initialize BaseRunner-subclassed object runner = BaseRunner(config, sess, towers) with graph.as_default(), tf.device("/cpu:0"): runner.initialize() if config.train: if config.load: runner.load() runner.train(train_ds, dev_ds, eval_tensor_names=eval_tensor_names) else: runner.load() runner.eval(test_ds, eval_tensor_names=eval_tensor_names)
def fundamentalMatrix(inputPath, rank=3, rank3Matrix=None): if rank == 3: pts_a, pts_b = read_data(inputPath, "pts2d-pic_a.txt"), read_data( inputPath, "pts2d-pic_b.txt") F_lstsq = solveEquations(pts_a, pts_b, method="least_square") F_SVD = solveEquations(pts_a, pts_b, method="SVD") return F_lstsq, F_SVD elif rank == 2: U, S, V = np.linalg.svd(rank3Matrix) S[-1] = 0 S = np.diag(S) F = np.dot(np.dot(U, S), V) return F return None
def predict_classes_one(): classifier = get_classifier() vectorizer = get_vectorizer() classes = get_classes(1) read_data.read_data(classifier, vectorizer, classes) cross_validation.score(classifier, vectorizer, 1) ids = create_id_set(0, io_files.test_path) label, y = predict.predict(classifier, vectorizer, ids) output = io_files.categorie1_path write_output(output, label, y)
def train(config): train_data = read_data('train') dev_data = read_data('dev') update_config(config, [train_data, dev_data]) _config_debug(config) word2vec_dict = train_data.shared['lower_word2vec'] word2idx_dict = train_data.shared['word2idx'] idx2vec_dict = {word2idx_dict[word]: vec for word, vec in word2vec_dict.items() if word in word2idx_dict} emb_mat = np.array([idx2vec_dict[idx] if idx in idx2vec_dict else np.random.multivariate_normal(np.zeros(config.word_emb_size), np.eye(config.word_emb_size)) for idx in range(config.word_vocab_size)]) config.emb_mat = emb_mat bidaf_model = train_bidaf()
def load_other(database, histmods, kmers, tissue, dist): if '_notss' in tissue: filter_promoters = True tissue = tissue[:-6] else: filter_promoters = False if 'randoms' in tissue: target_data = load_vista.load_enhancers_with_seq(tissue+'1500') data = read_data(histmods, kmers, target_data, tissue+'1500') data = choose_tissue(data, target_data, 'all', dist, filter_promoters) else: target_data = load_vista.load_enhancers_with_seq(database) data = read_data(histmods, kmers, target_data, database) data = choose_tissue(data, target_data, tissue, dist, filter_promoters) return data
def load_other(database, histmods, kmers, tissue, dist): if '_notss' in tissue: filter_promoters = True tissue = tissue[:-6] else: filter_promoters = False if 'randoms' in tissue: target_data = load_vista.load_enhancers_with_seq(tissue + '1500') data = read_data(histmods, kmers, target_data, tissue + '1500') data = choose_tissue(data, target_data, 'all', dist, filter_promoters) else: target_data = load_vista.load_enhancers_with_seq(database) data = read_data(histmods, kmers, target_data, database) data = choose_tissue(data, target_data, tissue, dist, filter_promoters) return data
def loop_trac(band='alpha',con=1): for i in range(20): print('Person {}:'.format(i+1)) tf.reset_default_graph() read_data.read_data(band,con,i+1) completeTrajectories() computeFeas() generate_behavior_sequences() generate_normal_behavior_sequence() trajectory2Vec(band,con,i+1) vecClusterAnalysis(band,con,i+1) fout = file('./band_data/deep_svm_con{}_{}'.format(con,band), 'w') #保存准确率 cPickle.dump(accs, fout)
def reprocess_data2(n_samples=-1): X, labels = read_data() if (n_samples != -1): indexes = random.sample(range(1, len(X)), n_samples) X = [x for idx, x in enumerate(X) if idx in indexes] X = np.array(X) labels = [label for idx, label in enumerate(labels) if idx in indexes] label_dict = { 'daisy': 0, 'dandelion': 1, 'rose': 2, 'sunflower': 3, 'tulip': 4 } y = [label_dict[labels[i]] for i in range(len(labels))] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42) X_train = X_train / 255. X_test = X_test / 255. X_train = np.array(X_train) X_test = np.array(X_test) y_train = np.array(y_train) y_test = np.array(y_test) X_train = X_train.reshape(X_train.shape[0], 128 * 128 * 3) X_test = X_test.reshape(X_test.shape[0], 128 * 128 * 3) return X_train, X_test, y_train, y_test
def brabo_starter(): house_path = '../../Data/wijk1_huizen.csv' # battery_path = '../../Data/wijk1_batterijen.txt' # battery_path = '../../Results/Battery_configurations/SCORE:4486_SIGMA:10.csv' # battery_path = '../../Results/Battery_configurations/leuknaampjes.csv' battery_path = '../../Results/Battery_configurations/1137_nice_sigma10.csv' houses, batteries = read_data(house_path, battery_path, True) max_x = max([dic['position'][0] for dic in houses] + [dic['position'][0] for dic in batteries]) + 1 max_y = max([dic['position'][1] for dic in houses] + [dic['position'][1] for dic in batteries]) + 1 wijk1 = SmartGrid(max_x,max_y) wijk1.add_house_dictionaries(houses) wijk1.add_battery_dictionaries(batteries) houses = wijk1.house_dict_with_manhattan_distances() print(houses) root = node(batteries, houses, 5000000) start = time.time() try: root.solve() except KeyboardInterrupt: run_time = time.time() - start print(run_time) print("klaar")
def prepare(self, filename): self.time, self.data = read_data(filename) increment = self.time[-1] - self.time[-2] self.time = np.append(self.time, np.arange(1, 1 + self.forecast_size) * increment + self.time[-1]) self.N_all_iter = len(self.time) self.operator_view.show() self.operator_view.status_bar.showMessage('Loaded successfully.', 1000)
def main(): """ plots a distribution using random_solve. The distribution gives a good indication to the statespace and its density. Increasing the amount of batteries will significantly increase runtime. """ house_path = '../../Data/wijk1_huizen.csv' battery_path = '../../Data/wijk1_batterijen.txt' battery_path = '../../Results/Battery_configurations/lucas_1137_nice_sigma10.csv' battery_path = '../../Results/Battery_configurations/BESTSCORE_SIGMA_relative.csv' houses, batteries = read_data(house_path, battery_path) wijk1 = SmartGrid(51, 51) wijk1.add_house_dictionaries(houses) wijk1.add_battery_dictionaries(batteries) for element in houses: wijk1.create_house(element['position'], element['output']) for element in batteries: wijk1.create_battery(element['position'], element['capacity']) random_solve(wijk1) print("klaar")
def test_train_split(fraction): # user-item data for model df_train = pd.read_csv("data/model_input/df.csv", sep='\t') # dataframe with user detail df_user_detail = read_data("user_detail_medium") # get list of unique user unique_users = list(df_user_detail.drop_duplicates(subset="user_id")["user_id"]) # number of test users n_test_users = int(len(unique_users) * fraction) # shuffle and select users to drop random.shuffle(unique_users) df_test_data = pd.DataFrame(unique_users[:n_test_users]) df_train_users = pd.DataFrame(unique_users[n_test_users:]) # set rating to 0 for test users for index, user in df_test_data.iterrows(): #df_train.loc[df_train["user_id"]==user[0], "comment"] = 0 df_train.drop(df_train[df_train["user_id"]==user[0]].index, inplace=True) # check before store #print(df_train.loc[df_train["user_id"]==df_test_data.iloc[0][0]]) # save training set df_train.to_csv("data/model_input/df_train.csv", sep="\t", index=False) df_test_data.to_csv("data/model_input/df_test.csv", index=False) df_train_users.to_csv("data/model_input/df_train_users.csv", index=False) return
def get_new_edges(data_type, construction): tree_prop_file = 'd6.treeproperties' t2props_dict = get_t2props_dict(tree_prop_file) t2topsub_dict = get_t2topsub_dict(tree_prop_file) ## get predicted_dependencies and apply transformations predicted_dependencies = read_data(construction, data_type) unbounded_dependencies = read_unbounded(construction, data_type) sents = read_stags(construction, data_type, 'sents') predicted_stags = read_stags(construction, data_type) predicted_pos = read_stags(construction, data_type, 'predicted_pos') new_edges = [] for sent_idx in range(len(unbounded_dependencies)): #for sent_idx in [0]: sent = sents[sent_idx] ## TAG analysis predicted_dependencies_sent = predicted_dependencies[sent_idx] predicted_stags_sent = predicted_stags[sent_idx] predicted_pos_sent = predicted_pos[sent_idx] transformed_sent = transform(t2props_dict, t2topsub_dict, sent, predicted_dependencies_sent, predicted_stags_sent, predicted_pos_sent) new_edges_sent = list( set(transformed_sent) - set(predicted_dependencies_sent)) new_edges_sent = [x for x in new_edges_sent if x[0] != x[1]] #print(new_edges_sent) new_edges.append(new_edges_sent) return new_edges
def load_paths(paths, fresh, frames_per_gesture, separate_frames, feature_set_type): """Load data from given paths Parameters ---------- paths : list The paths to the data: every path leads to the Leap subfolder of a participant folder fresh : boolean Recalculate aggregate of frames frames_per_gesture : int The number of frames considered to define a gesture separate_frames : boolean Treat every frame as a separate data point [not recommended] feature_set_type : string 'hands_only', 'fingers_only', 'all' """ all_data = [] all_target = [] for path in paths: if fresh: data, target = read_data(path, frames_per_gesture, separate_frames, feature_set_type) try: with open(path[:-4] + "Participant_fpg_{}.data".format(frames_per_gesture), 'wb') as fp: pickle.dump((data, target), fp) except IOError: continue else: try: with open(path[:-4] + "Participant_fpg_{}.data".format(frames_per_gesture), 'rb') as fp: data, target = pickle.load(fp) except IOError: continue all_data.extend(data) all_target.extend(target) return all_data, all_target
def _test(config): test_data = read_data(config, 'test', True) update_config(config, [test_data]) _config_debug(config) if config.use_glove_for_unk: word2vec_dict = test_data.shared[ 'lower_word2vec'] if config.lower_word else test_data.shared[ 'word2vec'] new_word2idx_dict = test_data.shared['new_word2idx'] idx2vec_dict = { idx: word2vec_dict[word] for word, idx in new_word2idx_dict.items() } new_emb_mat = np.array( [idx2vec_dict[idx] for idx in range(len(idx2vec_dict))], dtype='float32') config.new_emb_mat = new_emb_mat pprint(config.__flags, indent=2) models = get_multi_gpu_models(config) model = models[0] evaluator = MultiGPUEvaluator( config, models, tensor_dict=models[0].tensor_dict if config.vis else None) graph_handler = GraphHandler(config, model) sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) graph_handler.initialize(sess) num_steps = math.ceil(test_data.num_examples / (config.batch_size * config.num_gpus)) if 0 < config.test_num_batches < num_steps: num_steps = config.test_num_batches e = None for multi_batch in tqdm(test_data.get_multi_batches( config.batch_size, config.num_gpus, num_steps=num_steps, cluster=config.cluster), total=num_steps): ei = evaluator.get_evaluation(sess, multi_batch) e = ei if e is None else e + ei if config.vis: eval_subdir = os.path.join( config.eval_dir, "{}-{}".format(ei.data_type, str(ei.global_step).zfill(6))) if not os.path.exists(eval_subdir): os.mkdir(eval_subdir) path = os.path.join(eval_subdir, str(ei.idxs[0]).zfill(8)) graph_handler.dump_eval(ei, path=path) print("test acc: %f, loss: %f" % (e.acc, e.loss)) if config.dump_answer: print("dumping answer ...") graph_handler.dump_answer(e) if config.dump_eval: print("dumping eval ...") graph_handler.dump_eval(e)
def plot_beach(columns, df=None, beaches=None, separate_beaches=False, **kwds): ''' TODO: docstring ''' if df is None: df = read_data.read_data() if beaches is None: beaches = df['Client.ID'].dropna().unique().tolist() if type(beaches) is str: # be flexible with scalar vs. vector input beaches = [beaches] if separate_beaches: fig, ax = plt.subplots(len(beaches), 1, sharex=True, sharey=True) for i, beach in enumerate(beaches): filt = df['Client.ID'] == beach df[filt].plot(y=columns, ax=ax[i], **kwds) ax[i].set_title(beach) else: fig, ax = plt.subplots(1,1) l = len(ax.legend().get_texts()) for beach in beaches: filt = df['Client.ID'] == beach df[filt].plot(y=columns, ax=ax, **kwds) for txt in ax.legend().get_texts()[l:]: txt.set_text(beach + ': ' + txt.get_text()) return fig, ax
def main(): # Get the basic information. infile, number_of_components, graphtitle = get_information() # Here the data is finally read from the file. data, rawdata, variables, observations = rd.read_data(infile) # Look at and pre-process the data. look_at_and_pre_process_data(data, rawdata, variables) # Get the colour-coding (if applicable). colours = get_colours(observations) # Here the actual NIPALS algorithm is executed. print "\nBe patient. The NIPALS-algorithm may need some time ...\n" Z_merged, P_merged, r_merged, R_2, R_k_2, SPE, T_2 = na.nipals_pca(data, \ rawdata, number_of_components) # Plotting basic information. pl.plotting_the_basics(number_of_components, R_2, graphtitle, observations, \ variables, Z_merged, P_merged, colours = None) # Plotting the "versus-graphs". pl.plotting_more_complicated_graphs(number_of_components, R_2, T_2, graphtitle, \ SPE, observations, variables, Z_merged, \ P_merged, r_merged, colours) print "\nThank you for using this program\n"
def create_decision_tree(): #gets the customer list w preprocessed data -> {1 : c, 2: c,...} customer_list = read_data() train_data = get_decision_tree_data(customer_list) check_bal = train_data[0] sav_bal = train_data[1] decision = train_data[2] model = tree.DecisionTreeClassifier() model.fit(check_bal, decision) for masked_id in customer_list: if customer_list[masked_id].get_checking_status() == '': predict = model.predict([customer_list[masked_id].get_check_balances()]) customer_list[masked_id].checking_status = predict[0] model.fit(sav_bal, decision) #print the decision tree to file dot_data = tree.export_graphviz(model, out_file = None) graph = gp.Source(dot_data) graph.render("decision_tree_output") for masked_id in customer_list: if customer_list[masked_id].get_checking_status() == 'close an account': predict = model.predict([customer_list[masked_id].get_sav_balances()]) customer_list[masked_id].checking_status = predict[0] for masked_id in customer_list: customer_list[masked_id].pref_contact_medium = max(customer_list[masked_id].contact_mediums, key = customer_list[masked_id].contact_mediums.get) print_data_report(c_list = customer_list, file_name = output_file, train_data = training_data_for_print)
def load(histmods, kmers, tissue, dist): if '_notss' in tissue: filter_promoters = True tissue = tissue[:-6] else: filter_promoters = False if 'random' in tissue: target_data = load_vista.load_enhancers_with_seq(tissue) data = read_data(histmods, kmers, target_data, tissue) data = choose_tissue(data, target_data, 'all', dist, filter_promoters) else: target_data = load_vista.load_enhancers_with_seq(VISTA_FILE) data = read_data(histmods, kmers, target_data, VISTA_FILE) data = choose_tissue(data, target_data, tissue, dist, filter_promoters) return data
def main(): X = read_data() p, n = X.shape mu_hat = np.sum(X, axis=1) / n X = (X.T - mu_hat).T Z = X / np.sqrt(n) U, S, Vt = np.linalg.svd(Z, full_matrices=False) Lam = S**2 display_eigen_images(U, Lam) #projection of images Y = np.dot(U.T, X[:, :4]) plot_projections(Y) #reconstruction recons = np.zeros((p, len(m))) for i, im in enumerate(m): recons[:, i] = np.dot(U[:, :im], Y[:im, 0]) recons = (recons.T + mu_hat).T fig, axs = plt.subplots(3, 2) for k in range(6): img = np.reshape(recons[:, k], (Wd, Ht)) axs[k // 2, k % 2].imshow(img, cmap=plt.cm.gray, interpolation='none') axs[k // 2, k % 2].set_title('m=' + str(m[k])) axs[k // 2, k % 2].axis('off') plt.savefig('reconstruction.pdf') plt.close()
def train(model,data,settings): print("-- RUNNING TRAINING --", flush=True) # We are caching the partition in the container home dir so that # the same training subset is used for each iteration for a client. try: with open('/app/mnist_train/x.pyb','rb') as fh: x_train=pickle.loads(fh.read()) with open('/app/mnist_train/y.pyb','rb') as fh: y_train=pickle.loads(fh.read()) with open('/app/mnist_train/classes.pyb','rb') as fh: classes=pickle.loads(fh.read()) except: (x_train, y_train, classes) = read_data(data,nr_examples=settings['training_samples']) try: os.mkdir('/app/mnist_train') with open('/app/mnist_train/x.pyb','wb') as fh: fh.write(pickle.dumps(x_train)) with open('/app/mnist_train/y.pyb','wb') as fh: fh.write(pickle.dumps(y_train)) with open('/app/mnist_train/classes.pyb','wb') as fh: fh.write(pickle.dumps(classes)) except: pass model.fit(x_train, y_train, batch_size=settings['batch_size'], epochs=settings['epochs'], verbose=1) print("-- TRAINING COMPLETED --", flush=True) return model
def get_user_information(): # read dataframe with raw user information df = read_data("response") # merge rows of same user df = df.groupby(["user_id"]).agg({"post_id": lambda x: ', '.join(x)}) # reset index df.reset_index(level=0, inplace=True) # chunk size n = 600 # split dataframe into subset list_df = [df[i:i + n] for i in range(0, df.shape[0], n)] print("Getting user information from dataframe %s / %s" % (chunk_index_userinfo + 1, len(list_df))) # sub dataframe of list of articles df_sub = list_df[chunk_index_userinfo] # retrieve user information df_sub["userinfo"] = df_sub["user_id"].apply(get_userinfo) # save to csv df_sub.to_csv("data/user_detail/users_%s.csv" % chunk_index_userinfo, encoding='utf-8', index=False) print("Successfully retrieved user information from dataframe %s / %s" % (chunk_index_userinfo + 1, len(list_df) + 1))
def main(): output_path = Path('../output/try2_exactly_7_times') output_path.mkdir(exist_ok=True) save_path = output_path / 'vader.ckpt' # w_train, x_train, names = read_premade(DAYS_ORDERED) w_train, x_train, names = read_data() x_train = (x_train - np.mean(x_train)) / np.std(x_train) vader = VADER(x_train=x_train, w_train=w_train, save_path=save_path, n_hidden=[128, 32], k=5, learning_rate=1e-3, output_activation=None, recurrent=True, batch_size=8, alpha=0.1) # pre-train without latent loss vader.pre_fit(n_epoch=20, verbose=True) # train with latent loss vader.fit(n_epoch=100, verbose=True) # get the clusters c = vader.cluster(x_train, w_train) # get the re-constructions p = vader.predict(x_train) print(vader.get_clusters_on_x())
def validate(model, data, sample_fraction=1): try: x_test, y_test, classes = read_data(data, sample_fraction=sample_fraction) model_score = model.evaluate(x_test, y_test, verbose=0) result = open("../result/result.txt", "a") result.write("===========================\n") result.write("Validation accuracy: %s\n" % model_score[1]) result.close() print("======================================================") print('Training loss:', model_score[0]) print('Training accuracy:', model_score[1]) y_pred = model.predict_classes(x_test) clf_report = metrics.classification_report(y_test.argmax(axis=-1), y_pred) except Exception as e: print("failed to validate the model {}".format(e), flush=True) raise report = { "classification_report": clf_report, "loss": model_score[0], "accuracy": model_score[1] } return report
def main(): data = [] inp = [] num_iter = int(input("Enter the number of iterations")) for i in range(0, num_iter): inp = make_granules.make_granules(i, data[:2]) data = svm.svm(inp) print("clf is ", data[-1]) print( "***********************************************************************" ) print("ON THE FINAL DATA !!!!") df = read_data.read_data() number_of_cols = len(df.columns) X = df.values Y = X[:, -1] X = X[:, :-1] indices = np.argwhere(Y == 1) clf = data[-1] indices = indices.ravel() X = X[indices] predictions = clf.predict(X) correctly_done = np.sum(predictions) print("Correctly classified minority points ", correctly_done / len(indices)) print( "***********************************************************************" )
def main(stud_ans): op = rd.read_data("train.tsv") X_train_raw = op[0] y_train = op[1] X_test_raw = [] X_test_raw.extend(stud_ans) cv = CountVectorizer(max_df=1.0, min_df=2, ngram_range=(1, 2), max_features=10000, stop_words='english') X_vec = cv.fit_transform(X_train_raw) selector = SelectKBest(mutual_info_classif, k=300) X_vec = selector.fit_transform(X_vec, y_train) Y_vec = cv.transform(X_test_raw) Y_vec = selector.transform(Y_vec) svd = TruncatedSVD(100) lsa = make_pipeline(svd, Normalizer(copy=False)) print "shape", X_vec.shape, Y_vec.shape X_train_lsa = lsa.fit_transform(X_vec) X_test_lsa = lsa.transform(Y_vec) p = [] knn_lsa = KNeighborsClassifier(n_neighbors=1, algorithm='brute', metric='cosine') knn_lsa.fit(X_train_lsa, y_train) p.extend(knn_lsa.predict(X_test_lsa)) float(p[0]) print "answers modified", p[0] return p
def main(): import argparse parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--use_gpu', help='Use GPU to train NN', action='store_true', default=False) parser.add_argument('--gpu_device', help='GPU device ID', type=int, default=0) parser.add_argument('--model_type', help='Architecture of Model(STL/SNN/HPS/TF/PROG/Deconv/DeconvTM/DeconvTM2)', type=str, default='STL') parser.add_argument('--test_type', help='Type of test (including regularization scale or etc)', type=int, default=0) parser.add_argument('--all_output', help='Train on all outputs, not final stability score', action='store_true', default=False) parser.add_argument('--save_mat_name', help='Name of file to save training results', type=str, default='delete_this.mat') parser.add_argument('--lifelong', help='Train in lifelong learning setting', action='store_true', default=False) args = parser.parse_args() do_lifelong = args.lifelong mat_file_name = args.save_mat_name train_hyperpara = {} train_hyperpara['improvement_threshold'] = 1.002 # for accuracy (maximizing it) train_hyperpara['patience_multiplier'] = 1.5 train_hyperpara['lr'] = 0.0001 train_hyperpara['lr_decay'] = 1.0 / 100.0 train_hyperpara['num_run_per_model'] = 5 train_hyperpara['learning_step_max'] = 5000 train_hyperpara['patience'] = 100 data_hyperpara = {} data_hyperpara['folder_name'] = 'Data' data_hyperpara['train_file_name'] = _train_file_name data_hyperpara['test_file_name'] = _test_file_name data_hyperpara['train_valid_ratio'] = [0.8, 0.2] data_hyperpara['all_output'] = args.all_output _, datainfo = read_data(data_hyperpara['folder_name'], data_hyperpara['train_file_name'], data_hyperpara['test_file_name'], data_hyperpara['train_valid_ratio'], data_hyperpara['all_output']) model_architecture, model_hyperpara = model_setup(args.model_type, datainfo[2], args.test_type) train_result = train_run_for_each_model(model_architecture, model_hyperpara, train_hyperpara, data_hyperpara, mat_file_name, useGPU=args.use_gpu, GPU_device=args.gpu_device, doLifelong=do_lifelong)
def cal(): parser = read_data() #parser.read_mq2008('./MQ2008') parser.read_mq2007('./MQ2007') scores = [] for k in range(5): scores.append([]) for i in range(5): print("===========fold{}=================".format(i + 1)) train, vali, test = parser.get_fold(i) X, y, qid = train X_test, y_test, qid_test = test X_vali, y_vali, qid_vali = vali model = AdaRank(scorer=NDCGScorer_qid(K=5)) model.fit(X, y, qid, X_vali, y_vali, qid_vali) pred = model.predict(X_test) for k in range(5): score = round( NDCGScorer_qid(K=k + 1)(y_test, pred, qid_test).mean(), 4) scores[k].append(score) print('nDCG@{}\t{}\n'.format(k + 1, score)) print("==============Mean NDCG==================") for f in range(5): print("mean NDCG@{}\t{}\n".format(f + 1, round(np.mean(scores[f]), 4)))
def get_data(data_file, table=[2, 3, 4], udgs_only=True, environment=['all'], sort_param='Re', verbose=True): """ """ if verbose: print('\n{0}\n'.format('-' * 150)) print("File: ", data_file) print("Table: ", table) print("Objects: ", "UDGs" if udgs_only else "Candidates") print("Environment:", environment) print("Sort By: ", sort_param, '\n') # Load Data from Appropriate Tables df_results = read_data(data_file, udg_only=udgs_only) df_subset = df_results.loc[df_results["TABLE"].isin(table)] # Filter for Environment for env in environment: if env.lower() in ['sparse', 'dense']: df_subset = df_subset.loc[df_subset["LocalEnv"] == env.title()] elif env.lower() in ['cluster', 'non-cluster']: df_subset = df_subset.loc[df_subset["GlobalEnv"] == env.title()] elif env.lower() in ['high', 'low']: df_subset = df_subset.loc[df_subset["Density"] == env.title()] # Sort Data df_subset = df_subset.sort_values(by=sort_param) df_subset = df_subset.reset_index(drop=True) return df_subset
def train(model,data,sample_fraction): print("-- RUNNING TRAINING --") batch_size = 32 epochs = 1 # The data, split between train and test sets (x_train, y_train, classes) = read_data(data,sample_fraction=sample_fraction) """ num = 3 # Number of Clients ran_order = sample(range(0, x_train.shape[0]), x_train.shape[0]) local_size=int(x_train.shape[0]/num) partitionedX=[] partitionedY=[] for i in range(0,num): partitionedX.append(x_train[ran_order[i*local_size:(i+1)*local_size]]) partitionedY.append(y_train[ran_order[i*local_size:(i+1)*local_size]]) X = numpy.array(partitionedX) Y = numpy.array(partitionedY) """ model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1) print("-- TRAINING COMPLETED --") return model
def main(): path = os.getcwd() data_path = os.path.join(path, 'train_data/') lstm_path = os.path.join(path, 'torcs-server/torcs-client/lstm.h5') dense_path = os.path.join(path, 'torcs-server/torcs-client/dense.h5') category_index_input, category_index_output, input_data, output_data = rd.read_data(data_path) # LSTM_network(lstm_path, input_data, output_data) Dense_network(dense_path, input_data, output_data)
def setUp(self): """ alustetaan testeihin liittyvät muuttujat """ [mylambda, n_iter, tol, temperature,tiheydenmuutos, d_rho, askel, ny, nx, nz, elektroni_lkm, elektroni_tiheys, V_hartree, ydin_tiheys, ytimien_lkm, ydin_tiheys] = \ read_data.read_data(filename = '../test/alkuarvot.txt_5x5x0') self.elektroni_tiheys = elektroni_tiheys self.V_hartree1 = V_hartree self.V_hartree2 = V_hartree self.V_hartree3 = V_hartree self.ydin_tiheys = ydin_tiheys
def plot_data(extension): # Plot data file with a given extension rc('text', usetex=False) fig = plt.figure() ax = fig.add_subplot(111) label = [] for file in os.listdir('.'): if file.endswith(extension): t, h = rd.read_data(file) ax.plot(t, h, 'o-', label=file) ax.legend() plt.show(block=True)
def setUp(self): """ alustetaan testeihin liittyvät muuttujat """ [mylambda, n_iter, tol, temperature,tiheydenmuutos, d_rho, askel, ny, nx, nz, elektroni_lkm, elektroni_tiheys, V_hartree, ydin_tiheys, ytimien_lkm, ydin_tiheys] = \ read_data.read_data(filename = '../test/alkuarvot.txt_5x5x0') self.nx = nx self.ny = ny self.nz = nz self.h = 0.1 self.askel = askel self.mylambda = None self.n_iter = None self.tol = None self.temperature = None self.tiheydenmuutos = None self.d_rho = None self.askel = None init_value = 0.0 self.summa = None #luodaan testimuuttuja self.testgrid = gridi.Gridi(nx=5,ny=5,nz=5,h=0.1,init_value=0.0)
# -*- coding: utf-8 -*- # anomalias import matplotlib as mpl # Descomente para não mostrar a janela em cada plot mpl.use('Agg') # Descomente para não mostrar a janela em cada plot import numpy as np import os from PyFuncemeClimateTools import ClimateStats as cs from PyFuncemeClimateTools import PlotMaps as pm from read_data import read_data pcp, pcpe, obs = read_data() nla = np.linspace(-90., 90., 181) # lat nlo = np.linspace(-180., 179., 360) # lon x, y = np.meshgrid(nlo, nla) y1, y2, x1, x2 = -60., 15., -90., -30. # região # # diff das climatologias # clim_diff = np.nanmean(pcpe[0:30, :, :], axis=0) - \ # np.nanmean(pcp[0:30, :, :], axis=0) # # figtitle = u'CLIMATOLOGIA (MM) (EXP - CONTROL)' # # directory = 'figs_expsolar/clim' # # if not os.path.exists(directory):
def plot_beach(columns, df=None, beaches=None, separate_beaches=False, **kwds): ''' Plots the specified column of data for the specified beaches. Inputs ------ columns : One or more column names/indexes of data to plot. df : The dataframe of data. If None, then the dataframe will be read in using read_data. beaches : Name or list of names of beaches to plot. If None, all beaches will be used. separate_beaches : If False, each beach will be plotted on the same axis. Otherwise, each beach will be plotted on its own axis. keyword arguments ----------------- Other keyword arguments will be past to the plot routine. Returns fig : The figure object. ax : If separate_beaches is false, then this is the axis object. Otherwise, it is the array of axis objects. Example ------- >>> import read_data as rd >>> import visualizations as viz >>> df = rd.read_data() >>> beaches = ['Juneway', 'Rogers', 'Howard'] >>> col = 'Escherichia.coli' >>> viz.plot_beach(col, df=df, beaches=beaches, separate_beaches=True) ''' if df is None: df = read_data.read_data() if beaches is None: beaches = df['Client.ID'].dropna().unique().tolist() if type(beaches) is str: # be flexible with scalar vs. vector input beaches = [beaches] if separate_beaches: fig, ax = plt.subplots(len(beaches), 1, sharex=True, sharey=True) for i, beach in enumerate(beaches): filt = df['Client.ID'] == beach df[filt].plot(y=columns, ax=ax[i], **kwds) ax[i].set_title(beach) else: fig, ax = plt.subplots(1,1) for i, beach in enumerate(beaches): if type(columns) is str: l = i else: l = i * len(columns) filt = df['Client.ID'] == beach df[filt].plot(y=columns, ax=ax, **kwds) # TODO: cannot get this legend stuff to work... for txt in ax.legend().get_texts()[l:]: txt.set_text(beach + ': ' + txt.get_text()) plt.show(block=TO_BLOCK) return fig, ax
def movie(compare_column=None, df=None): ''' Creates an animation of the beaches E. coli levels represented as circles. The circle's radius is proportional to the log of the E. coli levels. Additionally, when the E. coli level is above the threshold of 235 PPM, the circle color changes from blue to purple. You can optionally choose to vary the background color of the animation with another column of data, however, this does not seem like a great way to visualize the relationship between E. coli levels and another data-stream. Inputs ------ compare_column : The name or index of the column that will be used to vary the background color. If compare_column is None, then the background color will remain static. df : The dataframe to use. If None, then the dataframe will be read in using read_data. Returns ------- anim : The animation object. Example ------- >>> import read_data as rd >>> import visualizations as viz >>> df = rd.read_data() >>> viz.movie(df=df) ''' if df is None: df = read_data.read_data() if compare_column is None: to_compare = False else: to_compare = True if to_compare: compare_min = df[compare_column].dropna().min() compare_max = df[compare_column].dropna().max() bg_min_color = np.array([.75, .5, .2]) bg_max_color = np.array([.999, .999, 0.9]) file_name = '../data/ExternalData/Beach_Locations.csv' beach_locs = read_data.read_locations(file_name) # compute Mercator projection of lat/longs phi = 0.730191653 beach_locs['Latitude'] = beach_locs['Latitude'] * 110574.0 beach_locs['Longitude'] = beach_locs['Longitude'] * 111320.0 * np.cos(phi) lat_min = beach_locs['Latitude'].min() lat_max = beach_locs['Latitude'].max() lat_rng = lat_max - lat_min lon_min = beach_locs['Longitude'].min() lon_max = beach_locs['Longitude'].max() lon_rng = lon_max - lon_min def generate_index(): for timestamp in df.index.unique(): readings = df.ix[timestamp, 'Escherichia.coli'] if to_compare: compare = df.ix[timestamp, compare_column] if type(compare) is pd.Series: compare = compare.dropna().mean() if np.isnan(compare): continue if ((type(readings) is np.float64 and not np.isnan(readings)) or (type(readings) is not np.float64 and readings.count())): if not to_compare: compare = None yield timestamp, compare def animate(timestamp_and_compare): timestamp = timestamp_and_compare[0] compare = timestamp_and_compare[1] if to_compare: compare = (compare - compare_min) / compare_max bg_color = bg_min_color * compare + bg_max_color * (1. - compare) ax.set_axis_bgcolor(bg_color) for i, b in enumerate(beach_locs['Beach']): beach_filt = df.ix[timestamp, 'Client.ID'] == b beach_skipped = False try: if not beach_filt.sum() == 1: beach_skipped = True except AttributeError: # is a boolean if not beach_filt: beach_skipped = True if beach_skipped: ecoli = 0 else: ecoli = float(df.ix[timestamp, 'Escherichia.coli'][beach_filt]) r = 200 * np.log(ecoli) if b in circle_indexes: ax.artists[circle_indexes[b]].set_radius(r) if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor( (0.301, 0, 1, 0.75)) else: ax.artists[circle_indexes[b]].set_facecolor( (0, 0.682, 1, 0.75)) else: circ = plt.Circle((beach_locs.ix[i,'Longitude'], beach_locs.ix[i,'Latitude']), radius=r, edgecolor='none') ax.add_artist(circ) circle_indexes[b] = len(ax.artists) - 1 if ecoli >= 235: ax.artists[circle_indexes[b]].set_facecolor( (0.301, 0, 1, 0.75)) else: ax.artists[circle_indexes[b]].set_facecolor( (0, 0.682, 1, 0.75)) ax.title.set_text(timestamp.strftime('%d %B %Y')) return ax fig = plt.figure(figsize=(18,10)) ax = plt.gcf().gca() ax.set_xlim([lon_min - lon_rng * 0.4, lon_max + lon_rng * 0.15]) ax.set_ylim([lat_min - lat_rng * 0.2, lat_max + lat_rng * 0.2]) ax.set_aspect('equal') circle_indexes = {} anim = animation.FuncAnimation(fig, animate, generate_index, repeat=False) plt.show(block=TO_BLOCK) return anim
def beach_hist(col='Escherichia.coli', beaches=None, subplots=False, transform=lambda x: x, df=None): ''' Plots histograms of a specified column for the specified beaches. Inputs ------ col : Column name or index of the column to be histogrammed beaches : List of beach names to generate histograms for, None indicates that all beaches should be used. subplots : False to have each beach's histogram be plotted on the same axis. Otherwise, subplots is a list with two elements specifying the dimensions of the subplot array. For example, [8, 4] will create an 8x4 grid of subplots. There must be at least as many subplot axes as beaches. transform : A function to trasform the data, can be useful to log scale the E. coli readings to make the histogram more spread out. df : The dataframe containing the data. If None, the data will be read in using read_data. Example ------- >>> import read_data as rd >>> import visualizations as viz >>> import numpy as np >>> df = rd.read_data() >>> # Will be very messy, you should only plot on the same axis when there >>> # are only a few beaches to plot >>> viz.beach_hist(transform=lambda x: np.log(x+1), df=df) >>> viz.beach_hist(transform=lambda x: np.log(x+1), df=df, subplots=[7, 4]) ''' if df is None: df = read_data.read_data() if beaches is None: beaches = df['Client.ID'].dropna().unique().tolist() if subplots: try: if len(subplots) != 2: raise ValueError('subplots must have exactly 2 elements') except TypeError: raise TypeError('subplots must be an iterable with 2 elements') if subplots[0] * subplots[1] < len(beaches): raise ValueError('not enough subplots for each beach') min_x = np.inf max_x = -np.inf for b in beaches: data = df[df['Client.ID'] == b][col].map(transform) if data.min() < min_x and not np.isinf(data.min()): min_x = data.min() if data.max() > max_x and not np.isinf(data.min()): max_x = data.max() fig, ax = plt.subplots(subplots[0], subplots[1], sharex=True, sharey=True) ax = ax.flatten() for i, b in enumerate(beaches): df[df['Client.ID'] == b][col].map(transform).hist( normed=1, ax=ax[i], bins=np.linspace(min_x, max_x, 15) ) ax[i].set_ylabel(b) ax[i].set_yticklabels([]) for i in range(len(beaches) + 1, len(ax)): ax[i].set_yticklabels([]) else: fig, ax = plt.subplots(1) for b in beaches: df[df['Client.ID'] == b][col].map(transform).hist( normed=True, alpha=.5, ax=ax ) ax.legend(beaches) plt.show(block=TO_BLOCK)
do_sd = False filenames = ['alkuarvot.txt_5x5x0', 'alkuarvot.txt_10x10x0', 'alkuarvot.txt_16x16x0'] #filenames = ['alkuarvot.txt_10x10x0', # 'alkuarvot.txt_16x16x0'] #filenames = ['alkuarvot.txt_5x5x0'] #filenames = ['alkuarvot.txt_10x10x0'] #filenames = ['alkuarvot.txt_16x16x0'] if do_mc: for filename in filenames: #Elektronien määrä on vakio [mylambda, n_iter, tol, temperature,tiheydenmuutos, d_rho, askel, ny, nx, nz, elektroni_lkm, elektroni_tiheys, V_hartree, ydin_tiheys, ytimien_lkm, ydin_tiheys] = \ read_data.read_data(filename = filename) elektroni_tiheys.set_summa() elektroni_tiheys.set_mylambda(mylambda) elektroni_tiheys.set_n_iter(n_iter) elektroni_tiheys.set_tol(tol) elektroni_tiheys.set_temperature(temperature) elektroni_tiheys.set_tiheydenmuutos(tiheydenmuutos) elektroni_tiheys.set_d_rho(d_rho) elektroni_tiheys.set_askel(askel) outfile = open('mc_'+filename+'energiat.txt', 'w') start = time.time() konvergoinut = \ laskentaa.minimoi_monte_carlolla( outfile,
def run_training(): """Train MNIST for a number of steps.""" # Get the sets of images and labels for training, validation, and # test on MNIST. filename = "Data4-10.txt" vectors_data1,labels_data1 = read_data.read_data(filename) filename = "Data21-25.txt" vectors_data2,labels_data2 = read_data.read_data(filename) filename = "Data26-29.txt" vectors_data3,labels_data3 = read_data.read_data(filename) vectors_data = np.vstack((vectors_data1,vectors_data2,vectors_data3)) print(vectors_data.shape) labels_data = np.vstack((np.reshape(labels_data1,(len(labels_data1),1)), np.reshape(labels_data2,(len(labels_data2),1)),np.reshape(labels_data3,(len(labels_data3),1)))) labels_data = np.reshape(labels_data,-1) print(labels_data.shape) filename = "test30-31.txt" validation_data,vlabels_data = read_data.read_data(filename) filename = "valid18-20.txt" test_data,tlabels_data = read_data.read_data(filename) # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): # Generate placeholders for the images and labels. vectors_placeholder, labels_placeholder = placeholder_inputs( FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = mnist.inference(vectors_placeholder, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = mnist.loss(logits, labels_placeholder) # Add to the Graph the Ops that calculate and apply gradients. train_op = mnist.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = mnist.evaluation(logits, labels_placeholder) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. init = tf.initialize_all_variables() sess.run(init) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) # And then after everything is built, start the training loop. for step in xrange(FLAGS.max_steps): start_time = time.time() # Fill a feed dictionary with the actual set of images and labels # for this particular training step. feed_dict = fill_feed_dict(step,vectors_data,labels_data, vectors_placeholder, labels_placeholder) # Run one step of the model. The return values are the activations # from the `train_op` (which is discarded) and the `loss` Op. To # inspect the values of your Ops or variables, you may include them # in the list passed to sess.run() and the value tensors will be # returned in the tuple from the call. _, loss_value = sess.run([train_op, loss],feed_dict=feed_dict) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if step % 100 == 0: # Print status to stdout. print('Step %d: loss = %.2f (%.3f sec)' % (step, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) # Save a checkpoint and evaluate the model periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: saver.save(sess, FLAGS.train_dir, global_step=step) # Evaluate against the training set. print('Training Data Eval:') do_eval(sess, eval_correct, vectors_placeholder, labels_placeholder, vectors_data, labels_data) # Evaluate against the validation set. print('Validation Data Eval:') do_eval(sess, eval_correct, vectors_placeholder, labels_placeholder, validation_data, vlabels_data) # Evaluate against the test set. print('Test Data Eval:') do_eval(sess, eval_correct, vectors_placeholder, labels_placeholder, test_data, tlabels_data)
from sklearn.ensemble import AdaBoostClassifier from read_data import read_data def results_from_examples(ps,ls): return [1 if p == l else 0 for p,l in zip(ps,ls)] def error_rate(rs): return 1.0-((1.0*sum(rs))/len(rs)) print "Sklearn" examples,labels = read_data('Data/clean1_clean.data') clf = AdaBoostClassifier(n_estimators=50) a = AdaBoostClassifier.fit(clf,examples,labels) score = a.score(examples, labels) i = 0 print "Estimator, Ensemble error, Classifier error" for value in AdaBoostClassifier.staged_predict(clf, examples): rs = results_from_examples(value, labels) #print "Estimator: " + str(i) + " Ensemble error: " + str(error_rate(rs)) + " Classifier error: " + str(clf.estimator_errors_[i]) print str(i) + "," + str(error_rate(rs)) + "," + str(clf.estimator_errors_[i]) i = i + 1 print score
ylabel('magnitude') legend(loc=1) #plot(ct,cx,'k.', ms=3) ax=subplot(313) ct, cx, ce = merge(t, lc1, e1, lc2, e2, mc_delay[0], mc_delta_mag[0]) errorbar(ct,cx, ce,fmt='b.', ms=3, label = 'merged light curve') xlabel('time, day') ylabel('magnitude') legend(loc=1) fig.savefig("mc_light_curves_%s.png"%(output_tag)) print 'Process Started on', t0 print 'It is currently ', datetime.datetime.now() #show() ################################# #END def emcee_delay_estimator(...) t, mag1, e1, mag2, e2, mag3, e3, mag4, e4 = read_data('../data/cosmograil/RXJ1131_Tewes2013.rdb') e1=array([max(0.1,x) for x in e1]) e2=array([max(0.1,x) for x in e2]) e3=array([max(0.1,x) for x in e3]) e4=array([max(0.1,x) for x in e4]) t =t[0:70] lc1 =mag1[0:70] err1 =e1[0:70] lc2 =mag4[0:70] err2 =e4[0:70] emcee_delay_estimator(t, lc1, err1, lc2, err2, 'RXJ1131_curves_AD') #emcee_delay_estimator(t, mag2, e2, mag3, e3, 'RXJ1131_curves_BC')
import read_data as rd import basic_analyze as az import regsvd_sgd as mf import datetime as dt if __name__ == '__main__': path = '/home/bliuab/tencent/data/all' data = rd.read_data(path) #az.analyze_count(data) #the data before split_time is for training, after that for testing k = 100 split_time = dt.datetime(2014,9,24,1) mf.mf(data, split_time, k)
def check_sample_times(df=None, to_plot=False): ''' Investigates whether there is a relationship between the time a sample was taken and the E. coli reading. A possible hypothesis being that samples taken later in the day might tend to read be higher. The conclusions from this function seem to indicate that there is not a subtantial relationship between sample time and E. coli reading. Inputs ------ df : Dataframe object, should contain at least the columns 'Client.ID', 'Escherichia.coli', 'Sample.Collection.Time', if df is None, then it will be read in from read_data. to_plot : Boolean, if true, the results will be printed and plotted. Otherwise, just the cleansed dataframe will be returned. Returns ------- ct : Dataframe of collection times and E. coli readings. The column 'Sample.Collection.Time' is the fraction of the day, for example, a value of 0.50 indicates the collection happened at noon, a value of 0.25 would indicate 6:00 AM, etc. ''' if df is None: df = rd.read_data() ct = df[['Client.ID', 'Escherichia.coli', 'Sample.Collection.Time']].dropna() def clean_times(s): ''' Takes in a string from the sample collection column and makes it machine readable if possible, and a NaN otherwise ''' if type(s) is not str: if type(s) is dt.datetime or type(s) is dt.time: return dt.datetime(2016, 1, 1, hour=s.hour, minute=s.minute) try: if ':' not in s: return float('nan') i = s.index(':') hr = int(s[max(i - 2, 0):i]) mn = int(s[i+1:i+3]) return dt.datetime(2016, 1, 1, hour=hr, minute=mn) except: return float('nan') ct['Sample.Collection.Time'] = ct['Sample.Collection.Time'].map(clean_times) ct = ct.dropna() ct['Sample.Collection.Time'] = ct['Sample.Collection.Time'].map( lambda x: x.hour / 24. + x.minute / (24. * 60.) ) # Filter out those samples which came before 4:00 AM or after 8:00 PM # It seems like most of the ones that come from before 4:00 AM might # actually be occuring in the afternoon. I've tried taking these and manually # changing them to the afternoon and there was no significant change in results. ct = ct[(ct['Sample.Collection.Time'] > .125) & (ct['Sample.Collection.Time'] < .83)] if to_plot: # t-test ct_low = ct[ct['Escherichia.coli'] < 235] ct_high = ct[ct['Escherichia.coli'] >= 235] ttest = scipy.stats.ttest_ind(ct_low['Sample.Collection.Time'], ct_high['Sample.Collection.Time']) print('tests comparing below threshold to above threshold:') print('\tOVERALL:') print('\tt-statistic: {0}\n\tp-value : {1}'.format(ttest[0], ttest[1])) low_mean = ct_low['Sample.Collection.Time'].mean() low_mean_hr = int(low_mean * 24) low_mean_min = str(int((low_mean * 24 - low_mean_hr) * 60)) if len(low_mean_min) < 2: low_mean_min = '0' + low_mean_min print('\tbelow thresh mean: {0} ({1})'.format( low_mean, str(low_mean_hr) + ':' + low_mean_min )) high_mean = ct_high['Sample.Collection.Time'].mean() high_mean_hr = int(high_mean * 24) high_mean_min = str(int((high_mean * 24 - high_mean_hr) * 60)) if len(high_mean_min) < 2: high_mean_min = '0' + high_mean_min print('\tbelow thresh mean: {0} ({1})'.format( high_mean, str(high_mean_hr) + ':' + high_mean_min )) ttests = [] for b in ct['Client.ID'].dropna().unique().tolist(): xl = ct_low[ct_low['Client.ID'] == b] xh = ct_high[ct_high['Client.ID'] == b] ttests.append(scipy.stats.ttest_ind(xl['Sample.Collection.Time'], xh['Sample.Collection.Time'])) ttest = ttests[-1] print('\t' + b) print('\t\tt-statistic: {0}\n\t\tp-value : {1}'.format(ttest[0], ttest[1])) plt.hist(map(lambda x: x[1], ttests)) # qq-plot x = [] y = [] for p in np.linspace(0,1,1000): x.append(ct_low['Sample.Collection.Time'].quantile(p)) y.append(ct_high['Sample.Collection.Time'].quantile(p)) ax = plt.subplots(1)[1] ax.plot([0, 1], [0, 1], 'r--') ax.hold(True) ax.plot(x, y) ax.set_xlabel('Below Threshold Quantiles') ax.set_ylabel('Above Threshold Quantiles') ax.set_aspect('equal') # set e coli to log scale ct['Escherichia.coli'] = ct['Escherichia.coli'].map(lambda x: np.log(x + 1.)) # correlations print('Correlations between log(E. coli) and Sample collection time:') print('\tPearson correlation : ' + str(ct.corr(method='pearson').ix[0,1])) print('\tSpearman correlation: ' + str(ct.corr(method='spearman').ix[0,1])) # scatter plot ct.plot(y='Escherichia.coli', x='Sample.Collection.Time', style='.') ax = plt.gca() ax.set_xlim([ct['Sample.Collection.Time'].min(), ct['Sample.Collection.Time'].max()]) # histograms tb = viz.TO_BLOCK viz.TO_BLOCK = False fig, ax = viz.plot_beach(columns='Sample.Collection.Time', df=ct) viz.TO_BLOCK = tb ax.legend_.remove() plt.show(tb) ct['Escherichia.coli'] = ct['Escherichia.coli'].map(lambda x: np.exp(x) - 1.) return ct
from evaluation import average_relative_error from save_result import save_to_file import sys # Poulis set k=25, m=2 as default! if __name__ == '__main__': if len(sys.argv) <= 1: flag = True elif sys.argv[1] == 'DA': flag = False else: flag = True #read gentree tax att_tree = read_tree() #read record trans = read_data() # remove duplicate items for i in range(len(trans)): trans[i] = list(set(trans[i])) if flag: print "Begin AA" cut = AA(att_tree, trans) else: print "Begin DA" cut = DA(att_tree, trans) # cut = AA(att_tree[-1], trans) print "Final Cut" print cut result = trans_gen(trans, cut) save_to_file(result) print "Finish T-Anonymization!!"
task_dir = config().task.dir kb_index = index_ent_rel(os.path.join(task_dir, 'train.txt'), os.path.join(task_dir, 'valid.txt'), os.path.join(task_dir, 'test.txt')) n_ent, n_rel = graph_size(kb_index) models = {'TransE': TransE, 'TransD': TransD, 'DistMult': DistMult, 'ComplEx': ComplEx} gen_config = config()[config().g_config] dis_config = config()[config().d_config] gen = models[config().g_config](n_ent, n_rel, gen_config) dis = models[config().d_config](n_ent, n_rel, dis_config) gen.load(os.path.join(task_dir, gen_config.model_file)) dis.load(os.path.join(task_dir, dis_config.model_file)) train_data = read_data(os.path.join(task_dir, 'train.txt'), kb_index) inplace_shuffle(*train_data) valid_data = read_data(os.path.join(task_dir, 'valid.txt'), kb_index) test_data = read_data(os.path.join(task_dir, 'test.txt'), kb_index) filt_heads, filt_tails = heads_tails(n_ent, train_data, valid_data, test_data) valid_data = [torch.LongTensor(vec) for vec in valid_data] test_data = [torch.LongTensor(vec) for vec in test_data] tester = lambda: dis.test_link(valid_data, n_ent, filt_heads, filt_tails) train_data = [torch.LongTensor(vec) for vec in train_data] dis.test_link(test_data, n_ent, filt_heads, filt_tails) corrupter = BernCorrupterMulti(train_data, n_ent, n_rel, config().adv.n_sample) src, rel, dst = train_data n_train = len(src) n_epoch = config().adv.n_epoch
""" import numpy as np from scipy.optimize import fmin_powell, fmin, anneal from gridi import * from energiat import E_tot import laskentaa import piirtoa import string import read_data #try: [ny, nx, nz, elektroni_lkm, elektroni_tiheys, V_hartree, ydin_tiheys, ytimien_lkm, ydin_tiheys] = read_data.read_data(filename = 'alkuarvot.txt') print elektroni_tiheys.get_volume() print elektroni_tiheys.gridi print elektroni_tiheys.to_1d_list() print "ydintiheys",ydin_tiheys.gridi #Elektronien määrä on vakio elektroni_tiheys.set_summa() ntot = np.sum(elektroni_tiheys.gridi)*V_hartree.get_volume_of_a_box() print "elektronien kokonaisvaraus", ntot, elektroni_tiheys.get_summa_mennyt() # otetaan tavoitteeksi että siirto hyväksytään joka viides kerta # silloin on sopivasti riskiä yrityksessä tiheydenmuutos = 0.1 outfile = open('energiat.txt', 'w')
def main(): filename = 'mnist_100.csv' train_perc = 0.7 label_index = 0 acc = [] for i in range(100): print(i) (train_x, train_y), (test_x, test_y), possibleLabels = read_data.read_data(filename, train_perc, label_index, NUM_LABELS) numAttributes = len(train_x[0]) numLabels = NUM_LABELS x = tf.placeholder(tf.float32, shape=[None, numAttributes]) y = tf.placeholder(tf.float32, shape=[None, numLabels]) W_hidden = tf.Variable(tf.truncated_normal([numAttributes, NUM_NEURONS], stddev=0.1)) b_hidden = tf.Variable(tf.constant(0.1, shape=[NUM_NEURONS])) hidden_net = tf.matmul(x, W_hidden) + b_hidden hidden_out = tf.sigmoid(hidden_net) W_outlayer = tf.Variable(tf.truncated_normal([NUM_NEURONS, numLabels], stddev=0.1)) b_outlayer = tf.Variable(tf.constant(0.1, shape=[numLabels])) output_net = tf.matmul(hidden_out, W_outlayer) + b_outlayer if numLabels == 1: predict = tf.sigmoid(output_net) else: predict = tf.nn.softmax(output_net) if numLabels == 1: cost = tf.reduce_sum(0.5 * (y - predict) * (y - predict)) else: cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=output_net)) trainStep = tf.train.AdamOptimizer(LEARNING_RATE).minimize(cost) with tf.Session() as sess: step = 0 printEvery = 100 maxIterations = 1000 totalTime = 0 sess.run(tf.global_variables_initializer()) while step < maxIterations: step += 1 # train the network startTime = time.process_time() sess.run(trainStep, feed_dict={x: train_x, y: train_y}) totalTime += time.process_time() - startTime if step % printEvery == 0: #p = sess.run(predict, feed_dict={x: train_x}) print("\nStep:", step, "\tTime:", totalTime / step) #cm = confusion_matrix.buildConfusionMatrix(p, train_y, numLabels) #confusion_matrix.printConfusionMatrix(cm, possibleLabels) #print("Training:") #confusion_matrix.printAccuracy(cm) #print("Testing:") #p = sess.run(predict, feed_dict={x: test_x}) #cm = confusion_matrix.buildConfusionMatrix(p, test_y, numLabels) #confusion_matrix.printAccuracy(cm) p = sess.run(predict, feed_dict={x: test_x}) #print("Confusion Matrix on Test Set:") cm = confusion_matrix.buildConfusionMatrix(p, test_y, numLabels) #confusion_matrix.printConfusionMatrix(cm, possibleLabels) #print("Average time:", totalTime / step) accuracy = confusion_matrix.printAccuracy(cm) acc.append(float(accuracy)) print(sum(acc) / float(len(acc))) print (acc)
def prepare_data(df=None): ''' Preps the data to be used in the model. Right now, the code itself must be modified to tweak which columns are included in what way. Parameters ---------- df : Dataframe to use. If not specified, the dataframe is loaded automatically. Returns ------- predictors : NxM DataFrame of the predictors for the classification problem. meta_info : Nx2 DataFrame containing the columns 'Escherichia.coli' and 'Full_date', to be used, e.g., for leave-one-year-out cross validation and creating the true class labels (elevated vs. not elevated E. coli levels). ''' if df is None: df = rd.read_data() # Leaving 2015 as the final validation set df = df[df['Full_date'] < '1-1-2015'] ###################################################### #### Add derived columns here ###################################################### df['DayOfYear'] = df['Full_date'].map(lambda x: x.dayofyear) ###################################################### #### List all columns you will use ###################################################### # Meta columns are not used as predictors meta_columns = ['Full_date', 'Escherichia.coli'] # Deterministic columns are known ahead of time, their actual values are used # with no previous days being used. deterministic_columns = [ 'Client.ID', 'Weekday', 'sunriseTime', 'DayOfYear' ] deterministic_hourly_columns = [ 'precipIntensity', 'temperature', 'windSpeed', 'windBearing', 'pressure', 'cloudCover' ] for var in deterministic_hourly_columns: for hr in [-12, -8, -4, 0, 4]: deterministic_columns.append(var + '_hour_' + str(hr)) # Historical columns have their previous days' values added to the predictors, # but not the current day's value(s). The value NUM_LOOKBACK_DAYS set below # controls the number of previous days added. Nothing is currently done to # fill NA values here, so if you wish to use columns with a high rate of data # loss, then you should add logic to fill the NA values. historical_columns = [ 'precipIntensity', 'precipIntensityMax', 'temperatureMin', 'temperatureMax', 'humidity', 'windSpeed', 'cloudCover' ] # Each historical column will have the data from 1 day back, 2 days back, # ..., NUM_LOOKBACK_DAYS days back added to the predictors. NUM_LOOKBACK_DAYS = 3 ###################################################### #### Get relevant columns, add historical data ###################################################### all_columns = list(set(meta_columns + deterministic_columns + historical_columns)) df = df[all_columns] df = rd.add_column_prior_data( df, historical_columns, range(1, NUM_LOOKBACK_DAYS + 1), beach_col_name='Client.ID', timestamp_col_name='Full_date' ) df.drop(set(historical_columns) - set(deterministic_columns), axis=1, inplace=True) ###################################################### #### Process non-numeric columns ###################################################### # process all of the nonnumeric columns # This method just assigns a numeric value to each possible value # of the non-numeric column. Note that this will not work well # for regression-style models, where instead dummy columns should # be created. def nonnumericCols(data, verbose=True): for f in data.columns: if data[f].dtype=='object': if (verbose): print('Column ' + str(f) + ' being treated as non-numeric') lbl = sklearn.preprocessing.LabelEncoder() lbl.fit(list(data[f].values)) data[f] = lbl.transform(list(data[f].values)) return data df = nonnumericCols(df) ###################################################### #### Drop any rows that still have NA, set up outputs ###################################################### total_rows_predictors = df.dropna(subset=['Escherichia.coli'], axis=0).shape[0] nonnan_rows_predictors = df.dropna(axis=0).shape[0] print('Dropping {0:.4f}% of rows because predictors contain NANs'.format( 100.0 - 100.0 * nonnan_rows_predictors / total_rows_predictors )) df.dropna(axis=0, inplace=True) predictors = df.drop(['Escherichia.coli', 'Full_date'], axis=1) meta_info = df[['Escherichia.coli', 'Full_date']] return predictors, meta_info
total_rows_predictors = df.dropna(subset=['Escherichia.coli'], axis=0).shape[0] nonnan_rows_predictors = df.dropna(axis=0).shape[0] print('Dropping {0:.4f}% of rows because predictors contain NANs'.format( 100.0 - 100.0 * nonnan_rows_predictors / total_rows_predictors )) df.dropna(axis=0, inplace=True) predictors = df.drop(['Escherichia.coli', 'Full_date'], axis=1) meta_info = df[['Escherichia.coli', 'Full_date']] return predictors, meta_info if __name__ == '__main__': df = rd.read_data(read_weather_station=False, read_water_sensor=False) epa_model_df = df[['Drek_Prediction', 'Escherichia.coli']].dropna() predictors, meta_info = prepare_data(df) timestamps = meta_info['Full_date'] classes = meta_info['Escherichia.coli'] > 235 print('Using the following columns as predictors:') for c in predictors.columns: print('\t' + str(c)) hyperparams = { # Parameters that effect computation 'n_estimators':250, 'max_depth':5, # Misc parameters 'n_jobs':-1, 'verbose':False } clfs, roc_ax, pr_ax = model(timestamps, predictors, classes,
# Adapted from sklearn.metrics._binary_clf_curve: # scores typically has many tied values. Here we extract # the indices associated with the distinct values. We also # concatenate a value for the end of the curve. # We need to use isclose to avoid spurious repeated thresholds # stemming from floating point roundoff errors. distinct_value_indices = np.where(np.logical_not(np.abs( np.diff(scores)) < 0.00001))[0] threshold_idxs = np.r_[distinct_value_indices, labels.size - 1] return scores, labels, threshold_idxs if __name__ == '__main__': TO_BLOCK = False df = read_data.read_data() scores = df[['Reading.1', 'Escherichia.coli']].dropna()['Reading.1'] labels = df[['Reading.1', 'Escherichia.coli']].dropna()['Escherichia.coli'] labels = labels >= 235.0 roc(scores, labels) precision_recall(scores, labels) beach_hist(transform=lambda x: np.log(x + 1), df=df, subplots=[7, 4]) movie(df=df) plt.show()
model_suffix = time.strftime("%d_%m_%Y") directory = 'model_'+model_suffix if not os.path.exists(directory): os.makedirs(directory) ########################## ### Load the data ########################## if args.input_data: print('Loading data from {0}'.format(args.input_data)) df = pd.read_csv(args.input_data, parse_dates='Full_date', low_memory=False) df['Full_date'] = rd.date_lookup(df['Full_date']) else: print('Reading and loading data. Saving to {}'.format(directory+'/all_data.csv')) df = rd.read_data(read_weather_station=False, read_water_sensor=False, add_each_beach_data=True) df.to_csv(directory+'/all_data.csv', index=False) ############################### ### Prepare Predictors ############################### if args.input_processed: print('Using Preprocessed data from {0} and {1}'.format(args.input_processed, args.input_meta )) datafilename = args.input_processed metadatafilename = args.input_meta data_processed = pd.read_csv(datafilename) meta_info = pd.read_csv(metadatafilename, parse_dates='Full_date') meta_info['Full_date'] = rd.date_lookup(meta_info['Full_date']) else: print('Preparing data for modeling. Saving to {0} and {1}'.format(directory+'/processed.csv', directory+'/meta_processed.csv')) data_processed, meta_info = prepare_data(df)
# Get data (observations) DataFilesNames = ['Basket_Ball_1.dat', 'Basket_Ball_2.dat', 'Bowling_Ball_1.dat', 'Bowling_Ball_2.dat'] #DataFilesNames = ['Basket_Ball_1.dat'] DataFolder = 'data/' DataFiles = [DataFolder + name for name in DataFilesNames] # Read all data points (+ clean-up suspicious ones) Dx = 10.0 gridpts = 1000 allt, allh, allYP = [], [], [] for datafile in DataFiles: YP = [] t, h = read_data(datafile) t = np.array(t[:-1])/600. h = h[:-1] # Assemble YP: for hh in h: YP.append(np.linspace(hh-Dx, hh+Dx, gridpts)) # remove last data points: allt.append(t) allh.append(h) allYP.append(YP) ############################################################################### # Compute posterior of observations at all points allINTEGRAL = []