def test(sess, m_val): m_val.restore(sess) fetches = [m_val.accuracy, m_val.label] accuracy, predictions = sess.run(fetches) print('accuracy: %.4f' % accuracy) utils.write_results(predictions, FLAGS.relation_file, FLAGS.results_file)
def main(): # Run model. model, results = experiments.tspec() # Plot training curves. visualize.training(results) # Save model. torch.save( model, os.path.join(PKG_PATH, 'models/best_tspec_model_{}.pt'.format(TSTAMP))) # Save results. utils.write_results( results, os.path.join(PKG_PATH, 'models/best_tspec_results_{}.pkl'.format(TSTAMP))) # Visualizations using non-shuffled data. train_data = utils.Data(train=True, augmentation=True) valid_data = utils.Data(train=False, augmentation=False) visualize.spectra(train_data, log=False, name='spectra_train') visualize.spectra(valid_data, log=False, name='spectra_valid') visualize.timeseries(train_data, name='timeseries_train') visualize.timeseries(valid_data, name='timeseries_valid') visualize.pca(train_data) visualize.tsne(train_data)
def main(): print(alpha) xtrain = np.loadtxt(FLAGS.path_to_xtrain) xtest = np.loadtxt(FLAGS.path_to_xtest) xrand = np.loadtxt(FLAGS.path_to_xrand) print(xtrain.shape, xtest.shape, xrand.shape) FLAGS.depict_input_dim = 162 FLAGS.rbfnn_num_center = 120 for i in range(7, 16 + 1): k = 1 << i FLAGS.depict_output_dim = k FLAGS.rbfnn_input_dim = k pprint.pprint(FLAGS) depict_input_shape = (162, ) base_model, train_model = build(depict_input_shape) for i in range(20): train_model.compile(optimizer=Adam(lr=1e-4), loss=depict_loss) train_model.fit(xtrain, xtrain, epochs=1, validation_split=0.2) ys_train = base_model.predict(xtrain) ys_test = base_model.predict(xtest) metrics = classifier.run_with_soft_assignment( ys_train, ys_test, FLAGS) # metrics = classifier.run(ys_train, ys_test, FLAGS) print('num_cluster: %d, iteration: %d, alpha: %f' % (k, i, alpha)) pprint.pprint(metrics) utils.write_results(FLAGS, metrics, i, postfix='alpha_%.12f' % (alpha))
def main(test_mode=False): log_fname = "logs/train.log" if os.path.isfile(log_fname): os.remove(log_fname) log_hdl = logging.FileHandler(log_fname) log_hdl.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(log_hdl) data = utils.load_data_2d(test_mode=test_mode, valid_pct=0.1, cropping=False) # way to map between string labels and int labels y_map = utils.get_y_map(data) data['y']['train'] = utils.convert_y(data['y']['train'], y_map) data['y']['valid'] = utils.convert_y(data['y']['valid'], y_map) # run experiments y_test, model, performance, optimizer = exp.resnet(data) y_test = utils.convert_y(y_test, y_map) utils.write_results('results/resnet.csv', y_test) import IPython IPython.embed()
def main(args): expectations, input_files, columns = pre_process_args(args) successful_dest_folder = args.successful_dest_folder failed_dest_folder = args.failed_dest_folder files_handler = FilesHandler(input_files=input_files) files_path = files_handler.get_files_path() validator = FileValidator(columns=columns, expectations=expectations) logging.info("Starting the validation files process.. ") p = Pool(processes=cpu_count()) r = list( tqdm(p.imap(validator.validate, files_path), total=len(files_path))) p.close() p.join() sucessful_expectations, failed_expectations = parse_results(r) write_results(sucessful_expectations, success=True) write_results(failed_expectations, success=False) if (successful_dest_folder != None): for expectation in sucessful_expectations: file_name = expectation["file_path"].split("/")[-1] files_handler.move_file(file_name, successful_dest_folder) if (failed_dest_folder != None): for expectation in failed_expectations: file_name = expectation["file_path"].split("/")[-1] files_handler.move_file(file_name, failed_dest_folder) if (files_handler.is_from_storage): files_handler.clean_temp_folder()
def modify_results(results_filename, exam_config_filename, output_filename, invalidate, set_correct): config = utils.read_config() results = utils.read_results(results_filename) exam_data = utils.ExamConfig(exam_config_filename) for result in results: modify(result, exam_data, invalidate, set_correct) utils.write_results(results, output_filename, config['csv-dialect'])
def detect_contours(binary: np.ndarray, color: np.ndarray, options: VesselDetectorOptions, stem: str, ext: str) -> np.ndarray: print(f"Finding contours in {stem}") contours, results = find_contours(binary.copy(), color.copy(), options) cv2.imwrite(f"{stem}.contours.{ext}", contours) write_results(results, options, f"{stem}.contours") return contours
def train_test(target_shape, dataset): model = DFGAN(batch_size=64, target_shape=target_shape, tag=tag, n_param=1.) trainer = SRGANTrainer(model=model, dataset=dataset, num_train_steps=100000, lr=0.0001) trainer.train_model(None) model.batch_size = 100 fid, i_s = trainer.test_gan_all() write_results(fid, i_s, model, dataset, tag)
def run_experiments_with_cross_validation( data, dataset_name, experiments_configs: List, n_sample: int, n_iter: int = 20, kf_splits: int = 5, initialization_method: Callable = random_sample_init, ): kf = KFold(n_splits=kf_splits, shuffle=True) n_iter = (((len(data) // kf_splits) * (kf_splits - 1)) // n_sample) - 1 results = [] samples_data = [] random_samples_dic = dict() for (k, (train_index, test_index)), config in product(enumerate(kf.split(data)), experiments_configs): representations = np.array([np.array(sent.tolist()) for sent in data[config['representation']].tolist()]) labels = np.array([np.array([label]) for label in data.Label.values]) sentences = np.array([sent for sent in data.sentence.tolist()]) if config['model_type'] in ORIGINAL_REPRESENTATION_MODELS: sentences = representations train_representations, test_representations = representations[train_index], representations[test_index] train_labels, test_labels = labels[train_index], labels[test_index] train_sentences, test_sentences = sentences[train_index], sentences[test_index] random_samples_dic[k] = random_samples_dic.get(k, np.random.randint(len(train_index), size=n_sample)) # generate n_sample random indexes from train_index. random_init_sample = random_samples_dic.get(k) learner = ActiveLearner( initialization_method=initialization_method, n_samples=n_sample ) res, chosen_samples = run_experiment(deepcopy(learner), config['model_type'], np.copy(train_representations), np.copy(train_sentences), np.copy(test_sentences), np.copy(train_labels), np.copy(test_labels), n_iter, config['sample_method'], random_init_sample, dataset_name) res['k_fold'] = [k] * n_iter chosen_samples['sample_method'] = res['sample_method'] = [config['sample_method'].__name__] * n_iter chosen_samples['representation'] = res['representation'] = [config['representation']] * n_iter chosen_samples['model_type'] = res['model_type'] = [config['model_type']] * n_iter results.append(res) samples_data.append(chosen_samples) write_results(results, dataset_name, samples_data)
async def main(endpoints): results = {} coroutines = [ asyncio.create_task(check_open_port(endpoint)) for endpoint in endpoints ] completed, pending = await asyncio.wait(coroutines) for c in completed: results[c.result()[0]] = c.result()[1] write_results(results, 'asyncio')
def main(_): transforms = FLAGS.transforms.split(',') method_name = 'UCF_C3D_{}'.format(''.join(transforms)) tag = '{}_{}'.format(method_name, FLAGS.tag) net_scope = 'features' preprocessor_ssl = PreprocessorTransform(seq_length=16, n_speeds=FLAGS.n_speed, crop_size=(112, 112), resize_shape=(128, 171), transforms=transforms) preprocessor = Preprocessor(seq_length=16, skip=FLAGS.frame_skip, crop_size=(112, 112), resize_shape=(128, 171), num_test_seq=32) # Initialize the data generator dataset_train = UCF101('train_0') # Define the network and training model = SLC3D(scope=net_scope, tag=tag, net_args={'version': FLAGS.net_version}) trainer = VideoSSLTrainer(model=model, data_generator=dataset_train, pre_processor=preprocessor_ssl, num_epochs=FLAGS.n_eps_pre, batch_size=FLAGS.batch_size, tag='pre', init_lr=FLAGS.pre_lr, momentum=FLAGS.momentum, wd=FLAGS.wd, skip_pred=FLAGS.skip_pred, num_gpus=FLAGS.num_gpus, train_scopes=net_scope) trainer.train_model() ckpt = wait_for_new_checkpoint(trainer.get_save_dir(), last_checkpoint=None) for i in range(0, 3): # Transfer UCF transfer_dataset = UCF101('train_{}'.format(i)) ftuner = VideoBaseTrainer(model=model, data_generator=transfer_dataset, pre_processor=preprocessor, num_epochs=FLAGS.n_eps_ftune, batch_size=FLAGS.batch_size_ftune, init_lr=FLAGS.ftune_lr, momentum=FLAGS.momentum, wd=FLAGS.wd, num_gpus=FLAGS.num_gpus, train_scopes=net_scope, tag='ftune_split{}'.format(i), exclude_scopes=['global_step', '{}/fc_3'.format(net_scope)]) ftuner.train_model(ckpt) # Evaluate dataset_test = UCF101('test_{}'.format(i)) tester = VideoBaseTester(model, dataset_test, FLAGS.batch_size, preprocessor) results = tester.test_classifier_multi_crop(ftuner.get_save_dir()) write_results(results[0], '{}_ftune_split{}_{}'.format(tag, i, transfer_dataset.name), FLAGS) # Finetuning HMDB transfer_dataset = HMDB51('train_{}'.format(i)) ftuner = VideoBaseTrainer(model=model, data_generator=transfer_dataset, pre_processor=preprocessor, num_epochs=FLAGS.n_eps_ftune, batch_size=FLAGS.batch_size_ftune, init_lr=FLAGS.ftune_lr, momentum=FLAGS.momentum, wd=FLAGS.wd, num_gpus=FLAGS.num_gpus, train_scopes=net_scope, tag='ftune_split{}'.format(i), exclude_scopes=['global_step', '{}/fc_3'.format(net_scope)]) ftuner.train_model(ckpt) # Evaluate dataset_test = HMDB51('test_{}'.format(i)) tester = VideoBaseTester(model, dataset_test, FLAGS.batch_size, preprocessor) results = tester.test_classifier_multi_crop(ftuner.get_save_dir()) write_results(results[0], '{}_ftune_split{}_{}'.format(tag, i, transfer_dataset.name), FLAGS)
def predict_fine(self, testing_data, results_file): x_test, y_test = testing_data p = self.prediction_params yh_s = self.full_classifier.predict(x_test, batch_size=p['batch_size']) single_classifier_error = utils.get_error(y_test, yh_s) logger.info('Single Classifier Error: '+str(single_classifier_error)) results_dict = {'Single Classifier Error': single_classifier_error} utils.write_results(results_file, results_dict=results_dict) return yh_s
def execute_bfs(grid: np.ndarray, goal: str, max_l: int, puzzle_number: int, heuristic_algorithm: str): """ Wrapper function to run bfs :param grid: numpy 2D array representation of the input board. :param goal: goal grid string :param max_l: maximum search path length :param puzzle_number: line number of the puzzle :param heuristic_algorithm: Heuristic algorithm to be used for this run :return: void """ print( "Executing BFS Algorithm with heuristic {} and max search length of {} on the grid\n{}" .format(heuristic_algorithm, max_l, grid)) # Initialize necessary data structures """ float: h(n) int: pegs Node: board state """ open_list: List[Tuple[float, int, Node]] = [] open_set = set() # path needed closed_set = set() # nodes already visited search_path: List[str] = [] # initialize root node information s_grid = grid_to_string(grid) path = ['{} {}'.format(0, s_grid) ] # adds the initial board state to the grid num_black_tokens = s_grid.count('1') hn = get_heuristic(heuristic_algorithm, num_black_tokens, 0, set(), '') root_node = Node(grid, s_grid, 1, path, hn, num_black_tokens, set()) heappush(open_list, (root_node.get_hn(), get_white_token_score(s_grid), root_node)) open_set.add(s_grid) start_time = time.time() solution_path = bfs(open_list, open_set, closed_set, search_path, goal, max_l, heuristic_algorithm, start_time + TIME_TO_SOLVE_PUZZLE_SECONDS) end_time = time.time() write_results(puzzle_number, BEST_FIRST_ALGORITHM, heuristic_algorithm, solution_path, search_path) gather_performance(puzzle_number, np.size(grid, 0), solution_path, len(search_path), start_time, end_time, BEST_FIRST_ALGORITHM, heuristic_algorithm) print('Found no solution' if solution_path == constant.NO_SOLUTION else 'Found solution in {} moves'.format(len(solution_path) - 1))
def generate_csv(timer, mode, write_hits, write_groups): for i in range(write_groups): response_list =[] hits = 0 current = datetime.datetime.now() while hits <= write_hits: client = init_client() in_current = datetime.datetime.now() hits += 1 response_list.append(generate_stats(client, in_current)) print('Sleeping from ' + get_current_hour_minutes_seconds(in_current) +' for '+ str(timer * 60) + ' seconds') if(mode=='dev'): print('deving...') else: time.sleep(timer * 60) print('Writing results ' + str(i + 1)) write_results(PATH, response_list, current)
def write_results(run_data): """Write all of the recorded results from the experiments""" if run_data["options"].verbosity > 0: print() print(f"Writing confusion matrices to {run_data['options'].result_file}...") utils.write_results( run_data["options"].result_file, torch.round( run_data["sim_confusion_matrices"] / run_data["options"].num_sims ).long() ) if (scmb := run_data.get('sim_confusion_matrices_bd')) is not None: utils.write_results( f"bd_{run_data['options'].result_file}", torch.round( scmb / run_data["options"].num_sims ).long() )
def predict_coarse(self, testing_data, results_file, fine2coarse): x_test, y_test = testing_data p = self.prediction_params yh_s = self.full_classifier.predict(x_test, batch_size=p['batch_size']) single_classifier_error = utils.get_error(y_test, yh_s) logger.info('Single Classifier Error: ' + str(single_classifier_error)) yh_c = np.dot(yh_s, fine2coarse) y_test_c = np.dot(y_test, fine2coarse) coarse_classifier_error = utils.get_error(y_test_c, yh_c) logger.info('Single Classifier Error: ' + str(coarse_classifier_error)) results_dict = {'Single Classifier Error': single_classifier_error, 'Coarse Classifier Error': coarse_classifier_error} utils.write_results(results_file, results_dict=results_dict)
def main(test_mode=False): log_fname = "logs/train13.log" if os.path.isfile(log_fname): os.remove(log_fname) log_hdl = logging.FileHandler(log_fname) log_hdl.setFormatter(logging.Formatter('%(message)s')) LOGGER.addHandler(log_hdl) data = utils.load_data(test_mode=test_mode, cropping=True) # way to map between string labels and int labels y_map = utils.get_y_map(data) data['y']['train'] = utils.convert_y(data['y']['train'], y_map) data['y']['valid'] = utils.convert_y(data['y']['valid'], y_map) # run experiments lr_pred, lr_model = exp.lr_baseline(data) lr_y_test = utils.convert_y(lr_pred['test'], y_map) utils.write_results('results/lr_baseline.csv', lr_y_test)
def original(options: VesselDetectorOptions): output_prefix = join(options.output_directory, f"{options.input_stem}") output_ext = 'png' # read the image if options.input_file.endswith('.czi'): image = czifile.imread(options.input_file) image.shape = (image.shape[2], image.shape[3], image.shape[4] ) # drop first 2 columns output_ext = 'jpg' else: image = cv2.imread(options.input_file) # make backup image image_copy = image.copy() cv2.imwrite(f"{output_prefix}.original.{output_ext}", image_copy) # threshold and clustering colorspace = 'lab' channels = 'all' clusters = 2 thresh = clustering_grayscale(image_copy, colorspace, channels, clusters) if image.shape[2] == 1 \ else clustering_color(image_copy, colorspace, channels, clusters) cv2.imwrite(f"{output_prefix}.segmented.{output_ext}", thresh) # watershed segmentation min_distance_value = 5 labels = apply_watershed(image_copy, thresh, min_distance_value) # find vessel contours if options.min_radius is not None: (avg_curv, label_trait, results) = find_vessels(image_copy, labels, image.shape[2] == 1, options.min_radius) write_results(results, options, f"{output_prefix}") else: (avg_curv, label_trait, results) = find_vessels(image_copy, labels, image.shape[2] == 1) write_results(results, options, f"{output_prefix}") if label_trait is not None: cv2.imwrite(f"{output_prefix}.contours.{output_ext}", label_trait)
def evaluate(tokens, instances, labels, write_result=False): '''Evaluate and print results''' prediction, target = sess.run( [model.pred, model.output_y], feed_dict={ model.input_x: np.asarray(instances), model.output_y: np.asarray(labels), model.dropout: 1.0 }) prec, recall, f1sc = f1score(2, prediction, target) if write_result: print("Found MAX") print("--Tokenwise P:{:.5f}".format(prec), "R:{:.5f}".format(recall), "F1:{:.5f}".format(f1sc)) prec, recall, f1sc = phrasalf1score(args, tokens, prediction, target) print("--Phrasal P:{:.5f}".format(prec), "R:{:.5f}".format(recall), "F1:{:.5f}".format(f1sc)) write_results(tokens, prediction, target, "runs/res_{:.5f}".format(f1sc) + ".txt") return f1sc
def predict(self, testing_data, results_file, fine2coarse): x_test, y_test = testing_data self.load_best_full_model() p = self.prediction_params yh_s = self.full_model.predict(x_test, batch_size=p['batch_size']) single_classifier_error = utils.get_error(y_test, yh_s) logger.info('Single Classifier Error: ' + str(single_classifier_error)) results_dict = {'Single Classifier Error': single_classifier_error} utils.write_results(results_file, results_dict=results_dict) np.save(self.model_directory + "/fine_predictions.npy", yh_s) # np.save(self.model_directory + "/coarse_predictions.npy", ych_s) np.save(self.model_directory + "/fine_labels.npy", y_test) # np.save(self.model_directory + "/coarse_labels.npy", yc_test) return yh_s
def run(args): start = time.time() if args.local_file is not None: file_path = download_file_from_storage(args.input_file) else: file_path = args.input_file index = args.index_column date_column = args.date_column y_column = args.y_column data = load_parse_file(file_path=file_path) dataframes = get_frames_by_id(dataframe=data, index_col=index) #save dataframes on google cloud storage with open("dataframes.dill", "wb") as dill_file: dill.dump(dataframes, dill_file) dill_file.close() if args.local_file is not None: pass else: save_in_gcs("dataframes.dill", args.output_path) prophet_config_file_name = args.prophet_options if prophet_config_file_name is not None: with open('config_example.json', 'r') as f: config = json.load(f) else: config = {'prophet_obj_kwds': {}, 'predict_kwds': {}, 'fit_kwds': {}} p = Pool(cpu_count()) partial_func = partial(run_prophet, date_column=date_column, y_column=y_column, index_column=index, type_=args.type, start_date=args.start_date, end_date=args.end_date, prophet_obj_kwds=config['prophet_obj_kwds'], predict_kwds=config['predict_kwds'], fit_kwds=config['fit_kwds']) predictions = p.map(partial_func, dataframes) results_path = write_results(predictions, file_name=args.output_name) if args.local_file is not None: pass else: save_in_gcs(results_path, args.output_path) print("Done in {0} minutes".format((time.time() - start) / 60))
def main(n=None): if n: n = int(n) # Load word2vec models. model_source = "vectors/source_vectors.bin" model_target = "vectors/target_vectors.bin" model_source = KeyedVectors.load_word2vec_format(model_source, binary=True) model_target = KeyedVectors.load_word2vec_format(model_target, binary=True) # Load translation matrix from file. tm = np.loadtxt("data/transformation_matrix.csv", delimiter=",") # Pairs: a CSV file containing source and target sentence pairs. pairsfile = "data/sample_pairs.csv" if n: # Shorten data to n pairs to make testing faster. pairs = pairs_from_df(pairsfile) pairs = shuffle_and_slice(pairs, n) else: pairs = pairs_from_df(pairsfile) pair_objects = list() for p in pairs: # define pair objects pair = SentencePair(p[0], p[1]) pair_objects.append(pair) pairs = calculate_sentence_vectors(model_source, model_target, tm, pair_objects) scores = compare_sentence_vectors(pairs) pairs = add_scores_to_pairs(pairs, scores) average = np.mean(scores) print("Scores average: ", average) write_results(pairs)
"Designing Poly-Time Algorithms" lecture """ def better_enum_max_subarray(ls): maxSum = newSum = 0 low = high = 0 i = 0 for i in range (len(ls)): newSum = 0 for j in range(i, len(ls)): newSum = newSum + ls[j] if newSum > maxSum: maxSum = newSum low = i high = j return low, high, maxSum if __name__ == '__main__': problems = load_problems() for problem in problems results = better_enum_max_subarray(ls = problem) write_results( filename='MSS_Results.txt', original_array=problem, max_subarray=problem[results[0]:results[1] + 1], max_sum=results1[2] )
left_low, left_high, left_sum = divide_and_conquer_find_max_subarray(array, low, mid) right_low, right_high, right_sum = divide_and_conquer_find_max_subarray(array, mid + 1, high) # crossing sub-problem cross_low, cross_high, cross_sum = find_max_crossing_subarray(array, low, mid, high) # case 1: max sub-array is in left array if left_sum >= right_sum and left_sum >= cross_sum: return left_low, left_high, left_sum # case 2: max sub-array is in right array elif right_sum >= left_sum and right_sum >= cross_sum: return right_low, right_high, right_sum # case 3: max sub-array is in array crossing midpoint else: return cross_low, cross_high, cross_sum if __name__ == "__main__": problems = load_problems() for problem in problems: results = divide_and_conquer_find_max_subarray(array=problem, low=0, high=len(problem) - 1) write_results( filename="MSS_Results.txt", original_array=problem, max_subarray=problem[results[0] : results[1] + 1], max_sum=results[2], )
y0, y1 = get_y0_y1(sess, y_post, f0, f1, shape=yalltr.shape, L=100, verbose=False, task=task) if task == 'ihdp': y0, y1 = y0 * ys + ym, y1 * ys + ym score = evaluator_train.calc_stats(y1, y0) scores[i, :] = score # out-test score y0t, y1t = get_y0_y1(sess, y_post, f0t, f1t, shape=yte.shape, L=100, verbose=False, task=task) if task == 'ihdp': y0t, y1t = y0t * ys + ym, y1t * ys + ym score_test = evaluator_test.calc_stats(y1t[test_filter], y0t[test_filter]) scores_test[i, :] = score_test print('Replication: {}/{}, tr_ite: {:0.3f}, tr_ate: {:0.3f}, tr_score: {:0.3f}' \ ', te_ite: {:0.3f}, te_ate: {:0.3f}, te_score: {:0.3f}'.format(i + 1, args.reps, score[0], score[1], score[2], score_test[0], score_test[1], score_test[2])) sess.close() print('CEVAE model total scores ' + str(arg_info)) train_means, train_stds = np.mean(scores, axis=0), sem(scores, axis=0) print('train ITE: {:.3f}+-{:.3f}, train ATE: {:.3f}+-{:.3f}, train SCORE: {:.3f}+-{:.3f}' \ ''.format(train_means[0], train_stds[0], train_means[1], train_stds[1], train_means[2], train_stds[2])) test_means, test_stds = np.mean(scores_test, axis=0), sem(scores_test, axis=0) print('test ITE: {:.3f}+-{:.3f}, test ATE: {:.3f}+-{:.3f}, test SCORE: {:.3f}+-{:.3f}' \ ''.format(test_means[0], test_stds[0], test_means[1], test_stds[1], test_means[2], test_stds[2])) # save scores to csv file results = (train_means[2], train_stds[2], test_means[2], test_stds[2]) try: write_results(args, results) except: print(results)
# 11.9472119646 def k(tweets): num_words = float(tweets.map(lambda t: len(t[COLUMNS.index('tweet_text')].split())).sum()) avg_length = num_words / tweets.count() return "Tweet avg num words\t" + str(avg_length) if __name__ == "__main__": task = '1' tweets = get_tweets(task, False) result_file = open(result_file(task), "w") a = a(tweets) b = b(tweets) c = c(tweets) d = d(tweets) e = e(tweets) f = f(tweets) g = g(tweets) h = h(tweets) i = i(tweets) j = j(tweets) k = k(tweets) results = [a, b, c, d, e, f, g, h, i, j, k] write_results(result_file, results)
from utils import get_tweets, result_file, write_results from constants import * def tweets_place(tweets): tuples = \ tweets.filter(lambda t: t[COLUMNS.index('country_code')] == 'US' and t[COLUMNS.index('place_type')] == 'city')\ .map(lambda t: (t[COLUMNS.index('place_name')], 1))\ .aggregateByKey(0, (lambda x, y: x + y), (lambda rdd1, rdd2: rdd1 + rdd2))\ .sortByKey()\ .sortBy(lambda t: t[1], False)\ .map(lambda t: '%s\t%s' % (t[0], t[1]))\ .collect() return tuples if __name__ == "__main__": task = '5' tweets = get_tweets(task, False) result_file = open(result_file(task), "w") results = tweets_place(tweets) write_results(result_file, results, cols=['place_name', 'num_tweets'])
def main(arguments): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) # Logistics parser.add_argument("--cuda", help="CUDA id to use", type=int, default=0) parser.add_argument("--seed", help="Random seed", type=int, default=19) parser.add_argument("--use_pytorch", help="1 to use PyTorch", type=int, default=1) parser.add_argument("--out_dir", help="Dir to write preds to", type=str, default='') parser.add_argument("--log_file", help="File to log to", type=str) parser.add_argument("--load_data", help="0 to read data from scratch", type=int, default=1) # Task options parser.add_argument("--tasks", help="Tasks to evaluate on, as a comma separated list", type=str) parser.add_argument("--max_seq_len", help="Max sequence length", type=int, default=40) # Model options parser.add_argument("--ckpt_path", help="Path to ckpt to load", type=str, default=PATH_PREFIX + 'ckpts/svae/glue_svae/best.mdl') parser.add_argument("--vocab_path", help="Path to vocab to use", type=str, default=PATH_PREFIX + 'processed_data/svae/glue_v2/vocab.json') parser.add_argument("--model", help="Word emb dim", type=str, default='vae') parser.add_argument("--embedding_size", help="Word emb dim", type=int, default=300) parser.add_argument("--word_dropout", help="Word emb dim", type=float, default=0.5) parser.add_argument("--hidden_size", help="RNN size", type=int, default=512) parser.add_argument("--latent_size", help="Latent vector dim", type=int, default=16) parser.add_argument("--num_layers", help="Number of encoder layers", type=int, default=1) parser.add_argument("--bidirectional", help="1 for bidirectional", type=bool, default=False) parser.add_argument("--rnn_type", help="Type of rnn", type=str, choices=['rnn', 'gru'], default='gru') parser.add_argument("--batch_size", help="Batch size to use", type=int, default=64) # Classifier options parser.add_argument("--cls_batch_size", help="Batch size to use", type=int, default=64) args = parser.parse_args(arguments) logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG) if args.log_file: fileHandler = logging.FileHandler(args.log_file) logging.getLogger().addHandler(fileHandler) logging.info(args) # define senteval params params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': args.use_pytorch, 'kfold': 10, 'max_seq_len': args.max_seq_len, 'batch_size': args.batch_size, 'load_data': args.load_data, 'seed': args.seed} params_senteval['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': args.cls_batch_size, 'tenacity': 5, 'epoch_size': 4, 'cudaEfficient': True} # Load InferSent model vocab = json.load(open(args.vocab_path, 'r')) args.denoise = False args.prob_swap, args.prob_drop = 0.0, 0.0 if args.model == 'vae': model = SentenceVAE(args, vocab['w2i'], #sos_idx=w2i['<sos>'], eos_idx=w2i['<eos>'], pad_idx=w2i['<pad>'], #max_sequence_length=args.max_seq_len, embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) elif args.model == 'ae': model = SentenceAE(args, vocab['w2i'], embedding_size=args.embedding_size, rnn_type=args.rnn_type, hidden_size=args.hidden_size, word_dropout=args.word_dropout, latent_size=args.latent_size, num_layers=args.num_layers, bidirectional=args.bidirectional) model.load_state_dict(torch.load(args.ckpt_path)) model = model.cuda() model.eval() params_senteval['model'] = model # Do SentEval stuff se = senteval.engine.SE(params_senteval, batcher, prepare) tasks = get_tasks(args.tasks) results = se.eval(tasks) if args.out_dir: write_results(results, args.out_dir) if not args.log_file: print(results) else: logging.info(results)
def train(args, model, tokenizer): args.train_batch_size = args.per_gpu_train_batch_size * max(1, args.n_gpu) exp_dir = 'H%d_L%d_E%d_B%d_LR%s_WD%s_%s' % (args.max_hyp_num, args.max_seq_len, args.num_train_epochs, args.train_batch_size, args.learning_rate, args.weight_decay, datetime.now().strftime('%m%d%H%M')) args.output_dir = os.path.join(args.output_dir, exp_dir) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) with open(os.path.join(args.output_dir, 'args.json'), 'w') as f: arg_dict = copy.deepcopy(args.__dict__) arg_dict['device'] = str(args.device) json.dump(arg_dict, f, indent=2) os.mkdir(os.path.join(args.output_dir, 'src')) for src_file in ['model.py', 'losses.py', 'run.py']: dst_file = os.path.join(args.output_dir, 'src', os.path.basename(src_file)) shutil.copyfile(src_file, dst_file) file_handler = logging.FileHandler(os.path.join(args.output_dir, 'log.txt')) file_handler.setFormatter(formatter) file_handler.setLevel(logging.DEBUG) logger.addHandler(file_handler) train_dataset = load_dataset(args, tokenizer, mode='train') train_sampler = RandomSampler(train_dataset) data_loader = DataLoader(train_dataset, sampler=train_sampler, batch_size=args.train_batch_size, num_workers=16) if args.max_steps > 0: t_total = args.max_steps args.num_train_epochs = args.max_steps // (len(data_loader) // args.gradient_accumulation_steps) + 1 else: t_total = len(data_loader) // args.gradient_accumulation_steps * args.num_train_epochs # Prepare optimizer and schedule (linear warmup and decay) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ { 'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay }, { 'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 } ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) # optimizer = optim.SGD(optimizer_grouped_parameters, lr=args.learning_rate) # scheduler = WarmupLinearSchedule(optimizer, warmup_steps=args.warmup_steps, t_total=t_total) if args.fp16: try: from apex import amp except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") model, optimizer = amp.initialize(model, optimizer, opt_level=args.fp16_opt_level) # multi-gpu training (should be after apex fp16 initialization) if not args.no_cuda and args.n_gpu > 1: model = torch.nn.DataParallel(model) logger.info("***** Running training *****") logger.info(" Num stories = %d", len(train_dataset)) logger.info(" Num epochs = %d", args.num_train_epochs) logger.info(" Instantaneous batch size per GPU = %d", args.per_gpu_train_batch_size) logger.info(" Total batch size = %d", args.train_batch_size * args.gradient_accumulation_steps) logger.info(" Gradient accumulation steps = %d", args.gradient_accumulation_steps) logger.info(" Total optimization steps = %d", t_total) logger.info(" Criterion = %s", args.criterion) logger.info(" Learning rate = %s", args.learning_rate) tb_writer = SummaryWriter(os.path.join('runs/', exp_dir)) global_step = 0 best_acc, best_step = 0, 0 keys = ['list_mle', 'list_net', 'approx_ndcg', 'rank_net', 'hinge', 'lambda'] losses = dict.fromkeys(keys, 0.0) last_losses = losses.copy() model.zero_grad() epoch_iterator = trange(int(args.num_train_epochs), desc="Epoch") set_seed(args) # Added here for reproducibility (even between python 2 and 3) for epoch in epoch_iterator: batch_iterator = tqdm(data_loader, desc="Iteration") for step, batch in enumerate(batch_iterator): model.train() batch = tuple(t.to(args.device) if torch.is_tensor(t) else t for t in batch) x = {'input_ids': batch[0], 'token_type_ids': batch[1], 'attention_mask': batch[2]} # (batch_size, list_len) logits = model(**x) labels = batch[3] _losses = dict() _losses['list_mle'] = list_mle(logits, labels) _losses['list_net'] = list_net(logits, labels) _losses['approx_ndcg'] = approx_ndcg_loss(logits, labels) _losses['rank_net'] = rank_net(logits, labels) _losses['hinge'] = pairwise_hinge(logits, labels) _losses['lambda'] = lambda_loss(logits, labels) if args.n_gpu > 1: # mean() to average on multi-gpu parallel (not distributed) training for k, v in _losses.items(): _losses[k] = v.mean() if args.gradient_accumulation_steps > 1: for k in _losses.keys(): _losses[k] /= args.gradient_accumulation_steps if args.fp16: with amp.scale_loss(_losses[args.criterion], optimizer) as scaled_loss: scaled_loss.backward() torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm) else: _losses[args.criterion].backward() torch.nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) for k in losses.keys(): losses[k] += _losses[k].item() if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() # scheduler.step() # Update learning rate schedule model.zero_grad() global_step += 1 # Log losses if args.log_period > 0 and global_step % args.log_period == 0: # tb_writer.add_scalar('lr', scheduler.get_lr()[0], global_step) for k in losses: tb_writer.add_scalar('loss/' + k, (losses[k] - last_losses[k]) / args.log_period, global_step) last_losses = losses.copy() # Log metrics if args.eval_period > 0 and global_step % args.eval_period == 0: metrics, dev_losses = evaluate(args, model, tokenizer, prefix='%d-%d' % (epoch, global_step), partition=1) for k, v in metrics.items(): tb_writer.add_scalar('metrics_dev/' + k, v, global_step) for k, v in dev_losses.items(): tb_writer.add_scalar('loss_dev/' + k, v, global_step) if metrics['accuracy'] > best_acc: best_acc = metrics['accuracy'] best_step = global_step logger.info(" Achieve best accuracy: %.2f", best_acc * 100) output_dir = os.path.join(args.output_dir, 'checkpoint-best_acc') if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) write_results('step: %d' % best_step, metrics, dev_losses, os.path.join(output_dir, "dev-eval.txt")) shutil.copyfile(os.path.join(args.output_dir, 'raw_dev.pkl'), os.path.join(output_dir, 'raw_dev.pkl')) shutil.copyfile(os.path.join(args.output_dir, 'dev-pred.lst'), os.path.join(output_dir, 'dev-pred.lst')) # Save model checkpoint if args.save_period > 0 and global_step % args.save_period == 0: output_dir = os.path.join(args.output_dir, 'checkpoint-{}'.format(global_step)) if not os.path.exists(output_dir): os.makedirs(output_dir) model_to_save = model.module if hasattr(model, 'module') else model model_to_save.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) if global_step % args.eval_period == 0: write_results('step: %d' % global_step, metrics, dev_losses, os.path.join(output_dir, "dev-eval.txt")) batch_iterator.set_description('Iteration(loss=%.4f)' % _losses[args.criterion].item()) if 0 < args.max_steps < global_step: # stop_train or batch_iterator.close() break if 0 < args.max_steps < global_step: # stop_train or epoch_iterator.close() break tb_writer.close() logger.info(" global_step = %s, average loss = %s", global_step, losses[args.criterion] / global_step) logger.info("achieve best accuracy: %.2f at step %s", best_acc * 100, best_step) if args.save_period > 0: model_to_save = model.module if hasattr(model, 'module') else model # Take care of parallel training model_to_save.save_pretrained(os.path.join(args.output_dir, 'checkpoint-final')) tokenizer.save_pretrained(os.path.join(args.output_dir, 'checkpoint-final')) # logger.removeHandler(file_handler) return global_step, losses[args.criterion] / global_step
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument("--data_dir", default='dataset/alphanli/', type=str, required=True, help="The input data dir.") parser.add_argument("--output_dir", default=None, type=str, required=True, help="The output directory where the model checkpoints and predictions will be written.") parser.add_argument("--model_type", default=None, type=str, required=True, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument("--model_name_or_path", default=None, type=str, required=True, help="Path to pretrained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) # Other parameters parser.add_argument("--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name") parser.add_argument("--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name") parser.add_argument("--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--cache_dir", default="", type=str, help="Where do you want to store the pre-trained models downloaded from s3") parser.add_argument('--linear_dropout_prob', type=float, default=0.6) parser.add_argument("--max_hyp_num", default=22, type=int, help="The maximum number of hypotheses for a story.") parser.add_argument("--tt_max_hyp_num", default=22, type=int, help="The maximum number of hypotheses for a story at training time.") parser.add_argument("--max_seq_len", default=128, type=int, help="The maximum total input sequence length after tokenization. Sequences longer " "than this will be truncated, sequences shorter will be padded.") parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval on the dev set.") parser.add_argument("--criterion", default="list_mle", type=str, help="Criterion for optimization selected in " "[list_mle, list_net, approx_ndcg, rank_net, hinge, lambda]") parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--weight_decay", default=0.0, type=float, # 0.01 help="Weight decay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=3, type=int, help="Total number of training epochs to perform.") parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.") parser.add_argument("--warmup_steps", default=0, type=int, help="Linear warmup over warmup_steps.") parser.add_argument('--gradient_accumulation_steps', type=int, default=1, help="Number of updates steps to accumulate before performing a backward/update pass.") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument('--log_period', type=int, default=50, help="Log every X updates steps.") parser.add_argument('--eval_period', type=int, default=1000, help="Evaluate every X updates steps.") parser.add_argument('--save_period', type=int, default=-1, help="Save checkpoint every X updates steps.") parser.add_argument("--eval_all_checkpoints", action='store_true', help="Evaluate all checkpoints starting with the same prefix as model_name " "and ending with step number") parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--overwrite_output_dir', action='store_true', help="Overwrite the content of the output directory") parser.add_argument('--overwrite_cache', action='store_true', help="Overwrite the cached training and evaluation sets") parser.add_argument('--fp16', action='store_true', help="Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit") parser.add_argument('--fp16_opt_level', type=str, default='O1', help="For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html") parser.add_argument('--comment', default=None, type=str, help='The comment to the experiment') args = parser.parse_args() if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train and not args.overwrite_output_dir): raise ValueError("Output directory ({}) already exists and is not empty. " "Use --overwrite_output_dir to overcome.".format(args.output_dir)) # Setup CUDA, GPU device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() if not args.no_cuda else 0 args.device = device logger.info("Device: %s, n_gpu: %s, 16-bits training: %s", device, args.n_gpu, args.fp16) # Set seed set_seed(args) args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] logger.info("Training/evaluation parameters: %s", args) # Before do anything with models, we want to ensure that we get fp16 execution of torch.einsum if args.fp16 is set. # Otherwise it'll default to "promote" mode, and we'll get fp32 operations. # Note that running `--fp16_opt_level="O2"` will remove the need for this code, but it is still valid. if args.fp16: try: import apex apex.amp.register_half_function(torch, 'einsum') except ImportError: raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use fp16 training.") # Training if args.do_train: config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, cache_dir=args.cache_dir if args.cache_dir else None, ) if not hasattr(config, 'linear_dropout_prob'): config.linear_dropout_prob = args.linear_dropout_prob tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None, ) model = model_class.from_pretrained( args.model_name_or_path, config=config, cache_dir=args.cache_dir if args.cache_dir else None, ) logger.info(str(model)) model.to(args.device) train(args, model, tokenizer) # Load a trained model and vocabulary that you have fine-tuned model = model_class.from_pretrained(os.path.join(args.output_dir, 'checkpoint-best_acc')) model.to(args.device) # Evaluation - we can ask to evaluate all the checkpoints (sub-directories) in a directory if args.do_eval: results = {} if args.eval_all_checkpoints: checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) else: checkpoints = [os.path.join(args.output_dir, 'checkpoint-best_acc')] logging.getLogger("utils").setLevel(logging.INFO) logger.info("Evaluate the following checkpoints: %s", checkpoints) for checkpoint in checkpoints: # Reload the model global_step = checkpoint.split('-')[-1] if 'checkpoint' in checkpoint else "" tokenizer = tokenizer_class.from_pretrained(checkpoint, do_lower_case=args.do_lower_case, cache_dir=args.cache_dir if args.cache_dir else None) model = model_class.from_pretrained(checkpoint, cache_dir=args.cache_dir if args.cache_dir else None) model.to(args.device) if not args.no_cuda and args.n_gpu > 1: model = torch.nn.DataParallel(model) # Evaluate args.output_dir = checkpoint metrics, losses = evaluate(args, model, tokenizer, prefix=global_step, partition=None) write_results(args.comment, metrics, losses, os.path.join(args.output_dir, "dev-eval.txt")) metrics = dict((k + ('_{}'.format(global_step) if global_step else ''), v) for k, v in metrics.items()) results.update(metrics) logger.info("Results: {}".format(results))
def main(arguments): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) # Logistics parser.add_argument("--gpu_id", help="gpu id to use", type=int, default=0) parser.add_argument("--seed", help="Random seed", type=int, default=19) parser.add_argument("--use_pytorch", help="1 to use PyTorch", type=int, default=0) parser.add_argument("--out_dir", help="Dir to write preds to", type=str, default='') parser.add_argument("--log_file", help="File to log to", type=str, default='') parser.add_argument("--load_data", help="0 to read data from scratch", type=int, default=1) # Model options parser.add_argument("--batch_size", help="Batch size to use", type=int, default=16) parser.add_argument("--model_dir", help="path to model folder") parser.add_argument("--prefix1", help="prefix to model 1", default='nli_large_bothskip_parse') parser.add_argument("--prefix2", help="prefix to model 2", default='nli_large_bothskip') parser.add_argument("--word_vec_file", help="path to pretrained vectors") parser.add_argument("--strategy", help="Approach to create sentence embedding last/max/best", choices=["best", "max", "last"], default="best") # Task options parser.add_argument("--tasks", help="Tasks to evaluate on, as a comma separated list", type=str) parser.add_argument("--max_seq_len", help="Max sequence length", type=int, default=40) # Classifier options parser.add_argument("--cls_batch_size", help="Batch size to use for the classifier", type=int, default=16) args = parser.parse_args(arguments) logging.basicConfig(format='%(asctime)s : %(message)s', level=logging.DEBUG) if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) log_file = os.path.join(args.out_dir, "results.log") fileHandler = logging.FileHandler(log_file) logging.getLogger().addHandler(fileHandler) logging.info(args) torch.cuda.set_device(args.gpu_id) # Set up SentEval params_senteval = {'task_path': PATH_TO_DATA, 'usepytorch': args.use_pytorch, 'kfold': 10, 'max_seq_len': args.max_seq_len, 'batch_size': args.batch_size, 'load_data': args.load_data, 'seed': args.seed} params_senteval['classifier'] = {'nhid': 0, 'optim': 'adam', 'batch_size': args.cls_batch_size, 'tenacity': 5, 'epoch_size': 4, 'cudaEfficient': True} # Load model # import GenSen package sys.path.insert(0, args.model_dir) from gensen import GenSen, GenSenSingle ckpt_dir = os.path.join(args.model_dir, "data", "models") gensen_1 = GenSenSingle(model_folder=ckpt_dir, filename_prefix=args.prefix1, pretrained_emb=args.word_vec_file, cuda=bool(args.gpu_id >= 0)) gensen_2 = GenSenSingle(model_folder=ckpt_dir, filename_prefix=args.prefix2, pretrained_emb=args.word_vec_file, cuda=bool(args.gpu_id >= 0)) gensen = GenSen(gensen_1, gensen_2) global STRATEGY STRATEGY = args.strategy params_senteval['gensen'] = gensen # Do SentEval stuff se = senteval.engine.SE(params_senteval, batcher, prepare) tasks = get_tasks(args.tasks) results = se.eval(tasks) write_results(results, args.out_dir) logging.info(results)
print ("unknown blocks - {}".format(block['type'])) features_per_layer.append(x) return detections """ Testing with a sample image """ if __name__ == '__main__': inp = prep_image('images/dog.jpg', 416) dnn = Darknet('cfg/yolov3.cfg') # print ("Module list = {}".format(dnn.module_list)) dnn.load_weights('yolov3.weights') CUDA = torch.cuda.is_available() if CUDA: dnn.cuda() inp = inp[0].cuda() dnn.eval() print ("inp = {}\nshape = {}".format(inp, inp.shape)) with torch.no_grad(): pred = dnn(inp, CUDA) print ("prediction = {}\nshape = {}".format(pred, pred.shape)) with open("/home/cbarobotics/dev/pred.t", 'wb') as f: torch.save(pred, f) res = write_results(pred, 0.5, 80) print ("res = {}\nshape = {}".format(res, res.shape))
def main(): tf.logging.set_verbosity(tf.logging.INFO) prepare_file_system() # FLAGS.eval_step_interval = 1 # FLAGS.infer_step_interal = 10 # TODO: OOP train_graph = tf.Graph() with train_graph.as_default(): train_filenames, train_iterator, train_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.train_batch_size) train_inputs, train_cost, optimizer = build_train_graph( train_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim, func=FLAGS.loss_function) train_saver = tf.train.Saver() train_merger = tf.summary.merge_all() train_initializer = tf.global_variables_initializer() # train_parameters = tf.trainable_variables() eval_graph = tf.Graph() with eval_graph.as_default(): eval_filenames, eval_iterator, eval_elements = \ build_text_line_reader(shuffle=True, batch_size=FLAGS.eval_batch_size) eval_inputs, eval_outputs = build_eval_graph(eval_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) eval_saver = tf.train.Saver() eval_merger = tf.summary.merge_all() eval_initializer = tf.global_variables_initializer() # eval_parameters = tf.trainable_variables() infer_graph = tf.Graph() with infer_graph.as_default(): infer_filenames, infer_iterator, infer_elements = \ build_text_line_reader(shuffle=False, batch_size=FLAGS.infer_batch_size) infer_inputs, infer_outputs = build_infer_graph( infer_elements, FLAGS.depict_input_dim, FLAGS.depict_output_dim) rbfnn_metrics = build_metrics_graph('rbfnn') # kmeans_metrics = build_metrics_graph('kmeans') infer_saver = tf.train.Saver() infer_merger = tf.summary.merge_all() infer_initializer = tf.global_variables_initializer() config = tf.ConfigProto(device_count={"GPU": 1}) train_sess = tf.Session(graph=train_graph, config=config) eval_sess = tf.Session(graph=eval_graph, config=config) infer_sess = tf.Session(graph=infer_graph, config=config) # train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train', train_graph) # validation_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/validation', eval_graph) # infer_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/inference', infer_graph) train_sess.run(train_initializer) # eval_sess.run(eval_initializer) # infer_sess.run(infer_initializer) import utils results = dict() for epoch in itertools.count(): if epoch > FLAGS.how_many_training_epoches: break train_generator = utils.build_data_generator( xtrain, shuffle=True, batch_size=FLAGS.train_batch_size) for batch, xs_train in enumerate(train_generator): _, training_cost = train_sess.run( [optimizer, train_cost], feed_dict={train_inputs: xs_train}) if epoch % 1 == 0: checkpoint_path = train_saver.save(train_sess, FLAGS.checkpoints_dir + '/checkpoints', global_step=epoch) # train_saver.save(train_sess, FLAGS.saved_model_dir + '/checkpoints_' + str(FLAGS.depict_output_dim), global_step=epoch) infer_saver.restore(infer_sess, checkpoint_path) infers_train = [] infer_generator = utils.build_data_generator( xtrain, shuffle=False, batch_size=FLAGS.infer_batch_size) for batch, xs_infer in enumerate(infer_generator): ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) infers_train.extend(ys_infer) infers_test = [] infer_generator = utils.build_data_generator( xtest, shuffle=False, batch_size=FLAGS.infer_batch_size) for batch, xs_infer in enumerate(infer_generator): ys_infer = infer_sess.run(infer_outputs, feed_dict={infer_inputs: xs_infer}) infers_test.extend(ys_infer) print(len(infers_train), len(infers_test)) metrics = classifier.run(infers_train, infers_test, FLAGS) pprint.pprint(metrics) results[i] = metrics utils.write_results(FLAGS, metrics, epoch) train_sess.close() eval_sess.close() infer_sess.close() return results