def test_add_unit_transformation_one_element_tuple_tensor_slices(): input_data = np.array([ [1, 2, 3, 4], [1, 2, 3, 0], [1, 2, 0, 0], [1, 0, 0, 0], ]) expected_output_data = np.array([ [4, 5, 6, 7], [4, 5, 6, 3], [4, 5, 3, 3], [4, 3, 3, 3], ]) input_dataset = tf.data.Dataset.from_tensor_slices((input_data, )) expected_output_dataset = tf.data.Dataset.from_tensor_slices( expected_output_data) def add3(x): return x + 3 pipeline = DataPipeline() pipeline.add_unit_transformation(add3) output_dataset = pipeline.transform_dataset(input_dataset) output_next = output_dataset.make_one_shot_iterator().get_next() expected_next = expected_output_dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: for _ in range(4): r_output, r_expected = sess.run((output_next, expected_next)) assert r_output == approx(r_expected)
def __init__(self, configuration): self.conf = configuration self.data_pipeline = DataPipeline(self.conf.batch_size, self.conf.max_memory) self.input_size = self.conf.data_pipeline.get_input_size self.output_size = self.data_pipeline.get_output_size() self.lstm = tf.contrib.rnn.BasicLSTMCell(self.conf.state_size) self.weights = hp.weight_variables( [self.conf.state_size, self.output_size]) self.biases = hp.bias_variables([self.output_size]) self.input = tf.placeholder(tf.float32, shape=[ self.conf.batch_size, self.conf.max_memory, self.input_size ]) self.initial_state = tf.placeholder( tf.float32, shape=[self.conf.batch_size, self.conf.state_size]) self.current_state = tf.placeholder( tf.float32, shape=[self.conf.batch_size, self.conf.state_size]) self.hidden_state = tf.placeholder( tf.float32, shape=[self.conf.batch_size, self.conf.state_size]) self.label = tf.placeholder( tf.int32, shape=[self.conf.batch_size, self.conf.max_memory]) self.zero_state = self.lstm.zero_state(batch_size=self.conf.batch_size, dtype=tf.float32)
def run_once(scanner: DataPipeline, auto_push: bool): scanner.update_sources() scanner.process() if auto_push: host = get_host() util_git.push( scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}" )
def run_once(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool): " run the scanner once " scanner.update_sources() scanner.process() if capture: do_specialized_capture(capture) if auto_push: host = get_host() util_git.push(scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}")
def test_add_unit_transformation_nested(): def input_data_generator(): yield {"input_sequence": np.array([1, 2, 3, 4]), "length": 4}, 9 yield {"input_sequence": np.array([1, 2, 3]), "length": 3}, 9 yield {"input_sequence": np.array([1, 2]), "length": 2}, 9 yield {"input_sequence": np.array([1]), "length": 1}, 9 yield {"input_sequence": np.array([1, 2]), "length": 2}, 9 yield {"input_sequence": np.array([1, 2, 3]), "length": 3}, 9 yield {"input_sequence": np.array([1, 2, 3, 4]), "length": 4}, 9 def expected_output_data_generator(): yield {"input_sequence": np.array([4, 5, 6, 7]), "length": 4}, 9 yield {"input_sequence": np.array([4, 5, 6]), "length": 3}, 9 yield {"input_sequence": np.array([4, 5]), "length": 2}, 9 yield {"input_sequence": np.array([4]), "length": 1}, 9 yield {"input_sequence": np.array([4, 5]), "length": 2}, 9 yield {"input_sequence": np.array([4, 5, 6]), "length": 3}, 9 yield {"input_sequence": np.array([4, 5, 6, 7]), "length": 4}, 9 input_dataset = tf.data.Dataset.from_generator(input_data_generator, output_types=({ "input_sequence": tf.int32, "length": tf.int32 }, tf.int32)) expected_output_dataset = tf.data.Dataset.from_generator( expected_output_data_generator, output_types=({ "input_sequence": tf.int32, "length": tf.int32 }, tf.int32)) def add3(x): return x + 3 pipeline = DataPipeline() pipeline.add_unit_transformation(add3, 0, "input_sequence") output_dataset = pipeline.transform_dataset(input_dataset) output_next = output_dataset.make_one_shot_iterator().get_next() expected_next = expected_output_dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: for _ in range(7): r_output, r_expected = sess.run((output_next, expected_next)) assert r_output[0]["input_sequence"] == approx( r_expected[0]["input_sequence"]) assert r_output[0]["length"] == approx(r_expected[0]["length"]) assert r_output[1] == approx(r_expected[1])
def test_add_structural_transformation(): def input_data_generator(): yield np.array([1, 2, 3, 4]) yield np.array([1, 2, 3]) yield np.array([1, 2]) yield np.array([1]) yield np.array([1, 2]) yield np.array([1, 2, 3]) yield np.array([1, 2, 3, 4]) def expected_output_data_generator(): yield {"input_sequnce": np.array([1, 2, 3, 4]), "length": 4} yield {"input_sequnce": np.array([1, 2, 3]), "length": 3} yield {"input_sequnce": np.array([1, 2]), "length": 2} yield {"input_sequnce": np.array([1]), "length": 1} yield {"input_sequnce": np.array([1, 2]), "length": 2} yield {"input_sequnce": np.array([1, 2, 3]), "length": 3} yield {"input_sequnce": np.array([1, 2, 3, 4]), "length": 4} input_dataset = tf.data.Dataset.from_generator(input_data_generator, output_types=tf.int32) expected_output_dataset = tf.data.Dataset.from_generator( expected_output_data_generator, output_types={ "input_sequnce": tf.int32, "length": tf.int32 }) def add_length(input_sequnce): return { "input_sequnce": input_sequnce, "length": tf.shape(input_sequnce)[0] } pipeline = DataPipeline() pipeline.add_structural_transformation(add_length) output_dataset = pipeline.transform_dataset(input_dataset) output_next = output_dataset.make_one_shot_iterator().get_next() expected_next = expected_output_dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: for _ in range(7): r_output, r_expected = sess.run((output_next, expected_next)) assert r_output["input_sequnce"] == approx( r_expected["input_sequnce"]) assert r_output["length"] == approx(r_expected["length"])
def __init__(self, data): super().__init__() self.data_prepocessor = DataPipeline() self.X_train, self.X_val, self.y_train, self.y_val = data self.lr_model = LogisticRegression(random_state=1,multi_class='multinomial',solver='newton-cg', verbose=0) self.lr_model.fit(self.X_train, self.y_train)
def test_add_unit_transformation_one_element_tuple(): def input_data_generator(): yield np.array([1, 2, 3, 4]), yield np.array([1, 2, 3]), yield np.array([1, 2]), yield np.array([1]), yield np.array([1, 2]), yield np.array([1, 2, 3]), yield np.array([1, 2, 3, 4]), def expected_output_data_generator(): yield np.array([4, 5, 6, 7]) yield np.array([4, 5, 6]) yield np.array([4, 5]) yield np.array([4]) yield np.array([4, 5]) yield np.array([4, 5, 6]) yield np.array([4, 5, 6, 7]) input_dataset = tf.data.Dataset.from_generator(input_data_generator, output_types=(tf.int32, )) expected_output_dataset = tf.data.Dataset.from_generator( expected_output_data_generator, output_types=tf.int32) def add3(x): return x + 3 pipeline = DataPipeline() pipeline.add_unit_transformation(add3) output_dataset = pipeline.transform_dataset(input_dataset) output_next = output_dataset.make_one_shot_iterator().get_next() expected_next = expected_output_dataset.make_one_shot_iterator().get_next() with tf.Session() as sess: for _ in range(7): r_output, r_expected = sess.run((output_next, expected_next)) assert r_output == approx(r_expected)
def main(): # CoLaus DataPipeline() \ .with_consumer(CsvDataConsumer(data_dir + "CoLaus_sample_100linesShuffled.csv", "\t")) \ .with_processor(DataTransformer.from_mapping_file(mapping_dir + "colaus_cineca_mapping_questionnaire.csv")) \ .with_processor(FieldValueTransformer.from_mapping_file(mapping_dir + "colaus_data_label_mapping.xlsx")) \ .with_producer(JsonProducer(data_dir + "colaus_cineca.json")) \ .run() # H3Africa DataPipeline() \ .with_consumer(CsvDataConsumer(data_dir + "h3africa_dummy_datasets_for_cineca_demo.csv", ";")) \ .with_processor(DataTransformer.from_mapping_file(mapping_dir + "h3africa_cineca_mapping_questionnaire.csv")) \ .with_producer(JsonProducer(data_dir + "h3africa_cineca.json")) \ .run() # CHILD DataPipeline() \ .with_consumer(CsvDataConsumer(data_dir + "child_demo_data.csv", ",")) \ .with_processor(FieldValueTransformerPre.from_mapping_file("../resources/mapping/child_initial_data_label_mapping.xlsx")) \ .with_processor(DataTransformer.from_mapping_file(mapping_dir + "child_cineca_mapping_questionnaire.csv")) \ .with_producer(JsonProducer(data_dir + "child_cineca.json")) \ .run()
def __init__(self, data): self.data_prepocessor = DataPipeline() self.X_train, self.X_val, self.y_train, self.y_val = data param_grid = { 'C': [1, 2, 3], 'degree': [3, 4, 5, 6, 7 , 8], } SVM_model = SVC(gamma='auto') SVM_Tuned = GridSearchCV(estimator=SVM_model, param_grid=param_grid, cv=StratifiedKFold(3)) SVM_Tuned.fit(self.X_train, self.y_train) self.SVM_model = SVM_Tuned
def get_pipeline(self, train_dataset): trainer = Train(patience_epochs=self.patience_epochs, early_stopping=self.early_stopping, epochs=self.epochs) max_feature_lens = train_dataset.max_feature_lens num_classes = train_dataset.num_classes text_to_index = TransformTextToIndex(feature_lens=max_feature_lens) # data pipeline merge_tensor = TransformMergeTensors() post_process_steps = [("merge_tensor", merge_tensor)] data_pipeline = DataPipeline(text_to_index=text_to_index, postprocess_steps=post_process_steps) # Label pipeline label_encoder = TransformLabelEncoder() label_reshaper = TransformLabelReshaper(num_classes=num_classes) label_pipeline = LabelPipeline(label_encoder=label_encoder, label_reshaper=label_reshaper) # Network model = BiLstmNetwork(input_size=text_to_index.max_index, hidden_dim=self.hidden_dim, output_size=train_dataset.num_classes) self.logger.info("Using model {}".format(type(model))) # optimiser = SGD(lr=self.learning_rate, params=model.parameters()) optimiser = Adam(params=model.parameters()) self.logger.info("Using optimiser {}".format(type(optimiser))) # Loss function loss_func = nn.CrossEntropyLoss() self.logger.info("Using loss function {}".format(type(loss_func))) # Train pipeline train_pipeline = TrainPipeline(batch_size=self.batch_size, optimiser=optimiser, trainer=trainer, data_pipeline=data_pipeline, label_pipeline=label_pipeline, num_workers=self.num_workers, loss_func=loss_func, model=model) return train_pipeline
def __init__(self, data): super().__init__() self.data_prepocessor = DataPipeline() self.X_train, self.X_val, self.y_train, self.y_val = data self.RF_model = LogisticRegression(random_state=1, verbose=0) param_grid = { 'n_estimators': [50, 100, 150, 200], 'max_depth': [5, 6, 7, 8, 9 , 10, 12, 15], } RF_model = RandomForestClassifier(random_state=1, verbose=0) RF_Tuned = GridSearchCV(estimator=RF_model, param_grid=param_grid, cv=StratifiedKFold(3)) RF_Tuned.fit(self.X_train, self.y_train) self.RF_model = RF_Tuned
def __init__(self, bind_addresses, send_addresses, pipeline_count=8, packet_size=2048, sock_timeout=5): if len(bind_addresses) != len(send_addresses): raise Exception( f'Bind address array length is not equal to the send address array length. {len(bind_addresses)} vs {len(send_addresses)}' ) if len(bind_addresses) != pipeline_count: raise Exception( f'Pipeline count is not equal to the length of the supplied arrays. {len(bind_addresses)} vs {pipeline_count}' ) self.pipeline_count = pipeline_count self.data_pipelines = [None] * self.pipeline_count for i in range(self.pipeline_count): self.data_pipelines[i] = DataPipeline(send_addresses[i], bind_addresses[i], packet_size, sock_timeout)
def run_test(): # Get all ckpt names in log dir (without meta ext) meta_list = get_checkpoints(FLAGS.log_dir) # GPU/CPU Flag if FLAGS.gpu is not None: compute_string = '/gpu:' + str(FLAGS.gpu) else: compute_string = '/cpu:0' # Iterate through the checkpoints for ckpt_path in meta_list: tf.reset_default_graph() #################### # Setup Data Queue # #################### with tf.device("/cpu:0"): with tf.variable_scope('test') as scope: data_pipeline = DataPipeline(augment=False, num_epochs=1, shuffle=False) validate_x, validate_y, ids = data_pipeline.batch_ops() with tf.device(compute_string): ########################## # Declare Validate Graph # ########################## # Sets train/test mode; currently only used for BatchNormalization # True: Train False: Test phase = tf.placeholder(tf.bool, name='phase') validate_model = model(validate_x, validate_y, phase) # Delete extraneous info when done debugging validate_pred = validate_model.inference() pool5 = validate_model.fc2 init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) ids_file = open(os.path.join(FLAGS.log_dir, 'ids.txt'), 'w') predictions_file = open(os.path.join(FLAGS.log_dir, 'predictions.txt'), 'w') session_config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=session_config) as sess: sess.run(init) # Coordinator hands data fetching threads coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) optimistic_restore(sess, ckpt_path) try: step = 0 cum_time = 0 while True: if coord.should_stop(): break step += 1 start_time = time() prediction_value, pool5_value, ids_value = sess.run( [validate_pred, pool5, ids], feed_dict={phase: False}) duration_time = time() - start_time cum_time += duration_time feature_file = os.path.join(FLAGS.log_dir, "feature_%d" % step) #pool5_value = np.sum(pool5_value, (1,2)) #spatial average pool5_value = pool5_value.reshape(FLAGS.batch_size, -1) np.save(feature_file, pool5_value) for id in ids_value: ids_file.write("%s\n" % id) # Save prediction and ground truth info predictions_file.write(np.array_str( \ prediction_value, \ max_line_width=1e3, \ precision=10, \ suppress_small=True)) predictions_file.write('\n') predictions_file.flush() except tf.errors.OutOfRangeError: step -= 1 except Exception as e: step -= 1 # Stop Queueing data, we're done! coord.request_stop() coord.join(threads)
def __init__(self,data): self.data_prepocessor = DataPipeline() self.X_train, self.X_val, self.y_train, self.y_val = data self.LinearModel = LinearSVC() self.LinearModel.fit(self.X_train,self.y_train)
def main(args_list=None): if args_list is None: args_list = sys.argv[1:] args = parser.parse_args(args_list) if args.auto_update: return util_git.monitor_start("--auto_update") if not args.auto_push: logger.warning("github push is DISABLED") config = DataPipelineConfig(args.base_dir, args.temp_dir, flags={ "trace": args.trace, "capture_image": args.capture_image, "rerun_now": args.rerun_now, "firefox": args.use_firefox, "chrome": args.use_chrome, "headless": not args.show_browser, }) scanner = DataPipeline(config) capture = init_specialized_capture(args) if args.clean_html or args.extract_html or args.format_html: if args.format_html: scanner.format_html(rerun=True) if args.clean_html: scanner.clean_html(rerun=True) if args.extract_html: scanner.extract_html(rerun=True) elif args.continuous: scanner.format_html() scanner.clean_html() scanner.extract_html() run_continuous(scanner, capture, auto_push=args.auto_push) else: scanner.format_html() scanner.clean_html() scanner.extract_html() run_once(scanner, args.auto_push)
def run_continuous(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool): " run in continuous mode twice an hour " # check for new source code (return if found so watchdog can reload the main loop) if util_git.monitor_check(): return host = get_host() try: print("starting continuous run") # run the first time outside of the 'rety' logic # so it fails if something is really wrong # get new external source data scanner.update_sources() # main scan/clean/extract loop scanner.process() # run a one-off capture if requested if capture: do_specialized_capture(capture) # push to the git repo if auto_push: util_git.push(scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}") # check for new source again if util_git.monitor_check(): return cnt = 1 t = next_time() # run twice per hour forever # on error, rety twice before going back to sleep until next cycle print(f"sleep until {t}") while True: time.sleep(15) if datetime.now() < t: continue if util_git.monitor_check(): break print("==================================") print(f"=== run {cnt} at {t}") print("==================================") retry_cnt = 0 try: scanner.update_sources() scanner.process() if capture: do_specialized_capture(capture) if auto_push: util_git.push(scanner.config.base_dir, f"{udatetime.to_displayformat(scanner.change_list.start_date)} on {host}") except Exception as ex: logger.exception(ex) if retry_cnt < 2: print(f"run failed, wait 5 minutes and try again") t = t + timedelta(minutes=5) retry_cnt += 1 continue print("==================================") print("") t = next_time() print(f"sleep until {t}") cnt += 1 finally: if capture: capture.close()
def run_continuous(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool): if util_git.monitor_check(): return host = get_host() try: print("starting continuous run") scanner.update_sources() scanner.process() if capture: try: special_cases(capture) except Exception as ex: logger.error(ex) logger.error( "*** continue after exception in specialized capture") if auto_push: util_git.push( scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}" ) if util_git.monitor_check(): return cnt = 1 t = next_time() print(f"sleep until {t}") while True: time.sleep(15) if datetime.now() < t: continue if util_git.monitor_check(): break print("==================================") print(f"=== run {cnt} at {t}") print("==================================") try: scanner.update_sources() scanner.process() if capture: special_cases(capture) if auto_push: util_git.push( scanner.config.base_dir, f"{udatetime.to_displayformat(scanner.change_list.start_date)} on {host}" ) except Exception as ex: logger.exception(ex) print(f"run failed, wait 5 minutes and try again") t = t + timedelta(minutes=5) print("==================================") print("") t = next_time() print(f"sleep until {t}") cnt += 1 finally: if capture: capture.close()
def main(args): if args.n_clusters == None: raise ValueError('Define the number of clusters with --n_clusters') if not args.noise and not args.events: raise ValueError("Define if evaluating accuracy on noise or events") # Directory in which the evaluation summaries are written if args.noise: summary_dir = os.path.join(args.checkpoint_dir, "noise") if args.events: summary_dir = os.path.join(args.checkpoint_dir, "events") if args.save_false: false_start = [] false_end = [] false_origintime = [] false_dir = os.path.join("output", "false_predictions") if not os.path.exists(false_dir): os.makedirs(false_dir) while True: ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if args.eval_interval < 0 or ckpt: print('Evaluating model') break print('Waiting for training job to save a checkpoint') time.sleep(args.eval_interval) cfg = config.Config() if args.noise: cfg.batch_size = 128 if args.events: cfg.batch_size = 128 if args.save_false: cfg.batch_size = 1 cfg.n_epochs = 1 cfg.add = 1 cfg.n_clusters = args.n_clusters cfg.n_clusters += 1 while True: try: # data pipeline data_pipeline = DataPipeline(args.dataset, config=cfg, is_training=False) samples = { 'data': data_pipeline.samples, 'cluster_id': data_pipeline.labels, "start_time": data_pipeline.start_time, "end_time": data_pipeline.end_time } # set up model and validation metrics model = models.get(args.model, samples, cfg, args.checkpoint_dir, is_training=False) metrics = model.validation_metrics() # Validation summary writer summary_writer = tf.summary.FileWriter(summary_dir, None) with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.load(sess, args.step) print('Evaluating at step {}'.format( sess.run(model.global_step))) step = tf.train.global_step(sess, model.global_step) mean_metrics = {} for key in metrics: mean_metrics[key] = 0 n = 0 pred_labels = np.empty(1) true_labels = np.empty(1) while True: try: to_fetch = [ metrics, model.layers["class_prediction"], samples["cluster_id"], samples["start_time"], samples["end_time"] ] metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run( to_fetch) #batch_pred_label -=1 pred_labels = np.append(pred_labels, batch_pred_label) true_labels = np.append(true_labels, batch_true_label) # Save times of false preds if args.save_false and \ batch_pred_label != batch_true_label: print("---False prediction---") print(starttime, endtime) false_origintime.append( (starttime[0] + endtime[0]) / 2) false_end.append(endtime) false_start.append(starttime) # print true_labels for key in metrics: mean_metrics[key] += cfg.batch_size * metrics_[key] n += cfg.batch_size mess = model.validation_metrics_message(metrics_) print('{:03d} | '.format(n) + mess) except KeyboardInterrupt: print('stopping evaluation') break except tf.errors.OutOfRangeError: print('Evaluation completed ({} epochs).'.format( cfg.n_epochs)) print("{} windows seen".format(n)) break if n > 0: for key in metrics: mean_metrics[key] /= n summary = tf.Summary(value=[ tf.Summary.Value(tag='{}/val'.format(key), simple_value=mean_metrics[key]) ]) if args.save_summary: summary_writer.add_summary(summary, global_step=step) summary_writer.flush() mess = model.validation_metrics_message(mean_metrics) print('Average | ' + mess) if args.eval_interval < 0: print('End of evaluation') break tf.reset_default_graph() print('Sleeping for {}s'.format(args.eval_interval)) time.sleep(args.eval_interval) finally: print('joining data threads') coord.request_stop() if args.save_false: false_preds = {} false_preds["start_time"] = false_start false_preds["end_time"] = false_end false_preds["origintime"] = false_origintime # false_preds = np.array((false_start, false_end)).transpose()[0] # print 'shape', false_preds.shape df = pd.DataFrame(false_preds) df.to_csv(os.path.join(false_dir, "false_preds.csv")) pred_labels = pred_labels[1::] true_labels = true_labels[1::] # np.save("output/pred_labels_noise.npy",pred_labels) # np.save("output/true_labels_noise.npy",true_labels) print("---Confusion Matrix----") print(confusion_matrix(true_labels, pred_labels)) coord.join(threads)
def run_validate(): # Get all ckpt names in log dir (without meta ext) meta_list = get_checkpoints(FLAGS.log_dir) # GPU/CPU Flag if FLAGS.gpu is not None: compute_string = '/gpu:' + str(FLAGS.gpu) else: compute_string = '/cpu:0' # Iterate through the checkpoints val_loss = [] val_acc = [] val_itr = [] for ckpt_path in meta_list: tf.reset_default_graph() #################### # Setup Data Queue # #################### with tf.device("/cpu:0"): with tf.variable_scope('validate') as scope: data_pipeline = DataPipeline(augment=False, num_epochs=1, shuffle=False) validate_x, validate_y, ids = data_pipeline.batch_ops() with tf.device(compute_string): ########################## # Declare Validate Graph # ########################## # Sets train/test mode; currently only used for BatchNormalization # True: Train False: Test phase = tf.placeholder(tf.bool, name='phase') validate_model = model(validate_x, validate_y, phase) # Delete extraneous info when done debugging validate_pred = validate_model.inference() validate_acc = validate_model.evaluate() validate_loss, gt_y = validate_model.loss() global_step = validate_model.get_global_step() summary = tf.summary.merge_all() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) session_config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=session_config) as sess: sess.run(init) summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # Coordinator hands data fetching threads coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) optimistic_restore(sess, ckpt_path) global_step_value = global_step.eval() try: step = 0 cum_loss = 0 cum_acc = 0 cum_time = 0 while True: if coord.should_stop(): break step += 1 start_time = time() loss_value, acc_value, prediction_value, gt_value, ids_value = sess.run( [ validate_loss, validate_acc, validate_pred, gt_y, ids ], feed_dict={phase: False}) duration_time = time() - start_time cum_loss += loss_value cum_acc += acc_value cum_time += duration_time if step % 1 == 0: # Print progress to stdout if FLAGS.print_pred: print( 'Step %d: loss = %.4f acc = %.4f (%.3f sec)' % (step, loss_value, acc_value, duration_time)) print('Prediction:{}'.format(prediction_value)) print('GT:{}'.format(gt_value)) sys.stdout.flush() # Write the summaries if step % 25 == 0: # Update the summary file summary_str = sess.run(summary, feed_dict={phase: False}) summary_writer.add_summary(summary_str, global_step_value) summary_writer.flush() except tf.errors.OutOfRangeError: step -= 1 except Exception as e: step -= 1 # Stop Queueing data, we're done! coord.request_stop() coord.join(threads) avg_loss = cum_loss / step avg_acc = cum_acc / step avg_time = cum_time / step val_loss.append(float(avg_loss)) val_acc.append(float(avg_acc)) val_itr.append(int(global_step_value)) print('Results For Load File: %s' % ckpt_path) print('Average_Loss = %.4f' % avg_loss) print('Average_Acc = %.4f' % avg_acc) print('Run Time: %.2f' % cum_time) sys.stdout.flush() val_loss = np.asarray(val_loss) val_acc = np.asarray(val_acc) val_itr = np.asarray(val_itr) best_loss = np.amin(val_loss) best_acc = np.amax(val_acc) best_itr = val_itr[np.argmax(val_acc)] print('Overall Results') print('Minimum Loss: %.4f' % best_loss) print('Maximum Acc: %.4f' % best_acc) print('Best Checkpoint: %d' % best_itr) save_path = os.path.join(FLAGS.log_dir, 'validation_results.npz') np.savez(save_path, val_loss=val_loss, val_acc=val_acc, val_itr=val_itr)
y = y_val[t] action = self.policy.choose(X) reward = self.calculateReward(action, y) predictions.append(action) #self.policy.updateParameters(X, action, reward) rewards.append(reward) return (rewards, predictions) if __name__ == '__main__': #seeds = [1,12,123,1234, 12345, 1234545, 0, 2, 234, 2345, 23454, 345, 3456, 345656, 456, 45656, 7483, 7590 , 789, 7890 ] #seeds = np.random.randint(2 ** 30, size=20) seeds = np.random.randint(2**30, size=20) data_prepocessor = DataPipeline() #(bert_on=False) X_train, X_val, y_train, y_val = data_prepocessor.loadAndPrepData() linUCB_regrets = [] ts_regrets = [] linUCB_cum_errors = [] ts_cum_errors = [] softmax_cum_errors = [] RF_cum_errors = [] baseline_cum_errors = [] fixed_cum_errors = [] fixed_policy = FixedBaseline() fixed_warfarin = WarfarinDosageRecommendation(fixed_policy, data=(X_train, X_val, y_train, y_val))
def run_training(): ''' Run Training Loop ''' # GPU/CPU Flag if FLAGS.gpu is not None: compute_string = '/gpu:' + str(FLAGS.gpu) else: compute_string = '/cpu:0' ##################### # Setup Data Queues # ##################### with tf.device("/cpu:0"): with tf.variable_scope('train'): data_pipeline = DataPipeline(augment=True) train_x, train_y = data_pipeline.batch_ops() ####################### # Declare train graph # ####################### with tf.device(compute_string): phase = tf.placeholder(tf.bool, name='phase') train_model = model(train_x, train_y, phase) train_predictions = train_model.inference() train_acc = train_model.evaluate() train_loss, gt_y = train_model.loss() train_op = train_model.optimize() global_step = train_model.get_global_step() tf.summary.scalar('train_loss', train_loss) tf.summary.scalar('train_acc', train_acc) ############################# # Setup Summaries and Saver # ############################# # Collect summaries for TensorBoard summary = tf.summary.merge_all() # Create variable initializer op init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Create checkpoint saver saver = tf.train.Saver(max_to_keep=100) # Begin TensorFlow Session session_config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=session_config) as sess: # Resume training or # Run the Variable Initializer Op sess.run(init) if FLAGS.resume == True: try: meta_list = get_checkpoints(FLAGS.log_dir) optimistic_restore(sess, meta_list[-1]) resume_status = True except: print('Checkpoint Load Failed') print('Training from scratch') resume_status = False if not resume_status: try: train_model.load_pretrained_weights(sess) except: print('Failed to load pretrained weights.') print('Training from scratch') sys.stdout.flush() # Coordinator hands data fetching threads coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) # Instantiate a summary writer to output summaries and the Graph. summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph) # Actually begin the training process try: for step in xrange(FLAGS.max_steps): if coord.should_stop(): break start_time = time() # Run one step of the model. _, loss_value, acc = sess.run( [train_op, train_loss, train_acc], feed_dict={phase: True}) global_step_value = global_step.eval() duration_time = time() - start_time # debug profiler on step 3 # open timeline.json in chrome://tracing/ if FLAGS.profile and step == 3: run_metadata = tf.RunMetadata() _, loss, acc = sess.run( [train_op, train_loss, train_acc], options=tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE), run_metadata=run_metadata) tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timeline.json', 'w') as f: f.write(ctf) # Display progress if global_step_value % 1 == 0: # Print progress to stdout print('Step %d: loss = %.2f, acc = %.2f (%.3f sec)' % (global_step_value, loss_value, acc, duration_time)) sys.stdout.flush() # Write the summaries if global_step_value % 20 == 0: # Update the summary file summary_str = sess.run(summary, feed_dict={phase: False}) summary_writer.add_summary(summary_str, global_step_value) summary_writer.flush() # Save Model Checkpoint if (global_step_value)%FLAGS.checkpoint_freq==0 or \ (global_step_value+1)==FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.log_dir, 'model') saver.save(sess, checkpoint_path, global_step=global_step) #loop_time = time() - start_time #print('Total Loop Time: %.3f' % loop_time) except tf.errors.OutOfRangeError: print('Done Training -- Epoch limit reached.') sys.stdout.flush() except Exception as e: print("Exception encountered: ", e) sys.stdout.flush() # Stop Queueing data, we're done! coord.request_stop() coord.join(threads)