Python DataPipeline Examples, data_pipeline.DataPipeline Python Examples

Example #1

0

Show file

File: test_data_pipeline.py Project: myrywy/autoregressor

def test_add_unit_transformation_one_element_tuple_tensor_slices():
    input_data = np.array([
        [1, 2, 3, 4],
        [1, 2, 3, 0],
        [1, 2, 0, 0],
        [1, 0, 0, 0],
    ])
    expected_output_data = np.array([
        [4, 5, 6, 7],
        [4, 5, 6, 3],
        [4, 5, 3, 3],
        [4, 3, 3, 3],
    ])

    input_dataset = tf.data.Dataset.from_tensor_slices((input_data, ))
    expected_output_dataset = tf.data.Dataset.from_tensor_slices(
        expected_output_data)

    def add3(x):
        return x + 3

    pipeline = DataPipeline()
    pipeline.add_unit_transformation(add3)
    output_dataset = pipeline.transform_dataset(input_dataset)

    output_next = output_dataset.make_one_shot_iterator().get_next()
    expected_next = expected_output_dataset.make_one_shot_iterator().get_next()

    with tf.Session() as sess:
        for _ in range(4):
            r_output, r_expected = sess.run((output_next, expected_next))
            assert r_output == approx(r_expected)

Example #2

0

Show file

    def __init__(self, configuration):

        self.conf = configuration
        self.data_pipeline = DataPipeline(self.conf.batch_size,
                                          self.conf.max_memory)
        self.input_size = self.conf.data_pipeline.get_input_size
        self.output_size = self.data_pipeline.get_output_size()
        self.lstm = tf.contrib.rnn.BasicLSTMCell(self.conf.state_size)
        self.weights = hp.weight_variables(
            [self.conf.state_size, self.output_size])
        self.biases = hp.bias_variables([self.output_size])

        self.input = tf.placeholder(tf.float32,
                                    shape=[
                                        self.conf.batch_size,
                                        self.conf.max_memory, self.input_size
                                    ])
        self.initial_state = tf.placeholder(
            tf.float32, shape=[self.conf.batch_size, self.conf.state_size])
        self.current_state = tf.placeholder(
            tf.float32, shape=[self.conf.batch_size, self.conf.state_size])
        self.hidden_state = tf.placeholder(
            tf.float32, shape=[self.conf.batch_size, self.conf.state_size])
        self.label = tf.placeholder(
            tf.int32, shape=[self.conf.batch_size, self.conf.max_memory])

        self.zero_state = self.lstm.zero_state(batch_size=self.conf.batch_size,
                                               dtype=tf.float32)

Example #3

0

Show file

def run_once(scanner: DataPipeline, auto_push: bool):
    scanner.update_sources()
    scanner.process()
    if auto_push:
        host = get_host()
        util_git.push(
            scanner.config.base_dir,
            f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}"
        )

Example #4

0

Show file

File: scanner.py Project: wbing520/covid-data-pipeline

def run_once(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool):
    " run the scanner once "
    scanner.update_sources()
    scanner.process()

    if capture: do_specialized_capture(capture)

    if auto_push:
        host = get_host()
        util_git.push(scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}")

Example #5

0

Show file

File: test_data_pipeline.py Project: myrywy/autoregressor

def test_add_unit_transformation_nested():
    def input_data_generator():
        yield {"input_sequence": np.array([1, 2, 3, 4]), "length": 4}, 9
        yield {"input_sequence": np.array([1, 2, 3]), "length": 3}, 9
        yield {"input_sequence": np.array([1, 2]), "length": 2}, 9
        yield {"input_sequence": np.array([1]), "length": 1}, 9
        yield {"input_sequence": np.array([1, 2]), "length": 2}, 9
        yield {"input_sequence": np.array([1, 2, 3]), "length": 3}, 9
        yield {"input_sequence": np.array([1, 2, 3, 4]), "length": 4}, 9

    def expected_output_data_generator():
        yield {"input_sequence": np.array([4, 5, 6, 7]), "length": 4}, 9
        yield {"input_sequence": np.array([4, 5, 6]), "length": 3}, 9
        yield {"input_sequence": np.array([4, 5]), "length": 2}, 9
        yield {"input_sequence": np.array([4]), "length": 1}, 9
        yield {"input_sequence": np.array([4, 5]), "length": 2}, 9
        yield {"input_sequence": np.array([4, 5, 6]), "length": 3}, 9
        yield {"input_sequence": np.array([4, 5, 6, 7]), "length": 4}, 9

    input_dataset = tf.data.Dataset.from_generator(input_data_generator,
                                                   output_types=({
                                                       "input_sequence":
                                                       tf.int32,
                                                       "length":
                                                       tf.int32
                                                   }, tf.int32))
    expected_output_dataset = tf.data.Dataset.from_generator(
        expected_output_data_generator,
        output_types=({
            "input_sequence": tf.int32,
            "length": tf.int32
        }, tf.int32))

    def add3(x):
        return x + 3

    pipeline = DataPipeline()
    pipeline.add_unit_transformation(add3, 0, "input_sequence")
    output_dataset = pipeline.transform_dataset(input_dataset)

    output_next = output_dataset.make_one_shot_iterator().get_next()
    expected_next = expected_output_dataset.make_one_shot_iterator().get_next()

    with tf.Session() as sess:
        for _ in range(7):
            r_output, r_expected = sess.run((output_next, expected_next))
            assert r_output[0]["input_sequence"] == approx(
                r_expected[0]["input_sequence"])
            assert r_output[0]["length"] == approx(r_expected[0]["length"])
            assert r_output[1] == approx(r_expected[1])

Example #6

0

Show file

File: test_data_pipeline.py Project: myrywy/autoregressor

def test_add_structural_transformation():
    def input_data_generator():
        yield np.array([1, 2, 3, 4])
        yield np.array([1, 2, 3])
        yield np.array([1, 2])
        yield np.array([1])
        yield np.array([1, 2])
        yield np.array([1, 2, 3])
        yield np.array([1, 2, 3, 4])

    def expected_output_data_generator():
        yield {"input_sequnce": np.array([1, 2, 3, 4]), "length": 4}
        yield {"input_sequnce": np.array([1, 2, 3]), "length": 3}
        yield {"input_sequnce": np.array([1, 2]), "length": 2}
        yield {"input_sequnce": np.array([1]), "length": 1}
        yield {"input_sequnce": np.array([1, 2]), "length": 2}
        yield {"input_sequnce": np.array([1, 2, 3]), "length": 3}
        yield {"input_sequnce": np.array([1, 2, 3, 4]), "length": 4}

    input_dataset = tf.data.Dataset.from_generator(input_data_generator,
                                                   output_types=tf.int32)
    expected_output_dataset = tf.data.Dataset.from_generator(
        expected_output_data_generator,
        output_types={
            "input_sequnce": tf.int32,
            "length": tf.int32
        })

    def add_length(input_sequnce):
        return {
            "input_sequnce": input_sequnce,
            "length": tf.shape(input_sequnce)[0]
        }

    pipeline = DataPipeline()
    pipeline.add_structural_transformation(add_length)
    output_dataset = pipeline.transform_dataset(input_dataset)

    output_next = output_dataset.make_one_shot_iterator().get_next()
    expected_next = expected_output_dataset.make_one_shot_iterator().get_next()

    with tf.Session() as sess:
        for _ in range(7):
            r_output, r_expected = sess.run((output_next, expected_next))
            assert r_output["input_sequnce"] == approx(
                r_expected["input_sequnce"])
            assert r_output["length"] == approx(r_expected["length"])

Example #7

0

Show file

File: policy.py Project: masoudML/Warfarin-Dosage-RL

    def __init__(self, data):
        super().__init__()
        self.data_prepocessor = DataPipeline()
        self.X_train, self.X_val, self.y_train, self.y_val = data
        self.lr_model = LogisticRegression(random_state=1,multi_class='multinomial',solver='newton-cg',
                                          verbose=0)

        self.lr_model.fit(self.X_train, self.y_train)

Example #8

0

Show file

File: test_data_pipeline.py Project: myrywy/autoregressor

def test_add_unit_transformation_one_element_tuple():
    def input_data_generator():
        yield np.array([1, 2, 3, 4]),
        yield np.array([1, 2, 3]),
        yield np.array([1, 2]),
        yield np.array([1]),
        yield np.array([1, 2]),
        yield np.array([1, 2, 3]),
        yield np.array([1, 2, 3, 4]),

    def expected_output_data_generator():
        yield np.array([4, 5, 6, 7])
        yield np.array([4, 5, 6])
        yield np.array([4, 5])
        yield np.array([4])
        yield np.array([4, 5])
        yield np.array([4, 5, 6])
        yield np.array([4, 5, 6, 7])

    input_dataset = tf.data.Dataset.from_generator(input_data_generator,
                                                   output_types=(tf.int32, ))
    expected_output_dataset = tf.data.Dataset.from_generator(
        expected_output_data_generator, output_types=tf.int32)

    def add3(x):
        return x + 3

    pipeline = DataPipeline()
    pipeline.add_unit_transformation(add3)
    output_dataset = pipeline.transform_dataset(input_dataset)

    output_next = output_dataset.make_one_shot_iterator().get_next()
    expected_next = expected_output_dataset.make_one_shot_iterator().get_next()

    with tf.Session() as sess:
        for _ in range(7):
            r_output, r_expected = sess.run((output_next, expected_next))
            assert r_output == approx(r_expected)

Example #9

0

Show file

File: run.py Project: CINECA-project/wp3-data-conversion

def main():
    # CoLaus
    DataPipeline() \
        .with_consumer(CsvDataConsumer(data_dir + "CoLaus_sample_100linesShuffled.csv", "\t")) \
        .with_processor(DataTransformer.from_mapping_file(mapping_dir + "colaus_cineca_mapping_questionnaire.csv")) \
        .with_processor(FieldValueTransformer.from_mapping_file(mapping_dir + "colaus_data_label_mapping.xlsx")) \
        .with_producer(JsonProducer(data_dir + "colaus_cineca.json")) \
        .run()

    # H3Africa
    DataPipeline() \
        .with_consumer(CsvDataConsumer(data_dir + "h3africa_dummy_datasets_for_cineca_demo.csv", ";")) \
        .with_processor(DataTransformer.from_mapping_file(mapping_dir + "h3africa_cineca_mapping_questionnaire.csv")) \
        .with_producer(JsonProducer(data_dir + "h3africa_cineca.json")) \
        .run()

    # CHILD
    DataPipeline() \
        .with_consumer(CsvDataConsumer(data_dir + "child_demo_data.csv", ",")) \
        .with_processor(FieldValueTransformerPre.from_mapping_file("../resources/mapping/child_initial_data_label_mapping.xlsx")) \
        .with_processor(DataTransformer.from_mapping_file(mapping_dir + "child_cineca_mapping_questionnaire.csv")) \
        .with_producer(JsonProducer(data_dir + "child_cineca.json")) \
        .run()

Example #10

0

Show file

File: policy.py Project: masoudML/Warfarin-Dosage-RL

    def __init__(self, data):
        self.data_prepocessor = DataPipeline()
        self.X_train, self.X_val, self.y_train, self.y_val = data

        param_grid = {
            'C': [1, 2, 3],
            'degree': [3, 4, 5, 6, 7 , 8],
        }

        SVM_model = SVC(gamma='auto')

        SVM_Tuned = GridSearchCV(estimator=SVM_model, param_grid=param_grid, cv=StratifiedKFold(3))
        SVM_Tuned.fit(self.X_train, self.y_train)

        self.SVM_model = SVM_Tuned

Example #11

0

Show file

    def get_pipeline(self, train_dataset):
        trainer = Train(patience_epochs=self.patience_epochs,
                        early_stopping=self.early_stopping,
                        epochs=self.epochs)

        max_feature_lens = train_dataset.max_feature_lens
        num_classes = train_dataset.num_classes

        text_to_index = TransformTextToIndex(feature_lens=max_feature_lens)

        # data pipeline
        merge_tensor = TransformMergeTensors()
        post_process_steps = [("merge_tensor", merge_tensor)]
        data_pipeline = DataPipeline(text_to_index=text_to_index,
                                     postprocess_steps=post_process_steps)

        # Label pipeline
        label_encoder = TransformLabelEncoder()
        label_reshaper = TransformLabelReshaper(num_classes=num_classes)

        label_pipeline = LabelPipeline(label_encoder=label_encoder,
                                       label_reshaper=label_reshaper)

        # Network
        model = BiLstmNetwork(input_size=text_to_index.max_index,
                              hidden_dim=self.hidden_dim,
                              output_size=train_dataset.num_classes)
        self.logger.info("Using model {}".format(type(model)))

        # optimiser = SGD(lr=self.learning_rate, params=model.parameters())
        optimiser = Adam(params=model.parameters())
        self.logger.info("Using optimiser {}".format(type(optimiser)))

        # Loss function
        loss_func = nn.CrossEntropyLoss()
        self.logger.info("Using loss function {}".format(type(loss_func)))

        # Train pipeline
        train_pipeline = TrainPipeline(batch_size=self.batch_size,
                                       optimiser=optimiser,
                                       trainer=trainer,
                                       data_pipeline=data_pipeline,
                                       label_pipeline=label_pipeline,
                                       num_workers=self.num_workers,
                                       loss_func=loss_func,
                                       model=model)

        return train_pipeline

Example #12

0

Show file

File: policy.py Project: masoudML/Warfarin-Dosage-RL

    def __init__(self, data):
        super().__init__()
        self.data_prepocessor = DataPipeline()
        self.X_train, self.X_val, self.y_train, self.y_val = data
        self.RF_model = LogisticRegression(random_state=1,
                                          verbose=0)

        param_grid = {
            'n_estimators': [50, 100, 150, 200],
            'max_depth': [5, 6, 7, 8, 9 , 10, 12, 15],
        }

        RF_model = RandomForestClassifier(random_state=1,
                                          verbose=0)

        RF_Tuned = GridSearchCV(estimator=RF_model, param_grid=param_grid, cv=StratifiedKFold(3))
        RF_Tuned.fit(self.X_train, self.y_train)

        self.RF_model = RF_Tuned

Example #13

0

Show file

    def __init__(self,
                 bind_addresses,
                 send_addresses,
                 pipeline_count=8,
                 packet_size=2048,
                 sock_timeout=5):
        if len(bind_addresses) != len(send_addresses):
            raise Exception(
                f'Bind address array length is not equal to the send address array length. {len(bind_addresses)} vs {len(send_addresses)}'
            )

        if len(bind_addresses) != pipeline_count:
            raise Exception(
                f'Pipeline count is not equal to the length of the supplied arrays. {len(bind_addresses)} vs {pipeline_count}'
            )

        self.pipeline_count = pipeline_count
        self.data_pipelines = [None] * self.pipeline_count

        for i in range(self.pipeline_count):
            self.data_pipelines[i] = DataPipeline(send_addresses[i],
                                                  bind_addresses[i],
                                                  packet_size, sock_timeout)

Example #14

0

Show file

File: main.py Project: its-dron/AppraisalNet

def run_test():
    # Get all ckpt names in log dir (without meta ext)
    meta_list = get_checkpoints(FLAGS.log_dir)

    # GPU/CPU Flag
    if FLAGS.gpu is not None:
        compute_string = '/gpu:' + str(FLAGS.gpu)
    else:
        compute_string = '/cpu:0'

    # Iterate through the checkpoints
    for ckpt_path in meta_list:
        tf.reset_default_graph()

        ####################
        # Setup Data Queue #
        ####################
        with tf.device("/cpu:0"):
            with tf.variable_scope('test') as scope:
                data_pipeline = DataPipeline(augment=False,
                                             num_epochs=1,
                                             shuffle=False)
                validate_x, validate_y, ids = data_pipeline.batch_ops()

        with tf.device(compute_string):
            ##########################
            # Declare Validate Graph #
            ##########################
            # Sets train/test mode; currently only used for BatchNormalization
            # True: Train   False: Test
            phase = tf.placeholder(tf.bool, name='phase')
            validate_model = model(validate_x, validate_y, phase)

            # Delete extraneous info when done debugging
            validate_pred = validate_model.inference()
            pool5 = validate_model.fc2

        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())

        ids_file = open(os.path.join(FLAGS.log_dir, 'ids.txt'), 'w')
        predictions_file = open(os.path.join(FLAGS.log_dir, 'predictions.txt'),
                                'w')

        session_config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=session_config) as sess:
            sess.run(init)

            # Coordinator hands data fetching threads
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            optimistic_restore(sess, ckpt_path)
            try:
                step = 0
                cum_time = 0
                while True:
                    if coord.should_stop():
                        break
                    step += 1
                    start_time = time()
                    prediction_value, pool5_value, ids_value = sess.run(
                        [validate_pred, pool5, ids], feed_dict={phase: False})
                    duration_time = time() - start_time

                    cum_time += duration_time

                    feature_file = os.path.join(FLAGS.log_dir,
                                                "feature_%d" % step)
                    #pool5_value = np.sum(pool5_value, (1,2)) #spatial average
                    pool5_value = pool5_value.reshape(FLAGS.batch_size, -1)
                    np.save(feature_file, pool5_value)

                    for id in ids_value:
                        ids_file.write("%s\n" % id)

                    # Save prediction and ground truth info
                    predictions_file.write(np.array_str( \
                            prediction_value, \
                            max_line_width=1e3, \
                            precision=10, \
                            suppress_small=True))
                    predictions_file.write('\n')
                    predictions_file.flush()

            except tf.errors.OutOfRangeError:
                step -= 1
            except Exception as e:
                step -= 1

            # Stop Queueing data, we're done!
            coord.request_stop()
            coord.join(threads)

Example #15

0

Show file

File: policy.py Project: masoudML/Warfarin-Dosage-RL

    def __init__(self,data):
        self.data_prepocessor = DataPipeline()
        self.X_train, self.X_val, self.y_train, self.y_val = data
        self.LinearModel = LinearSVC()

        self.LinearModel.fit(self.X_train,self.y_train)

Example #16

0

Show file

def main(args_list=None):

    if args_list is None:
        args_list = sys.argv[1:]
    args = parser.parse_args(args_list)

    if args.auto_update:
        return util_git.monitor_start("--auto_update")
    if not args.auto_push:
        logger.warning("github push is DISABLED")

    config = DataPipelineConfig(args.base_dir,
                                args.temp_dir,
                                flags={
                                    "trace": args.trace,
                                    "capture_image": args.capture_image,
                                    "rerun_now": args.rerun_now,
                                    "firefox": args.use_firefox,
                                    "chrome": args.use_chrome,
                                    "headless": not args.show_browser,
                                })
    scanner = DataPipeline(config)

    capture = init_specialized_capture(args)

    if args.clean_html or args.extract_html or args.format_html:
        if args.format_html: scanner.format_html(rerun=True)
        if args.clean_html: scanner.clean_html(rerun=True)
        if args.extract_html: scanner.extract_html(rerun=True)
    elif args.continuous:
        scanner.format_html()
        scanner.clean_html()
        scanner.extract_html()
        run_continuous(scanner, capture, auto_push=args.auto_push)
    else:
        scanner.format_html()
        scanner.clean_html()
        scanner.extract_html()
        run_once(scanner, args.auto_push)

Example #17

0

Show file

File: scanner.py Project: wbing520/covid-data-pipeline

def run_continuous(scanner: DataPipeline, capture: SpecializedCapture, auto_push: bool):
    " run in continuous mode twice an hour "

    # check for new source code (return if found so watchdog can reload the main loop)
    if util_git.monitor_check(): return

    host = get_host()
    try:
        print("starting continuous run")

        # run the first time outside of the 'rety' logic 
        # so it fails if something is really wrong

        # get new external source data
        scanner.update_sources()

        # main scan/clean/extract loop 
        scanner.process()

        # run a one-off capture if requested
        if capture: do_specialized_capture(capture)

        # push to the git repo
        if auto_push: util_git.push(scanner.config.base_dir, f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}")

        # check for new source again
        if util_git.monitor_check(): return

        cnt = 1
        t = next_time()

        # run twice per hour forever
        #    on error, rety twice before going back to sleep until next cycle
        print(f"sleep until {t}")
        while True:
            time.sleep(15)
            if datetime.now() < t: continue

            if util_git.monitor_check(): break

            print("==================================")
            print(f"=== run {cnt} at {t}")
            print("==================================")

            retry_cnt = 0
            try:
                scanner.update_sources()
                scanner.process()
                if capture: do_specialized_capture(capture)
                if auto_push: util_git.push(scanner.config.base_dir, f"{udatetime.to_displayformat(scanner.change_list.start_date)} on {host}")
            except Exception as ex:
                logger.exception(ex)
                
                if retry_cnt < 2:
                    print(f"run failed, wait 5 minutes and try again")
                    t = t + timedelta(minutes=5)
                    retry_cnt += 1
                continue

            print("==================================")
            print("")
            t = next_time()
            print(f"sleep until {t}")                        
            cnt += 1
    finally:
        if capture: capture.close()

Example #18

0

Show file

def run_continuous(scanner: DataPipeline, capture: SpecializedCapture,
                   auto_push: bool):

    if util_git.monitor_check(): return

    host = get_host()
    try:
        print("starting continuous run")

        scanner.update_sources()
        scanner.process()

        if capture:
            try:
                special_cases(capture)
            except Exception as ex:
                logger.error(ex)
                logger.error(
                    "*** continue after exception in specialized capture")

        if auto_push:
            util_git.push(
                scanner.config.base_dir,
                f"{udatetime.to_logformat(scanner.change_list.start_date)} on {host}"
            )
        if util_git.monitor_check(): return
        cnt = 1
        t = next_time()

        print(f"sleep until {t}")
        while True:
            time.sleep(15)
            if datetime.now() < t: continue

            if util_git.monitor_check(): break

            print("==================================")
            print(f"=== run {cnt} at {t}")
            print("==================================")

            try:
                scanner.update_sources()
                scanner.process()
                if capture: special_cases(capture)
                if auto_push:
                    util_git.push(
                        scanner.config.base_dir,
                        f"{udatetime.to_displayformat(scanner.change_list.start_date)} on {host}"
                    )
            except Exception as ex:
                logger.exception(ex)
                print(f"run failed, wait 5 minutes and try again")
                t = t + timedelta(minutes=5)

            print("==================================")
            print("")
            t = next_time()
            print(f"sleep until {t}")
            cnt += 1
    finally:
        if capture: capture.close()

Example #19

0

Show file

def main(args):

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')
    if not args.noise and not args.events:
        raise ValueError("Define if evaluating accuracy on noise or events")

    # Directory in which the evaluation summaries are written
    if args.noise:
        summary_dir = os.path.join(args.checkpoint_dir, "noise")
    if args.events:
        summary_dir = os.path.join(args.checkpoint_dir, "events")
    if args.save_false:
        false_start = []
        false_end = []
        false_origintime = []
        false_dir = os.path.join("output", "false_predictions")
        if not os.path.exists(false_dir):
            os.makedirs(false_dir)

    while True:
        ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
        if args.eval_interval < 0 or ckpt:
            print('Evaluating model')
            break
        print('Waiting for training job to save a checkpoint')
        time.sleep(args.eval_interval)

    cfg = config.Config()
    if args.noise:
        cfg.batch_size = 128
    if args.events:
        cfg.batch_size = 128
    if args.save_false:
        cfg.batch_size = 1
    cfg.n_epochs = 1
    cfg.add = 1
    cfg.n_clusters = args.n_clusters
    cfg.n_clusters += 1

    while True:
        try:
            # data pipeline
            data_pipeline = DataPipeline(args.dataset,
                                         config=cfg,
                                         is_training=False)
            samples = {
                'data': data_pipeline.samples,
                'cluster_id': data_pipeline.labels,
                "start_time": data_pipeline.start_time,
                "end_time": data_pipeline.end_time
            }

            # set up model and validation metrics
            model = models.get(args.model,
                               samples,
                               cfg,
                               args.checkpoint_dir,
                               is_training=False)
            metrics = model.validation_metrics()
            # Validation summary writer
            summary_writer = tf.summary.FileWriter(summary_dir, None)

            with tf.Session() as sess:
                coord = tf.train.Coordinator()
                tf.initialize_local_variables().run()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                model.load(sess, args.step)
                print('Evaluating at step {}'.format(
                    sess.run(model.global_step)))

                step = tf.train.global_step(sess, model.global_step)
                mean_metrics = {}
                for key in metrics:
                    mean_metrics[key] = 0

                n = 0
                pred_labels = np.empty(1)
                true_labels = np.empty(1)
                while True:
                    try:
                        to_fetch = [
                            metrics, model.layers["class_prediction"],
                            samples["cluster_id"], samples["start_time"],
                            samples["end_time"]
                        ]
                        metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run(
                            to_fetch)

                        #batch_pred_label -=1
                        pred_labels = np.append(pred_labels, batch_pred_label)
                        true_labels = np.append(true_labels, batch_true_label)

                        # Save times of false preds
                        if args.save_false and \
                                batch_pred_label != batch_true_label:
                            print("---False prediction---")
                            print(starttime, endtime)
                            false_origintime.append(
                                (starttime[0] + endtime[0]) / 2)
                            false_end.append(endtime)
                            false_start.append(starttime)

                        # print  true_labels
                        for key in metrics:
                            mean_metrics[key] += cfg.batch_size * metrics_[key]
                        n += cfg.batch_size

                        mess = model.validation_metrics_message(metrics_)
                        print('{:03d} | '.format(n) + mess)

                    except KeyboardInterrupt:
                        print('stopping evaluation')
                        break

                    except tf.errors.OutOfRangeError:
                        print('Evaluation completed ({} epochs).'.format(
                            cfg.n_epochs))
                        print("{} windows seen".format(n))
                        break

                if n > 0:
                    for key in metrics:
                        mean_metrics[key] /= n
                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag='{}/val'.format(key),
                                             simple_value=mean_metrics[key])
                        ])
                        if args.save_summary:
                            summary_writer.add_summary(summary,
                                                       global_step=step)

                summary_writer.flush()

                mess = model.validation_metrics_message(mean_metrics)
                print('Average | ' + mess)

                if args.eval_interval < 0:
                    print('End of evaluation')
                    break

            tf.reset_default_graph()
            print('Sleeping for {}s'.format(args.eval_interval))
            time.sleep(args.eval_interval)

        finally:
            print('joining data threads')
            coord.request_stop()

    if args.save_false:
        false_preds = {}
        false_preds["start_time"] = false_start
        false_preds["end_time"] = false_end
        false_preds["origintime"] = false_origintime
        # false_preds = np.array((false_start, false_end)).transpose()[0]
        # print 'shape', false_preds.shape
        df = pd.DataFrame(false_preds)
        df.to_csv(os.path.join(false_dir, "false_preds.csv"))
    pred_labels = pred_labels[1::]
    true_labels = true_labels[1::]
    # np.save("output/pred_labels_noise.npy",pred_labels)
    # np.save("output/true_labels_noise.npy",true_labels)
    print("---Confusion Matrix----")
    print(confusion_matrix(true_labels, pred_labels))

    coord.join(threads)

Example #20

0

Show file

File: main.py Project: its-dron/AppraisalNet

def run_validate():
    # Get all ckpt names in log dir (without meta ext)
    meta_list = get_checkpoints(FLAGS.log_dir)

    # GPU/CPU Flag
    if FLAGS.gpu is not None:
        compute_string = '/gpu:' + str(FLAGS.gpu)
    else:
        compute_string = '/cpu:0'

    # Iterate through the checkpoints
    val_loss = []
    val_acc = []
    val_itr = []
    for ckpt_path in meta_list:
        tf.reset_default_graph()

        ####################
        # Setup Data Queue #
        ####################
        with tf.device("/cpu:0"):
            with tf.variable_scope('validate') as scope:
                data_pipeline = DataPipeline(augment=False,
                                             num_epochs=1,
                                             shuffle=False)
                validate_x, validate_y, ids = data_pipeline.batch_ops()

        with tf.device(compute_string):
            ##########################
            # Declare Validate Graph #
            ##########################
            # Sets train/test mode; currently only used for BatchNormalization
            # True: Train   False: Test
            phase = tf.placeholder(tf.bool, name='phase')
            validate_model = model(validate_x, validate_y, phase)

            # Delete extraneous info when done debugging
            validate_pred = validate_model.inference()
            validate_acc = validate_model.evaluate()
            validate_loss, gt_y = validate_model.loss()
            global_step = validate_model.get_global_step()
        summary = tf.summary.merge_all()

        init = tf.group(tf.global_variables_initializer(),
                        tf.local_variables_initializer())

        session_config = tf.ConfigProto(allow_soft_placement=True)
        with tf.Session(config=session_config) as sess:
            sess.run(init)
            summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

            # Coordinator hands data fetching threads
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            optimistic_restore(sess, ckpt_path)
            global_step_value = global_step.eval()
            try:
                step = 0
                cum_loss = 0
                cum_acc = 0
                cum_time = 0
                while True:
                    if coord.should_stop():
                        break
                    step += 1
                    start_time = time()
                    loss_value, acc_value, prediction_value, gt_value, ids_value = sess.run(
                        [
                            validate_loss, validate_acc, validate_pred, gt_y,
                            ids
                        ],
                        feed_dict={phase: False})
                    duration_time = time() - start_time

                    cum_loss += loss_value
                    cum_acc += acc_value
                    cum_time += duration_time

                    if step % 1 == 0:
                        # Print progress to stdout
                        if FLAGS.print_pred:
                            print(
                                'Step %d: loss = %.4f acc = %.4f (%.3f sec)' %
                                (step, loss_value, acc_value, duration_time))
                            print('Prediction:{}'.format(prediction_value))
                            print('GT:{}'.format(gt_value))
                        sys.stdout.flush()

                    # Write the summaries
                    if step % 25 == 0:
                        # Update the summary file
                        summary_str = sess.run(summary,
                                               feed_dict={phase: False})
                        summary_writer.add_summary(summary_str,
                                                   global_step_value)
                        summary_writer.flush()

            except tf.errors.OutOfRangeError:
                step -= 1
            except Exception as e:
                step -= 1

            # Stop Queueing data, we're done!
            coord.request_stop()
            coord.join(threads)

        avg_loss = cum_loss / step
        avg_acc = cum_acc / step
        avg_time = cum_time / step

        val_loss.append(float(avg_loss))
        val_acc.append(float(avg_acc))
        val_itr.append(int(global_step_value))

        print('Results For Load File: %s' % ckpt_path)
        print('Average_Loss = %.4f' % avg_loss)
        print('Average_Acc = %.4f' % avg_acc)
        print('Run Time: %.2f' % cum_time)
        sys.stdout.flush()

    val_loss = np.asarray(val_loss)
    val_acc = np.asarray(val_acc)
    val_itr = np.asarray(val_itr)

    best_loss = np.amin(val_loss)
    best_acc = np.amax(val_acc)
    best_itr = val_itr[np.argmax(val_acc)]

    print('Overall Results')
    print('Minimum Loss: %.4f' % best_loss)
    print('Maximum Acc: %.4f' % best_acc)
    print('Best Checkpoint: %d' % best_itr)

    save_path = os.path.join(FLAGS.log_dir, 'validation_results.npz')
    np.savez(save_path, val_loss=val_loss, val_acc=val_acc, val_itr=val_itr)

Example #21

0

Show file

            y = y_val[t]
            action = self.policy.choose(X)
            reward = self.calculateReward(action, y)

            predictions.append(action)
            #self.policy.updateParameters(X, action, reward)
            rewards.append(reward)

        return (rewards, predictions)


if __name__ == '__main__':
    #seeds = [1,12,123,1234, 12345, 1234545, 0, 2, 234, 2345, 23454, 345, 3456, 345656, 456, 45656, 7483, 7590 , 789, 7890 ]
    #seeds = np.random.randint(2 ** 30, size=20)
    seeds = np.random.randint(2**30, size=20)
    data_prepocessor = DataPipeline()  #(bert_on=False)
    X_train, X_val, y_train, y_val = data_prepocessor.loadAndPrepData()
    linUCB_regrets = []
    ts_regrets = []
    linUCB_cum_errors = []
    ts_cum_errors = []
    softmax_cum_errors = []
    RF_cum_errors = []
    baseline_cum_errors = []
    fixed_cum_errors = []

    fixed_policy = FixedBaseline()
    fixed_warfarin = WarfarinDosageRecommendation(fixed_policy,
                                                  data=(X_train, X_val,
                                                        y_train, y_val))

Example #22

0

Show file

File: main.py Project: its-dron/AppraisalNet

def run_training():
    '''
    Run Training Loop
    '''
    # GPU/CPU Flag
    if FLAGS.gpu is not None:
        compute_string = '/gpu:' + str(FLAGS.gpu)
    else:
        compute_string = '/cpu:0'

    #####################
    # Setup Data Queues #
    #####################
    with tf.device("/cpu:0"):
        with tf.variable_scope('train'):
            data_pipeline = DataPipeline(augment=True)
            train_x, train_y = data_pipeline.batch_ops()

    #######################
    # Declare train graph #
    #######################
    with tf.device(compute_string):
        phase = tf.placeholder(tf.bool, name='phase')
        train_model = model(train_x, train_y, phase)
        train_predictions = train_model.inference()
        train_acc = train_model.evaluate()
        train_loss, gt_y = train_model.loss()
        train_op = train_model.optimize()
        global_step = train_model.get_global_step()
        tf.summary.scalar('train_loss', train_loss)
        tf.summary.scalar('train_acc', train_acc)

    #############################
    # Setup Summaries and Saver #
    #############################

    # Collect summaries for TensorBoard
    summary = tf.summary.merge_all()
    # Create variable initializer op
    init = tf.group(tf.global_variables_initializer(),
                    tf.local_variables_initializer())
    # Create checkpoint saver
    saver = tf.train.Saver(max_to_keep=100)

    # Begin TensorFlow Session
    session_config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=session_config) as sess:
        # Resume training or
        # Run the Variable Initializer Op
        sess.run(init)
        if FLAGS.resume == True:
            try:
                meta_list = get_checkpoints(FLAGS.log_dir)
                optimistic_restore(sess, meta_list[-1])
                resume_status = True
            except:
                print('Checkpoint Load Failed')
                print('Training from scratch')
                resume_status = False
        if not resume_status:
            try:
                train_model.load_pretrained_weights(sess)
            except:
                print('Failed to load pretrained weights.')
                print('Training from scratch')
                sys.stdout.flush()

        # Coordinator hands data fetching threads
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)

        # Instantiate a summary writer to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)

        # Actually begin the training process
        try:
            for step in xrange(FLAGS.max_steps):
                if coord.should_stop():
                    break
                start_time = time()

                # Run one step of the model.
                _, loss_value, acc = sess.run(
                    [train_op, train_loss, train_acc], feed_dict={phase: True})
                global_step_value = global_step.eval()
                duration_time = time() - start_time

                # debug profiler on step 3
                # open timeline.json in chrome://tracing/
                if FLAGS.profile and step == 3:
                    run_metadata = tf.RunMetadata()
                    _, loss, acc = sess.run(
                        [train_op, train_loss, train_acc],
                        options=tf.RunOptions(
                            trace_level=tf.RunOptions.FULL_TRACE),
                        run_metadata=run_metadata)
                    tl = timeline.Timeline(run_metadata.step_stats)
                    ctf = tl.generate_chrome_trace_format()
                    with open('timeline.json', 'w') as f:
                        f.write(ctf)

                # Display progress
                if global_step_value % 1 == 0:
                    # Print progress to stdout
                    print('Step %d: loss = %.2f, acc = %.2f (%.3f sec)' %
                          (global_step_value, loss_value, acc, duration_time))
                    sys.stdout.flush()

                # Write the summaries
                if global_step_value % 20 == 0:
                    # Update the summary file
                    summary_str = sess.run(summary, feed_dict={phase: False})
                    summary_writer.add_summary(summary_str, global_step_value)
                    summary_writer.flush()

                # Save Model Checkpoint
                if (global_step_value)%FLAGS.checkpoint_freq==0 or \
                        (global_step_value+1)==FLAGS.max_steps:
                    checkpoint_path = os.path.join(FLAGS.log_dir, 'model')
                    saver.save(sess, checkpoint_path, global_step=global_step)
                #loop_time = time() - start_time
                #print('Total Loop Time: %.3f' % loop_time)
        except tf.errors.OutOfRangeError:
            print('Done Training -- Epoch limit reached.')
            sys.stdout.flush()
        except Exception as e:
            print("Exception encountered: ", e)
            sys.stdout.flush()

        # Stop Queueing data, we're done!
        coord.request_stop()
        coord.join(threads)