Ejemplos de preprocess en Python, ejemplos de src.utils.preprocess en Python

Ejemplo n.º 1

0

Mostrar archivo

def extract_responses(filepath, writer):
    with open(filepath) as input_file:
        reader = csv.reader(input_file, quoting=csv.QUOTE_MINIMAL)
        deleted = "deleted"
        for line in reader:
            if (deleted not in line[0]) and (deleted not in line[1]):
                preprocessed_line = preprocess(line[1])
                try:
                    if detect_language(preprocessed_line) == 'en':
                        writer.writerow([preprocessed_line])
                except ValueError:
                    continue

Ejemplo n.º 2

0

Mostrar archivo

Archivo: train.py Proyecto: Neoares/cvae_molgen

def train(x, c, config, callbacks=()):
    if config.multi_processing:
        t1 = time.time()
        with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as p:
            x = p.map(preprocess_multiprocess, x)
        print("time loading with multiprocess:", time.time() - t1)
        t1 = time.time()
        print("converting no numpy array...")
        x = np.array(x, dtype='int8')
        print("time to convert to numpy array:", time.time() - t1)
    else:
        x = preprocess(x)

    model.autoencoder.fit([x, c],
                          x,
                          batch_size=config.batch_size,
                          epochs=config.epochs,
                          validation_split=0.2,
                          callbacks=callbacks,
                          verbose=config.verbose)

Ejemplo n.º 3

0

Mostrar archivo

def preprocess_first_stage(args, logger):
    if args.do_pre:
        raw_file = os.path.join(args.processed_path, 'NLP_Corpus.txt')
        save_json = os.path.join(args.processed_path, "NLP_Corpus.json")
        save_csv = os.path.join(args.processed_path, "NLP_Corpus.csv")
        save_xlsx = os.path.join(args.processed_path, "NLP_Corpus.xlsx")
        if not os.path.exists(raw_file):
            combine(logger, args.raw_path, raw_file)
        args.raw_file = raw_file
        if not os.path.exists(save_json):
            preprocess(logger, args, "all")
        if not os.path.exists(save_csv):
            preprocess(logger, args, "json")
        if not os.path.exists(save_xlsx):
            preprocess(logger, args, "xlsx")
        logger.info("Preprocessing has done!")
        preprocess_second_stage(logger, args)

Ejemplo n.º 4

0

Mostrar archivo

Archivo: data_generator.py Proyecto: yanan11/CRNN_CTC_English_Handwriting_Recognition

    def __data_generation(self, ids):
        """Generates data containing batch_size samples"""
        size = len(ids)

        if K.image_data_format() == 'channels_first':
            X = np.ones([size, 1, self.img_w, self.img_h])
        else:
            X = np.ones([size, self.img_w, self.img_h, 1])
        Y = np.zeros([size, self.max_text_len])
        input_length = np.ones((size, 1), dtype=np.float32) * \
            (self.img_w // self.downsample_factor - 2)
        label_length = np.zeros((size, 1), dtype=np.float32)

        # Generate data
        for i, id_ in enumerate(ids):
            img = preprocess(
                cv2.imread(self.samples[id_][0], cv2.IMREAD_GRAYSCALE),
                self.img_size, self.data_aug)
            if K.image_data_format() == 'channels_first':
                img = np.expand_dims(img, 0)
            else:
                img = np.expand_dims(img, -1)

            X[i] = img
            len_text = len(self.samples[id_][1])
            Y[i, :len_text] = \
                text_to_labels(self.chars, self.samples[id_][1])
            label_length[i] = len_text

        inputs = {
            'the_input': X,  # (bs, 128, 32, 1)
            'the_labels': Y,  # (bs, max_text_len) ~ (bs, 32)
            'input_length': input_length,  # (bs, 1)
            'label_length': label_length,  # (bs, 1)
        }
        outputs = {'ctc': np.zeros([size])}  # (bs, 1)

        return inputs, outputs

Ejemplo n.º 5

0

Mostrar archivo

Archivo: Service.py Proyecto: muaz-urwa/fare-evasion-detection-using-openvino

def run_demo(args):

    skip_frames = args.skip_frames
    out_fps = args.out_fps
    sigma_iou = args.sigma_iou
    log = args.log
    in_video_path = args.in_video_path
    device = args.device
    max_miss_frames = 3
    min_frame_th = 3

    video_name = in_video_path.split('/')[-1].split('.')[0]

    # setup experiment directory
    if not os.path.exists('runs'):
        os.makedirs('runs')
    exp_id = len(os.listdir('runs'))
    exp_dir = os.path.join('runs', 'exp_' + str(exp_id))
    os.mkdir(exp_dir)
    violation_dir = os.path.join(exp_dir, 'violations')
    os.mkdir(violation_dir)

    print("Experiment Directory: ", exp_dir)
    print('==== Configuration ====')
    print(args)

    # load models
    model_od = 'models/mobilenet_ssd/FP16/mobilenet-ssd.xml'
    mode_pose = 'models/pose_estimation/FP16/single-human-pose-estimation-0001.xml'
    cls_file = 'models/pose_classifier/classifier.sav'

    ie = IECore()
    detector_person = Detector(ie,
                               path_to_model_xml=model_od,
                               device=device,
                               label_class=15)

    single_human_pose_estimator = HumanPoseEstimator(
        ie, path_to_model_xml=mode_pose, device=device)

    classifier = pickle.load(open(cls_file, 'rb'))

    #read video file
    cap = cv2.VideoCapture(in_video_path)
    ret, frame = cap.read()

    # output video
    out = cv2.VideoWriter(os.path.join(exp_dir, video_name + '.avi'),
                          cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), out_fps,
                          (frame.shape[1], frame.shape[0]))

    #time benchmarks
    total_time = 0
    detector_time = 0
    pose_time = 0
    classification_time = 0
    tracking_time = 0
    operation_count = 0

    tracks_active = []

    t_id = 1
    frame_i = 0
    while (cap.isOpened()):
        # read a frame from video
        ret, frame = cap.read()

        frame_i += 1

        # if valid frame read
        if ret == True:

            # skip frames
            if frame_i % skip_frames == 0:

                operation_count += 1
                start_time = time.time()

                if log:
                    print("====== Frame id : ", str(frame_i))

                # detect person
                s = time.time()
                boxes = detector_person.detect(frame)
                detector_time += time.time() - s

                # extract pose
                s = time.time()
                key_points = [
                    single_human_pose_estimator.estimate(frame, bbox)
                    for bbox in boxes
                ]
                pose_time += time.time() - s

                if log:
                    print("Detections : ", str(len(key_points)))

                # predict state and get detections
                s = time.time()
                detections_frame = []
                for box, k_p in zip(boxes, key_points):
                    features = preprocess(k_p)
                    state = classifier.predict(features)
                    det = Detection(box=box, state=state, frame=frame_i)
                    detections_frame.append(det)
                classification_time += time.time() - s

                dets = detections_frame

                # person tracking
                s = time.time()

                updated_tracks = []
                for track in tracks_active:

                    if len(dets) > 0:

                        best_match = max(
                            dets, key=lambda x: iou(track.position, x.box))
                        if iou(track.position, best_match.box) >= sigma_iou:
                            track.update(best_match.box, best_match.state,
                                         frame_i, frame)
                            updated_tracks.append(track)

                            # remove from best matching detection from detections
                            del dets[dets.index(best_match)]

                    # if track was not updated
                    if len(updated_tracks
                           ) == 0 or track is not updated_tracks[-1]:
                        # finish track when the conditions are met
                        track.miss_track(frame_i)
                        if track.miss_count < max_miss_frames:
                            updated_tracks.append(track)

                # create new tracks
                new_tracks = []

                for det in dets:
                    new_tracks.append(
                        Track(det.box, det.state, det.frame, frame_i, t_id,
                              violation_dir))
                    t_id += 1

                tracks_active = updated_tracks + new_tracks

                tracking_time += time.time() - s

                if log:
                    print("Active Tracks : ", str(len(tracks_active)))

                valid_tracks = [
                    t for t in tracks_active if t.frame_count() > min_frame_th
                ]
                frame = draw_tracks(valid_tracks, frame)

                # save results
                out.write(frame)
                total_time += time.time() - start_time

        else:
            break

    cap.release()

    print("======= FPS Report =======")
    print("Total fps: " + str(float(operation_count) / total_time))
    print("Detector fps: " + str(float(operation_count) / detector_time))
    print("Pose estimation fps: " + str(float(operation_count) / pose_time))
    print("Pose classification fps: " +
          str(float(operation_count) / classification_time))
    print("Person Tracker fps: " + str(float(operation_count) / tracking_time))

Ejemplo n.º 6

0

Mostrar archivo

def main():

    params = process_cli_params(get_cli_params())

    # -----------------------------
    # Set GPU device to use
    os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"] = str(params.which_gpu)

    # Set seeds
    np.random.seed(params.seed)
    tf.set_random_seed(params.seed)

    print("===  Loading Data ===")
    mnist = mnist.read_data_sets("MNIST_data",
                                 n_labeled=params.num_labeled,
                                 one_hot=True,
                                 disjoint=False)
    num_examples = mnist.train.num_examples
    # -----------------------------
    # Parameter setup
    params.iter_per_epoch = (num_examples // params.batch_size)
    params.num_iter = params.iter_per_epoch * params.end_epoch
    params.encoder_layers = params.cnn_fan if params.cnn else \
        params.encoder_layers

    # -----------------------------
    # Placeholder setup
    inputs_placeholder = tf.placeholder(tf.float32, shape=(None, params.encoder_layers[
        0]))
    inputs = preprocess(inputs_placeholder, params)
    outputs = tf.placeholder(tf.float32)
    train_flag = tf.placeholder(tf.bool)

    # -----------------------------
    # Ladder
    ladder = Ladder(inputs, outputs, train_flag, params)

    # -----------------------------
    # Loss, accuracy and training steps
    loss = ladder.cost + ladder.u_cost

    accuracy = tf.reduce_mean(
        tf.cast(
            tf.equal(ladder.predict, tf.argmax(outputs, 1)),
            "float")) * tf.constant(100.0)

    learning_rate = tf.Variable(params.initial_learning_rate, trainable=False)
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    # add the updates of batch normalization statistics to train_step
    bn_updates = tf.group(*ladder.bn.bn_assigns)
    with tf.control_dependencies([train_step]):
        train_step = tf.group(bn_updates)

    saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.5, max_to_keep=5)

    # -----------------------------
    # Create logs after full graph created to count trainable parameters
    # Write logs to appropriate directory
    log_dir = params.logdir + params.id
    if not os.path.exists(log_dir):
        os.makedirs(log_dir)

    desc_file = log_dir + "/" + "description"
    with open(desc_file, 'a') as f:
        print(*order_param_settings(params), sep='\n', file=f, flush=True)
        print("Trainable parameters:", count_trainable_params(), file=f,
              flush=True)

    log_file = log_dir + "/" + "train_log"

    # -----------------------------
    print("===  Starting Session ===")
    sess = tf.Session()
    i_iter = 0
    # -----------------------------
    # Resume from checkpoint
    ckpt_dir = "checkpoints/" + params.id + "/"
    ckpt = tf.train.get_checkpoint_state(
        ckpt_dir)  # get latest checkpoint (if any)
    if ckpt and ckpt.model_checkpoint_path:
        # if checkpoint exists, restore the parameters and set epoch_n and i_iter
        saver.restore(sess, ckpt.model_checkpoint_path)
        epoch_n = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[1])
        i_iter = (epoch_n + 1) * (num_examples // params.batch_size)
        print("Restored Epoch ", epoch_n)
    else:
        # no checkpoint exists. create checkpoints directory if it does not exist.
        if not os.path.exists(ckpt_dir):
            os.makedirs(ckpt_dir)
        init = tf.global_variables_initializer()
        sess.run(init)


    # -----------------------------
    print("=== Training ===")

    def evaluate_metric(dataset, sess, op):
        metric = 0
        num_eval_iters = dataset.num_examples // params.batch_size
        for _ in range(num_eval_iters):
            images, labels = dataset.next_batch(params.batch_size)
            init_feed = {inputs_placeholder: images,
                         outputs: labels,
                         train_flag: False}
            metric += sess.run(op, init_feed)
        metric /= num_eval_iters
        return metric

    def evaluate_metric_list(dataset, sess, ops):
        metrics = [0.0 for _ in ops]
        num_eval_iters = dataset.num_examples // params.batch_size
        for _ in range(num_eval_iters):
            images, labels = dataset.next_batch(params.batch_size)
            init_feed = {inputs_placeholder: images,
                         outputs: labels,
                         train_flag: False}
            op_eval = sess.run(ops, init_feed)

            for i, op in enumerate(op_eval):
                metrics[i] += op

        metrics = [metric/num_eval_iters for metric in metrics]
        return metrics

    # -----------------------------
    # Evaluate initial training accuracy and losses
    # init_loss = evaluate_metric(
        # mnist.train.labeled_ds, sess, cost)
    with open(desc_file, 'a') as f:
        print('================================', file=f, flush=True)
        print("Initial Train Accuracy: ",
              sess.run(accuracy, feed_dict={
                  inputs_placeholder: mnist.train.labeled_ds.images,
                  outputs: mnist.train.labeled_ds.labels,
                  train_flag: False}),
              "%", file=f, flush=True)
        print("Initial Train Losses: ", *evaluate_metric_list(
            mnist.train, sess, [loss, ladder.cost, ladder.u_cost]), file=f,
              flush=True)

        # -----------------------------
        # Evaluate initial testing accuracy and cross-entropy loss
        print("Initial Test Accuracy: ",
              sess.run(accuracy, feed_dict={
                  inputs_placeholder: mnist.test.images,
                  outputs: mnist.test.labels,
                  train_flag: False}),
              "%", file=f, flush=True)
        print("Initial Test Cross Entropy: ",
              evaluate_metric(mnist.test, sess, ladder.cost), file=f,
              flush=True)

    start = time.time()
    for i in tqdm(range(i_iter, params.num_iter)):

        images, labels = mnist.train.next_batch(params.batch_size)

        _ = sess.run(
            [train_step],
            feed_dict={inputs_placeholder: images,
                       outputs: labels,
                       train_flag: True})

        # ---------------------------------------------
        # Epoch completed?
        if (i > 1) and ((i+1) % params.iter_per_epoch == 0):
            epoch_n = i // (num_examples // params.batch_size)
            update_decays(sess, epoch_n, iter=i, graph=g, params=p)

            # ---------------------------------------------
            # Evaluate every test_frequency_in_epochs
            if ((i + 1) % (params.test_frequency_in_epochs *
                               params.iter_per_epoch) == 0):
                now = time.time() - start

                if not params.do_not_save:
                    saver.save(sess, ckpt_dir + 'model.ckpt', epoch_n)

                # ---------------------------------------------
                # Compute error on testing set (10k examples)
                test_cost = evaluate_metric(mnist.test, sess, ladder.cost)

                # Create log of:
                # time, epoch number, test accuracy, test cross entropy,
                # train accuracy, train loss, train cross entropy,
                # train reconstruction loss

                log_i = [now, epoch_n] + sess.run(
                    [accuracy],
                    feed_dict={inputs_placeholder: mnist.test.images,
                               outputs: mnist.test.labels,
                               train_flag: False}
                ) + [test_cost] + sess.run(
                    [accuracy],
                    feed_dict={inputs_placeholder:
                                   mnist.train.labeled_ds.images,
                               outputs: mnist.train.labeled_ds.labels,
                               train_flag: False}
                ) + sess.run(
                    [loss, ladder.cost, ladder.u_cost],
                    feed_dict={inputs_placeholder: images,
                               outputs: labels,
                               train_flag: False})

                with open(log_file, 'a') as train_log:
                    print(*log_i, sep=',', flush=True, file=train_log)

    with open(desc_file, 'a') as f:
        print("Final Accuracy: ", sess.run(accuracy, feed_dict={
            inputs_placeholder: mnist.test.images, outputs: mnist.test.labels,
            train_flag: False}),
              "%", file=f, flush=True)


    sess.close()