def extract_responses(filepath, writer): with open(filepath) as input_file: reader = csv.reader(input_file, quoting=csv.QUOTE_MINIMAL) deleted = "deleted" for line in reader: if (deleted not in line[0]) and (deleted not in line[1]): preprocessed_line = preprocess(line[1]) try: if detect_language(preprocessed_line) == 'en': writer.writerow([preprocessed_line]) except ValueError: continue
def train(x, c, config, callbacks=()): if config.multi_processing: t1 = time.time() with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as p: x = p.map(preprocess_multiprocess, x) print("time loading with multiprocess:", time.time() - t1) t1 = time.time() print("converting no numpy array...") x = np.array(x, dtype='int8') print("time to convert to numpy array:", time.time() - t1) else: x = preprocess(x) model.autoencoder.fit([x, c], x, batch_size=config.batch_size, epochs=config.epochs, validation_split=0.2, callbacks=callbacks, verbose=config.verbose)
def preprocess_first_stage(args, logger): if args.do_pre: raw_file = os.path.join(args.processed_path, 'NLP_Corpus.txt') save_json = os.path.join(args.processed_path, "NLP_Corpus.json") save_csv = os.path.join(args.processed_path, "NLP_Corpus.csv") save_xlsx = os.path.join(args.processed_path, "NLP_Corpus.xlsx") if not os.path.exists(raw_file): combine(logger, args.raw_path, raw_file) args.raw_file = raw_file if not os.path.exists(save_json): preprocess(logger, args, "all") if not os.path.exists(save_csv): preprocess(logger, args, "json") if not os.path.exists(save_xlsx): preprocess(logger, args, "xlsx") logger.info("Preprocessing has done!") preprocess_second_stage(logger, args)
def __data_generation(self, ids): """Generates data containing batch_size samples""" size = len(ids) if K.image_data_format() == 'channels_first': X = np.ones([size, 1, self.img_w, self.img_h]) else: X = np.ones([size, self.img_w, self.img_h, 1]) Y = np.zeros([size, self.max_text_len]) input_length = np.ones((size, 1), dtype=np.float32) * \ (self.img_w // self.downsample_factor - 2) label_length = np.zeros((size, 1), dtype=np.float32) # Generate data for i, id_ in enumerate(ids): img = preprocess( cv2.imread(self.samples[id_][0], cv2.IMREAD_GRAYSCALE), self.img_size, self.data_aug) if K.image_data_format() == 'channels_first': img = np.expand_dims(img, 0) else: img = np.expand_dims(img, -1) X[i] = img len_text = len(self.samples[id_][1]) Y[i, :len_text] = \ text_to_labels(self.chars, self.samples[id_][1]) label_length[i] = len_text inputs = { 'the_input': X, # (bs, 128, 32, 1) 'the_labels': Y, # (bs, max_text_len) ~ (bs, 32) 'input_length': input_length, # (bs, 1) 'label_length': label_length, # (bs, 1) } outputs = {'ctc': np.zeros([size])} # (bs, 1) return inputs, outputs
def run_demo(args): skip_frames = args.skip_frames out_fps = args.out_fps sigma_iou = args.sigma_iou log = args.log in_video_path = args.in_video_path device = args.device max_miss_frames = 3 min_frame_th = 3 video_name = in_video_path.split('/')[-1].split('.')[0] # setup experiment directory if not os.path.exists('runs'): os.makedirs('runs') exp_id = len(os.listdir('runs')) exp_dir = os.path.join('runs', 'exp_' + str(exp_id)) os.mkdir(exp_dir) violation_dir = os.path.join(exp_dir, 'violations') os.mkdir(violation_dir) print("Experiment Directory: ", exp_dir) print('==== Configuration ====') print(args) # load models model_od = 'models/mobilenet_ssd/FP16/mobilenet-ssd.xml' mode_pose = 'models/pose_estimation/FP16/single-human-pose-estimation-0001.xml' cls_file = 'models/pose_classifier/classifier.sav' ie = IECore() detector_person = Detector(ie, path_to_model_xml=model_od, device=device, label_class=15) single_human_pose_estimator = HumanPoseEstimator( ie, path_to_model_xml=mode_pose, device=device) classifier = pickle.load(open(cls_file, 'rb')) #read video file cap = cv2.VideoCapture(in_video_path) ret, frame = cap.read() # output video out = cv2.VideoWriter(os.path.join(exp_dir, video_name + '.avi'), cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), out_fps, (frame.shape[1], frame.shape[0])) #time benchmarks total_time = 0 detector_time = 0 pose_time = 0 classification_time = 0 tracking_time = 0 operation_count = 0 tracks_active = [] t_id = 1 frame_i = 0 while (cap.isOpened()): # read a frame from video ret, frame = cap.read() frame_i += 1 # if valid frame read if ret == True: # skip frames if frame_i % skip_frames == 0: operation_count += 1 start_time = time.time() if log: print("====== Frame id : ", str(frame_i)) # detect person s = time.time() boxes = detector_person.detect(frame) detector_time += time.time() - s # extract pose s = time.time() key_points = [ single_human_pose_estimator.estimate(frame, bbox) for bbox in boxes ] pose_time += time.time() - s if log: print("Detections : ", str(len(key_points))) # predict state and get detections s = time.time() detections_frame = [] for box, k_p in zip(boxes, key_points): features = preprocess(k_p) state = classifier.predict(features) det = Detection(box=box, state=state, frame=frame_i) detections_frame.append(det) classification_time += time.time() - s dets = detections_frame # person tracking s = time.time() updated_tracks = [] for track in tracks_active: if len(dets) > 0: best_match = max( dets, key=lambda x: iou(track.position, x.box)) if iou(track.position, best_match.box) >= sigma_iou: track.update(best_match.box, best_match.state, frame_i, frame) updated_tracks.append(track) # remove from best matching detection from detections del dets[dets.index(best_match)] # if track was not updated if len(updated_tracks ) == 0 or track is not updated_tracks[-1]: # finish track when the conditions are met track.miss_track(frame_i) if track.miss_count < max_miss_frames: updated_tracks.append(track) # create new tracks new_tracks = [] for det in dets: new_tracks.append( Track(det.box, det.state, det.frame, frame_i, t_id, violation_dir)) t_id += 1 tracks_active = updated_tracks + new_tracks tracking_time += time.time() - s if log: print("Active Tracks : ", str(len(tracks_active))) valid_tracks = [ t for t in tracks_active if t.frame_count() > min_frame_th ] frame = draw_tracks(valid_tracks, frame) # save results out.write(frame) total_time += time.time() - start_time else: break cap.release() print("======= FPS Report =======") print("Total fps: " + str(float(operation_count) / total_time)) print("Detector fps: " + str(float(operation_count) / detector_time)) print("Pose estimation fps: " + str(float(operation_count) / pose_time)) print("Pose classification fps: " + str(float(operation_count) / classification_time)) print("Person Tracker fps: " + str(float(operation_count) / tracking_time))
def main(): params = process_cli_params(get_cli_params()) # ----------------------------- # Set GPU device to use os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(params.which_gpu) # Set seeds np.random.seed(params.seed) tf.set_random_seed(params.seed) print("=== Loading Data ===") mnist = mnist.read_data_sets("MNIST_data", n_labeled=params.num_labeled, one_hot=True, disjoint=False) num_examples = mnist.train.num_examples # ----------------------------- # Parameter setup params.iter_per_epoch = (num_examples // params.batch_size) params.num_iter = params.iter_per_epoch * params.end_epoch params.encoder_layers = params.cnn_fan if params.cnn else \ params.encoder_layers # ----------------------------- # Placeholder setup inputs_placeholder = tf.placeholder(tf.float32, shape=(None, params.encoder_layers[ 0])) inputs = preprocess(inputs_placeholder, params) outputs = tf.placeholder(tf.float32) train_flag = tf.placeholder(tf.bool) # ----------------------------- # Ladder ladder = Ladder(inputs, outputs, train_flag, params) # ----------------------------- # Loss, accuracy and training steps loss = ladder.cost + ladder.u_cost accuracy = tf.reduce_mean( tf.cast( tf.equal(ladder.predict, tf.argmax(outputs, 1)), "float")) * tf.constant(100.0) learning_rate = tf.Variable(params.initial_learning_rate, trainable=False) train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss) # add the updates of batch normalization statistics to train_step bn_updates = tf.group(*ladder.bn.bn_assigns) with tf.control_dependencies([train_step]): train_step = tf.group(bn_updates) saver = tf.train.Saver(keep_checkpoint_every_n_hours=0.5, max_to_keep=5) # ----------------------------- # Create logs after full graph created to count trainable parameters # Write logs to appropriate directory log_dir = params.logdir + params.id if not os.path.exists(log_dir): os.makedirs(log_dir) desc_file = log_dir + "/" + "description" with open(desc_file, 'a') as f: print(*order_param_settings(params), sep='\n', file=f, flush=True) print("Trainable parameters:", count_trainable_params(), file=f, flush=True) log_file = log_dir + "/" + "train_log" # ----------------------------- print("=== Starting Session ===") sess = tf.Session() i_iter = 0 # ----------------------------- # Resume from checkpoint ckpt_dir = "checkpoints/" + params.id + "/" ckpt = tf.train.get_checkpoint_state( ckpt_dir) # get latest checkpoint (if any) if ckpt and ckpt.model_checkpoint_path: # if checkpoint exists, restore the parameters and set epoch_n and i_iter saver.restore(sess, ckpt.model_checkpoint_path) epoch_n = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[1]) i_iter = (epoch_n + 1) * (num_examples // params.batch_size) print("Restored Epoch ", epoch_n) else: # no checkpoint exists. create checkpoints directory if it does not exist. if not os.path.exists(ckpt_dir): os.makedirs(ckpt_dir) init = tf.global_variables_initializer() sess.run(init) # ----------------------------- print("=== Training ===") def evaluate_metric(dataset, sess, op): metric = 0 num_eval_iters = dataset.num_examples // params.batch_size for _ in range(num_eval_iters): images, labels = dataset.next_batch(params.batch_size) init_feed = {inputs_placeholder: images, outputs: labels, train_flag: False} metric += sess.run(op, init_feed) metric /= num_eval_iters return metric def evaluate_metric_list(dataset, sess, ops): metrics = [0.0 for _ in ops] num_eval_iters = dataset.num_examples // params.batch_size for _ in range(num_eval_iters): images, labels = dataset.next_batch(params.batch_size) init_feed = {inputs_placeholder: images, outputs: labels, train_flag: False} op_eval = sess.run(ops, init_feed) for i, op in enumerate(op_eval): metrics[i] += op metrics = [metric/num_eval_iters for metric in metrics] return metrics # ----------------------------- # Evaluate initial training accuracy and losses # init_loss = evaluate_metric( # mnist.train.labeled_ds, sess, cost) with open(desc_file, 'a') as f: print('================================', file=f, flush=True) print("Initial Train Accuracy: ", sess.run(accuracy, feed_dict={ inputs_placeholder: mnist.train.labeled_ds.images, outputs: mnist.train.labeled_ds.labels, train_flag: False}), "%", file=f, flush=True) print("Initial Train Losses: ", *evaluate_metric_list( mnist.train, sess, [loss, ladder.cost, ladder.u_cost]), file=f, flush=True) # ----------------------------- # Evaluate initial testing accuracy and cross-entropy loss print("Initial Test Accuracy: ", sess.run(accuracy, feed_dict={ inputs_placeholder: mnist.test.images, outputs: mnist.test.labels, train_flag: False}), "%", file=f, flush=True) print("Initial Test Cross Entropy: ", evaluate_metric(mnist.test, sess, ladder.cost), file=f, flush=True) start = time.time() for i in tqdm(range(i_iter, params.num_iter)): images, labels = mnist.train.next_batch(params.batch_size) _ = sess.run( [train_step], feed_dict={inputs_placeholder: images, outputs: labels, train_flag: True}) # --------------------------------------------- # Epoch completed? if (i > 1) and ((i+1) % params.iter_per_epoch == 0): epoch_n = i // (num_examples // params.batch_size) update_decays(sess, epoch_n, iter=i, graph=g, params=p) # --------------------------------------------- # Evaluate every test_frequency_in_epochs if ((i + 1) % (params.test_frequency_in_epochs * params.iter_per_epoch) == 0): now = time.time() - start if not params.do_not_save: saver.save(sess, ckpt_dir + 'model.ckpt', epoch_n) # --------------------------------------------- # Compute error on testing set (10k examples) test_cost = evaluate_metric(mnist.test, sess, ladder.cost) # Create log of: # time, epoch number, test accuracy, test cross entropy, # train accuracy, train loss, train cross entropy, # train reconstruction loss log_i = [now, epoch_n] + sess.run( [accuracy], feed_dict={inputs_placeholder: mnist.test.images, outputs: mnist.test.labels, train_flag: False} ) + [test_cost] + sess.run( [accuracy], feed_dict={inputs_placeholder: mnist.train.labeled_ds.images, outputs: mnist.train.labeled_ds.labels, train_flag: False} ) + sess.run( [loss, ladder.cost, ladder.u_cost], feed_dict={inputs_placeholder: images, outputs: labels, train_flag: False}) with open(log_file, 'a') as train_log: print(*log_i, sep=',', flush=True, file=train_log) with open(desc_file, 'a') as f: print("Final Accuracy: ", sess.run(accuracy, feed_dict={ inputs_placeholder: mnist.test.images, outputs: mnist.test.labels, train_flag: False}), "%", file=f, flush=True) sess.close()