def decode_time(base_dir, ext='jpg', repeat=3): sess = dbutils.get_session() for path in recursive_glob(base_dir, '*.{}'.format(ext)): with open(path, 'rb') as f: buf = f.read() tic = time.time() for _ in range(repeat): arr = cv2.imdecode(np.frombuffer(buf, np.int8), cv2.IMREAD_COLOR) elapsed = time.time() - tic h, w = arr.shape[:2] decode_ms = elapsed * 1000 / repeat size = len(buf) keys_dict = {'path': path, 'hostname': this_hostname} vals_dict = { 'basename': os.path.basename(path), 'size': size, 'height': h, 'width': w, 'decode_ms': decode_ms } logger.debug(str(vals_dict)) dbutils.insert_or_update_one(sess, models.DecodeProfile, keys_dict=keys_dict, vals_dict=vals_dict) sess.commit() sess.close()
def disk_read(base_dir, disk, ext='jpg', sort_inode=False, store_result=True): logger.warn("Make sure you cleaned the OS page buffer!") base_dir = os.path.realpath(base_dir) paths = list(recursive_glob(base_dir, '*.{}'.format(ext))) if sort_inode: paths = sorted(paths, key=lambda p: os.stat(p).st_ino) logger.info("Sort by inode num.") else: # deterministic pseudo-random random.seed(42) random.shuffle(paths) results = [] for p in paths: tic = time.time() fd = os.open(p, os.O_RDONLY) size = os.path.getsize(p) buf = os.read(fd, size) os.close(fd) elapsed = time.time() - tic logger.debug("{}: {} bytes {} ms".format(p, len(buf), elapsed * 1000)) vals_dict = {'size': size} if sort_inode: vals_dict['seq_read_ms'] = elapsed * 1000 else: vals_dict['rand_read_ms'] = elapsed * 1000 results.append({ 'keys_dict': { 'path': p, 'disk': disk }, 'vals_dict': vals_dict }) if store_result: logger.info("Going to write {} results to DB".format(len(results))) sess = dbutils.get_session() for r in results: dbutils.insert_or_update_one(sess, models.DiskReadProfile, keys_dict=r['keys_dict'], vals_dict=r['vals_dict']) sess.commit() sess.close()
def image_meta(base_dir, ext='jpg', num_workers=16): sess = dbutils.get_session() pool = mp.Pool(num_workers) for path, format, size, width, height in pool.imap( _get_meta, recursive_glob(base_dir, '*.{}'.format(ext)), 64): dbutils.insert_or_update_one(sess, models.ImageMeta, {'path': path}, { 'format': format, 'size': size, 'width': width, 'height': height }) logger.info("Read {}".format(path)) sess.commit() sess.close()
def run_face(base_dir, ext='jpg', store_results=False): modelFile = os.path.join(os.getcwd(), 's3dexp/models/opencv_face_detector_uint8.pb') configFile = os.path.join(os.getcwd(), 's3dexp/models/opencv_face_detector.pbtxt') model = cv2.dnn.readNetFromTensorflow(modelFile, configFile) threshold = 0.8 results = [] for path in recursive_glob(base_dir, "*.{}".format(ext)): tic = time.time() # Read with open(path, 'rb') as f: buf = f.read() read_time = time.time() - tic # 1. image decode image = cv2.imdecode(np.frombuffer(buf, np.int8), cv2.IMREAD_COLOR) decode_time = time.time() - tic h, w = image.shape[:2] # Run detection blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0)) model.setInput(blob) detections = model.forward() box = [] for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] if confidence > threshold: bb = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) box.append(bb.astype('int').tolist()) all_time = time.time() - tic logger.debug("Read {:.1f} ms, Decode {:.1f}, Total {:.1f}. {}".format( read_time * 1000, decode_time * 1000, all_time * 1000, path)) results.append({ 'path': path, 'read_ms': read_time * 1000, 'decode_ms': decode_time * 1000, 'total_ms': all_time * 1000, 'size': len(buf), 'height': h, 'width': w, 'num_faces': len(box), 'box': json.dumps(box) }) if store_results: logger.info("Writing {} results to DB".format(len(results))) sess = dbutils.get_session() logger.debug(sess) for r in results: keys_dict = { 'path': r['path'], 'basename': os.path.basename(r['path']), 'expname': 'face_detection', 'device': 'cpu', 'disk': 'hdd' } dbutils.insert_or_update_one(sess, dbmodels.FaceExp, keys_dict=keys_dict, vals_dict={ 'read_ms': r['read_ms'], 'decode_ms': r['decode_ms'], 'total_ms': r['total_ms'], 'size': r['size'], 'height': r['height'], 'width': r['width'], 'num_faces': r['num_faces'], 'box': r['box'] }) sess.commit() sess.close()
def run(search_file, base_dir, ext='.jpg', num_cores=8, workers_per_core=1, store_result=False, expname=None, sort=None, verbose=False): """Run a search consisting of a filter chain defined in an input 'search file' Arguments: search_file {str} -- path to a yml file base_dir {str} -- diretory to glob files to process Keyword Arguments: ext {str} -- file extension filter (default: {'.jpg'}) num_cores {int} -- number of logical cores (default: {8}) workers_per_core {int} -- number of workers per logical core (default: {1}) store_result {bool} -- whether store measurements to DB (default: {False}) expname {[type]} -- expname in DB. If not provided, will try to use from search_file (default: {None}) sort {str or None} -- sort the paths by 'fie' (Linux FIE), 'name' (file name), or None (random) verbose {bool} -- [description] (default: {False}) """ if verbose: logzero.loglevel(logging.DEBUG) with open(search_file, 'r') as f: search_conf = yaml.load(f, Loader=yaml.FullLoader) # prepare CPU affinity assert num_cores == 1 or num_cores % 2 == 0, "Must give an even number for num_cores or 1: {}".format( num_cores) if num_cores > 1: cpuset = range(CPU_START[0], int(CPU_START[0] + num_cores / 2)) + range( CPU_START[1], int(CPU_START[1] + num_cores / 2)) else: cpuset = [ CPU_START[0], ] logger.info("cpuset: {}".format(cpuset)) psutil.Process().cpu_affinity(cpuset) # prepare expname if not expname: expname = search_conf['expname'] logger.warn("No expname given on cmd. Use from {}: {}".format( search_file, expname)) logger.info("Using expname: {}".format(expname)) # prepare filter configs filter_configs = [] for el in search_conf['filters']: filter_cls = globals()[el['filter']] fc = FilterConfig(filter_cls, args=el.get('args', []), kwargs=el.get('kwargs', {})) filter_configs.append(fc) # prepare and sort paths assert sort in (None, 'fie', 'name') base_dir = str(pathlib.Path(base_dir).resolve()) paths = list( filter(lambda p: p.suffix == ext, pathlib.Path(base_dir).rglob('*'))) paths = list(map(str, paths)) if sort == 'fie': logger.info("Sort paths by FIE") paths = sorted(paths, key=get_fie_physical_start) elif sort == 'name': logger.info("Sort paths by name") paths = sorted(paths, key=lambda p: pathlib.Path(p).name) else: # deterministic pseudo-random logger.info("Shuffle paths") random.seed(42) random.shuffle(paths) logger.info("Find {} files under {}".format(len(paths), base_dir)) # create shared data structure by workers manager = mp.Manager() context = Context(manager) # run the search with parallel workers tic = time.time() run_search(filter_configs, num_cores * workers_per_core, paths, context) elapsed = time.time() - tic logger.info("End-to-end elapsed time {:.3f} s".format(elapsed)) logger.info(str(context.stats)) keys_dict = { 'expname': expname, 'basedir': base_dir, 'ext': ext, 'num_workers': num_cores, 'hostname': this_hostname } vals_dict = { 'num_items': context.stats['num_items'], 'avg_wall_ms': 1e3 * elapsed / context.stats['num_items'], 'avg_cpu_ms': 1e3 * context.stats['cpu_time'] / context.stats['num_items'], 'avg_mbyteps': context.stats['bytes_from_disk'] * 1e-6 / elapsed, } logger.info(json.dumps(keys_dict)) logger.info(json.dumps(vals_dict)) logger.info("obj tput: {}".format(1000 // vals_dict['avg_wall_ms'])) if store_result: logger.warn("Writing result to DB expname={}".format(expname)) sess = dbutils.get_session() dbutils.insert_or_update_one(sess, dbmodles.EurekaExp, keys_dict=keys_dict, vals_dict=vals_dict) sess.commit() sess.close()
def run(video_path='/mnt/hdd/fast20/video/VIRAT/mp4/VIRAT_S_000200_02_000479_000635.mp4', diff_threshold=100., delta_frames=30, every_frame=10, detect=False, confidence=0.95, num_workers=8, smart=False, expname=None, verbose=False): """Run NoScope's frame skipping + image difference detection on videos. Optionally, pass passing frames to a DNN object detector. Keyword Arguments: video_path {str} -- Path of a video or directory of videos diff_threshold {float} -- For the diff detector to fire (default: {1000.}) delta_frames {int} -- For diff detector: compare with the frame delta_frames ago (default: {30}) detect {bool} -- If true, run DNN on passing frames (default: {False}) every_frame {int} -- For frame skipping, run diff detector every `every_frame` (default: {1}) num_workers {int} -- Parallel workers (default: {4}) smart {bool} -- Use smart disk or not (default: {False}) expname {[type]} -- If not None, will store to DB with expname (default: {None}) Raises: ValueError: [description] """ if verbose: logzero.loglevel(logging.DEBUG) # expand paths if os.path.isfile(video_path): paths = [video_path] elif os.path.isdir(video_path): paths = list(recursive_glob(video_path, '*.mp4')) else: raise ValueError("Invalid: {}".format(video_path)) logger.info("Found {} files".format(len(paths))) # set CPU affinity assert num_workers == 1 or num_workers % 2 == 0, "Must give an even number for num_workers or 1: {}".format( num_workers) if num_workers > 1: cpuset = range(CPU_START[0], CPU_START[0] + num_workers / 2) + range( CPU_START[1], CPU_START[1] + num_workers / 2) else: cpuset = [ CPU_START[0], ] logger.info("cpuset: {}".format(cpuset)) psutil.Process().cpu_affinity(cpuset) # Setup and start workers context = Context() workers = [] for _ in range(num_workers): w = threading.Thread(target=worker, args=(context, diff_threshold, detect), kwargs={ 'targets': [ 'person', ], 'confidence': confidence }) # search for persons w.daemon = True w.start() workers.append(w) # Exclude preload time from measurement if smart: ss_client = SmartStorageClient( map_from_dir='/mnt/hdd/fast20/video/VIRAT/mp4', preload=True) tic = time.time() tic_cpu = time.clock() total_frames = 0 for path in paths: num_frames = get_num_video_frames(path) logger.info("Processing {} with {} frames".format(path, num_frames)) window = [] if smart: gen = smart_decoder(ss_client, path, context, every_frame) else: gen = cv2_decoder(path, context, every_frame) for i, (frame, frame_id) in enumerate(gen): window.append(frame) reference = window.pop(0) if frame_id > delta_frames else None item = Item('{}-{}'.format(path, frame_id)) item.array = frame context.q.put((item, reference)) logger.debug("Pushed {}".format(item.src)) if frame_id % 100 == 0: logger.info("Procssed {} frames, frame id {}, {}".format( i, frame_id, path)) total_frames += num_frames # push sentinels for _ in workers: context.q.put(None) logger.info("All frames pushed, waiting for queue join") context.q.join() for w in workers: w.join() elapsed = time.time() - tic elapsed_cpu = time.clock() - tic_cpu logger.info("Elapsed {:.2f} s, Elapsed CPU {:.2f} s".format( elapsed, elapsed_cpu)) logger.info(str(context.stats)) logger.info("Total frames: {}".format(total_frames)) keys_dict = { 'expname': expname, 'basedir': str(video_path), 'ext': 'video', 'num_workers': num_workers, 'hostname': this_hostname } vals_dict = { 'num_items': total_frames, # different from image because we have frame skipping 'avg_wall_ms': 1e3 * elapsed / total_frames, 'avg_cpu_ms': 1e3 * elapsed_cpu / total_frames, 'avg_mbyteps': context.stats['bytes_from_disk'] * 1e-6 / elapsed, } logger.info(str(keys_dict)) logger.info(str(vals_dict)) if expname is not None: sess = dbutils.get_session() dbutils.insert_or_update_one(sess, dbmodles.EurekaExp, keys_dict=keys_dict, vals_dict=vals_dict) sess.commit() sess.close()
def main(base_dir='/mnt/hdd/fast20/jpeg/flickr2500', ext='jpg', num_workers=8, sort_fie=False, smart=False, batch_size=64, verbose=False, use_accimage=True, expname=None, loader_workers=None): assert ext == 'jpg' or not use_accimage, "accimage only works for jpg" if loader_workers is None: loader_workers = num_workers if verbose: logzero.loglevel(logging.DEBUG) # prepare CPU affinity assert num_workers == 1 or num_workers % 2 == 0, "Must give an even number for num_workers or 1: {}".format( num_workers) if num_workers > 1: cpuset = range(CPU_START[0], CPU_START[0] + num_workers / 2) + range( CPU_START[1], CPU_START[1] + num_workers / 2) else: cpuset = [ CPU_START[0], ] logger.info("cpuset: {}".format(cpuset)) psutil.Process().cpu_affinity(cpuset) # prepare paths paths = list(recursive_glob(base_dir, '*.{}'.format(ext))) if sort_fie: logger.info("Sorting paths") paths = sorted(paths, key=get_fie_physical_start) else: # deterministic pseudo-random random.seed(42) random.shuffle(paths) logger.info("Total {} paths".format(len(paths))) if use_accimage: torchvision.set_image_backend('accimage') trn_name = 'trn10' # taipei-scrubbing.py in Blazeit # trn_name = 'trn18' # end2end.py in Blazeit trn_name_to_layers = \ [('trn10', [1, 1, 1, 1]), ('trn18', [2, 2, 2, 2]), ('trn34', [3, 4, 6, 3])] trn_name_to_layers = dict(trn_name_to_layers) model = PytorchResNet(trn_name_to_layers[trn_name], num_classes=2, conv1_size=3, conv1_pad=1, nbf=16, downsample_start=False) model.cuda() normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # prepare preprocessing pipeline if smart: # do resizing using OpenCV in ImageDataSet # because ndarray -> PIL conversion is an overhead preprocess = transforms.Compose([transforms.ToTensor(), normalize]) else: preprocess = transforms.Compose( [transforms.Resize(RESOL), transforms.ToTensor(), normalize]) manager = mp.Manager() context = Context(manager, qsize=len(paths) + 1) # hack for smart batch and basline-sorted: enque all paths in the beginning to force sequential access map(context.q.put, paths) image_dataset = ImageDataset(paths, context, transform=preprocess, smart=smart, sort_fie=sort_fie) loader = torch.utils.data.DataLoader(image_dataset, batch_size=batch_size, shuffle=False, num_workers=loader_workers, pin_memory=False) logger.info("warm up with a fake batch") fake_batch = torch.zeros([batch_size, 3] + list(RESOL), dtype=torch.float32) fake_batch = fake_batch.cuda() print fake_batch.shape, fake_batch.dtype _ = model(fake_batch) # zf: use a separate queue to pre-fetch batches, phew .... # batch_q = Queue.Queue(100) # def batch_prefetcher(loader, q): # for i, image_tensor in enumerate(loader): # q.put(image_tensor) # logger.info("Prefetched batch {}".format(i)) # logger.info("Loader finish.") # q.put(None) # prefetcher_thread = threading.Thread(target=batch_prefetcher, args=(loader, batch_q)) # prefetcher_thread.daemon = True # prefetcher_thread.start() loaderit = iter(loader) logger.info("Type of iter(loader): {}".format(type(loaderit).__name__)) tic = time.time() tic_cpu = time.clock() num_batches = 0 last_batch_time = tic elapsed_gpu = 0. for _ in range(int(len(paths) / batch_size)): idx, data = loaderit._get_data() loaderit.tasks_outstanding -= 1 loaderit._try_put_index() logger.info("Get internal batch {}".format(idx)) image_tensor = data image_tensor = image_tensor.cuda() tic_gpu = time.time() output = model(image_tensor) now = time.time() logger.info("Run batch {} in {:.3f} ms".format( num_batches, 1000 * (now - last_batch_time))) logger.info("Batch GPU time: {:.3f} ms".format(1000 * (now - tic_gpu))) last_batch_time = now elapsed_gpu += (now - tic_gpu) num_batches += 1 # logger.info("loaderiter.task_outstanding: {}".format(datait.tasks_outstanding)) elapsed = time.time() - tic elapsed_cpu = time.clock() - tic_cpu elapsed_cpu += context.stats['cpu_time'] logger.info("# batches: {}".format(num_batches)) logger.info( "GPU time per batch {:.3f} ms, GPU time per image {:.3f} ms".format( 1000 * elapsed_gpu / num_batches, 1000 * elapsed_gpu / num_batches / batch_size)) num_items = len(paths) bytes_from_disk = context.stats['bytes_from_disk'] logger.info( "Elapsed {:.3f} ms / image, CPU elapsed {:.3f} ms / image".format( 1000 * elapsed / num_items, 1000 * elapsed_cpu / num_items)) logger.info(str(context.stats)) keys_dict = { 'expname': expname, 'basedir': base_dir, 'ext': ext, 'num_workers': num_workers, 'hostname': this_hostname } vals_dict = { 'num_items': num_items, 'avg_wall_ms': 1e3 * elapsed / num_items, 'avg_cpu_ms': 1e3 * elapsed_cpu / num_items, 'avg_mbyteps': bytes_from_disk * 1e-6 / elapsed, } logger.info(str(keys_dict)) logger.info(str(vals_dict)) if expname: sess = dbutils.get_session() dbutils.insert_or_update_one(sess, dbmodles.EurekaExp, keys_dict=keys_dict, vals_dict=vals_dict) sess.commit() sess.close()
def run(base_dir, ext="jpg", store_results='', smart=False): if smart: raise NotImplementedError using_gpu = tf.test.is_gpu_available() if using_gpu: logger.info("Running on GPU") else: from tensorflow.python.framework import test_util as tftest_util assert tftest_util.IsMklEnabled( ), "This tensorflow is not compiled with MKL. Abort." logger.warn("Running on CPU") results = [] # Download and uncompress model checkpoint_url = "http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz" checkpoints_dir = s3dexp.config.CKPT_DIR checkpoint_path = os.path.join(checkpoints_dir, 'mobilenet_v1_1.0_224.ckpt') if not tf.gfile.Exists(checkpoints_dir): tf.gfile.MakeDirs(checkpoints_dir) dataset_utils.download_and_uncompress_tarball(checkpoint_url, checkpoints_dir) with tf.Graph().as_default(): logger.info("Creating compute graph ...") ######################################## # Select the model ######################################## network_fn = nets_factory.get_network_fn('mobilenet_v1', num_classes=1001, is_training=False) image_size = mobilenet_v1.mobilenet_v1.default_image_size ######################################## # Define input and preprocessing tensors ######################################## # crucial to specify dtype=tf.unit8. Otherwise will get wrong predictions. inputs = tf.placeholder(dtype=tf.uint8, shape=(None, image_size, image_size, 3)) preprocessing_fn = get_preprocessing('mobilenet_v1') processed_images = tf.map_fn( lambda x: preprocessing_fn(x, image_size, image_size), inputs, dtype=tf.float32) ######################################## # Create the compute graph ######################################## logits, _ = network_fn(processed_images) probabilities = tf.nn.softmax(logits) # https://github.com/tensorflow/tensorflow/issues/4196 # https://www.tensorflow.org/programmers_guide/using_gpu config = tf.ConfigProto() # config.gpu_options.allow_growth = True # config.gpu_options.per_process_gpu_memory_fraction = 0.4 with tf.Session(config=config) as sess: logger.info("Loading checkpoint from %s" % checkpoint_path) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) logger.info("Warm up with a fake image") fakeimages = np.random.randint(0, 256, size=(1, image_size, image_size, 3), dtype=np.uint8) _ = sess.run(probabilities, feed_dict={inputs: fakeimages}) ######################################## # walk through directory and inference ######################################## for path in recursive_glob(base_dir, "*.{}".format(ext)): tic = time.time() if not smart: # 0. read from disk with open(path, 'rb') as f: buf = f.read() read_time = time.time() - tic # 1. image decode arr = cv2.imdecode(np.frombuffer(buf, np.int8), cv2.IMREAD_COLOR) decode_time = time.time() - tic else: raise NotImplementedError h, w = arr.shape[:2] # 2. Run inference # resize arr_resized = cv2.resize(arr, (image_size, image_size), interpolation=cv2.INTER_AREA) images = np.expand_dims(arr_resized, 0) _ = sess.run(probabilities, feed_dict={inputs: images}) all_time = time.time() - tic logger.debug( "Read {:.1f} ms, Decode {:.1f}, Total {:.1f}. {}".format( read_time * 1000, decode_time * 1000, all_time * 1000, path)) results.append({ 'path': path, 'read_ms': read_time * 1000, 'decode_ms': decode_time * 1000, 'total_ms': all_time * 1000, 'size': len(buf), 'height': h, 'width': w }) if store_results: logger.info("Writing {} results to DB".format(len(results))) dbsess = dbutils.get_session() for r in results: keys_dict = { 'path': r['path'], 'basename': os.path.basename(r['path']), 'expname': 'mobilenet_inference', 'device': 'gpu' if using_gpu else 'cpu', 'disk': 'smart' if smart else 'hdd' } dbutils.insert_or_update_one(dbsess, dbmodles.AppExp, keys_dict=keys_dict, vals_dict={ 'read_ms': r['read_ms'], 'decode_ms': r['decode_ms'], 'total_ms': r['total_ms'], 'size': r['size'], 'height': r['height'], 'width': r['width'] }) dbsess.commit() dbsess.close()