def mapper(videohash, metadata): print('videohash[%s]' % videohash) print('hdfs_path[%s]' % metadata['hdfs_path']) print 'mapper', videohash filename = 'hardcodedvideo.' + metadata['extension'] #print filename, metadata.keys() try: picarus.io._record_to_file(metadata, filename) except IOError: hadoopy.counter('INPUT_ERROR', 'REMOTE READ FAILED') return min_interval = float(os.environ['MIN_INTERVAL']) resolution = float(os.environ['RESOLUTION']) try: iter1 = lambda : viderator.frame_iter(filename, frame_skip=5, frozen=True) iter2 = lambda : viderator.convert_video_ffmpeg(filename, frame_skip=5, frozen=True) kf = keyframe.Histogram(min_interval) # Do this instead of 'return' in order to keep the tempfile around try: for k, v in keyframes(iter1, iter2, metadata, kf, resolution): #print 'yield', k yield k, v except: hadoopy.counter('INPUT_ERROR', 'VIDEO_READ_ERROR') return finally: os.remove(filename)
def map(self, key, value): """ Args: key: Image name value: Image as jpeg byte data Yields: A tuple in the form of (key, value) key: Imagename-face-x0<x_tl_val>-y0<y_tl_val>-x1<x_br_val>-y1<y_br_val> value: Cropped face binary data """ try: image_pil, image_cv = self._load_cv_image(value) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') return try: faces = self._detect_faces(image_cv) except: hadoopy.counter('DATA_ERRORS2', 'ImageLoadError') return if not faces: return if self._output_boxes: yield key, (value, faces) else: for x0, y0, x1, y1 in faces: image_pil_crop = image_pil.crop((x0, y0, x1, y1)) out_fp = StringIO.StringIO() image_pil_crop.save(out_fp, 'JPEG') out_fp.seek(0) yield '%s-face-x0%d-y0%d-x1%d-y1%d' % (key, x0, y0, x1, y1), out_fp.read()
def collect(self,key,value): if len(self.data) == 0: self.first_key = key if self.ncols == None: self.ncols = len(value) print >>sys.stderr, "Matrix size: %i columns"%(self.ncols) else: # TODO should we warn and truncate here? # No. that seems like something that will introduce # bugs. Maybe we could add a "liberal" flag # for that. assert(len(value) == self.ncols) self.data.append(value) self.nrows += 1 if len(self.data)>self.blocksize*self.ncols: hadoopy.counter('Program','QR Compressions',1) # compress the data self.compress() # write status updates so Hadoop doesn't complain if self.nrows%50000 == 0: hadoopy.counter('Program','rows processed',50000)
def map(self, event_filename, video_data): hadoopy.counter('CombinedFeatures', 'DontHave') sys.stderr.write('%s\n' % str(event_filename)) for event_filename, features in self.r.map(event_filename, video_data): sys.stderr.write('%s\n' % str(event_filename)) for x in self.b.map(event_filename, features): yield x
def _map(self, row, image_binary): try: image = Image.open(StringIO.StringIO(image_binary)) if not hasattr(image, "_getexif"): yield row, json.dumps({}) else: image_tags = image._getexif() if image_tags is None: yield row, json.dumps({}) else: yield row, json.dumps( dict( (name, base64.b64encode(image_tags[id])) if isinstance(image_tags[id], str) else image_tags[id] for id, name in TAGS.items() if id in image_tags ) ) except: sys.stdout.flush() hadoopy.counter("STATUS", "badRows") else: sys.stdout.flush() hadoopy.counter("STATUS", "goodRows")
def map(self, image_hash, image_data): """ Args: image_hash: Unique image string image_data: Binary image data Yields: A tuple in the form of (classifier_name, label_value) classifier_name: String representing the classifier label_value: (label, feature) where label is an int """ try: image = Image.open(StringIO.StringIO(image_data)) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') return bgen = imfeat.BlockGenerator(image, imfeat.CoordGeneratorRectRotate, output_size=(self._image_height, self._image_width), step_delta=(self._image_height / 2, self._image_width / 2), angle_steps=1) for num, (image_out, sim) in enumerate(bgen): feature = np.asfarray(imfeat.compute(self._feat, image_out)[0]) pred = dict((classifier_name, classifier.predict(feature)) for classifier_name, classifier in self._classifiers) if any(x for x in pred.values() if x[0][0] * x[0][1] > 0): # At least 1 class needs to be > 0 image_out_fp = StringIO.StringIO() imfeat.convert_image(image_out, ['RGB']).save(image_out_fp, 'JPEG') image_out_fp.seek(0) yield (image_hash, sim), (pred, image_out_fp.read())
def map(self, key, value): """ Args: key: Image name value: Image as jpeg byte data Yields: A tuple in the form of (key, value) key: Constant dummy value value: (l2sqr_dist, value) """ try: image = imfeat.resize_image(imfeat.image_fromstring(value, {'type': 'numpy', 'mode': 'bgr', 'dtype': 'uint8'}), 100, 100) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') return # Distance metric diff = image - self.target_image dist = np.sum(diff * diff) # Output if dist < self.min_dist: self.min_dist = dist self.min_key = key self.min_value = value
def reduce(self, image_hash, values): """ Args: image_hash: (see mapper) values: Iterator of values (see mapper) Yields: A tuple in the form of (image_hash, value) image_hash: Image hash value: The provided value (not the prediction) """ predictions = None out_val = None for value in values: if isinstance(value, dict): predictions = value else: out_val = value if predictions is None or out_val is None: hadoopy.counter('DATA_ERR', 'MISSING_PREDICTIONS_OR_DATA') return label, conf = predictions[self._class_name][0] if (self._class_thresh <= label * conf) == (self._output_class == 1): # Both true or both false yield image_hash, out_val
def convert_matrix(self, matrix): sparsity = len(matrix.nonzero()[0]) / float(matrix.size) print('Sparsity[%f]' % sparsity) if sparsity < self.min_sparsity: hadoopy.counter('SPARSITY', 'SPARSE') return sp.sparse.csr_matrix(matrix) hadoopy.counter('SPARSITY', 'DENSE') return matrix
def _map(self, row, input_binary): try: yield row, self.job.process_binary(input_binary) except: sys.stdout.flush() hadoopy.counter('ERROR', 'BadRow') else: hadoopy.counter('STATUS', 'GoodRow')
def _map(self, row, image_binary): try: image = imfeat.image_fromstring(image_binary) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') out = self.sp(image, 25) hulls = self.sp.label_image_to_contours(out, 1.) yield row, json.dumps(hulls, separators=(',', ':'))
def map(self, image_id, image_binary): image = resize(imfeat.image_fromstring(image_binary)) print(image.shape) st = time.time() box_num = -1 for box_num, (box, f) in enumerate(feature.image_patch_features_dense(image, normalize_box=True)): yield (image_id, box.tolist()), np.dot(self.coefs, f.reshape((f.size, 1))).ravel() + self.intercepts hadoopy.counter('stats', 'num_boxes', box_num + 1) print('ImageTime[%f]' % (time.time() - st))
def amap(self, key, feat): feat = np.array([np.fromstring(feat, dtype=np.float32)]) if self.canopies.size: nearest_dist = self.nn(feat, self.canopies)[1] if nearest_dist > self.hard_dist: hadoopy.counter('canopy_cluster', 'canopy_count') self.canopies = np.concatenate((self.canopies, feat)) else: hadoopy.counter('canopy_cluster', 'canopy_count') self.canopies = feat
def map(self, name, image_or_data): if isinstance(image_or_data, str): try: image = imfeat.image_fromstring(image_or_data) except: hadoopy.counter("DATA_ERRORS", "ImageLoadError") return else: image = image_or_data yield name, self._feat(image)
def _map(self, row, image_binary): try: if not image_binary: raise ValueError image = imfeat.image_fromstring(image_binary) except: hadoopy.counter('ERROR', 'FEATURE') print('Error on row[%r]' % row) else: yield row, picarus.api.np_tostring(self._feat.compute_feature(image))
def reducer(k, vs): for v in vs: print(type(v)) if isinstance(v, dict): metadata = v else: feature_hash = v try: yield k, (feature_hash, metadata) except NameError: hadoopy.counter("ERRORS", "JoinsFailed")
def map(self, url, value): try: data = download_file(url) except Exception: hadoopy.counter('FILE_DOWNLOADER', 'Exception') else: if self.output_type == 'meta': yield url, (data, value) elif self.output_type == 'image': yield url, data else: raise ValueError('OutputType[%s]' % self.output_type)
def map(self, name, image_data): try: image = Image.open(StringIO.StringIO(image_data)) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') return image = image.resize((self._image_length, self._image_length)) try: yield name, np.asfarray(imfeat.compute(self._feat, image)[0]) except ValueError, e: print(e) hadoopy.counter('DATA_ERRORS', 'UnkImageType') return
def train(classifier_name, classifier_extra, label_values): import classipy label_values = list(label_values) hadoopy.counter('FeatureShape', str(len(label_values[0][1]))) if classifier_name == 'svmlinear': return classipy.SVMLinear(options={'B': '1'}).train(label_values) elif classifier_name == 'svm': return classipy.SVM(options={'t': '2'}).train(label_values) elif classifier_name == 'svm_hik': return classipy.SVMScikit(kernel=classipy.kernels.histogram_intersection).train(label_values) elif classifier_name == 'svmlinear_autotune': def wrapped_optimizer(*args, **kw): for x in pyram.exponential_grid(*args, **kw): hadoopy.counter('X-Val', 'Rounds') yield x b = classipy.select_parameters(classipy.SVMLinear, label_values, {'c': (10**-2, 10**1, 10)}, wrapped_optimizer, options={'B': '1'})[1] print(b) return classipy.SVMLinear(b).train(label_values) elif classifier_name == 'plslinearsvmxval': num_dims = label_values[0][1].size # Set the parameters by cross-validation #,'pls__n_components': [x for x in [1, 8, 16, 32, 64, 128, 256] if x <= num_dims] #('pls', sklearn.pls.PLSRegression(n_components=0)), tuned_parameters = [{'svm__C': [.001, .01, .1, 1, 10, 100]}] p = sklearn.pipeline.Pipeline([('svm', sklearn.svm.SVC(kernel=classipy.kernels.histogram_intersection, scale_C=True))]) # was cls #p = sklearn.grid_search.GridSearchCV(cls, tuned_parameters, score_func=sklearn.metrics.f1_score) num_neg = 0 num_pos = 0 import random random.shuffle(label_values) new_label_values = [] for l, v in label_values: if l == 1: if num_pos < 100: new_label_values.append((l, v)) num_pos += 1 else: if num_neg < 100: new_label_values.append((l, v)) num_neg += 1 import sys sys.stderr.write('Num Neg[%d] Pos[%d]\n' % (num_neg, num_pos)) p.fit(*zip(*new_label_values)[::-1]) return p # p.best_estimator_ else: raise ValueError('Unknown classifier [%s]' % classifier_name)
def map(self, image_id, image_binary): if self.num_inputs <= 0: return self.num_inputs -= 1 pyramid = np.zeros((len(self.ids), np.sum(self.num_bins_sqr)), dtype=np.int32) num_boxes = 0 for (image_id, box), confs in super(Mapper, self).map(image_id, image_binary): num_boxes += 1 cy = (box[2] + box[0]) / 2 cx = (box[1] + box[3]) / 2 offset = 0 cur_bins = [] for l in range(self.levels): cur_bins.append(offset + int(cy * self.num_bins[l]) * self.num_bins[l] + int(cx * self.num_bins[l])) offset += self.num_bins_sqr[l] #if num_boxes < 1000 and num_boxes % 100: # print((box, cy, cx, cur_bins)) inds = (confs >= 0).nonzero()[0] hadoopy.counter('STATS', 'num_pos', inds.size) hadoopy.counter('STATS', 'num_neg', confs.size - inds.size) hadoopy.counter('STATS', 'total', confs.size) if inds.size: for cur_bin in cur_bins: pyramid[inds, cur_bin] += 1 hadoopy.counter('STATS', 'sz-%s' % str(pyramid.shape)) if np.any(pyramid): pyramid = pyramid * (self.bin_weight / float(num_boxes)) for exemplar_num, row in enumerate(pyramid): yield exemplar_num, (image_id, row)
def map(self, key, feat): stime = time.time() feat = self._strto2d(feat) self.ftime += time.time() - stime stime = time.time() if self.canopies.size: nearest_dist = self.nn(feat, self.canopies)[1] if nearest_dist > self.hard_dist: hadoopy.counter('canopy_cluster', 'canopy_count') self.canopies = np.concatenate((self.canopies, feat)) else: hadoopy.counter('canopy_cluster', 'canopy_count') self.canopies = feat self.gtime += time.time() - stime
def map(self, image_name, image_label_points): """ Args: image_name: A string (if not then we skip the input) image_label_points: (image, [(label, points), ...]) where points is Nx2 (y, x) """ if not isinstance(image_name, str): hadoopy.counter('SKIPPED_INPUTS', 'KeyNotString') return with self.timer('Build root_labels_image_points'): image, label_points = image_label_points root_labels_image_points = {} if self.dp: root_label_points = self.dp.group_lowest_nodes(image, label_points) for root, label_points in root_label_points: labels = np.array([x[0] for x in label_points], dtype=np.int32) image_points = [(image, x[1]) for x in label_points] root_labels_image_points[root] = labels, image_points else: labels = np.array([x[0] for x in label_points], dtype=np.int32) image_points = [(image, x[1]) for x in label_points] root_labels_image_points[0] = labels, image_points with self.timer('Run train_map_hists and sum qlss/qrss'): for root, (labels, image_points) in root_labels_image_points.items(): if self.level != int(np.floor(np.log2(root + 1))): # We are done processing this root continue qls, qrs = train_map_hists(labels, image_points, self.feats, self.num_classes) qls, qrs = self.convert_matrix(qls), self.convert_matrix(qrs) try: self.root_count[root] += 1 except KeyError: self.root_count[root] = 1 try: try: self.qlss[root] += qls except NotImplementedError: self.qlss[root] = self.qlss[root] + qls try: self.qrss[root] += qrs except NotImplementedError: self.qrss[root] = self.qrss[root] + qrs self.num_images[root] += 1 except KeyError: if self.max_root_buffer <= len(self.qlss): for x in self.flush_node(root): yield x self.qlss[root] = qls self.qrss[root] = qrs self.num_images[root] = 1
def stop_time(self, name): try: dur = time.time() - self._pending_times[name] except KeyError: hadoopy.counter('stop_time', 'timer_failed') else: try: time_stats = self._times[name] # Min/Max/Sum/Count self._times[name] = [min(time_stats[0], dur), max(time_stats[1], dur), time_stats[2] + dur, time_stats[3] + 1] except KeyError: self._times[name] = [dur, dur, dur, 1]
def map(self, image_id, image_binary): image = imfeat.image_fromstring(image_binary) print(image.shape) st = time.time() box_num = -1 for box_num, (box, f) in enumerate(feature.image_patch_features_dense(image, normalize_box=True)): scores = np.dot(self.coefs, f.reshape((f.size, 1))) + self.intercepts pred_common = [image_id, box.tolist(), f.tolist()] for score, preds in zip(scores, self.preds): pred = self.output_formatter([float(score[0])] + pred_common) if len(preds) >= self.max_hard: heapq.heappushpop(preds, pred) else: heapq.heappush(preds, pred) hadoopy.counter("stats", "num_boxes", box_num + 1) print("ImageTime[%f]" % (time.time() - st))
def mapper(key, value): """ Args: key, value: either of - the output of clustering (/partition or /samples), - the output of video keyframing (/allframes) - (hash,image_data) input - (hash,image_metadata) input Env vars: IMAGE_TYPE: record, cluster, kv, frame THUMB_SIZE: longest dimension for the thumbnail images Yields: image_hash, image_data: serialized jpeg data for thumbnail """ if os.environ['IMAGE_TYPE'] == 'record': # Image input format image_hash, image_metadata = key, value image_file = picarus.io._record_to_file(image_metadata) elif os.environ['IMAGE_TYPE'] == 'cluster': # Cluster output /partition or /sample cluster_index, (image_hash, image_data) = key, value image_file = StringIO.StringIO(image_data) elif os.environ['IMAGE_TYPE'] == 'kv': # hash, image bytes image_hash, image_data = key, value image_file = StringIO.StringIO(image_data) elif os.environ['IMAGE_TYPE'] == 'frame': image_hash, image_metadata = key, value image_file = StringIO.StringIO(image_metadata['image_data']) thumb_size = int(os.environ['THUMB_SIZE']) try: image = Image.open(image_file) image.thumbnail((thumb_size, thumb_size)) except: hadoopy.counter('INPUT_ERROR', 'IMAGE_DATA_ERROR') image = image.convert('RGB') s = StringIO.StringIO() image.save(s, 'JPEG') s.seek(0) # Output type: kv yield image_hash, s.buf
def map(self, event_filename, video_data): """ Args: event_filename: Tuple of (event, filename) video_data: Binary video data Yields: A tuple in the form of ((event, filename, frame_num, frame_time), frame_data) """ ext = '.' + event_filename[1].rsplit('.')[1] event, filename = event_filename heap = [(float('-inf'), None)] * self.max_outputs with tempfile.NamedTemporaryFile(suffix=ext) as fp: fp.write(video_data) fp.flush() sys.stderr.write('Prevideo\n') try: for frame_num, frame_time, frame in viderator.frame_iter(fp.name, frozen=True, frame_skip=self.frame_skip): sys.stderr.write('FrameNum[%d]\n' % frame_num) if frame_num >= self.max_frames_per_video: break frame_orig = frame if self.remove_bars: sz = self.remove_bars.find_bars(frame) frame = frame[sz[0]:sz[1], sz[2]:sz[3], :] if not frame.size: # Empty continue st = time.time() c = self._feat(frame)[0] sys.stderr.write('FrameTime[%f]\n' % (time.time() - st)) print('FrameTime[%f]' % (time.time() - st)) if c > heap[0][0]: if self.output_frame: heapq.heappushpop(heap, (c, ((event, filename, frame_num, frame_time), imfeat.image_tostring(frame_orig, 'JPEG')))) else: heapq.heappushpop(heap, (c, ((event, filename, frame_num, frame_time), ''))) except IOError: hadoopy.counter('PICARUS', 'CantProcessVideo') for x in heap[-self.max_outputs_per_video:]: heapq.heappushpop(self.heap, x)
def map(self, event_filename, video_data): """ Args: event_filename: Tuple of (event, filename) video_data: Binary video data Yields: A tuple in the form of ((event, filename), value) where value is a dict with contents prev_frame_time: prev_frame_num: prev_frame: frame_time: frame_num: frame: """ ext = '.' + event_filename[1].rsplit('.')[1] with tempfile.NamedTemporaryFile(suffix=ext) as fp: fp.write(video_data) fp.flush() prev_frame_time = None prev_frame_num = None prev_frame = None try: for (frame_num, frame_time, frame), iskeyframe in self.kf(viderator.frame_iter(fp.name, frame_skip=self.frame_skip, frozen=True)): if self.max_time < frame_time: break if iskeyframe and prev_frame is not None: yield event_filename, {'prev_frame_time': prev_frame_time, 'prev_frame_num': prev_frame_num, 'prev_frame': imfeat.image_tostring(prev_frame, 'JPEG'), 'frame_time': frame_time, 'frame_num': frame_num, 'frame': imfeat.image_tostring(frame, 'JPEG')} prev_frame_num = frame_num prev_frame = frame except Exception, e: print(e) hadoopy.counter('VIDEO_ERROR', 'FFMPEGCantParse')
def map(self, key, value): """ Args: key: Image name value: Image as jpeg byte data Yields: A tuple in the form of (key, value) key: Image name value: (image, faces) where image is the input value and faces is a list of ((x, y, w, h), n) """ try: image = self._load_cv_image(value) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') return dist = self._compute_face_distance(image) yield dist, (key, value)
def map(self, image_hash, feature): """ Args: image_hash: Unique image string feature: Numpy image feature Yields: A tuple in the form of (classifier_name, label_value) classifier_name: String representing the classifier label_value: (label, feature) where label is an int """ try: class_labels = self._hash_class_labels[image_hash] except KeyError: hadoopy.counter('DATA_ERRORS', 'UNKNOWN_IMAGE_HASH') return for classifier_name, label in class_labels: yield classifier_name, (label, feature)
def _image_from_str(s): """Load from string, crop to a square, resize to _initial_image_size Args: s: String of bytes representing a JPEG image Returns: RGB Image with height/width as _initial_image_size Raises: ValueError: Image is height/width too small (< _initial_image_size) or mode isn't RGB IOError: Image is unreadable """ try: img = Image.open(StringIO.StringIO(s)) except IOError, e: hadoopy.counter('Stats', 'IMG_BAD') raise e
def wrapped_optimizer(*args, **kw): for x in pyram.exponential_grid(*args, **kw): hadoopy.counter('X-Val', 'Rounds') yield x
def _map(self, row, image_binary): try: image = imfeat.image_fromstring(image_binary) yield row, imfeat.image_tostring(imfeat.resize_image_max_side(image, self.max_side), 'jpg') except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
def reduce(self, exemplar_num, id_rows): out = np.hstack([x[1].ravel() for x in sorted(id_rows, key=lambda x: x[0])]) hadoopy.counter('STATS', 'sz-%s' % str(out.shape)) yield exemplar_num, out
def test_counter(self): def err(x): self.assertEqual('reporter:counter:a,b,5\n', x) hadoopy.counter('a', 'b', 5, err=err)
def _map(self, row, image_binary): try: image = imfeat.image_fromstring(image_binary) except: hadoopy.counter('DATA_ERRORS', 'ImageLoadError') yield row, picarus.api.np_tostring(self._feat(image))
def reducer(event, filename_predicates): yield event, dict(filename_predicates) hadoopy.counter('SkippingTaskCounters', 'ReduceProcessedGroups')
class Mapper(object): def __init__(self): _target_image = cv2.imread('target.jpg') _target_image = cv2.resize( _target_image, (_target_image.shape[1] // _tile_length * _tile_length, _target_image.shape[0] // _tile_length * _tile_length)) self.target_tiles = {} ytiles = _target_image.shape[0] / _tile_length xtiles = _target_image.shape[1] / _tile_length print('Xtiles[%d] Ytiles[%d]' % (xtiles, ytiles)) assert xtiles > 0 and ytiles > 0 xsubtiles = xtiles * _subtiles_per_tile_length ysubtiles = ytiles * _subtiles_per_tile_length self.min_dists = {} self.dist = lambda x, y: np.sum(np.abs(x - y)) # distpy.L2Sqr().dist for y in xrange(ysubtiles): for x in xrange(xsubtiles): # Defines which tile the subtile is in #'%.6d_%.6d' % tile_id = (x / _subtiles_per_tile_length, y / _subtiles_per_tile_length) # Defines which position it is in within the tile #'%.6d_%.6d' % subtile_id = (x % _subtiles_per_tile_length, y % _subtiles_per_tile_length) #'\t'.join( key = (tile_id[0], tile_id[1], subtile_id[0], subtile_id[1]) yp = ysubtiles - y - 1 # NOTE(brandyn): Flip coordinates for y axis tile = _target_image[(yp * _subtile_length):((yp + 1) * _subtile_length), (x * _subtile_length):(x + 1) * _subtile_length, :] self.target_tiles[key] = np.asfarray(tile) @staticmethod def _crop_image_from_str(s): """Load from string, crop to a square, resize to _initial_image_size Args: s: String of bytes representing a JPEG image Returns: RGB Image with height/width as _initial_image_size Raises: ValueError: Image is height/width too small (< _initial_image_size) or mode isn't RGB IOError: Image is unreadable """ if isinstance(s, tuple): s = s[0] try: img = imfeat.image_fromstring(s) except IOError, e: hadoopy.counter('Stats', 'IMG_BAD') raise e min_side = min(img.shape[:2]) if min_side < _initial_image_size: hadoopy.counter('Stats', 'IMG_TOO_SMALL') raise ValueError if img.ndim != 3: hadoopy.counter('Stats', 'IMG_WRONG_MODE') raise ValueError return imfeat.resize_image(img, _initial_image_size, _initial_image_size)
def map(self, event_filename, video_data): """ Args: event_filename: Tuple of (event, filename) video_data: Binary video data Yields: A tuple in the form of ((event, filename), features) where features is a dict frame_features: List of frame features file_size: Size in bytes where each frame feature is a dictionary of frame_time: Time in seconds frame_num: Frame number prev_frame_num: Previous frame number (useful if there is a frame skip) keyframe: Boolean True/False surf: List of surf points (see impoint) face_widths: face_heights: predictions: Dictionary of predictions """ sys.stderr.write('In Raw:%s\n' % str(event_filename)) print(event_filename) ext = '.' + event_filename[1].rsplit('.')[1] with tempfile.NamedTemporaryFile(suffix=ext) as fp: with self.timer('Writing video data'): fp.write(video_data) fp.flush() kf = keyframe.DecisionTree(min_interval=0) kf.load() prev_frame = None prev_frame_num = 0 all_out = [] sz = len(video_data) self.timer.start('KF') try: for (frame_num, frame_time, frame), iskeyframe in kf( viderator.frame_iter(fp.name, frozen=True)): hadoopy.counter('RawFeatures', 'NumFrames') self.timer.stop('KF') print(frame_time) if frame_num > self._max_frames: break if frame_num % 100 == 0: with self.timer('Computing face features'): faces = _detect_faces( imfeat.convert_image(frame, [('opencv', 'gray', 8)]), self.cascade) else: faces = {} out = { 'frame_time': frame_time, 'frame_num': frame_num, 'prev_frame_num': prev_frame_num, 'keyframe': iskeyframe, 'surf': kf.prev_vec['surf'] } if faces: # If any faces face_heights = np.array([x[0][3] for x in faces ]) / float(frame.height) face_widths = np.array([x[0][2] for x in faces ]) / float(frame.width) out['face_widths'] = face_widths out['face_heights'] = face_heights # Output the cur and previous frames if this is a keyframe if iskeyframe and np.random.random( ) < self._frame_output_prob: out['prev_frame'] = cv_to_jpg(prev_frame) out['frame'] = cv_to_jpg(frame) # Compute scene features with self.timer('Computing scene classifier features'): frame_res = cv.fromarray( cv2.resize( np.asarray(cv.GetMat(frame)), (self._image_width, self._image_height))) feature = self._feat(frame_res) out['predictions'] = dict( (classifier_name, classifier.predict(feature)) for classifier_name, classifier in self._classifiers) # Output JPEG with match lines from the SURF feature if np.random.random( ) < self._match_line_prob and prev_frame: out['surf_image'] = cv_to_jpg( plot_matches(prev_frame, kf.surf_debug['matches'], kf.surf_debug['points0'], kf.surf_debug['points1'], max_feat_width=kf.max_feat_width)) # Output data buffer all_out.append(out) if len(all_out) >= self._block_size: with self.timer('Yield'): yield event_filename, { 'frame_features': all_out, 'file_size': sz } all_out = [] prev_frame = frame prev_frame_num = frame_num self.timer.start('KF') except viderator.FPSParseException: # NOTE(brandyn): This will disregard videos with this error hadoopy.counter('SkippedVideos', 'FPSParseException') return if all_out: with self.timer('Yield'): yield event_filename, { 'frame_features': all_out, 'file_size': sz }