def mapper(videohash, metadata):
    print('videohash[%s]' % videohash)
    print('hdfs_path[%s]' % metadata['hdfs_path'])
    print 'mapper', videohash

    filename = 'hardcodedvideo.' + metadata['extension']
    #print filename, metadata.keys()
    try:
        picarus.io._record_to_file(metadata, filename)
    except IOError:
        hadoopy.counter('INPUT_ERROR', 'REMOTE READ FAILED')
        return

    min_interval = float(os.environ['MIN_INTERVAL'])
    resolution = float(os.environ['RESOLUTION'])
    try:
        iter1 = lambda : viderator.frame_iter(filename, frame_skip=5, frozen=True)
        iter2 = lambda : viderator.convert_video_ffmpeg(filename, frame_skip=5, frozen=True)

        kf = keyframe.Histogram(min_interval)

        # Do this instead of 'return' in order to keep the tempfile around
        try:
            for k, v in  keyframes(iter1, iter2, metadata, kf, resolution):
                #print 'yield', k
                yield k, v
        except:
            hadoopy.counter('INPUT_ERROR', 'VIDEO_READ_ERROR')
            return
    finally:
        os.remove(filename)
    def map(self, key, value):
        """
        Args:
            key: Image name
            value: Image as jpeg byte data

        Yields:
            A tuple in the form of (key, value)
            key: Imagename-face-x0<x_tl_val>-y0<y_tl_val>-x1<x_br_val>-y1<y_br_val>
            value: Cropped face binary data
        """
        try:
            image_pil, image_cv = self._load_cv_image(value)
        except:
            hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
            return
        try:
            faces = self._detect_faces(image_cv)
        except:
            hadoopy.counter('DATA_ERRORS2', 'ImageLoadError')
            return
        if not faces:
            return
        if self._output_boxes:
            yield key, (value, faces)
        else:
            for x0, y0, x1, y1 in faces:
                image_pil_crop = image_pil.crop((x0, y0, x1, y1))
                out_fp = StringIO.StringIO()
                image_pil_crop.save(out_fp, 'JPEG')
                out_fp.seek(0)
                yield '%s-face-x0%d-y0%d-x1%d-y1%d' % (key, x0, y0, x1, y1), out_fp.read()
Beispiel #3
0
 def collect(self,key,value):
     if len(self.data) == 0:
         self.first_key = key
     
     if self.ncols == None:
         self.ncols = len(value)
         print >>sys.stderr, "Matrix size: %i columns"%(self.ncols)
     else:
         # TODO should we warn and truncate here?
         # No. that seems like something that will introduce
         # bugs.  Maybe we could add a "liberal" flag
         # for that.
         assert(len(value) == self.ncols)
     
     self.data.append(value)
     self.nrows += 1
     
     if len(self.data)>self.blocksize*self.ncols:
         hadoopy.counter('Program','QR Compressions',1)
         # compress the data
         self.compress()
         
     # write status updates so Hadoop doesn't complain
     if self.nrows%50000 == 0:
         hadoopy.counter('Program','rows processed',50000)
 def map(self, event_filename, video_data):
     hadoopy.counter('CombinedFeatures', 'DontHave')
     sys.stderr.write('%s\n' % str(event_filename))
     for event_filename, features in self.r.map(event_filename, video_data):
         sys.stderr.write('%s\n' % str(event_filename))
         for x in self.b.map(event_filename, features):
             yield x
Beispiel #5
0
 def _map(self, row, image_binary):
     try:
         image = Image.open(StringIO.StringIO(image_binary))
         if not hasattr(image, "_getexif"):
             yield row, json.dumps({})
         else:
             image_tags = image._getexif()
             if image_tags is None:
                 yield row, json.dumps({})
             else:
                 yield row, json.dumps(
                     dict(
                         (name, base64.b64encode(image_tags[id]))
                         if isinstance(image_tags[id], str)
                         else image_tags[id]
                         for id, name in TAGS.items()
                         if id in image_tags
                     )
                 )
     except:
         sys.stdout.flush()
         hadoopy.counter("STATUS", "badRows")
     else:
         sys.stdout.flush()
         hadoopy.counter("STATUS", "goodRows")
    def map(self, image_hash, image_data):
        """

        Args:
            image_hash: Unique image string
            image_data: Binary image data

        Yields:
            A tuple in the form of (classifier_name, label_value)
            classifier_name: String representing the classifier
            label_value: (label, feature) where label is an int
        """
        try:
            image = Image.open(StringIO.StringIO(image_data))
        except:
            hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
            return
        bgen = imfeat.BlockGenerator(image, imfeat.CoordGeneratorRectRotate,
                                     output_size=(self._image_height, self._image_width),
                                     step_delta=(self._image_height / 2, self._image_width / 2), angle_steps=1)
        for num, (image_out, sim) in enumerate(bgen):
            feature = np.asfarray(imfeat.compute(self._feat, image_out)[0])
            pred = dict((classifier_name, classifier.predict(feature))
                        for classifier_name, classifier in self._classifiers)
            if any(x for x in pred.values() if x[0][0] * x[0][1] > 0):  # At least 1 class needs to be > 0
                image_out_fp = StringIO.StringIO()
                imfeat.convert_image(image_out, ['RGB']).save(image_out_fp, 'JPEG')
                image_out_fp.seek(0)
                yield (image_hash, sim), (pred, image_out_fp.read())
Beispiel #7
0
    def map(self, key, value):
        """
        Args:
            key: Image name
            value: Image as jpeg byte data

        Yields:
            A tuple in the form of (key, value)
            key: Constant dummy value
            value: (l2sqr_dist, value)
        """
        try:
            image = imfeat.resize_image(imfeat.image_fromstring(value, {'type': 'numpy', 'mode': 'bgr', 'dtype': 'uint8'}),
                                        100, 100)
        except:
            hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
            return

        # Distance metric
        diff = image - self.target_image
        dist = np.sum(diff * diff)

        # Output
        if dist < self.min_dist:
            self.min_dist = dist
            self.min_key = key
            self.min_value = value
    def reduce(self, image_hash, values):
        """

        Args:
            image_hash: (see mapper)
            values: Iterator of values (see mapper)

        Yields:
            A tuple in the form of (image_hash, value)
            image_hash: Image hash
            value: The provided value (not the prediction)
        """
        predictions = None
        out_val = None
        for value in values:
            if isinstance(value, dict):
                predictions = value
            else:
                out_val = value
        if predictions is None or out_val is None:
            hadoopy.counter('DATA_ERR', 'MISSING_PREDICTIONS_OR_DATA')
            return
        label, conf = predictions[self._class_name][0]
        if (self._class_thresh <= label * conf) == (self._output_class == 1):  # Both true or both false
            yield image_hash, out_val
Beispiel #9
0
 def convert_matrix(self, matrix):
     sparsity = len(matrix.nonzero()[0]) / float(matrix.size)
     print('Sparsity[%f]' % sparsity)
     if sparsity < self.min_sparsity:
         hadoopy.counter('SPARSITY', 'SPARSE')
         return sp.sparse.csr_matrix(matrix)
     hadoopy.counter('SPARSITY', 'DENSE')
     return matrix
Beispiel #10
0
 def _map(self, row, input_binary):
     try:
         yield row, self.job.process_binary(input_binary)
     except:
         sys.stdout.flush()
         hadoopy.counter('ERROR', 'BadRow')
     else:
         hadoopy.counter('STATUS', 'GoodRow')
 def _map(self, row, image_binary):
     try:
         image = imfeat.image_fromstring(image_binary)
     except:
         hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
     out = self.sp(image, 25)
     hulls = self.sp.label_image_to_contours(out, 1.)
     yield row, json.dumps(hulls, separators=(',', ':'))
 def map(self, image_id, image_binary):
     image = resize(imfeat.image_fromstring(image_binary))
     print(image.shape)
     st = time.time()
     box_num = -1
     for box_num, (box, f) in enumerate(feature.image_patch_features_dense(image, normalize_box=True)):
         yield (image_id, box.tolist()), np.dot(self.coefs, f.reshape((f.size, 1))).ravel() + self.intercepts
     hadoopy.counter('stats', 'num_boxes', box_num + 1)
     print('ImageTime[%f]' % (time.time() - st))
 def amap(self, key, feat):
     feat = np.array([np.fromstring(feat, dtype=np.float32)])
     if self.canopies.size:
         nearest_dist = self.nn(feat, self.canopies)[1]
         if nearest_dist > self.hard_dist:
             hadoopy.counter('canopy_cluster', 'canopy_count')
             self.canopies = np.concatenate((self.canopies, feat))
     else:
         hadoopy.counter('canopy_cluster', 'canopy_count')
         self.canopies = feat
 def map(self, name, image_or_data):
     if isinstance(image_or_data, str):
         try:
             image = imfeat.image_fromstring(image_or_data)
         except:
             hadoopy.counter("DATA_ERRORS", "ImageLoadError")
             return
     else:
         image = image_or_data
     yield name, self._feat(image)
 def _map(self, row, image_binary):
     try:
         if not image_binary:
             raise ValueError
         image = imfeat.image_fromstring(image_binary)
     except:
         hadoopy.counter('ERROR', 'FEATURE')
         print('Error on row[%r]' % row)
     else:
         yield row, picarus.api.np_tostring(self._feat.compute_feature(image))
Beispiel #16
0
def reducer(k, vs):
    for v in vs:
        print(type(v))
        if isinstance(v, dict):
            metadata = v
        else:
            feature_hash = v
    try:
        yield k, (feature_hash, metadata)
    except NameError:
        hadoopy.counter("ERRORS", "JoinsFailed")
Beispiel #17
0
 def map(self, url, value):
     try:
         data = download_file(url)
     except Exception:
         hadoopy.counter('FILE_DOWNLOADER', 'Exception')
     else:
         if self.output_type == 'meta':
             yield url, (data, value)
         elif self.output_type == 'image':
             yield url, data
         else:
             raise ValueError('OutputType[%s]' % self.output_type)
Beispiel #18
0
 def map(self, name, image_data):
     try:
         image = Image.open(StringIO.StringIO(image_data))
     except:
         hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
         return
     image = image.resize((self._image_length, self._image_length))
     try:
         yield name, np.asfarray(imfeat.compute(self._feat, image)[0])
     except ValueError, e:
         print(e)
         hadoopy.counter('DATA_ERRORS', 'UnkImageType')
         return
def train(classifier_name, classifier_extra, label_values):
    import classipy
    label_values = list(label_values)
    hadoopy.counter('FeatureShape', str(len(label_values[0][1])))
    if classifier_name == 'svmlinear':
        return classipy.SVMLinear(options={'B': '1'}).train(label_values)
    elif classifier_name == 'svm':
        return classipy.SVM(options={'t': '2'}).train(label_values)
    elif classifier_name == 'svm_hik':
        return classipy.SVMScikit(kernel=classipy.kernels.histogram_intersection).train(label_values)
    elif classifier_name == 'svmlinear_autotune':

        def wrapped_optimizer(*args, **kw):
            for x in pyram.exponential_grid(*args, **kw):
                hadoopy.counter('X-Val', 'Rounds')
                yield x
        b = classipy.select_parameters(classipy.SVMLinear, label_values,
                                       {'c': (10**-2, 10**1, 10)},
                                       wrapped_optimizer,
                                       options={'B': '1'})[1]
        print(b)
        return classipy.SVMLinear(b).train(label_values)
    elif classifier_name == 'plslinearsvmxval':
        num_dims = label_values[0][1].size
        # Set the parameters by cross-validation
        #,'pls__n_components': [x for x in [1, 8, 16, 32, 64, 128, 256] if x <= num_dims]
        #('pls', sklearn.pls.PLSRegression(n_components=0)),
        tuned_parameters = [{'svm__C': [.001, .01, .1, 1, 10, 100]}]
        p = sklearn.pipeline.Pipeline([('svm', sklearn.svm.SVC(kernel=classipy.kernels.histogram_intersection, scale_C=True))])  # was cls
        #p = sklearn.grid_search.GridSearchCV(cls, tuned_parameters, score_func=sklearn.metrics.f1_score)
        num_neg = 0
        num_pos = 0
        import random
        random.shuffle(label_values)
        new_label_values = []
        for l, v in label_values:
            if l == 1:
                if num_pos < 100:
                    new_label_values.append((l, v))
                num_pos += 1
            else:
                if num_neg < 100:
                    new_label_values.append((l, v))
                num_neg += 1
        import sys
        sys.stderr.write('Num Neg[%d] Pos[%d]\n' % (num_neg, num_pos))
        p.fit(*zip(*new_label_values)[::-1])
        return p  # p.best_estimator_
    else:
        raise ValueError('Unknown classifier [%s]' % classifier_name)
 def map(self, image_id, image_binary):
     if self.num_inputs <= 0:
         return
     self.num_inputs -= 1
     pyramid = np.zeros((len(self.ids), np.sum(self.num_bins_sqr)), dtype=np.int32)
     num_boxes = 0
     for (image_id, box), confs in super(Mapper, self).map(image_id, image_binary):
         num_boxes += 1
         cy = (box[2] + box[0]) / 2
         cx = (box[1] + box[3]) / 2
         offset = 0
         cur_bins = []
         for l in range(self.levels):
             cur_bins.append(offset + int(cy * self.num_bins[l]) * self.num_bins[l] + int(cx * self.num_bins[l]))
             offset += self.num_bins_sqr[l]
         #if num_boxes < 1000 and num_boxes % 100:
         #    print((box, cy, cx, cur_bins))
         inds = (confs >= 0).nonzero()[0]
         hadoopy.counter('STATS', 'num_pos', inds.size)
         hadoopy.counter('STATS', 'num_neg', confs.size - inds.size)
         hadoopy.counter('STATS', 'total', confs.size)
         if inds.size:
             for cur_bin in cur_bins:
                 pyramid[inds, cur_bin] += 1
     hadoopy.counter('STATS', 'sz-%s' % str(pyramid.shape))
     if np.any(pyramid):
         pyramid = pyramid * (self.bin_weight / float(num_boxes))
         for exemplar_num, row in enumerate(pyramid):
             yield exemplar_num, (image_id, row)
 def map(self, key, feat):
     stime = time.time()
     feat = self._strto2d(feat)
     self.ftime += time.time() - stime
     stime = time.time()
     if self.canopies.size:
         nearest_dist = self.nn(feat, self.canopies)[1]
         if nearest_dist > self.hard_dist:
             hadoopy.counter('canopy_cluster', 'canopy_count')
             self.canopies = np.concatenate((self.canopies, feat))
     else:
         hadoopy.counter('canopy_cluster', 'canopy_count')
         self.canopies = feat
     self.gtime += time.time() - stime
Beispiel #22
0
 def map(self, image_name, image_label_points):
     """
     Args:
         image_name: A string (if not then we skip the input)
         image_label_points: (image, [(label, points), ...]) where points is Nx2 (y, x)
     """
     if not isinstance(image_name, str):
         hadoopy.counter('SKIPPED_INPUTS', 'KeyNotString')
         return
     with self.timer('Build root_labels_image_points'):
         image, label_points = image_label_points
         root_labels_image_points = {}
         if self.dp:
             root_label_points = self.dp.group_lowest_nodes(image, label_points)
             for root, label_points in root_label_points:
                 labels = np.array([x[0] for x in label_points], dtype=np.int32)
                 image_points = [(image, x[1]) for x in label_points]
                 root_labels_image_points[root] = labels, image_points
         else:
             labels = np.array([x[0] for x in label_points], dtype=np.int32)
             image_points = [(image, x[1]) for x in label_points]
         root_labels_image_points[0] = labels, image_points
     with self.timer('Run train_map_hists and sum qlss/qrss'):
         for root, (labels, image_points) in root_labels_image_points.items():
             if self.level != int(np.floor(np.log2(root + 1))):  # We are done processing this root
                 continue
             qls, qrs = train_map_hists(labels, image_points, self.feats, self.num_classes)
             qls, qrs = self.convert_matrix(qls), self.convert_matrix(qrs)
             try:
                 self.root_count[root] += 1
             except KeyError:
                 self.root_count[root] = 1
             try:
                 try:
                     self.qlss[root] += qls
                 except NotImplementedError:
                     self.qlss[root] = self.qlss[root] + qls
                 try:
                     self.qrss[root] += qrs
                 except NotImplementedError:
                     self.qrss[root] = self.qrss[root] + qrs
                 self.num_images[root] += 1
             except KeyError:
                 if self.max_root_buffer <= len(self.qlss):
                     for x in self.flush_node(root):
                         yield x
                 self.qlss[root] = qls
                 self.qrss[root] = qrs
                 self.num_images[root] = 1
Beispiel #23
0
 def stop_time(self, name):
     try:
         dur = time.time() - self._pending_times[name]
     except KeyError:
         hadoopy.counter('stop_time', 'timer_failed')
     else:
         try:
             time_stats = self._times[name]
             # Min/Max/Sum/Count
             self._times[name] = [min(time_stats[0], dur),
                                  max(time_stats[1], dur),
                                  time_stats[2] + dur,
                                  time_stats[3] + 1]
         except KeyError:
             self._times[name] = [dur, dur, dur, 1]
 def map(self, image_id, image_binary):
     image = imfeat.image_fromstring(image_binary)
     print(image.shape)
     st = time.time()
     box_num = -1
     for box_num, (box, f) in enumerate(feature.image_patch_features_dense(image, normalize_box=True)):
         scores = np.dot(self.coefs, f.reshape((f.size, 1))) + self.intercepts
         pred_common = [image_id, box.tolist(), f.tolist()]
         for score, preds in zip(scores, self.preds):
             pred = self.output_formatter([float(score[0])] + pred_common)
             if len(preds) >= self.max_hard:
                 heapq.heappushpop(preds, pred)
             else:
                 heapq.heappush(preds, pred)
     hadoopy.counter("stats", "num_boxes", box_num + 1)
     print("ImageTime[%f]" % (time.time() - st))
def mapper(key, value):
    """
    Args:
        key, value:
           either of - the output of clustering (/partition or /samples),
                     - the output of video keyframing (/allframes)
                     - (hash,image_data) input
                     - (hash,image_metadata) input
    Env vars:
        IMAGE_TYPE: record, cluster, kv, frame
        THUMB_SIZE: longest dimension for the thumbnail images

    Yields:
        image_hash, image_data: serialized jpeg data for thumbnail
    """
    if os.environ['IMAGE_TYPE'] == 'record':
        # Image input format
        image_hash, image_metadata = key, value
        image_file = picarus.io._record_to_file(image_metadata)

    elif os.environ['IMAGE_TYPE'] == 'cluster':
        # Cluster output /partition or /sample
        cluster_index, (image_hash, image_data) = key, value
        image_file = StringIO.StringIO(image_data)

    elif os.environ['IMAGE_TYPE'] == 'kv':
        # hash, image bytes
        image_hash, image_data = key, value
        image_file = StringIO.StringIO(image_data)

    elif os.environ['IMAGE_TYPE'] == 'frame':
        image_hash, image_metadata = key, value
        image_file = StringIO.StringIO(image_metadata['image_data'])

    thumb_size = int(os.environ['THUMB_SIZE'])
    try:
        image = Image.open(image_file)
        image.thumbnail((thumb_size, thumb_size))
    except:
        hadoopy.counter('INPUT_ERROR', 'IMAGE_DATA_ERROR')
    image = image.convert('RGB')
    s = StringIO.StringIO()
    image.save(s, 'JPEG')
    s.seek(0)

    # Output type: kv
    yield image_hash, s.buf
    def map(self, event_filename, video_data):
        """
        Args:
            event_filename: Tuple of (event, filename)
            video_data: Binary video data

        Yields:
            A tuple in the form of ((event, filename, frame_num, frame_time), frame_data)
        """
        ext = '.' + event_filename[1].rsplit('.')[1]
        event, filename = event_filename
        heap = [(float('-inf'), None)] * self.max_outputs
        with tempfile.NamedTemporaryFile(suffix=ext) as fp:
            fp.write(video_data)
            fp.flush()
            sys.stderr.write('Prevideo\n')
            try:
                for frame_num, frame_time, frame in viderator.frame_iter(fp.name,
                                                                         frozen=True,
                                                                         frame_skip=self.frame_skip):
                    sys.stderr.write('FrameNum[%d]\n' % frame_num)
                    if frame_num >= self.max_frames_per_video:
                        break
                    frame_orig = frame
                    if self.remove_bars:
                        sz = self.remove_bars.find_bars(frame)
                        frame = frame[sz[0]:sz[1], sz[2]:sz[3], :]
                        if not frame.size:  # Empty
                            continue
                    st = time.time()
                    c = self._feat(frame)[0]
                    sys.stderr.write('FrameTime[%f]\n' % (time.time() - st))
                    print('FrameTime[%f]' % (time.time() - st))
                    if c > heap[0][0]:
                        if self.output_frame:
                            heapq.heappushpop(heap,
                                              (c, ((event, filename, frame_num, frame_time), imfeat.image_tostring(frame_orig, 'JPEG'))))
                        else:
                            heapq.heappushpop(heap,
                                              (c, ((event, filename, frame_num, frame_time), '')))
            except IOError:
                hadoopy.counter('PICARUS', 'CantProcessVideo')
        for x in heap[-self.max_outputs_per_video:]:
            heapq.heappushpop(self.heap, x)
    def map(self, event_filename, video_data):
        """

        Args:
            event_filename: Tuple of (event, filename)
            video_data: Binary video data

        Yields:
            A tuple in the form of ((event, filename), value) where value is a dict
            with contents

            prev_frame_time:
            prev_frame_num:
            prev_frame:
            frame_time:
            frame_num:
            frame:
        """
        ext = '.' + event_filename[1].rsplit('.')[1]
        with tempfile.NamedTemporaryFile(suffix=ext) as fp:
            fp.write(video_data)
            fp.flush()
            prev_frame_time = None
            prev_frame_num = None
            prev_frame = None
            try:
                for (frame_num, frame_time, frame), iskeyframe in self.kf(viderator.frame_iter(fp.name,
                                                                                               frame_skip=self.frame_skip,
                                                                                               frozen=True)):
                    if self.max_time < frame_time:
                        break
                    if iskeyframe and prev_frame is not None:
                        yield event_filename, {'prev_frame_time': prev_frame_time,
                                               'prev_frame_num': prev_frame_num,
                                               'prev_frame': imfeat.image_tostring(prev_frame, 'JPEG'),
                                               'frame_time': frame_time,
                                               'frame_num': frame_num,
                                               'frame': imfeat.image_tostring(frame, 'JPEG')}
                    prev_frame_num = frame_num
                    prev_frame = frame
            except Exception, e:
                print(e)
                hadoopy.counter('VIDEO_ERROR', 'FFMPEGCantParse')
Beispiel #28
0
    def map(self, key, value):
        """
        Args:
            key: Image name
            value: Image as jpeg byte data

        Yields:
            A tuple in the form of (key, value)
            key: Image name
            value: (image, faces) where image is the input value and faces is
                a list of ((x, y, w, h), n)
        """
        try:
            image = self._load_cv_image(value)
        except:
            hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
            return
        dist = self._compute_face_distance(image)
        yield dist, (key, value)
Beispiel #29
0
    def map(self, image_hash, feature):
        """

        Args:
            image_hash: Unique image string
            feature: Numpy image feature

        Yields:
            A tuple in the form of (classifier_name, label_value)
            classifier_name: String representing the classifier
            label_value: (label, feature) where label is an int
        """
        try:
            class_labels = self._hash_class_labels[image_hash]
        except KeyError:
            hadoopy.counter('DATA_ERRORS', 'UNKNOWN_IMAGE_HASH')
            return
        for classifier_name, label in class_labels:
            yield classifier_name, (label, feature)
Beispiel #30
0
    def _image_from_str(s):
        """Load from string, crop to a square, resize to _initial_image_size

        Args:
            s: String of bytes representing a JPEG image

        Returns:
            RGB Image with height/width as _initial_image_size

        Raises:
            ValueError: Image is height/width too small (< _initial_image_size)
                or mode isn't RGB
            IOError: Image is unreadable
        """
        try:
            img = Image.open(StringIO.StringIO(s))
        except IOError, e:
            hadoopy.counter('Stats', 'IMG_BAD')
            raise e
Beispiel #31
0
 def wrapped_optimizer(*args, **kw):
     for x in pyram.exponential_grid(*args, **kw):
         hadoopy.counter('X-Val', 'Rounds')
         yield x
 def _map(self, row, image_binary):
     try:
         image = imfeat.image_fromstring(image_binary)
         yield row, imfeat.image_tostring(imfeat.resize_image_max_side(image, self.max_side), 'jpg')
     except:
         hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
Beispiel #33
0
 def reduce(self, exemplar_num, id_rows):
     out = np.hstack([x[1].ravel()
                      for x in sorted(id_rows, key=lambda x: x[0])])
     hadoopy.counter('STATS', 'sz-%s' % str(out.shape))
     yield exemplar_num, out
Beispiel #34
0
    def test_counter(self):
        def err(x):
            self.assertEqual('reporter:counter:a,b,5\n', x)

        hadoopy.counter('a', 'b', 5, err=err)
Beispiel #35
0
 def _map(self, row, image_binary):
     try:
         image = imfeat.image_fromstring(image_binary)
     except:
         hadoopy.counter('DATA_ERRORS', 'ImageLoadError')
     yield row, picarus.api.np_tostring(self._feat(image))
def reducer(event, filename_predicates):
    yield event, dict(filename_predicates)
    hadoopy.counter('SkippingTaskCounters', 'ReduceProcessedGroups')
class Mapper(object):
    def __init__(self):
        _target_image = cv2.imread('target.jpg')
        _target_image = cv2.resize(
            _target_image,
            (_target_image.shape[1] // _tile_length * _tile_length,
             _target_image.shape[0] // _tile_length * _tile_length))
        self.target_tiles = {}
        ytiles = _target_image.shape[0] / _tile_length
        xtiles = _target_image.shape[1] / _tile_length
        print('Xtiles[%d] Ytiles[%d]' % (xtiles, ytiles))
        assert xtiles > 0 and ytiles > 0
        xsubtiles = xtiles * _subtiles_per_tile_length
        ysubtiles = ytiles * _subtiles_per_tile_length
        self.min_dists = {}
        self.dist = lambda x, y: np.sum(np.abs(x - y))  # distpy.L2Sqr().dist
        for y in xrange(ysubtiles):
            for x in xrange(xsubtiles):
                # Defines which tile the subtile is in
                #'%.6d_%.6d' %
                tile_id = (x / _subtiles_per_tile_length,
                           y / _subtiles_per_tile_length)
                # Defines which position it is in within the tile
                #'%.6d_%.6d' %
                subtile_id = (x % _subtiles_per_tile_length,
                              y % _subtiles_per_tile_length)
                #'\t'.join(
                key = (tile_id[0], tile_id[1], subtile_id[0], subtile_id[1])
                yp = ysubtiles - y - 1  # NOTE(brandyn): Flip coordinates for y axis
                tile = _target_image[(yp * _subtile_length):((yp + 1) *
                                                             _subtile_length),
                                     (x * _subtile_length):(x + 1) *
                                     _subtile_length, :]
                self.target_tiles[key] = np.asfarray(tile)

    @staticmethod
    def _crop_image_from_str(s):
        """Load from string, crop to a square, resize to _initial_image_size

        Args:
            s: String of bytes representing a JPEG image

        Returns:
            RGB Image with height/width as _initial_image_size

        Raises:
            ValueError: Image is height/width too small (< _initial_image_size)
                or mode isn't RGB
            IOError: Image is unreadable
        """
        if isinstance(s, tuple):
            s = s[0]
        try:
            img = imfeat.image_fromstring(s)
        except IOError, e:
            hadoopy.counter('Stats', 'IMG_BAD')
            raise e
        min_side = min(img.shape[:2])
        if min_side < _initial_image_size:
            hadoopy.counter('Stats', 'IMG_TOO_SMALL')
            raise ValueError
        if img.ndim != 3:
            hadoopy.counter('Stats', 'IMG_WRONG_MODE')
            raise ValueError
        return imfeat.resize_image(img, _initial_image_size,
                                   _initial_image_size)
Beispiel #38
0
    def map(self, event_filename, video_data):
        """

        Args:
            event_filename: Tuple of (event, filename)
            video_data: Binary video data

        Yields:
            A tuple in the form of ((event, filename), features) where features is a dict

            frame_features: List of frame features
            file_size: Size in bytes

            where each frame feature is a dictionary of

            frame_time: Time in seconds
            frame_num: Frame number
            prev_frame_num: Previous frame number (useful if there is a frame skip)
            keyframe: Boolean True/False
            surf: List of surf points (see impoint)
            face_widths:
            face_heights:
            predictions: Dictionary of predictions
        """
        sys.stderr.write('In Raw:%s\n' % str(event_filename))
        print(event_filename)
        ext = '.' + event_filename[1].rsplit('.')[1]
        with tempfile.NamedTemporaryFile(suffix=ext) as fp:
            with self.timer('Writing video data'):
                fp.write(video_data)
                fp.flush()
            kf = keyframe.DecisionTree(min_interval=0)
            kf.load()
            prev_frame = None
            prev_frame_num = 0
            all_out = []
            sz = len(video_data)

            self.timer.start('KF')
            try:
                for (frame_num, frame_time, frame), iskeyframe in kf(
                        viderator.frame_iter(fp.name, frozen=True)):
                    hadoopy.counter('RawFeatures', 'NumFrames')
                    self.timer.stop('KF')
                    print(frame_time)
                    if frame_num > self._max_frames:
                        break
                    if frame_num % 100 == 0:
                        with self.timer('Computing face features'):
                            faces = _detect_faces(
                                imfeat.convert_image(frame,
                                                     [('opencv', 'gray', 8)]),
                                self.cascade)
                    else:
                        faces = {}
                    out = {
                        'frame_time': frame_time,
                        'frame_num': frame_num,
                        'prev_frame_num': prev_frame_num,
                        'keyframe': iskeyframe,
                        'surf': kf.prev_vec['surf']
                    }
                    if faces:  # If any faces
                        face_heights = np.array([x[0][3] for x in faces
                                                 ]) / float(frame.height)
                        face_widths = np.array([x[0][2] for x in faces
                                                ]) / float(frame.width)
                        out['face_widths'] = face_widths
                        out['face_heights'] = face_heights
                    # Output the cur and previous frames if this is a keyframe
                    if iskeyframe and np.random.random(
                    ) < self._frame_output_prob:
                        out['prev_frame'] = cv_to_jpg(prev_frame)
                        out['frame'] = cv_to_jpg(frame)
                    # Compute scene features
                    with self.timer('Computing scene classifier features'):
                        frame_res = cv.fromarray(
                            cv2.resize(
                                np.asarray(cv.GetMat(frame)),
                                (self._image_width, self._image_height)))
                        feature = self._feat(frame_res)
                        out['predictions'] = dict(
                            (classifier_name, classifier.predict(feature)) for
                            classifier_name, classifier in self._classifiers)
                    # Output JPEG with match lines from the SURF feature
                    if np.random.random(
                    ) < self._match_line_prob and prev_frame:
                        out['surf_image'] = cv_to_jpg(
                            plot_matches(prev_frame,
                                         kf.surf_debug['matches'],
                                         kf.surf_debug['points0'],
                                         kf.surf_debug['points1'],
                                         max_feat_width=kf.max_feat_width))
                    # Output data buffer
                    all_out.append(out)
                    if len(all_out) >= self._block_size:
                        with self.timer('Yield'):
                            yield event_filename, {
                                'frame_features': all_out,
                                'file_size': sz
                            }
                            all_out = []
                    prev_frame = frame
                    prev_frame_num = frame_num
                self.timer.start('KF')
            except viderator.FPSParseException:  # NOTE(brandyn): This will disregard videos with this error
                hadoopy.counter('SkippedVideos', 'FPSParseException')
                return
            if all_out:
                with self.timer('Yield'):
                    yield event_filename, {
                        'frame_features': all_out,
                        'file_size': sz
                    }