예제 #1
0
    def device(self, outfile=None, n=24, fontsize=7):
        assert self._isvipy()
        d_all = vipy.util.countby([
            v for v in self.list() if v.hasattribute('device_type')
            and v.attributes['device_type'] != 'unrecognized'
        ], lambda v: v.attributes['device_type'])

        topk = [
            k
            for (k, v) in sorted(list(d_all.items()), key=lambda x: x[1])[-n:]
        ]
        other = np.sum([v for (k, v) in d_all.items() if k not in set(topk)])

        d = {k: v for (k, v) in d_all.items() if k in set(topk)}
        d.update({'Other': other})
        d = dict(sorted(list(d.items()), key=lambda x: x[1]))

        print('[vipy.dataset.device]: Device types = %d ' % len(d_all))
        print('[vipy.dataset.device]: Top-%d Device types = %s ' %
              (n, str(topk)))

        if outfile is not None:
            from vipy.metrics import pie
            pie(d.values(),
                d.keys(),
                explode=None,
                outfile=outfile,
                shadow=False,
                legend=False,
                fontsize=fontsize,
                rotatelabels=False)
        return d
예제 #2
0
    def collectors(self, outfile=None):
        assert self._isvipy()
        d = vipy.util.countby(self.list(),
                              lambda v: v.attributes['collector_id'])
        f = lambda x, n: len([k for (k, v) in d.items() if int(v) >= n])
        print('[vipy.dataset]: Collectors = %d ' % f(d, 0))
        print('[vipy.dataset]: Collectors with >10 submissions = %d' %
              f(d, 10))
        print('[vipy.dataset]: Collectors with >100 submissions = %d' %
              f(d, 100))
        print('[vipy.dataset]: Collectors with >1000 submissions = %d' %
              f(d, 1000))
        print('[vipy.dataset]: Collectors with >10000 submissions = %d' %
              f(d, 10000))

        if outfile is not None:
            from vipy.metrics import histogram
            (k,
             v) = zip(*(sorted(d.items(), key=lambda x: x[1], reverse=True)))
            histogram(v,
                      list(range(len(k))),
                      outfile=outfile,
                      ylabel='Submissions',
                      xlabel='Collector',
                      xrot='vertical',
                      fontsize=3,
                      xshow=False)
        return d
예제 #3
0
파일: video.py 프로젝트: visym/collector
 def fetch(self, ignoreErrors=False):
     """Download JSON and MP4 if not already downloaded"""
     if not self.hasjson() or self._is_json_loaded is None:
         try:
             self.fetchjson()  # Do we need this?
             self._load_json()
         except KeyboardInterrupt:
             raise
         except Exception as e:
             print('[pycollector.video]: fetch error "%s" - SKIPPING' %
                   str(e))
     return self.fetchvideo()
예제 #4
0
 def os(self, outfile=None):
     assert self._isvipy()
     d = vipy.util.countby(
         [v for v in self.list() if v.hasattribute('device_identifier')],
         lambda v: v.attributes['device_identifier'])
     print('[vipy.dataset]: Device OS = %d ' % len(d))
     if outfile is not None:
         from vipy.metrics import pie
         pie(d.values(),
             d.keys(),
             explode=None,
             outfile=outfile,
             shadow=False)
     return d
예제 #5
0
파일: video.py 프로젝트: visym/collector
    def quicklooks(self,
                   n=9,
                   dilate=1.5,
                   mindim=256,
                   fontsize=10,
                   context=True):
        """Return a vipy.image.Image object containing a montage quicklook for each of the activities in this video.

        Usage:

        >>> filenames = [im.saveas('/path/to/quicklook.jpg') for im in self.quicklooks()]

        """
        assert vipy.version.is_at_least("0.8.2")
        print('[pycollector.video]: Generating quicklooks for video "%s"' %
              self.videoid())
        return [
            a.quicklook(n=n,
                        dilate=dilate,
                        mindim=mindim,
                        fontsize=fontsize,
                        context=context) for a in self.fetch().activityclip()
        ]
예제 #6
0
파일: video.py 프로젝트: visym/collector
    def fetchjson(self):
        """Download JSON if not already downloaded"""

        if self._jsonfile is None:
            self._jsonfile = os.path.join(
                remkdir(os.environ["VIPY_CACHE"] if "VIPY_CACHE" in
                        os.environ else tempdir()),
                filetail(self._jsonurl),
            )
            if not os.path.exists(self._jsonfile):
                print('[pycollector.video]:  Fetching "%s"' % self._jsonurl)
                try:
                    vipy.downloader.s3(
                        self._jsonurl, self._jsonfile
                    )  # TODO - this is a problem to assume vipy user also has access to S3. We should decouple this dependency of using vipy

                except KeyboardInterrupt:
                    raise
                except Exception as e:
                    print(
                        '[pycollector.video]: S3 download error "%s" - SKIPPING'
                        % str(e))
                    jsonfile = None
        return self
예제 #7
0
파일: video.py 프로젝트: visym/collector
 def quickshow(self, framerate=10, nocaption=False):
     print("[pycollector.video]: setting quickshow input framerate=%d" %
           framerate)
     return self.fetch().clone().framerate(framerate).mindim(256).show(
         nocaption=nocaption)
예제 #8
0
파일: video.py 프로젝트: visym/collector
    def _load_json(self):
        """Lazy JSON download, parse, and import"""

        # Already loaded?  Call once
        if self._is_json_loaded is not None:
            return self

        # Not downloaded?
        if not self.hasjson():
            self.fetchjson()

        # Parse JSON (with version error handling)
        jsonfile = self._jsonfile
        if jsonfile is not None and os.path.getsize(jsonfile) != 0:
            if self._verbose:
                print('[pycollector.video]:  Parsing "%s"' % jsonfile)

            d = readjson(jsonfile)
            if "collection_id" not in d["metadata"]:
                d["metadata"]["collection_id"] = d["metadata"][
                    "video_id"]  # android 1.1.1(3) bug

            for obj in d["object"]:
                if "label" not in obj:
                    obj["label"] = "person"  # android 1.1.1(3) bug
                if "label" in obj and obj["label"] == "vehicle":
                    # obj['label'] = 'person'  # all bug
                    pass
                for bb in obj["bounding_box"]:
                    if "frame_index" not in bb and "frameIndex" in bb:
                        bb["frame_index"] = bb[
                            "frameIndex"]  # android 1.1.1(3) bug

            d["metadata"]["rotate"] = None
            if d["metadata"]["orientation"] == "landscape":
                # d['metadata']['rotate'] = 'rot90cw'
                pass
            elif d["metadata"]["orientation"] == "portrait":
                # d['metadata']['rotate'] = 'rot90ccw'
                pass
            else:
                pass

            if "device_type" in d["metadata"] and "device_identifier" == "ios":
                d["metadata"][
                    "rotate"] = "rot90ccw"  # iOS (7) bug, different than iOS (6)

            # FIXME: "collected_date":"2020-06-19T18:34:33+0000" on both now
            try:
                uploaded = datetime.strptime(
                    d["metadata"]["collected_date"],
                    "%Y-%m-%d %H:%M:%S %z")  # iOS 1.0 (6)
            except:
                try:
                    uploaded = datetime.strptime(
                        d["metadata"]["collected_date"],
                        "%Y-%m-%d %I:%M:%S %p %z")  # bug number 55
                except:
                    uploaded = datetime.strptime(
                        d["metadata"]["collected_date"],
                        "%Y-%m-%dT%H:%M:%S%z")  # android 1.1.1 (3)

            # if isapi('v1'):
            #    d["metadata"]["collected_date"] = uploaded.strftime(
            #        "%Y-%m-%d %H:%M:%S"
            #    )
            # else:
            #    et = pytz.timezone("US/Eastern")
            #    d["metadata"]["collected_date"] = uploaded.astimezone(et).strftime("%Y-%m-%d %H:%M:%S")

            et = pytz.timezone("US/Eastern")
            d["metadata"]["app_collected_date"] = d["metadata"][
                "collected_date"]
            d["metadata"]["collected_date"] = uploaded.astimezone(et).strftime(
                "%Y-%m-%d %H:%M:%S")

        else:
            print('[pycollector.video]: empty JSON "%s"' % jsonfile)
            self._has_invalid_json = True  # for later filtering
            d = None

        # Backwards compatible video import: should not be necessary with new app release
        if d is not None and not "category" in d["metadata"]:
            vipy.util.try_import(
                "pycollector.admin.globals",
                message="Not authorized - Old style JSON requires admin access"
            )
            from pycollector.admin.globals import backend, isapi
            from pycollector.admin.legacy import applabel_to_longlabel, shortname_synonyms, applabel_to_piplabel

            # V1 - old collection name pattern
            if any([
                    d["metadata"]["collection_id"] in k
                    for k in applabel_to_piplabel().keys()
            ]):
                try:
                    d["metadata"]["collection_name"] = d["metadata"][
                        "collection_id"]
                    applabel = [
                        "%s_%s_%s" %
                        (d["metadata"]["project_id"],
                         d["metadata"]["collection_id"], a["label"])
                        for a in d["activity"]
                    ]
                    synonyms = shortname_synonyms()
                    applabel = [
                        a if
                        (a in applabel_to_piplabel()
                         or a in applabel_to_longlabel()) else "%s_%s_%s" %
                        (d["metadata"]["project_id"],
                         d["metadata"]["collection_id"],
                         synonyms[a.split("_")[2]]) for a in applabel
                    ]
                    d["metadata"]["category"] = ",".join([
                        applabel_to_piplabel()[a]
                        if a in applabel_to_piplabel() else
                        applabel_to_longlabel()[a] for a in applabel
                    ])
                    d["metadata"]["shortname"] = ",".join(
                        [a.split("_")[2] for a in applabel])
                except Exception as e:
                    print(
                        '[pycollector.video]: legacy json import failed for v1 JSON "%s" with metadata "%s" and error "%s"'
                        % (jsonfile, str(d["metadata"]), str(e)))
                    self._has_invalid_json = True  # for later filtering
                    d = None

            # V2 - new collection names, but activity names not in JSON
            elif isapi("v1") or isapi("v2"):
                version = "v1" if isapi("v1") else "v2"
                if version == "v1":
                    backend(org="str", env="prod",
                            version="v2")  # temporary switch

                if not backend().collections().iscollectionid(
                        d["metadata"]["collection_id"]):
                    print('[pycollector.video]: invalid collection ID "%s"' %
                          d["metadata"]["collection_id"])
                    self._has_invalid_json = True  # for later filtering
                    d = None
                elif len(d["activity"]) == 1 and len(
                        d["activity"][0]["label"]) == 0:
                    d["activity"] = []
                    d["metadata"]["category"] = ""
                    d["metadata"]["shortname"] = ""
                else:
                    try:
                        # Fetch labels from backend (with legacy shortname translation)
                        C = backend().collections()[d["metadata"]
                                                    ["collection_id"]]
                        d["metadata"]["collection_name"] = backend(
                        ).collections().id_to_name(
                            d["metadata"]["collection_id"])
                        shortnames = []
                        for a in d["activity"]:
                            if not (a["label"] in C.shortnames()
                                    or a["label"] in shortname_synonyms()):
                                raise ValueError(
                                    "Invalid shortname '%s' for collection shortnames '%s' and not in legacy synonyms '%s'"
                                    % (a["label"], str(C.shortnames()),
                                       str(shortname_synonyms())))
                            shortnames.append(
                                a["label"] if a["label"] in C.shortnames(
                                ) else shortname_synonyms()[a["label"]])
                        d["metadata"]["category"] = ",".join([
                            C.shortname_to_activity(s, strict=False)
                            for s in shortnames
                        ])
                        d["metadata"]["shortname"] = ",".join(
                            [s for s in shortnames])
                    except Exception as e:
                        print(
                            "[pycollector.video]: label fetch failed for %s in JSON '%s' with exception %s"
                            % (str(d["activity"]), jsonfile, str(e)))
                        self._has_invalid_json = True  # for later filtering
                        d = None

                if version == "v1":
                    backend(org="str", env="prod", version="v1")  # switch back
            else:
                print(
                    '[pycollector.video]: Legacy JSON import failed for JSON "%s" with metadata - "%s"'
                    % (jsonfile, str(d["metadata"])))
                self._has_invalid_json = True  # for later filtering
                d = None

        else:
            # New style JSON: use labels stored directly in JSON
            pass

        # Import JSON into scene
        if d is not None:

            # TODO - Replace with video_data
            collection_name = d["metadata"]["collection_name"]

            self.category(collection_name)
            self.attributes = {} if self.attributes is None else self.attributes
            self.attributes.update(d["metadata"])
            self.framerate(float(d["metadata"]["frame_rate"]))

            # FIXME: this videoID '20200421_1500081666724286' has low framerate.  Parsing is correct, but load() and show() is too fast
            # This requires explicitly setting output framerate in vipy.video

            # Import tracks
            d_trackid_to_track = {}
            for obj in d["object"]:

                keyboxes = [
                    BoundingBox(
                        xmin=bb["frame"]["x"],
                        ymin=bb["frame"]["y"],
                        width=bb["frame"]["width"],
                        height=bb["frame"]["height"],
                    ) for bb in sorted(obj["bounding_box"],
                                       key=lambda x: x["frame_index"])
                ]
                keyframes = [
                    bb["frame_index"]
                    for bb in sorted(obj["bounding_box"],
                                     key=lambda x: x["frame_index"])
                ]

                badboxes = [bb for bb in keyboxes if not bb.isvalid()]
                if len(badboxes) > 0:
                    print(
                        '[pycollector.video]: Removing %d bad keyboxes "%s" from "%s" for videoid=%s'
                        % (len(badboxes), str(badboxes), jsonfile,
                           d["metadata"]["video_id"]))
                if len(badboxes) == len(keyboxes):
                    raise ValueError("all keyboxes in track are invalid")

                t = Track(
                    category=obj["label"],
                    framerate=float(d["metadata"]["frame_rate"]),
                    keyframes=[
                        int(f) for (f, bb) in zip(keyframes, keyboxes)
                        if bb.isvalid()
                    ],
                    boxes=[
                        bb for (f, bb) in zip(keyframes, keyboxes)
                        if bb.isvalid()
                    ],
                    boundary="strict",
                )
                if vipy.version.is_at_least("0.8.3"):
                    self.add(
                        t, rangecheck=False
                    )  # no rangecheck since all tracks are guaranteed to be within image rectangle
                else:
                    self.add(t)
                d_trackid_to_track[t.id()] = t

            # Category variants:  a_category_name#Variant1=A&Joint=a_joint_label:Short Label&Variant2=B
            variant = {}
            d_shortname_to_category = {
                s: c
                for (s, c) in zip(d["metadata"]["shortname"].split(","),
                                  d["metadata"]["category"].split(","))
            }
            if "#" in d["metadata"]["category"]:
                d_shortname_to_category = {
                    s: c.split("#")[0]
                    for (s, c) in d_shortname_to_category.items()
                }  # shortname and category may be empty
                variantlist = list(
                    set([
                        c.split("#")[1] if "#" in c else None
                        for c in d["metadata"]["category"].split(",")
                    ]))
                if len(variantlist) != 1:
                    print(
                        '[pycollector.video]: WARNING - Ignoring mixed variant "%s"'
                        % str(variantlist))
                elif all([len(v) == 0 for v in variantlist]):
                    pass  # empty variant
                elif any([
                        "=" not in v or v.count("&") != (v.count("=") - 1)
                        for v in variantlist
                ]):
                    print(
                        '[pycollector.video]: WARNING - Ignoring invalid variant "%s"'
                        % str(variantlist))
                else:
                    variant = {
                        k.split("=")[0]: k.split("=")[1]
                        for k in variantlist[0].split("&")
                    }
            self.attributes["variant"] = variant

            # Import activities
            for a in d["activity"]:
                try:
                    # Legacy shortname display
                    if a["label"] not in d_shortname_to_category:
                        from pycollector.admin.legacy import shortname_synonyms  # legacy import
                        if a["label"] not in shortname_synonyms():
                            raise ValueError(
                                "Invalid shortname '%s' for collection shortnames '%s' and not in legacy synonyms '%s'"
                                % (a["label"], d_shortname_to_category,
                                   str(shortname_synonyms())))
                        a["label"] = a["label"] if a[
                            "label"] in d_shortname_to_category else shortname_synonyms(
                            )[a["label"]]  # legacy translation
                    if d["metadata"]["collection_id"] == "P004C009" and d[
                            "metadata"]["device_identifier"] == "android":
                        shortlabel = "Buying (Machine)"
                    elif d["metadata"]["collection_id"] == "P004C008" and d[
                            "metadata"][
                                "device_identifier"] == "ios" and "Purchasing" in a[
                                    "label"]:
                        # BUG: iOS (11) reports wrong collection id for "purchase something from a machine" as P004C008 instead of P004C009
                        shortlabel = "Buying (Machine)"
                    elif d["metadata"]["collection_id"] == "P004C009" and d[
                            "metadata"]["device_identifier"] == "ios":
                        # BUG: iOS (11) reports wrong collection id for "pickup and dropoff with bike messenger" as P004C009 instead of P004C010
                        shortlabel = a["label"]  # unchanged
                    elif d["metadata"]["collection_id"] == "P005C003":
                        shortlabel = "Buying (Cashier)"
                    else:
                        shortlabel = a["label"]

                    # category = backend().collection()[d["metadata"]["collection_id"]].shortname_to_activity(a["label"])
                    category = d_shortname_to_category[a["label"]]
                    self.add(
                        vipy.activity.Activity(
                            category=category,
                            shortlabel=shortlabel,
                            startframe=int(a["start_frame"]),
                            endframe=int(a["end_frame"]),
                            tracks=d_trackid_to_track,
                            actorid=None if len(d_trackid_to_track) == 0 else
                            list(d_trackid_to_track.keys())
                            [0],  # by insertion order
                            framerate=d["metadata"]["frame_rate"],
                        ))

                except Exception as e:
                    print(
                        '[pycollector.video]: Filtering invalid activity "%s" from JSON "%s" with error "%s" for videoid=%s'
                        %
                        (str(a), jsonfile, str(e), d["metadata"]["video_id"]))
                    self._has_invalid_json = True  # for later filtering

            # Joint activity?  Occurs simultaneously with any JSON defined activities
            if "Joint" in variant:
                self.add(
                    vipy.activity.Activity(
                        category=variant["Joint"].split(":")[0],
                        shortlabel=variant["Joint"].split(":")[1]
                        if ":" in variant["Joint"] else None,
                        startframe=min([
                            int(a["start_frame"]) for a in d["activity"]
                        ]) if len(d["activity"]) > 0 else 0,
                        endframe=max([
                            int(a["end_frame"]) for a in d["activity"]
                        ]) if len(d["activity"]) > 0 else int(
                            np.round(
                                float(d["metadata"]["duration"]) *
                                float(d["metadata"]["frame_rate"]))),
                        tracks=d_trackid_to_track,
                        actorid=None if len(d_trackid_to_track) == 0 else list(
                            d_trackid_to_track.keys())
                        [0],  # by insertion order                        
                        framerate=d["metadata"]["frame_rate"],
                    ))

            if d["metadata"]["rotate"] == "rot90ccw":
                self.rot90ccw()
            elif d["metadata"]["rotate"] == "rot90cw":
                self.rot90cw()

            self._is_json_loaded = True

            # Minimum dimension of video for reasonably fast interactions (must happen after JSON load to get frame size from JSON)
            if self._mindim is not None:
                if "frame_width" in self.metadata(
                ) and "frame_height" in self.metadata():  # older JSON bug
                    (W, H) = (int(self.metadata()["frame_width"]),
                              int(self.metadata()["frame_height"])
                              )  # from device
                    s = float(min(W, H))
                    if s > 256:
                        newrows = int(np.round(H) * (self._mindim / float(s)))
                        newcols = int(np.round(W) * (self._mindim / float(s)))
                        self.shape(shape=(H, W)).resize(
                            rows=newrows, cols=newcols
                        )  # manually set shape to avoid preview(), does not require load
                    else:
                        print(
                            "[pycollector.video]: Filtering Invalid JSON (height, width)"
                        )
                        self._is_json_loaded = False
                        self._has_invalid_json = True  # for later filtering
                else:
                    assert vipy.version.is_at_least("0.8.0")
                    self.clear()  # remove this old video from consideration
                    self._is_json_loaded = False
                    self._has_invalid_json = True  # for later filtering
        else:
            print("[pycollector.video]: JSON '%s' load failed - SKIPPING" %
                  jsonfile)
            self._is_json_loaded = False
            self._has_invalid_json = True  # for later filtering

        # Resample tracks
        if self._dt > 1 and self._is_json_loaded:
            self.trackmap(lambda t: t.resample(self._dt).significant_digits(2))

        assert vipy.version.is_at_least("1.8.34")
        self.trackmap(lambda t: t.significant_digits(2))
        return self