def device(self, outfile=None, n=24, fontsize=7): assert self._isvipy() d_all = vipy.util.countby([ v for v in self.list() if v.hasattribute('device_type') and v.attributes['device_type'] != 'unrecognized' ], lambda v: v.attributes['device_type']) topk = [ k for (k, v) in sorted(list(d_all.items()), key=lambda x: x[1])[-n:] ] other = np.sum([v for (k, v) in d_all.items() if k not in set(topk)]) d = {k: v for (k, v) in d_all.items() if k in set(topk)} d.update({'Other': other}) d = dict(sorted(list(d.items()), key=lambda x: x[1])) print('[vipy.dataset.device]: Device types = %d ' % len(d_all)) print('[vipy.dataset.device]: Top-%d Device types = %s ' % (n, str(topk))) if outfile is not None: from vipy.metrics import pie pie(d.values(), d.keys(), explode=None, outfile=outfile, shadow=False, legend=False, fontsize=fontsize, rotatelabels=False) return d
def collectors(self, outfile=None): assert self._isvipy() d = vipy.util.countby(self.list(), lambda v: v.attributes['collector_id']) f = lambda x, n: len([k for (k, v) in d.items() if int(v) >= n]) print('[vipy.dataset]: Collectors = %d ' % f(d, 0)) print('[vipy.dataset]: Collectors with >10 submissions = %d' % f(d, 10)) print('[vipy.dataset]: Collectors with >100 submissions = %d' % f(d, 100)) print('[vipy.dataset]: Collectors with >1000 submissions = %d' % f(d, 1000)) print('[vipy.dataset]: Collectors with >10000 submissions = %d' % f(d, 10000)) if outfile is not None: from vipy.metrics import histogram (k, v) = zip(*(sorted(d.items(), key=lambda x: x[1], reverse=True))) histogram(v, list(range(len(k))), outfile=outfile, ylabel='Submissions', xlabel='Collector', xrot='vertical', fontsize=3, xshow=False) return d
def fetch(self, ignoreErrors=False): """Download JSON and MP4 if not already downloaded""" if not self.hasjson() or self._is_json_loaded is None: try: self.fetchjson() # Do we need this? self._load_json() except KeyboardInterrupt: raise except Exception as e: print('[pycollector.video]: fetch error "%s" - SKIPPING' % str(e)) return self.fetchvideo()
def os(self, outfile=None): assert self._isvipy() d = vipy.util.countby( [v for v in self.list() if v.hasattribute('device_identifier')], lambda v: v.attributes['device_identifier']) print('[vipy.dataset]: Device OS = %d ' % len(d)) if outfile is not None: from vipy.metrics import pie pie(d.values(), d.keys(), explode=None, outfile=outfile, shadow=False) return d
def quicklooks(self, n=9, dilate=1.5, mindim=256, fontsize=10, context=True): """Return a vipy.image.Image object containing a montage quicklook for each of the activities in this video. Usage: >>> filenames = [im.saveas('/path/to/quicklook.jpg') for im in self.quicklooks()] """ assert vipy.version.is_at_least("0.8.2") print('[pycollector.video]: Generating quicklooks for video "%s"' % self.videoid()) return [ a.quicklook(n=n, dilate=dilate, mindim=mindim, fontsize=fontsize, context=context) for a in self.fetch().activityclip() ]
def fetchjson(self): """Download JSON if not already downloaded""" if self._jsonfile is None: self._jsonfile = os.path.join( remkdir(os.environ["VIPY_CACHE"] if "VIPY_CACHE" in os.environ else tempdir()), filetail(self._jsonurl), ) if not os.path.exists(self._jsonfile): print('[pycollector.video]: Fetching "%s"' % self._jsonurl) try: vipy.downloader.s3( self._jsonurl, self._jsonfile ) # TODO - this is a problem to assume vipy user also has access to S3. We should decouple this dependency of using vipy except KeyboardInterrupt: raise except Exception as e: print( '[pycollector.video]: S3 download error "%s" - SKIPPING' % str(e)) jsonfile = None return self
def quickshow(self, framerate=10, nocaption=False): print("[pycollector.video]: setting quickshow input framerate=%d" % framerate) return self.fetch().clone().framerate(framerate).mindim(256).show( nocaption=nocaption)
def _load_json(self): """Lazy JSON download, parse, and import""" # Already loaded? Call once if self._is_json_loaded is not None: return self # Not downloaded? if not self.hasjson(): self.fetchjson() # Parse JSON (with version error handling) jsonfile = self._jsonfile if jsonfile is not None and os.path.getsize(jsonfile) != 0: if self._verbose: print('[pycollector.video]: Parsing "%s"' % jsonfile) d = readjson(jsonfile) if "collection_id" not in d["metadata"]: d["metadata"]["collection_id"] = d["metadata"][ "video_id"] # android 1.1.1(3) bug for obj in d["object"]: if "label" not in obj: obj["label"] = "person" # android 1.1.1(3) bug if "label" in obj and obj["label"] == "vehicle": # obj['label'] = 'person' # all bug pass for bb in obj["bounding_box"]: if "frame_index" not in bb and "frameIndex" in bb: bb["frame_index"] = bb[ "frameIndex"] # android 1.1.1(3) bug d["metadata"]["rotate"] = None if d["metadata"]["orientation"] == "landscape": # d['metadata']['rotate'] = 'rot90cw' pass elif d["metadata"]["orientation"] == "portrait": # d['metadata']['rotate'] = 'rot90ccw' pass else: pass if "device_type" in d["metadata"] and "device_identifier" == "ios": d["metadata"][ "rotate"] = "rot90ccw" # iOS (7) bug, different than iOS (6) # FIXME: "collected_date":"2020-06-19T18:34:33+0000" on both now try: uploaded = datetime.strptime( d["metadata"]["collected_date"], "%Y-%m-%d %H:%M:%S %z") # iOS 1.0 (6) except: try: uploaded = datetime.strptime( d["metadata"]["collected_date"], "%Y-%m-%d %I:%M:%S %p %z") # bug number 55 except: uploaded = datetime.strptime( d["metadata"]["collected_date"], "%Y-%m-%dT%H:%M:%S%z") # android 1.1.1 (3) # if isapi('v1'): # d["metadata"]["collected_date"] = uploaded.strftime( # "%Y-%m-%d %H:%M:%S" # ) # else: # et = pytz.timezone("US/Eastern") # d["metadata"]["collected_date"] = uploaded.astimezone(et).strftime("%Y-%m-%d %H:%M:%S") et = pytz.timezone("US/Eastern") d["metadata"]["app_collected_date"] = d["metadata"][ "collected_date"] d["metadata"]["collected_date"] = uploaded.astimezone(et).strftime( "%Y-%m-%d %H:%M:%S") else: print('[pycollector.video]: empty JSON "%s"' % jsonfile) self._has_invalid_json = True # for later filtering d = None # Backwards compatible video import: should not be necessary with new app release if d is not None and not "category" in d["metadata"]: vipy.util.try_import( "pycollector.admin.globals", message="Not authorized - Old style JSON requires admin access" ) from pycollector.admin.globals import backend, isapi from pycollector.admin.legacy import applabel_to_longlabel, shortname_synonyms, applabel_to_piplabel # V1 - old collection name pattern if any([ d["metadata"]["collection_id"] in k for k in applabel_to_piplabel().keys() ]): try: d["metadata"]["collection_name"] = d["metadata"][ "collection_id"] applabel = [ "%s_%s_%s" % (d["metadata"]["project_id"], d["metadata"]["collection_id"], a["label"]) for a in d["activity"] ] synonyms = shortname_synonyms() applabel = [ a if (a in applabel_to_piplabel() or a in applabel_to_longlabel()) else "%s_%s_%s" % (d["metadata"]["project_id"], d["metadata"]["collection_id"], synonyms[a.split("_")[2]]) for a in applabel ] d["metadata"]["category"] = ",".join([ applabel_to_piplabel()[a] if a in applabel_to_piplabel() else applabel_to_longlabel()[a] for a in applabel ]) d["metadata"]["shortname"] = ",".join( [a.split("_")[2] for a in applabel]) except Exception as e: print( '[pycollector.video]: legacy json import failed for v1 JSON "%s" with metadata "%s" and error "%s"' % (jsonfile, str(d["metadata"]), str(e))) self._has_invalid_json = True # for later filtering d = None # V2 - new collection names, but activity names not in JSON elif isapi("v1") or isapi("v2"): version = "v1" if isapi("v1") else "v2" if version == "v1": backend(org="str", env="prod", version="v2") # temporary switch if not backend().collections().iscollectionid( d["metadata"]["collection_id"]): print('[pycollector.video]: invalid collection ID "%s"' % d["metadata"]["collection_id"]) self._has_invalid_json = True # for later filtering d = None elif len(d["activity"]) == 1 and len( d["activity"][0]["label"]) == 0: d["activity"] = [] d["metadata"]["category"] = "" d["metadata"]["shortname"] = "" else: try: # Fetch labels from backend (with legacy shortname translation) C = backend().collections()[d["metadata"] ["collection_id"]] d["metadata"]["collection_name"] = backend( ).collections().id_to_name( d["metadata"]["collection_id"]) shortnames = [] for a in d["activity"]: if not (a["label"] in C.shortnames() or a["label"] in shortname_synonyms()): raise ValueError( "Invalid shortname '%s' for collection shortnames '%s' and not in legacy synonyms '%s'" % (a["label"], str(C.shortnames()), str(shortname_synonyms()))) shortnames.append( a["label"] if a["label"] in C.shortnames( ) else shortname_synonyms()[a["label"]]) d["metadata"]["category"] = ",".join([ C.shortname_to_activity(s, strict=False) for s in shortnames ]) d["metadata"]["shortname"] = ",".join( [s for s in shortnames]) except Exception as e: print( "[pycollector.video]: label fetch failed for %s in JSON '%s' with exception %s" % (str(d["activity"]), jsonfile, str(e))) self._has_invalid_json = True # for later filtering d = None if version == "v1": backend(org="str", env="prod", version="v1") # switch back else: print( '[pycollector.video]: Legacy JSON import failed for JSON "%s" with metadata - "%s"' % (jsonfile, str(d["metadata"]))) self._has_invalid_json = True # for later filtering d = None else: # New style JSON: use labels stored directly in JSON pass # Import JSON into scene if d is not None: # TODO - Replace with video_data collection_name = d["metadata"]["collection_name"] self.category(collection_name) self.attributes = {} if self.attributes is None else self.attributes self.attributes.update(d["metadata"]) self.framerate(float(d["metadata"]["frame_rate"])) # FIXME: this videoID '20200421_1500081666724286' has low framerate. Parsing is correct, but load() and show() is too fast # This requires explicitly setting output framerate in vipy.video # Import tracks d_trackid_to_track = {} for obj in d["object"]: keyboxes = [ BoundingBox( xmin=bb["frame"]["x"], ymin=bb["frame"]["y"], width=bb["frame"]["width"], height=bb["frame"]["height"], ) for bb in sorted(obj["bounding_box"], key=lambda x: x["frame_index"]) ] keyframes = [ bb["frame_index"] for bb in sorted(obj["bounding_box"], key=lambda x: x["frame_index"]) ] badboxes = [bb for bb in keyboxes if not bb.isvalid()] if len(badboxes) > 0: print( '[pycollector.video]: Removing %d bad keyboxes "%s" from "%s" for videoid=%s' % (len(badboxes), str(badboxes), jsonfile, d["metadata"]["video_id"])) if len(badboxes) == len(keyboxes): raise ValueError("all keyboxes in track are invalid") t = Track( category=obj["label"], framerate=float(d["metadata"]["frame_rate"]), keyframes=[ int(f) for (f, bb) in zip(keyframes, keyboxes) if bb.isvalid() ], boxes=[ bb for (f, bb) in zip(keyframes, keyboxes) if bb.isvalid() ], boundary="strict", ) if vipy.version.is_at_least("0.8.3"): self.add( t, rangecheck=False ) # no rangecheck since all tracks are guaranteed to be within image rectangle else: self.add(t) d_trackid_to_track[t.id()] = t # Category variants: a_category_name#Variant1=A&Joint=a_joint_label:Short Label&Variant2=B variant = {} d_shortname_to_category = { s: c for (s, c) in zip(d["metadata"]["shortname"].split(","), d["metadata"]["category"].split(",")) } if "#" in d["metadata"]["category"]: d_shortname_to_category = { s: c.split("#")[0] for (s, c) in d_shortname_to_category.items() } # shortname and category may be empty variantlist = list( set([ c.split("#")[1] if "#" in c else None for c in d["metadata"]["category"].split(",") ])) if len(variantlist) != 1: print( '[pycollector.video]: WARNING - Ignoring mixed variant "%s"' % str(variantlist)) elif all([len(v) == 0 for v in variantlist]): pass # empty variant elif any([ "=" not in v or v.count("&") != (v.count("=") - 1) for v in variantlist ]): print( '[pycollector.video]: WARNING - Ignoring invalid variant "%s"' % str(variantlist)) else: variant = { k.split("=")[0]: k.split("=")[1] for k in variantlist[0].split("&") } self.attributes["variant"] = variant # Import activities for a in d["activity"]: try: # Legacy shortname display if a["label"] not in d_shortname_to_category: from pycollector.admin.legacy import shortname_synonyms # legacy import if a["label"] not in shortname_synonyms(): raise ValueError( "Invalid shortname '%s' for collection shortnames '%s' and not in legacy synonyms '%s'" % (a["label"], d_shortname_to_category, str(shortname_synonyms()))) a["label"] = a["label"] if a[ "label"] in d_shortname_to_category else shortname_synonyms( )[a["label"]] # legacy translation if d["metadata"]["collection_id"] == "P004C009" and d[ "metadata"]["device_identifier"] == "android": shortlabel = "Buying (Machine)" elif d["metadata"]["collection_id"] == "P004C008" and d[ "metadata"][ "device_identifier"] == "ios" and "Purchasing" in a[ "label"]: # BUG: iOS (11) reports wrong collection id for "purchase something from a machine" as P004C008 instead of P004C009 shortlabel = "Buying (Machine)" elif d["metadata"]["collection_id"] == "P004C009" and d[ "metadata"]["device_identifier"] == "ios": # BUG: iOS (11) reports wrong collection id for "pickup and dropoff with bike messenger" as P004C009 instead of P004C010 shortlabel = a["label"] # unchanged elif d["metadata"]["collection_id"] == "P005C003": shortlabel = "Buying (Cashier)" else: shortlabel = a["label"] # category = backend().collection()[d["metadata"]["collection_id"]].shortname_to_activity(a["label"]) category = d_shortname_to_category[a["label"]] self.add( vipy.activity.Activity( category=category, shortlabel=shortlabel, startframe=int(a["start_frame"]), endframe=int(a["end_frame"]), tracks=d_trackid_to_track, actorid=None if len(d_trackid_to_track) == 0 else list(d_trackid_to_track.keys()) [0], # by insertion order framerate=d["metadata"]["frame_rate"], )) except Exception as e: print( '[pycollector.video]: Filtering invalid activity "%s" from JSON "%s" with error "%s" for videoid=%s' % (str(a), jsonfile, str(e), d["metadata"]["video_id"])) self._has_invalid_json = True # for later filtering # Joint activity? Occurs simultaneously with any JSON defined activities if "Joint" in variant: self.add( vipy.activity.Activity( category=variant["Joint"].split(":")[0], shortlabel=variant["Joint"].split(":")[1] if ":" in variant["Joint"] else None, startframe=min([ int(a["start_frame"]) for a in d["activity"] ]) if len(d["activity"]) > 0 else 0, endframe=max([ int(a["end_frame"]) for a in d["activity"] ]) if len(d["activity"]) > 0 else int( np.round( float(d["metadata"]["duration"]) * float(d["metadata"]["frame_rate"]))), tracks=d_trackid_to_track, actorid=None if len(d_trackid_to_track) == 0 else list( d_trackid_to_track.keys()) [0], # by insertion order framerate=d["metadata"]["frame_rate"], )) if d["metadata"]["rotate"] == "rot90ccw": self.rot90ccw() elif d["metadata"]["rotate"] == "rot90cw": self.rot90cw() self._is_json_loaded = True # Minimum dimension of video for reasonably fast interactions (must happen after JSON load to get frame size from JSON) if self._mindim is not None: if "frame_width" in self.metadata( ) and "frame_height" in self.metadata(): # older JSON bug (W, H) = (int(self.metadata()["frame_width"]), int(self.metadata()["frame_height"]) ) # from device s = float(min(W, H)) if s > 256: newrows = int(np.round(H) * (self._mindim / float(s))) newcols = int(np.round(W) * (self._mindim / float(s))) self.shape(shape=(H, W)).resize( rows=newrows, cols=newcols ) # manually set shape to avoid preview(), does not require load else: print( "[pycollector.video]: Filtering Invalid JSON (height, width)" ) self._is_json_loaded = False self._has_invalid_json = True # for later filtering else: assert vipy.version.is_at_least("0.8.0") self.clear() # remove this old video from consideration self._is_json_loaded = False self._has_invalid_json = True # for later filtering else: print("[pycollector.video]: JSON '%s' load failed - SKIPPING" % jsonfile) self._is_json_loaded = False self._has_invalid_json = True # for later filtering # Resample tracks if self._dt > 1 and self._is_json_loaded: self.trackmap(lambda t: t.resample(self._dt).significant_digits(2)) assert vipy.version.is_at_least("1.8.34") self.trackmap(lambda t: t.significant_digits(2)) return self