def __init__(self): cherrylog("Buiding Media Process object") super(MediaProcess,self).__init__() #Call parent init method. self.source_name = "" self.mongo_connection = Connection() #This gets torn down in the parent's destructor self.bettermedia = self.mongo_connection['BetterMedia'] # `BetterMedia` database self.my_collection = self.bettermedia.crowdservice # `crowdservice` collection
def splice_video_into_images(videoInputPath, videoName, processedVideoPath, image_dir, video_stats): video_name_without_extension = videoName[0:videoName.rfind(".")] full_path = videoInputPath + "/" + videoName md5 = video_stats['hashstring'] width = video_stats['width'] height = video_stats['height'] #Grab images for video #Because it seems we will always want all the frames, commenting out the ability to set FPS #ffmpeg_shell_command = "ffmpeg -i \"%s\" -r %s -s %s \"%s/%s/%s/image_%%06d.jpg\"" % ( videoInputPath +"/" + videoName, images_per_sec, str(resolution["width"]) + "x" + str(resolution["height"]), processedVideoPath, video_md5sum, imagesSubDir) ffmpeg_shell_command = "ffmpeg -i \"%s\" -s \"%s\" \"%s/%s/%s/image%s06d.png\"" % ( full_path, width + "x" + height, processedVideoPath, md5, image_dir, "%") cherrylog ("running ffmpeg -> " + ffmpeg_shell_command) #test_string = re.sub(r'\x00', '', ffmpeg_shell_command) #encoded_ffmpeg_command = cStringIO.StringIO(test_string) #cherrylog (" does it contain nulls? " + str(ffmpeg_shell_command.find('\0'))) args = shlex.split(ffmpeg_shell_command.encode('utf-8')) p = subprocess.call(args) #Also build jpg files for web hosting ffmpeg_shell_command_jpg = "ffmpeg -i \"%s\" -s \"%s\" \"%s/%s/%s/image%s06d.jpg\"" % ( full_path, width + "x" + height, processedVideoPath, md5, image_dir, "%") cherrylog ("running ffmpeg -> " + ffmpeg_shell_command_jpg) args = shlex.split(ffmpeg_shell_command_jpg.encode('utf-8')) p = subprocess.call(args) os.remove(full_path) return True
def get_md5(path, running_os): if (running_os == "linux"): md5_shell_command = "md5sum \"%s\"" % (path) elif (running_os == "osx"): md5_shell_command = "md5 \"%s\"" % (path) else: md5_shell_command = "md5sum \"%s\"" % (path) md5_args = shlex.split(md5_shell_command) cherrylog ("Running md5sum command: " + md5_shell_command ) video_md5sum_b = subprocess.check_output(md5_args) video_md5sum_long = video_md5sum_b.decode("utf-8") video_md5sum = "" #On OS X with the md5 command, we want the last if (running_os == "osx"): intBreak = video_md5sum_long.rfind(" ") video_md5sum = video_md5sum_long[intBreak+1:intBreak+33] #On linux with the md5sum command, we want the first segment elif (running_os == "linux"): intBreak = video_md5sum_long.find(" ") video_md5sum = video_md5sum_long[0:intBreak] else: intBreak = video_md5sum_long.find(" ") video_md5sum = video_md5sum_long[0:intBreak] cherrylog ("Got video md5sum of " + video_md5sum) return video_md5sum
def validate_rebuild_report_params(video_json): if not all(k in MVideo.REBUILD_PARAMS for k in video_json): cherrylog("Video rebuildReport command is missing params") return FALSE if len(video_json.keys()) != len(MVideo.REBUILD_PARAMS): cherrylog("Video rebuildReport command: wrong number of params") return False return True
def validate_reprocess_metadata_params(video_json): if not all(k in MVideo.REPROCESS_PARAMS for k in video_json): cherrylog("Video reprocessMetadata command is missing params") return FALSE if len(video_json.keys()) != len(MVideo.REPROCESS_PARAMS): cherrylog("Video reprocessMetadata command: wrong number of params") return False return True
def validate_analyze_params(video_json): if not all(k in video_json for k in (MVideo.ANALYZE_PARAMS)): cherrylog("Video lookup is missing a parameter") return False if len(video_json.keys()) != len(MVideo.ANALYZE_PARAMS): cherrylog("wrong number of parameters for video ") return False return True
def validate_submit_params(video_json): if not all(k in video_json for k in (MVideo.SUBMIT_PARAMS)): cherrylog("Video submit is missing a parameter") return False if len(video_json.keys()) != len(MVideo.SUBMIT_PARAMS): cherrylog("wrong number of parameters for video ") return False return True
def __init__(self, collection=""): """Constructor""" cherrylog ("Created Media Object of type: " + str(self.__class__.__name__)) #Mongo Stuff - For safety / performance, these aren't always instantiated upon object instatiated and need to be built when needed. self.mongo_connection = "" #Mongo connection object self.my_collection = collection #Take a collection object if one is given. self.bettermedia = "" #Mongo BetterMedia database self.attributes = {} #Holds the important data for this object. This will be persisted to the Mongo DB. self.doc_id = "" #Holds a reference to the mongo document ID
def set_env_configs(config_dir): try: configs['environment'] = str(yaml.load(open(config_dir + '/environment.yml'))) cherrylog ("Current Environment = " + configs['environment']) except: cherrylog ("Couldn't load environment file. Using default of " + DEFAULT_ENV) configs['environment'] = DEFAULT_ENV for i in config_files: configs[i] = yaml.load(open(config_dir + "/" + i + '.yml'))[configs['environment']]
def validate_update_params(video_json): if not all(k in MVideo.UPDATE_PARAMS for k in video_json): cherrylog("Video update parameter is not valid") # if not all (k in video_json for k in (MVideo.UPDATE_PARAMS)): # cherrylog ("Video update is missing a parameter") # return False # if len(video_json.keys()) != len(MVideo.UPDATE_PARAMS): # cherrylog ("wrong number of parameters for video ") # return False return True
def start_timed_process(self,**kwargs): count = 0 function_name = str(kwargs['called_function'].__name__) repeat_time = self.get_repeat_time(function_name) time.sleep(self.timed_processes_shutdown_time) while (self.running): if (count % self.print_timed_process_update == 0): cherrylog("Checking timed process: " + str(function_name) + " Current time: " + str(count) + " and interval timer: " + str(repeat_time)) if (count >= repeat_time): count = 0 kwargs['called_function']() else: count += self.timed_processes_shutdown_time time.sleep(self.timed_processes_shutdown_time)
def __init__(self): """Initialize MVideo object. Set up the Mongo connection objects. """ cherrylog("Created Media Object of type: " + str(self.__class__.__name__)) self.mongo_connection = Connection() # This gets closed by the parent destructor self.bettermedia = self.mongo_connection["BetterMedia"] # `BetterMedia` database self.image_collection = self.bettermedia.image # `image` collection self.scene_collection = self.bettermedia.scene self.my_collection = self.bettermedia.video # `video` collection self.attributes = {} # Holds the important data for this object. This will be persisted to the Mongo DB. self.doc_id = "" # Holds a reference to the mongo document ID
def verify_key(*args, **kwargs): """ Function used to verify the incoming request has the remote_key as a parameter for controlled access to this system's calls. """ request = cherrypy.request params = request.body.request_params cherrylog ("Verifying key of http request params: " + str(params)) if (params.has_key("remote_key")): param_key = params['remote_key'] valid_remote_key = c.configs['imagetagger']['remote_key'] if (param_key == valid_remote_key): #Strip the remote_key parameter once it has been verified. del cherrypy.request.body.request_params['remote_key'] return True raise cherrypy.HTTPError("401 Unauthorized")
def destroy(self): cherrylog("Deleting video from database") # Don't delete the video in the database, as it contains our useful metadata. # It has unique idenfitiers, so it should never clash # match_param = {'friendly_id':self.attributes['friendly_id']} # self.my_collection.remove(match_param) # Instead, mark the video as deleted self.update({"status": {"imagetagger_status": "deleted"}}, commit=True) # For now, don't delete other metadata about the video # match_param = {'video':self.attributes['friendly_id']} # self.scene_collection.remove(match_param) # self.image_collection.remove(match_param) # Should check if there are active crowd services, and if so, cancel them. self.delete_data() cherrylog("Done destroying video")
def get_thumbnail(videoInputPath, videoName, processedVideoPath, thumb_dir, thumb_name, video_stats): cherrylog ("Getting thumbnail for video " + videoName) full_path = videoInputPath + "/" + videoName md5 = video_stats['hashstring'] width = video_stats['width'] height = video_stats['height'] length_seconds = video_stats['length'] / 1000 ffmpeg_shell_command = "ffmpeg -itsoffset -%d -i \"%s\" -vcodec mjpeg -vframes 1 -an -f rawvideo -s \"%s\" \"%s/%s/%s/%s\"" % ( length_seconds / 2, full_path, width + "x" + height, processedVideoPath, md5, thumb_dir, thumb_name) args = shlex.split(ffmpeg_shell_command.encode('utf-8')) p = subprocess.call(args) cherrylog ("running ffmpeg -> " + ffmpeg_shell_command) return True
def create(self): self.doc_id = "" """Insert the object into the mongodb. It should not exist in mongo before this call.""" if (not self.attributes.has_key('created_at')): self.attributes['created_at'] = datetime.datetime.now() self.attributes['status'] = {'imagetagger_status':'created'} self.attributes['modified_at'] = datetime.datetime.now() cherrylog ("Creating item in database with values " + str(self.attributes)) #insert a doc try: insert = self.my_collection.insert(self.attributes, safe=True) except Exception as e: Tools.print_errors ("Error inserting object into database: " + str(self.__class__.__name__)) return False cherrylog ("Insert of item successul: " + (str(insert))) self.doc_id = str(insert) return True
def doProcess(video,full_path,extension): # Run the scene detection algorithm cherrylog ("Running the scene detection algorithm with video: " + str(video.attributes['friendly_id']) + " and at folder: " + str(full_path) + " with file extension: " + str(extension)) detection_results = scene_detection.detect(full_path, extension) all_scenes = get_scene_stats(video.attributes['friendly_id'], detection_results['motionIndexVector'], video.attributes['fps']) cherrylog ("Done running get_scene_stats, updating video obj in database") # This adds the scenes to Mongo mongo_connection = Connection() bettermedia = mongo_connection['BetterMedia'] scene_collection = bettermedia.scene for scene in all_scenes: new_scene = ms.MScene(collection=scene_collection) new_scene.attributes = scene new_scene.create() mongo_connection.disconnect()
def find_nearby_nontrivial_image(nearby_files): cherrylog("Running 'find_nearby_nontrivial_image' against " + str(len(nearby_files)) + " nearby files") num_files = len(nearby_files) check = int(num_files/2) count = 1 while (check >= 0 and check < num_files): if (is_nontrivial_image(nearby_files[check])): cherrylog("Found a nontrivial nearby image at subindex: " + str(check)) return check #Found a nearby nontrivial image, returning the index of it else: if (count % 2 == 0): check += count else: check -= count count += 1 #Couldn't find a nearby nontrivial image! return None
def get_scene_stats(video_id, motionIndexVector,fps): cherrylog ("Running get_scene_stats") all_scenes = [] try: for i in range(len(motionIndexVector)-1): num_images = motionIndexVector[i+1] - motionIndexVector[i] start_time = time_of_frame_raw(motionIndexVector[i], fps) stop_time = time_of_frame_raw(motionIndexVector[i+1], fps) length = time_of_frame(start_time - stop_time) scene = {'video': int(video_id),'scene_id': int(i), 'start_time':float(start_time),'stop_time':float(stop_time), 'num_images':int(num_images),'length':str(length), 'status':{'processed':True}} all_scenes.append(scene) except Exception as e: Tools.print_errors ("Error running get_scene_stats on video_id: " + str(video_id)) return all_scenes
def accept_video(self, video_file, *args, **kwargs): out = "myFile length: %s\nmyFile filename: %s\nmyFile mime-type: %s" # Although this just counts the file length, it demonstrates # how to read large files in chunks instead of all at once. # CherryPy reads the uploaded file into a temporary file; # myFile.file.read reads from that. cherrylog("Got args of " + str(args) + " and " + str(kwargs)) new_file_name = kwargs["file_id"] size = 0 video_file_directory = c.configs["imagetagger"]["video_file_directory"] output_file = file(video_file_directory + new_file_name, "wb") while True: data = video_file.file.read(8192) if not data: break output_file.write(data) size += len(data) output_file.close() return out % (size, video_file.filename, video_file.content_type)
def flag_images_for_crowd(self, quality): """docstring for flag_images_for_crowd""" match_param = {"video": self.attributes["friendly_id"]} image_docs = self.image_collection.find(match_param, safe=True) interval = 0 # Number of seconds between images to process if quality == "basic": interval = 4 elif quality == "premium": interval = 2 else: interval = 4 # Expects a round integer for time_in_video for image in image_docs: if image["time_in_video"] % interval == 0: cherrylog("Flagging this image to be sent to crowd: " + str(image)) flag_image = mi.MImage(collection=self.image_collection) flag_image.load(str(image["_id"])) flag_image.update({"send_to_crowd": True})
def images(self, *args, **kw): friendly_id = json.loads(kw["id"]) cherrylog("Received command to get to_process images for this video: " + str(friendly_id)) image_video = mv.MVideo() if not image_video.load(alt_matching={"friendly_id": friendly_id}): return "Could not find video" result = image_video.get_image_json() if result == []: cherrylog("No images have been flagged to be analyzed yet.") return "No images have been flagged to be analyzed yet." if result: cherrylog("Returned to_process images for this video as json") return json.dumps(result) else: cherrylog("Something went wrong when submitting process command") return "Unable to retrieve images for this video at this time"
def delete_data(self): cherrylog("Removing Video files, including hidden files (.*)") video_root_dir = c.configs["imagetagger"]["processed_videos_dir"] + "/" + self.attributes["hashstring"] # image_files = glob.glob(video_root_dir + '/' + c.configs['imagetagger']['image_dir_name'] + '/*') + glob.glob(video_root_dir + '/' + c.configs['imagetagger']['image_dir_name'] + '/.*') # video_files = glob.glob(video_root_dir + '/' + c.configs['imagetagger']['video_dir_name'] + '/*') + glob.glob(video_root_dir + '/' + c.configs['imagetagger']['video_dir_name'] + '/.*') # report_files = glob.glob(video_root_dir + '/' + c.configs['imagetagger']['report_dir_name'] + '/*') + glob.glob(video_root_dir + '/' + c.configs['imagetagger']['report_dir_name'] + '/.*') try: # Have to first delete all files, when using the OS lib # for image_count in range(len(image_files)): # os.remove(image_files[image_count]) # for video_count in range(len(video_files)): # os.remove(video_files[video_count]) # for report_count in range(len(report_files)): # os.remove(report_files[report_count]) # os.removedirs(video_root_dir) shutil.rmtree(video_root_dir) except OSError: Tools.print_errors( "Unable to delete directory or files in " + c.configs["imagetagger"]["processed_videos_dir"] + "/" + self.attributes["hashstring"] )
def send_thumbnail_to_web(self, processedVideoPath, video_stats, thumb_dir, thumb_name): cherrylog("Building Thumbnail PUT request") md5 = video_stats["hashstring"] thumb_file_path = processedVideoPath + "/" + md5 + "/" + thumb_dir + "/" + thumb_name cherrylog("thumbnail file path: " + thumb_file_path) dest_url = c.configs["imagetagger"]["video_thumbnail_url"] + "/videos/" + str(self.attributes["friendly_id"]) cherrylog("dest url is: " + dest_url) # Register the streaming http handlers with urllib2 register_openers() params = {"remote_key": c.configs["imagetagger"]["remote_key"]} datagen, headers = multipart_encode([("thumbnail", open(thumb_file_path, "rb"))]) dest_url += "?" + urllib.urlencode(params) # Create the Request object request = urllib2.Request(dest_url, datagen, headers) request.get_method = lambda: "PUT" # Actually do the request, and get the response cherrylog("Sending Thumbnail update to imagetagger web") print urllib2.urlopen(request).read() cherrylog("Done!") return True
def load(self, lookup_doc_id=None, alt_matching={}): """Load data from database into object in memory Arguments: lookup_doc_id -- the doc_id of the object that will be used for matching. alt_matching -- An alternate dictionary for doing the lookup. One of these two parameters is required. """ if (lookup_doc_id != None): cherrylog ("Loading " + str(self.__class__.__name__) + " with params: id=" + str(lookup_doc_id)) else: cherrylog ("Loading " + str(self.__class__.__name__) + " with params: " + str(alt_matching)) try: match_param = {} if (alt_matching != {}): match_param = alt_matching else: match_param = {'_id':oid(lookup_doc_id)} docs = self.my_collection.find(match_param, safe=True) cherrylog ("got number of results as: " + str(docs.count())) if (docs.count() == 0): cherrylog ("Couldn't load object, 0 matching results in database!!") return False if (docs.count() == 1): self.doc_id = str(docs[0]['_id']) if docs[0].has_key('_id'): del docs[0]['_id'] self.attributes = docs[0] return True else: raise Exception("Retrieved multiple videos when only 1 was expected") except Exception as e: Tools.print_errors ("Unable to load " + str(self.__class__.__name__) + " from database with params: id=" + str(self.doc_id)) return False cherrylog ("Successfully loaded object: " + str(self.attributes)) return True
def is_nontrivial_image(image_file_name): cherrylog("checking if this image is non_trivial: " + image_file_name) image = pylab.imread(image_file_name) height = len(image) width = len(image[0]) white_image_sum = 3*height*width #The max sum for an image if ( (image.sum() < (0.05 * white_image_sum)) or (image.sum() > (0.95 * white_image_sum))): cherrylog("No, it is trivial because the sum of the pixel values is: " + str(image.sum())) return False else: cherrylog("Yes, it is valid and non-trivial. Pixel sum is: " + str(image.sum())) return True
def index(self, *args, **kw): cherrylog("Args: " + str(kw)) #submit a new video #http://localhost:8080/video?submit={%22display_name%22:%22testvid%22,%22video_description%22:%22Some%20Sample%20Video%22,%22video_meta_tags%22:%22apple,banana%22,%22account_id%22:1,%22friendly_id%22:12893712} try: if 'get_images' in kw: get_args = json.loads(kw['get']) file_id = get_args['file_id'] return ('<html><body><img src="/images/' + file_id + '" /></body></html>') else: return "Invalid Image Command" except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) cherrylog (str(sys.exc_info())) cherrylog ("received invalid Video command") return "Invalid Command"
def update(self, merge_data={}, commit=True): """ Update object with current data into mongo. Arguments: merge_data -- dictionary used to represent the entire object. This will overwrite the object's current data. commit -- Should this update of the data in memory be commited to mongo? Default: True """ if (not merge_data=={}): self.merge_data(merge_data) #Intelligently merge the argument into the self.attributes variable cherrylog ("Data merged into object: "+ str(merge_data)) if (commit): self.attributes['modified_at'] = datetime.datetime.now() cherrylog ("updating item in database") try: update = self.my_collection.update({'_id':oid(self.doc_id)}, self.attributes, safe=True) except Exception as e: Tools.print_errors ("Error updating object in database: " + str(self.__class__.__name__)) return False cherrylog ("Update of item successul: " + (str(update))) return True
def print_errors(message): cherrylog (message) exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] print(exc_type, fname, exc_tb.tb_lineno) cherrylog (str(sys.exc_info()))
def poll_video_analysis_complete(self): cherrylog ("Running: poll_video_analysis_complete") mv.MVideo.video_analysis_complete()