def extract_image_thumbnail(auth_params, func_params): """ This function is used to create a thumbnail for a image item. A cropped and resized portion of the original image is used as thumbnail for each digital item. The thumbnail image size is 256*256 pixels. @param auth_params: Input parameters of the function that generate this function call @param func_params: Output parameters of the function that generate this function call """ # construct start and final path for digital items file_path = os.path.join(get_media_root(), func_params['file']) thumb_path = os.path.join('/tmp', str(func_params['id']) + '_thumb.jpeg') # generate the thumbnail for the image cmd = 'ffmpeg -loglevel fatal -y -i "' + file_path + '" -vf "crop=min(iw\,ih):min(ih\,iw), scale=256:256" -vframes 1 ' + thumb_path subprocess.check_output(cmd, shell=True) token = auth_params['token'] # check if the thumbnail has been created otherwise create a standard thumbnail if not os.path.exists(thumb_path): # create the default thumbnail, coping from a known directory default_thumb_path = os.path.join(get_media_root(), 'thumbnails', 'image.jpeg') shutil.copyfile(default_thumb_path, thumb_path) # send the thumbnail to the core res = set_thumbnail(func_params['id'], thumb_path, 'image/jpeg',token) # delete the temporary thumbnail from local filesystem os.remove(thumb_path) if not res: raise Exception("Error on video thumbnail generation " + str(func_params['id']))
def speaker_diarization_video(auth_params, func_params): try: cmd = '/usr/bin/ffmpeg -y -i "' + os.path.join(get_media_root(), func_params["file"]) + '"' cmd += ' -strict -2 -acodec pcm_s16le -ac 1 -ar 16000 ' cmd += os.path.join(get_media_root(), 'items', str(func_params['id']), 'audio.wav') print cmd subprocess.call(cmd, shell=True) func_params['file'] = os.path.join('items', str(func_params['id']), 'audio.wav') print func_params speaker_diarization(auth_params, func_params) except: print "Error on video speaker extraction"
def speaker_diarization(auth_params, func_params): try: token = auth_params['token'] # remove existing tags (and dynamic tags) for the item tags = get_tags_by_item(func_params['id'], token) for tag in tags: if tag['type'] == 'speaker': remove_tag(tag['id'], token) print "***** PLUGIN SPEAKER RECOGNITION: DIARIZATION ---> START" file_path=os.path.join(get_media_root(), func_params["file"]) new_file_path=os.path.join(get_media_root(),'items',str(func_params["id"]),str(func_params["id"])) print "new file path ",new_file_path shutil.copy2(file_path,new_file_path) file_root=os.path.join(get_media_root(), 'items', str(func_params["id"])) file_path=new_file_path #convert_with_ffmpeg(file_path) #_mkdir_out(file_root+"/out") # delete the path if exists and create a new one if os.path.exists(file_root + "/out"): shutil.rmtree(file_root + "/out") os.mkdir(file_root + "/out") with open(file_path.split(".")[0]+'.properties', "w") as f: #f.write("fileName="+file_path.split(".")[0]+".wav") f.write("fileName="+file_path.split(".")[0]) with open(os.path.join(get_media_root(), 'models/audio/globals/settings.properties')) as fp: for line in fp: f.write(line) print line #f.writelines("fileName="+file_path.split(".")[0]+".wav") f.writelines("outputRoot="+file_root+"/out/") #f.writelines("outputRoot="+file_root) f.close() # vengono considerati i modelli esistenti diarization(file_path.split(".")[0]+'.properties') # fare la diarization sul file originale # splittare il file di property dei cluster # file_list = ['/path/property/file', '...'] # seq = Seq(diarization) # incapsula la funzione da calcolare in modo distribuito # farm = Farm(seq) # skeleton necessario per l'esecuzione parallela # Executor().eval(farm, file_list) # costrutto che valuta lo skeleton tree print "***** PLUGIN SPEAKER RECOGNITION: DIARIZATION ---> STOP" print "fp=",file_root+"/out/"+func_params["filename"].split(".")[0] post_di_esempio(str(func_params["id"]) , file_root+"/out/"+str(func_params["id"]), token) except Exception as e: print e raise Exception('Error on diarization' + str(e))
def __build_model(auth_params, func_params): """ Funzione utilizzata per costruire il modello di riconoscimento audio a partire da un insieme di istanze che vengono fornite dall'utente attraverso l'apposita GUI. In particolar ela funzione consente di : - ottenere la lista di istanze specificate dall'utente - recuperare il path completo associato a ciascuna istanza - unire le istanze in un unico file audio - effettuare la costruzione del modello di riconoscimento vocale - creare la persona e il modello sul database - associare i file creati alla persona e al modello - rimuovere tutti i file temporanei creati """ try: token = auth_params.get('token', '1234') f_name = func_params.get('first_name', 'Pinco') s_name = func_params.get('last_name', 'Pallino') inst_ids = func_params.get('inst_list', []) ubm = os.path.join(get_media_root(), 'models', 'audio', 'globals', 'ubm.gmm') classpath= os.path.join(get_base_dir(), 'plugins_script', 'speaker_extractor' , 'lium_spkdiarization-8.4.1.jar') # crea un modello e una persona con i dati forniti person = create_person(f_name, s_name, token=token) model = create_model(person['id'], 'audio', f_name + ' ' + s_name, last_update=None, token=token) #print person, model # recupera gli oggetti corrispondenti alle istanze #print inst_ids inst_paths = [] for inst_id in inst_ids: inst = get_instance(inst_id, token=token) inst_paths.append(os.path.join(get_media_root(), inst['features'])) #print inst_paths # concat all provided feature files temp = '/tmp/model_' + str(model['id']) + '.wav' concatena_multi(inst_paths, temp) #print temp # calcola e imposta il modello generato nel database model_path = create_new_model(classpath, ubm, temp, 0, None, None) set_model_file(model['id'], model_path, token=token) #print model_path # remove all created temporary files #os.remove(model_path) except Exception as e: print e return 'Error during entity model building'
def __build_model(auth_params, func_params): """ This script has been defined in order to build (or update) a face recognition model for a specific person based on a set of instances previously extracted and saved. The object representation of the model must already exist. """ token = auth_params.get('token', '1234') model_id = func_params.get('id', 0) # retrieve the entity model object if exists model = get_model(model_id, token) if model is None: raise Exception('The provided model id is not valid!') # retrieve all instances associated to the model instances = get_instances_by_model(model_id, token)['results'] inst_paths = [] for inst in instances: inst_paths.append(os.path.join(get_media_root(), inst['features'])) fm = FaceModels() model_file_path = fm.create_model_from_image_list(aligned_faces_list) tsm.set_model_file(model_id, model_file_path, token=token)
def _extract_image_preview(params): """ This function is used to extract a standard low quality version of the provided image, the resulting video will be in PNG format with a scaled resolution. @param params: Array containing all necessary function parameters. """ auth_params = params[0] func_params = params[1] # create the start and final path for the image digital items file_path = os.path.join(get_media_root(), func_params['file']) preview_path = os.path.join('/tmp', str(func_params['id']) + 'preview.jpeg') token = auth_params['token'] # extract image thumbnail cmd = '/usr/bin/ffmpeg -loglevel fatal -y -i "' + file_path + '" -vf scale=-1:600 ' + preview_path subprocess.check_output (cmd, shell=True) # check if the preview has been created if not os.path.exists(preview_path): raise Exception("Preview image not generated") # update data about the new generated preview file res = set_preview(func_params['id'], preview_path, 'image/jpeg', token) # remove the preview file from the local filesystem os.remove(preview_path) if not res: raise Exception("Preview not generated for image item" + str(func_params['id'])) return True
def _extract_video_preview(params): """ This function is used to extract a standard low quality version of the provided video, the resulting video will be in MPEG-4 format and 5fps. @param params: Array containing all necessary function parameters """ auth_params = params[0] func_params = params[1] # detect start and final absolute paths of video resources file_path = os.path.join(get_media_root(), func_params['file']) preview_path = os.path.join('/tmp', str(func_params['id']) + '_preview.mp4') token = auth_params['token'] # execute video conversion cmd = '/usr/bin/ffmpeg -loglevel fatal -y -i "' + file_path + '" -codec:v libx264 -preset fast -movflags +faststart -strict -2 ' + preview_path subprocess.check_output(cmd, shell=True) # check if the preview has been generated if not os.path.exists(preview_path): raise Exception("Preview not generated for video item" + str(func_params['id'])) # update data about the new generated preview file res = set_preview(func_params['id'], preview_path, 'video/mp4', token) # remove the preview file from the local filesystem os.remove(preview_path) if not res: raise Exception("Preview not generated for video item" + str(func_params['id'])) return True
def _build_model(audio_file,train_wav_file,name_surname,duration, token): classpath= os.path.join(get_base_dir(), 'plugins_script',"speaker_extractor","lium_spkdiarization-8.4.1.jar" ) ubm= os.path.join(get_media_root(),"models/audio/globals/ubm.gmm") start=0 audio_file="/tmp/prova.wav" gmm_path= build_model (classpath, ubm,audio_file, start, duration, train_wav_file, name_surname, token) return gmm_path
def __recognize_instance(auth_params, func_params): """ This script has been defined in order to recognize face instances that are saved into the database according to existent models """ try: token = auth_params.get('token', '1234') instance_id = func_params["id"] instance_path = os.path.join( get_media_root(), func_params["features"]) # Get available models model_type = 'video' models = tsm.get_models(model_type, auth_params['token']) # Create dictionary with models models_list = [] for model in models: model_id = model['id'] model_file = os.path.join( get_media_root(), model['model_file']) model_dict = {c.MODEL_ID_KEY: model_id, c.MODEL_FILE_KEY: model_file, } models_list.append(model_dict) fm = FaceModels(models_list) # Recognize given instance face = cv2.imread(instance_path, cv2.IMREAD_GRAYSCALE) (model_id, conf) = fm.recognize_face(face) # update the instance reference if recognized if model_id != c.UNDEFINED_LABEL: edit_instance(instance_id, model_id=label, token=token) return 'Instance ' + instance_id + ' associated to model ' + model_id return 'Instance ' + instance_id + ' not recognized by any model' # TODO modificare i dynamic tag per associare automaticamente la persona ? except Exception as e: print e return 'Error on instance recognition'
def remove_video_recognitions(auth_dict, param_dict): """ Function used to remove video recognition data :param auth_dict: Input parameters provided by the trigger Action :param param_dict: Output parameters returned by the trigger Action """ file_path = os.path.join(get_media_root(), param_dict['file']) item_id = param_dict['id'] fe = VideoFaceExtractor(file_path, str(item_id)) fe.delete_recognition_results() return True
def __recognize_instance(auth_params, func_params): """ Funzione utilizzata per applicare i modelli esistenti alle nuove istanze di tipo audio che vengono salvate nel database. Il modello con il punteggio di riconoscimento piu' alto viene assegnato all'istanza considerata (l'id della persona associata al modello) """ try: # extract all needed parameters instance_id = func_params["id"] instance_path = os.path.join(get_media_root(), func_params["features"]) models = get_models('audio', token=token) # copiare i modelli in una cartella in tmp # avviare l'identificazione dell'istanza model_id = identification() # ha bisogno del file di settings locale (ricostruirlo?) # extract model parameters model_id = model['id'] model_path = os.path.join(get_media_root(), model['model_file']) entity_id = model['entity'] print 'Comparing model ' + model_id + ' with instance ' + instance_id # update the instance reference if recognized if model_id is not None: edit_instance(instance_id, model_id=model_id, token=token) return 'Instance ' + instance_id + ' associated to model ' + model_id return 'Instance ' + instance_id + ' not recognized by any model' #TODO modificare i dynamic tag per associare automaticamente la persona except Exception as e: print e return 'Error on instance recognition'
def remove_video_data(auth_params, func_params): """ Function used to remove all item models, indexes etc when the video item is deleted. This function must be triggered as a callback script. :param auth_dict: Input parameters provided by the trigger Action :param param_dict: Output parameters returned by the trigger Action """ file_path = os.path.join(get_media_root(), func_params['file']) item_id = func_params['id'] fe = VideoFaceExtractor(file_path, str(item_id)) fe.delete_analysis_results() return True
def extract_metadata(auth_params, func_params): """ This function is used to extract relevant metadata from a digital item and then save this data on the ACTIVE Core. :param auth_params: Authentication parameters :param func_params: Function parameters """ file_path = os.path.join(get_media_root(), func_params['file']) item_info = get_exif_metadata(file_path) token = auth_params['token'] if not set_metadata(func_params['id'], item_info, token): raise Exception('Error on metadata update') if not set_status(func_params['id'], 'ANALYZED', token): raise Exception('Error on processing status update') print 'Extracted and saved metadata for digital item', func_params['id']
def extract_audio_thumbnail(auth_params, func_params): """ This function is used to create a thumbnail for an audio item. A standard image is used as thumbnail for all audio files. @param auth_params: Input parameters of the function that generate this function call @param func_params: Output parameters of the function that generate this function call """ # construct a standard thumbnail for the audio file thumb_path = os.path.join('/tmp', str(func_params['id']) + '_thumb.jpeg') default_thumb_path = os.path.join(get_media_root(), 'thumbnails', 'audio.jpeg') shutil.copyfile(default_thumb_path, thumb_path) token = auth_params['token'] # send the thumbnail to the core res = set_thumbnail(func_params['id'], thumb_path, 'image/jpeg',token) # delete the temporary thumbnail from local filesystem os.remove(thumb_path) if not res: raise Exception("Error on thumbnail generation for audio item " + str(func_params['id']))
def post_di_esempio(id_item, fp,token): print "***** PLUGIN SPEAKER RECOGNITION: POST DI ESEMPIO ---> Start" classpath= os.path.join(get_base_dir(), 'plugins_script',"speaker_extractor","lium_spkdiarization-8.4.1.jar" ) ubm= os.path.join(get_media_root(),"models/audio/globals/ubm.gmm") id_persona=None #id_item=3601 #name_p=open(name_file, "r") #name_p_list=name_p.readlines() #result=make_name_compact(fp) #result simile a [[nome,start,stop][nome,start,stop]] result=segfile_compact_name2(fp) print "result=",result uniform_tag_ids_arr =[] for res in result: try: name=res[0] feature_path = None model = None p=re.compile('[A-Z]') print "find name ", name st=int( float(res[1]))#*1000 ) print "start ", st dur=int (float(res[2]))#*1000) print "dur ",dur #feature_path = split4diarization(fp,st,dur,fp+"_"+str(st)+"_"+str(dur)) feature_path = split4diarization(os.path.join(get_media_root(), 'items', str(id_item), 'audio.wav'),st,dur,"/tmp/model.wav") print "feature_path ", feature_path inst = create_instance('audio', False, token=token) # impostare id modello set_instance_feature(inst['id'], feature_path, token) #if name.find("GiacomoMameli")>-1: # print "trovato giacomino" # id_persona=create_person("Giacomo","Mameli", token)["id"] # #createTagKeyword(id_item, 'Giacomo', 'Mameli', token) # print "id persona ",id_persona #else: if True: mai=p.findall(name) print "mai ",mai #if len(mai)==2: if True: #f_name=name.split(mai[1])[0] #s_name=mai[1]+name.split(mai[1])[1] f_name="Unknown" s_name=name + '_' + str(id_item) print "f_name, s_name ", f_name, s_name # corregge il problema delle identita' duplicate persona = None #feature_path = split4diarization(fp,st,dur,"/tmp/model_wav") if f_name == "Unknown": print "name Unknown" persona=create_person(f_name,s_name, token) #feature_path = split4diarization(fp,0,None,"/tmp/model_wav") #model_path=_build_model("/tmp/model.wav", None, [f_name, s_name],dur, token) # crea il modello associato alla persona sconosciuta #model_path= create_new_model (classpath, ubm,feature_path, 0, dur,None) #f_name+"_"+ s_name) #print "model_path ",model_path #model = create_model(persona['id'], 'audio', f_name + ' ' + s_name, last_update=None, token=token) ##set_model_file(model['id'], model_path, token=token) #else: # print " persona nota ", f_name, s_name # persona=create_person(f_name, s_name, token) # print "persona ", persona # # list out of bound # model = get_models_by_entity(persona['id'], token=token)[0] # print "model ", model # create a tag for person name #createTagKeyword(id_item, persona['first_name'], persona['last_name'], token) print "calcolo id persona" id_persona=persona["id"] print "id_persona ",id_persona #else: # persona=create_person("Il","Manutentore", token) # id_persona=persona["id"] print "create_tag id_item,id_persona ", id_item, " ",id_persona tag=create_tag(id_item,id_persona, "speaker", token) print "tag ",tag dtag=create_dtag(tag["id"], st*10, dur*10, token=token) print "dtag ",dtag uniform_tag = create_tag(id_item, id_persona, "face+speaker", token) print 'uniform tag', uniform_tag uniform_tag_ids_arr.append(uniform_tag['id']) # update the instance with the model id print 'instance, model', inst, model #edit_instance(inst['id'], model_id=model['id'], token=token) print 'ascallo' except Exception, e: print e
def __generate_instances(auth_params, func_params): """ @param auth_params: Input parameters of the function that generate this function call @param func_params: Output parameters of the function that generate this function call """ file_path = os.path.join(get_media_root(), func_params['file']) item_id = func_params['id'] # remove existing tags (and dynamic tags) for the item tags = get_tags_by_item(func_params['id'], auth_params['token']) for tag in tags: if tag['type'] == 'face': remove_tag(tag['id'], auth_params['token']) # extract faces from video and save metadata on filesystem # Get available models model_type = 'video' models = tsm.get_models(model_type, auth_params['token']) # Create dictionary with models models_list = [] for model in models: model_id = model['id'] model_file = os.path.join( get_media_root(), model['model_file']) entity_id = model['entity'] person = get_person(entity_id, auth_params['token']) name = person['first_name'] surname = person['last_name'] tag = surname + c.TAG_SEP + name model_dict = {c.MODEL_ID_KEY: model_id, c.MODEL_FILE_KEY: model_file, c.TAG_KEY: tag } models_list.append(model_dict) fe = VideoFaceExtractor(file_path, str(item_id), models_list) fe.analyze_video() set_status(item_id, "FACE_RECOG", auth_params['token']) people = fe.get_people() uniform_tag_ids_arr = [] # retrieve dynamic tags and save them on ACTIVE core for person_dict in people: #print "Tag assegnato al cluster", person_dict['assigned_tag'] #~ # update the image for the person #~ image_path = os.path.join(get_media_root(),'items', #~ str(item_id), 'Face extraction', #~ 'Face recognition', 'Key frames', #~ person_dict[c.KEYFRAME_NAME_KEY]) #~ set_image(person_id, image_path, 'image/png') # check if the person has been recognized model_id = person_dict[c.ASSIGNED_LABEL_KEY] trusted = False instance_id = None if model_id == c.UNDEFINED_LABEL: print "Creata una nuova persona" person = create_person( "Unknown", str(func_params['id']) + '_' + str(person_dict['person_counter']), auth_params['token']) person_id = person['id'] # Create a model for the unknown instance model = tsm.create_model( person_id, 'video', person['first_name'] + ' ' + person['last_name'], token=auth_params['token']) instance = tsm.create_instance( model_type, False, model_id=model['id'], token=auth_params['token']) else: # Create model instance instance = tsm.create_instance( model_type, trusted, model_id=model_id, token=auth_params['token']) model = tsm.get_model(model_id) person_id = model['entity'] # update the image for the person image_path = os.path.join(fe.rec_path, c.FACE_RECOGNITION_KEY_FRAMES_DIR, person_dict[c.KEYFRAME_NAME_KEY]) set_image(person_id, image_path, 'image/png', auth_params['token']) tsm.set_instance_thumbnail( instance['id'], image_path, token=auth_params['token']) # Get aligned face and set it as instance feature print person_dict.keys() aligned_face_path = os.path.join(fe.align_path, person_dict[c.MEDOID_ALIGNED_FACE_KEY]) tsm.set_instance_feature(instance['id'], aligned_face_path, token=auth_params['token']) # TODO DELETE? # else: # # Find id person by name and surname # tag_parts = person_id.split(c.TAG_SEP) # surname = tag_parts[0] # name = tag_parts[1] # person = create_person(name, surname, auth_params['token']) #person_id = person['id'] #~ if person['image'] == "unknown_user.png": #~ set_image(person_id, image_path, 'image/png') # create a tag for user name #createTagKeyword(item_id, person['first_name'], person['last_name']) # create a tag (occurrence of a person in a digital item) tag = create_tag(item_id, person_id, "face", auth_params['token']) #create audio+video tag #uniform_tag = create_tag(item_id, person_id, "face+speaker", auth_params['token']) #uniform_tag_ids_arr.append[uniform_tag['id']] for segment in person_dict[c.SEGMENTS_KEY]: start = segment[c.SEGMENT_START_KEY] duration = segment[c.SEGMENT_DURATION_KEY] bbox_x, bbox_y, width, height = segment[c.FRAMES_KEY][0][c.DETECTION_BBOX_KEY] create_dtag(tag['id'], int(start), int(duration), bbox_x, bbox_y, width, height, auth_params['token']) """ item_status = get_status(item_id, token) if "SPEAKER_RECOG" in item_status['status']: #create dtags for audio+video tag for u_tag_id in uniform_tag_ids_arr: create_uniform_dtags(item_id, u_tag_id, auth_params['token']) """ create_uniform_dtags(item_id, auth_params['token'])
def __generate_instances(auth_params, func_params): """ Funzione utilizzata per estrarre le istanze da un file audio e salvarle nel database senza associarle ad alcuna entita'/modello. Le istanze sono ottenute applicando prima una conversione sulla traccia audio originale e successivamente splittandola sul parlato delle persone. Il parametro generato dalle funzioni contiene i dati associati al nuovo """ try: # extract all needed parameters item_id = func_params['id'] item_path = os.path.join(get_media_root(), func_params['file']) temp_root = os.path.join('/tmp', 'speak_recog_' + str(item_id)) dest_path = os.path.join(temp_root, 'audio.wav') properties_path = dest_path.split(".")[-2]+'.properties' settings_path = os.path.join(get_media_root(), 'models/audio/globals/settings.properties') token = auth_params.get('token', '1234') #print 'item_id', item_id #print 'item_path', item_path #print 'temp_root', temp_root #print 'dest_path', dest_path #print 'properties_path', properties_path #print 'settings_path', settings_path # create a directory for temporary files for diarization phase if os.path.exists(temp_root): shutil.rmtree(temp_root) os.mkdir(temp_root) os.chmod(temp_root, 0o777) os.mkdir(temp_root + '/out') os.chmod(temp_root + '/out', 0o777) # extract the item audio and convert it in the wav format command = '/usr/bin/ffmpeg -y -i "' + item_path + '"' command += ' -strict -2 -acodec pcm_s16le -ac 1 -ar 16000 ' command += '"' + dest_path + '"' subprocess.call(command, shell=True) #print command # generate the local settings file with open(properties_path, "w") as f: f.write("fileName=" + dest_path) with open(settings_path) as fp: for line in fp: f.write(line) #print line f.writelines("outputRoot=" + temp_root + "/out/") # applica la fase di diarization e calcola gli spezzoni audio diarization(properties_path) # salva gli spezzoni audio nel file di settings? # extract the audio portions and save them in a temp directory occurrences = segfile_compact_name2(temp_root + '/out/audio') for o in occurrences: start = int(o[1])*10 duration = int(o[2])*10 # genera lo spezzone a partire dal file audio feature_path = temp_root + '/out/segment_' + str(item_id) + '_' + str(o[1]) + '.wav' split4diarization(dest_path, start, duration, feature_path) # genera il tag e il dtag da associare all'istanza persona = create_person('Unknown', o[0] + '_' + str(item_id), token=token) tag = create_tag(item_id, persona['id'], 'speaker', token=token) dtag = create_dtag(tag['id'], start, duration, token=token) # crea l'istanza e la carica nel database inst = create_instance('audio', False, token=token) set_instance_feature(inst['id'], feature_path, token=token) # remove all temporary directories and files #os.remove(temp_path) except Exception as e: print e