def create_voice_activity_clip(self, filename, outfile_name):
         """
            1. Runs the voice activity detector on the clip
            2. Calls the create_cutpoints_file program to create a file times listing where video should be cut
            3. Combines the times listed on the cutpoints file to create a combined video file
         """

          log.info("Running VAD on " + str(filename))
          try:
              v = VoiceActivityDetector(filename)
          except Exception as e:
              log.info("ERROR at VAD " + str(filename) + " " + e)
              pass

          # convert phase recognition array to second format
          log.info("Converting windows to readible labels for " + str(filename))
          try:
              voice_activity_regions = v.convert_windows_to_readible_labels(v.detect_speech())
          except:
              log.debug("ERROR at convert windows to readable labels for file" + str(filename))
              raise Exception

          voice_activity_regions_array = []

          # Flattening the list
          for region in voice_activity_regions:
              voice_activity_regions_array = voice_activity_regions_array + list(region.values())


          if voice_activity_regions_array != []:
            log.info("No voice activity detecvted for this clip.")

            log.info("Finished creating array of voice activity regions")

            try:
                #generate a vector of seconds, which is where the array should be cut
                cut_areas = self.get_most_dense_range(voice_activity_regions_array)
                #creates a textfile.txt cut areas file in the wav directory
                self.create_cutpoints_file(cut_areas, filename)
            except Exception as e:
                log.debug("could not create a text file with cut areas")
                log.debug(e)
                raise Exception

            if outfile_name == None: #allows you to specify an outfile name, if none is given creates a name
                file = "cut_stream" + filename[-11] + "_" + self.inputfile[:-4] + ".wav"  # creates cut_name for files
            else:
                file = outfile_name

            #creates a concatenated video file with high phrase density
            try:
                ffmpeg_calls.create_shortened_file(self.wav_directory, self.targetdirectory, file)
            except subprocess.CalledProcessError as e:
                log.debug(e)

            log.info("Created: " + self.targetdirectory + "/" + file)

          return file
Beispiel #2
0
def retrieve_confidence(id, filename):
    #----- Retrieves Confidence For All Files Uploaded
    log.info("Getting confidence for file: " + filename + " id: " + id)
    
    language, confidence, response_json = VI_API.new_get_language(str(id))

    log.info("File: " + filename + " language: " + str(language) + " confidence: " + str(confidence))
    
    ## -------- Reuploads video using a 5 min segment comprising random 20 second samples if the confidence is too low ----------------------
    cnt = 2
    try:
        if confidence < 0.50:
            log.info("Low confidence score, redoing processing -- this will take ~10 mins")
            #Get the full audio filename corresponding to the clip
            stream_number = str(filename.split("_")[1][-1])
            encoded_file_name = stream_number + "output.wav"
            encoded_file_path = wav_files_directory  + "/" + encoded_file_name
            

            file_length = ffmpeg_calls.retrieve_len(encoded_file_path)
            #print("Failed to find video length")

            new_language, new_confidence, new_json = 0, 0, {}

            if file_length > MINIMUM_FILE_LENGTH_THRESHOLD:
                # Take the shorter length
                temp_cut_size = min(file_length, 2400)
                log.info("Going to cut " + str(temp_cut_size) + " from the file")

                #random_startpoint = random.randint(60, int(file_length - (temp_cut_size + 10)))

                # generate 20 second ranges between the beginning of the video and the cutsize.
                x = range(10,temp_cut_size, 20)

                arr = random.sample(x, 15)

                cutpoints_file = open(wav_files_directory + "/" + "cutpoints_random_sample.txt", "w")
                num_cuts = int(MINIMUM_FILE_LENGTH_THRESHOLD / len(arr))

                log.info("Number of cuts " + str(num_cuts))


                for i in range(len(arr)):
                    cutpoints_file.write("file " + str(encoded_file_name) + "\n")
                    cutpoints_file.write("inpoint " + str(arr[i] - num_cuts) + "\n")
                    cutpoints_file.write("outpoint " + str(arr[i]) + "\n")

                cutpoints_file.close()

                log.info("Trying to create the shortened file from the cutpoints")
                ffmpeg_calls.create_shortened_file(wav_files_directory, reprocessed_files_directory, filename, "cutpoints_random_sample.txt")

                temp_id = VI_API.upload_video_file(filename, reprocessed_files_directory + "/" + filename)
                new_language, new_confidence, new_json = VI_API.new_get_language(temp_id)

                if new_confidence > confidence:
                    confidence = new_confidence
                    language = new_language
                    response_json = new_json
                    VI_API.clean_index([id])
                else:
                    if temp_id != "None":
                        VI_API.clean_index([temp_id])

        
        log.info("Writing the results to json ... ")
        write_json_to_file(response_json, detection_results_json_directory + "/" + filename.split(".")[0] + ".json")
        log.info("Wrote result to: " + detection_results_json_directory + "/" + filename.split(".")[0] + ".json")
        
        return {
            "streamName": filename, 
            "language": str(language), 
            "confidence": str(confidence), 
            "resultJsonFile": filename.split(".")[0] + ".json"
        }

    except Exception as e:
        print("ERROR: Failed to get new confidence for file " + filename)
        print e
        return {}
def retrieve_confidence(id, filename):
    #----- Retrieves Confidence For All Files Uploaded
    log.info("Getting confidence for file: " + filename + " id: " + id)
    
    language, confidence, response_json = VI_API.new_get_language(str(id))

    log.info("File: " + filename + " language: " +  str(language) + " confidence: " + str(confidence))
    
    ## -------- Reuploads video using a 5 min segment comprising random 20 second samples if the confidence is too low ----------------------
    cnt = 2
    try:
        #If confidence is below a certain theshold
        if confidence < 0.50:
            log.info("Low confidence score, redoing processing -- this will take ~10 mins")
            #Get the full audio filename corresponding to the clip
            #This finds the streamnumber from the description of the file
            #the wavfile format is 'cutstream2_output.wav', this takes the 2 from the filesname to find the stream number
            stream_number = str(filename.split("_")[1][-1])
            #This creates an encoded file name for the file, the encoded file name is simply the stream number + "output.wav". So stream 3 becomes 3output.wav
            encoded_file_name = stream_number + "output.wav"
            #This gives the full filepath the the newly created wav file
            encoded_file_path = wav_files_directory  + "/" + encoded_file_name
            

            file_length = ffmpeg_calls.retrieve_len(encoded_file_path)
            #print("Failed to find video length")
            #initialize langauge and confidence at zero
            new_language, new_confidence, new_json = 0, 0, {}
            
            #If the file length is smaller than 5 minutes, don't do any processing
            if file_length > MINIMUM_FILE_LENGTH_THRESHOLD:
                # Take the shorter length
                #The temp_cut_size of the file is the smaller between 20 minutes, and the length of the entire file
                #If the file is smaller than 20 minutes, we will not take a random 20 minute sample from the video file and instead process the entire video file
                temp_cut_size = min(file_length, 2400)
                log.info("Going to cut " + str(temp_cut_size) + " from the file")

                #random_startpoint = random.randint(60, int(file_length - (temp_cut_size + 10)))

                # generate 20 second ranges between the beginning of the video and the cutsize.
                x = range(10,temp_cut_size, 20)
                #This takes a random sample from the 20 second segments created above, this is used to generate a random mix of 20 second segments to create a 5 minute file from
                arr = random.sample(x, 15)
                
                #Create a file with locations to cut to create a shorter file
                cutpoints_file = open(wav_files_directory + "/" + "cutpoints_random_sample.txt", "w")
                #Number of cuts is determined by file lenght / length of the array
                num_cuts = int(MINIMUM_FILE_LENGTH_THRESHOLD / len(arr))

                log.info("Number of cuts " + str(num_cuts))

                #Creates a file called cutpoints .txt, which is a helper file used by ffmpeg to figure out where to cut the original file
                for i in range(len(arr)):
                    cutpoints_file.write("file " + str(encoded_file_name) + "\n")
                    cutpoints_file.write("inpoint " + str(arr[i] - num_cuts) + "\n")
                    cutpoints_file.write("outpoint " + str(arr[i]) + "\n")

                cutpoints_file.close()
                
                log.info("Trying to create the shortened file from the cutpoints")

                ffmpeg_calls.create_shortened_file(wav_files_directory, reprocessed_files_directory, filename, "cutpoints_random_sample.txt")

                temp_id = VI_API.upload_video_file(filename, reprocessed_files_directory + "/" + filename)
                new_language, new_confidence, new_json = VI_API.new_get_language(temp_id)
                #If the new confidence generated by the new method is better than old confidence generated by the first method, use results from the new method
                if new_confidence > confidence:
                    confidence = new_confidence
                    language = new_language
                    response_json = new_json
                    VI_API.clean_index([id])
                else:
                    if temp_id != "None":
                        VI_API.clean_index([temp_id])

        
        log.info("Writing the results to json ... ")
        #This writes the results from audio detection to a JSON File
        write_json_to_file(response_json, detection_results_json_directory + "/" + filename.split(".")[0] + ".json")
        log.info("Wrote result to: " + detection_results_json_directory + "/" + filename.split(".")[0] + ".json")
        
        #This returns the results in JSON Format
        return {
            "streamName": filename, 
            "language": str(language), 
            "confidence": str(confidence), 
            "resultJsonFile": filename.split(".")[0] + ".json"
        }

    except Exception as e:
        print("ERROR: Failed to get new confidence for file " + filename)
        print e
        return {}