def resample(self, file): """Uses Sox to resample the wav file to 16kHz, 1 channel, 16 bit wav which is the ideal format for processing""" sampler = sox.Transformer() sampler.convert(samplerate=16000, n_channels=1, bitdepth=16) resampled_file = '{0}_sampled'.format(file) resampled_path = common.file_path(resampled_file) sampler.build(common.file_path(file), resampled_path) common.file_exists(resampled_path) return resampled_file
def __rename_extensions(list_files, list_extensions, list_renamed, list_skipped, conflict_mode, extension_target): """ Core method to rename the file extensions. """ fs_case = common.get_fs_case_sensitivity(os.path.dirname(list_files[0])) for file_path in list_files: num = 1 list_path = file_path.split(os.path.sep) file_name = list_path[-1] file_ext = os.path.splitext(file_name)[1] if file_ext == "": list_skipped.append(file_path) continue file_newpath = file_path.replace(file_ext, os.path.extsep + extension_target) if file_path == file_newpath: list_skipped.append(file_path) continue if conflict_mode == "rename": while True: if common.file_exists(file_newpath, list_renamed, fs_case): if not fs_case: if file_path.lower() == file_newpath.lower(): break file_newpath = file_path.replace( file_ext, "_" + str(num) + os.path.extsep + extension_target) num += 1 else: break elif conflict_mode == "skip": if common.file_exists(file_newpath, list_renamed, fs_case): if not fs_case: if not file_path.lower() == file_newpath.lower(): list_skipped.append(file_path) continue else: list_skipped.append(file_path) continue if os.path.exists(file_path): list_renamed.append( [file_path, file_newpath + ".__temp__", file_newpath]) return list_renamed, list_skipped
def export_audio_chunks(self, file, chunks): """ For each chunk of audio it gets exported to wav :param file: file name with extension :param chunks: small chunks of wav :return: new exported file names """ chuck_names = [] for i, chunk in enumerate(chunks): file_path = common.file_path(file) chunk_name = "{0}_{1}.wav".format(file, i) chunk.export(file_path, format="wav") chuck_names.append(chunk_name) common.file_exists(file_path) return chuck_names
def spk_train_init(file): """ TODO: Generate train data This improves the Speech Recognition so that the system can recognize speakers recorded from past data Train the speaker model using the Gaussian mixture model (GMM) model.""" name, _ = common.split_file_ext(file) args = [ common.JAVA_EXE, '-Xmx256m', '-cp', common.LIUM_PATH, 'fr.lium.spkDiarization.programs.MTrainInit', '--sInputMask={}.seg'.format(common.seg_path(name)), '--fInputMask={}'.format(common.file_path(file)), '--sInputMask={}.ubm.gmm'.format(common.seg_path(name)), '--emInitMethod=copy', '--tOutputMask={}.init.gmm'.format(common.seg_path(name)), name ] common.call_subproc(args) common.file_exists('%s.init.gmm' % name)
def spk_train_map(file): """ TODO: Generate train data This improves the Speech Recognition so that the system can recognize speakers recorded from past data Train the speaker model using the Maximum a posteriori (MAP) adaptation of GMM """ name, _ = common.split_file_ext(file) args = [ common.JAVA_EXE, '-Xmx256m', '-cp', common.LIUM_PATH, 'fr.lium.spkDiarization.programs.MTrainMAP', '--sInputMask={}.ident.seg'.format(common.seg_path(name)), '--fInputMask={}'.format(common.file_path(file)), '--sInputMask={}.init.gmm'.format(common.seg_path(name)), '--emCtrl=1,5,0.01', '--varCtrl=0.01,10.0', '--tOutputMask={}.gmm'.format(common.seg_path(name)), name ] common.call_subproc(args) common.file_exists(name + '.gmm')
def audio_segmentation(self, file, start_list, end_list, concat=False, file_name=None): """ Breaks the file into small parts based on time slices and puts it back together if the concat option is True :param file: filename with extension :param start_list: list of ints representing start time ms :param end_list: list of ints representing en time ms :param concat: option to merge the file :param file_name: new file name for export :return: new file name/s """ file_names = [] baseName, ext = common.split_file_ext(file) seg_name = '{0}_{1}.{2}'.format(baseName, file_name if file_name else 'seg', ext) audio = AudioSegment.from_file(common.file_path(file), "wav") duration_in_ms = len(audio) * 1000 audio_segs = [ audio[start:end] for start, end in izip(start_list, end_list) if (duration_in_ms >= start >= 0) and (duration_in_ms >= end > 0) ] if not audio_segs: return file_names if concat: seg_path = common.seg_path(seg_name) audio_concat = reduce(lambda x, y: x + y, audio_segs) audio_concat.export(seg_path, format="wav") file_names.append(seg_name) common.file_exists(seg_path) else: file_names = self.export_audio_chunks(seg_name, audio_segs) return file_names
def diarization(self, file): """Take a wav file in the right format and build a segmentation file. The seg file stores the speaker, start time, duration, gender and also additional info for speech recognition""" name, _ = common.split_file_ext(file) seg_file = '{}.seg'.format(name) seg_path = common.seg_path(seg_file) args = [ common.JAVA_EXE, '-Xmx{}m'.format(common.JAVA_MEM), '-jar', common.LIUM_PATH, '--fInputMask={}'.format(common.file_path(file)), # Input file '--sOutputMask={}'.format(seg_path), # Output file '--doCEClustering', name ] # Add cluster for each speaker log.info('Processing diariazation for {}'.format(file)) common.call_subproc(args) common.file_exists(seg_path) log.info('File {} successfully diarized!'.format(file)) data = self.build_speakers_segments(seg_file, name) # Put together audio files for each speaker's part sp_file_names = {} for speaker in data: speaker_id_file = speaker['speaker_id'] file_names = self.speechClassifier.audioProcessor.audio_segmentation( file, speaker['start'], speaker['end'], concat=True, file_name=speaker_id_file) if not file_names: log.warn('Waring! Failed to perform audio segmentation for {}'. format(speaker_id_file)) sp_file_names[speaker_id_file] = file_names[0] return self.build_speakers_transcript(sp_file_names)
def __fill_num_gaps(list_files, separator, padding, list_renamed, list_skipped, fs_case, step): """ Core method to fill numeration gaps. """ list_temp = [] list_temp.extend(list_skipped) for i in list_renamed: list_temp.append(i[2]) list_temp.sort() list_gaps = __get_num_gaps(list_files, separator, padding, step) if len(list_gaps) > 0: list_gaps.sort(reverse=True) list_skipped.sort(reverse=True) while len(list_gaps) > 0: if len(list_skipped) < 1: break file_path = list_skipped.pop(0) list_path = file_path.split(os.path.sep) file_dir = list_path[-2] file_name = list_path[-1] if os.path.extsep in file_name: file_ext = os.path.splitext(file_name)[1] else: file_ext = "" num = list_gaps.pop(0) file_num = str(num).rjust(int(padding), "0") file_newname = file_dir + separator + \ file_num.replace(" ", "0") + file_ext file_newpath = file_path.replace(file_name, file_newname) if common.file_exists(file_newpath, list_renamed, fs_case): list_skipped.append(file_path) else: list_renamed.append([file_path, None, file_newpath]) return list_renamed, list_skipped
def compare(file1, file2, output=sys.stdout): def dump_lines(special_word): output.write(sep_line) output.write("line: " + str(i) + " " + special_word +"\n") output.write("case: " + saved_line + "\n") output.write("cp: " + l1 + "\n") output.write("ip: " + l2 + "\n") if common.file_exists(file1) == False: return common.result_fail, "%s not found" % file1 if common.file_exists(file2) == False: return common.result_fail, "%s not found" % file2 f1 = open(file1) f2 = open(file2) ## is it too crazy? ls1 = f1.readlines() ls2 = f2.readlines() len1 = len(ls1) len2 = len(ls2) if len1 != len2: return common.result_fail, "different file length: %d %d" % (len1, len2) fail_cnt = 0 sep_char = "##" sep_line = "-" * 80 + "\n" lsep = len(sep_char) for i in range(len1): l1 = remove_newline(ls1[i]) l2 = remove_newline(ls2[i]) if l1 == l2 and not l1.startswith("case"): continue pos1 = l1.find(sep_char) pos2 = l2.find(sep_char) r1 = l1[:pos1] r2 = l2[:pos2] if r1 != r2: dump_lines("different rule") fail_cnt += 1 continue c1 = l1[pos1+lsep+1:] c2 = l2[pos2+lsep+1:] if rules[r1](c1, c2) == False: dump_lines("different output") fail_cnt += 1 f1.close() f2.close() output.close() if fail_cnt == 0 : return common.result_pass, "pass" else: return common.result_fail, "diff count: %d" % fail_cnt
def test_file_exists(self): common.file_exists(out)
def __rename_files_keep_order(list_files, list_renamed, list_skipped, separator, padding, ignore_file_ext=False, custom_name=None, step=1, order_by=None): """ Core method to rename the base name of files based on the name of the directory where they are stored in using "keep-order" rename mode. """ file_newpath = "" file_temppath = "" temp_file_ext = "" list_new = [] list_ren = [] num = 0 fs_case = common.get_fs_case_sensitivity(os.path.dirname(list_files[0])) if padding == 0: padding = len(str(len(list_files))) for file_path in list_files: list_path = file_path.split(os.path.sep) file_dir = list_path[-2] file_name = list_path[-1] if file_name.startswith(file_dir + separator): list_ren.append(file_path) else: list_new.append(file_path) list_files = [] list_files.extend(list_ren) list_files.extend(list_new) for file_path in list_files: list_path = file_path.split(os.path.sep) file_name = list_path[-1] if custom_name == None: file_dir = list_path[-2] else: file_dir = custom_name if os.path.extsep in file_name: file_ext = os.path.splitext(file_name)[1] else: file_ext = "" if not ignore_file_ext: if not file_ext == temp_file_ext: num = 0 file_temppath = file_path temp_file_ext = file_ext while common.file_exists(file_temppath, list_renamed, fs_case): num += step file_num = str(num).rjust(int(padding), "0") file_newname = \ file_dir + separator + file_num.replace(" ", "0") + file_ext file_newpath = file_path.replace(file_name, file_newname) if not file_newpath in list_skipped: file_temppath = file_newpath + ".__temp__" if os.path.exists(file_path): if file_path == file_newpath: list_skipped.append(file_path) else: list_renamed.append([file_path, file_temppath, file_newpath]) return list_renamed, list_skipped
def __rename_files_fill(list_files, list_renamed, list_skipped, separator, padding, fill_gaps=False, ignore_file_ext=False, custom_name=None, step=1): """ Core method to rename the base name of files based on the name of the directory where they are stored in using one of the "fill" rename modes (such as "fill-gaps" and "rename-new"). """ file_newpath = "" num = 0 fs_case = common.get_fs_case_sensitivity(os.path.dirname(list_files[0])) if fill_gaps: list_temp_renamed = [] list_temp_skipped = [] obj_ren = list_temp_renamed obj_skip = list_temp_skipped else: obj_ren = list_renamed obj_skip = list_skipped if padding == 0: padding = len(str(len(list_files))) for file_path in list_files: list_path = file_path.split(os.path.sep) file_name = list_path[-1] if custom_name == None: file_dir = list_path[-2] else: file_dir = custom_name if os.path.extsep in file_name: file_ext = os.path.splitext(file_name)[1] else: file_ext = "" if file_name.startswith(file_dir + separator): try: temp = file_name.replace(file_dir + separator, "") list_pad = temp.split(".") file_padding = len(list_pad[0]) if step > 1: if int(list_pad[0]) % step == 0: obj_skip.append(file_path) continue else: if int(padding) == file_padding: obj_skip.append(file_path) continue except: pass if not ignore_file_ext: num = 0 file_newpath = file_path while common.file_exists(file_newpath, obj_ren, fs_case) or \ common.file_exists(file_newpath, obj_skip, fs_case): num += step file_num = str(num).rjust(int(padding), "0") file_newname = \ file_dir + separator + file_num.replace(" ", "0") + file_ext file_newpath = file_path.replace(file_name, file_newname) if os.path.exists(file_path): if file_path == file_newpath: obj_skip.append(file_path) else: obj_ren.append([file_path, None, file_newpath]) if fill_gaps: list_temp_renamed, list_temp_skipped = \ __fill_num_gaps(list_files, separator, padding, list_temp_renamed, list_temp_skipped, fs_case, step) list_renamed.extend(list_temp_renamed) list_skipped.extend(list_temp_skipped) return list_renamed, list_skipped
if static_case: base_name_target = __static_case(base_name_target, case, list_lower, list_mixed, list_title, list_upper).rstrip() file_newpath = file_path.replace(base_name + file_ext, base_name_target + file_ext) if file_path == file_newpath: list_skipped.append(file_path) continue if conflict_mode == "rename": while True: if common.file_exists(file_newpath, list_renamed, fs_case): if not fs_case: if file_path.lower() == file_newpath.lower(): break file_newpath = \ file_path.replace(base_name, base_name_target + "_" + str(num)) num += 1 else: break elif conflict_mode == "skip": if common.file_exists(file_newpath, list_renamed, fs_case): if not fs_case: if not file_path.lower() == file_newpath.lower(): list_skipped.append(file_path) continue
def get_status(task_id, delay=0): """ Get the status of the Erfr process with the given task ID. """ task_file = common.get_task_file(task_id) pv.intrange(task_id, "task ID", 1, common.get_max_tasks(), False) pv.intvalue(delay, "delay", True, True, False) delay = int(delay) task_id = int(task_id) progress_key = True process_type = "" process_type_list = ["encryption", "decryption", "key generation"] file_input_path = "" file_input_size = 0 file_key_path = "" file_key_size = 0 file_output_path = "" file_output_size = 0 valid_type = False if not common.file_exists(task_file): common.exception("No process is running with the given task ID.") dict_contents = __read_content(task_file) process_type = dict_contents["process_type"] if process_type == "": common.exception("The process type cannot be empty.") for item in process_type_list: if process_type == item: valid_type = True if not valid_type: common.exception("The process type '%s' is not supported." \ % process_type) file_input_path = dict_contents["file_input_path"] file_input_size = dict_contents["file_input_size"] if "crypt" in process_type: file_key_path = dict_contents["file_key_path"] file_key_size = dict_contents["file_key_size"] file_output_path = dict_contents["file_output_path"] file_output_size = dict_contents["file_output_size"] if process_type == "decryption": progress_key = False print print "Monitoring Erfr %s process with task ID %s." % \ (process_type, task_id) if delay > 0: if delay == 1: print "Refreshing the process status every second." else: print "Refreshing the process status every %s seconds." % \ str(delay) print print "-" * 78 if file_key_path == "" and file_output_path == "": __monitor_file(task_file, file_input_path, file_input_size, "File name", delay, True) else: __monitor_file(task_file, file_input_path, file_input_size, "Input file", delay, False) print __monitor_file(task_file, file_key_path, file_key_size, "Key file", delay, progress_key) print __monitor_file(task_file, file_output_path, file_output_size, "Output file", delay, True) print "-" * 78 print if delay > 0: print "Process finished."
def __monitor_file(task_file, file_path, file_size, description, delay, progress): """ Monitor the file size of the given file. """ file_name = os.path.basename(file_path) file_dir = __remove_duplicate_chars( \ file_path.rstrip(file_name).rstrip(os.path.sep), os.path.sep) file_size = int(file_size) file_size_init = 0 file_size_current = 0 file_size_perc = 0 chars_running = ["-", "\\", "|", "/"] chars_stalled = ["?", " "] chars_missing = ["X", " "] delay_running = 0.1 delay_stalled = 0.6 progress_chars = chars_running progress_count = 0 stalled = False wait = delay_running display_file_info = \ bool(int(common.global_config(["KeyGenerator", "Monitor"], ["display_file_info"], "1"))) if display_file_info: print("%s:" % description).ljust(16, " ") + file_name print("File path:").ljust(16, " ") + file_dir else: print "%s" % description if file_size < 1000: print("File size:").ljust(16, " ") + ("%s bytes total" % file_size) else: size_round = __format_size(file_size) print ("File size:").ljust(16, " ") + \ ("%s (%s bytes total)" % (size_round, file_size)) if not progress: return try: file_size_init = file_size file_size_current = common.get_file_size(file_path) file_size_perc = int((file_size_current * 100) / file_size) except: pass count = 0 while file_size_current < file_size: try: file_size_current = common.get_file_size(file_path) except: pass if file_size_current == file_size: break file_exists_task = common.file_exists(task_file) file_exists_input = common.file_exists(file_path) if not file_exists_task or not file_exists_input: if not file_exists_input: progress_chars = chars_missing else: progress_chars = chars_stalled stalled = True wait = delay_stalled else: progress_chars = chars_running wait = delay_running if stalled: dict_contents = __read_content(task_file) if not int(dict_contents["file_input_size"]) == \ file_size_init: print "-" * 78 common.exception("Task mismatch. Process cancelled.") stalled = False progress_count += 1 if progress_count >= len(progress_chars): progress_count = 0 if delay == 0: __progress(file_size_perc, None, True) return if delay > 0: if file_size_perc < 100: __progress( \ file_size_perc, progress_chars[progress_count], False) time.sleep(wait) if count < delay: count += 0.1 continue else: count = 0 try: file_size_current = common.get_file_size(file_path) if not stalled: file_size_perc = int((file_size_current * 100) / file_size) except: pass __progress(100, " ", True)