def lrs2pretrain_max_inplen_checker(): maxInpLen = 0 numWords = args["PRETRAIN_NUM_WORDS"] for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/pretrain"): for file in files: if file.endswith(".mp4"): audioFile = os.path.join(root, file[:-4]) + ".wav" targetFile = os.path.join(root, file[:-4]) + ".txt" with open(targetFile, "r") as f: lines = f.readlines() lines = [line.strip() for line in lines] trgt = lines[0][7:] words = trgt.split(" ") if len(words) <= numWords: if len(trgt) + 1 > 256: print("Max target length reached. Exiting") exit() sampFreq, audio = wavfile.read(audioFile) inpLen = (len(audio) - 640) // 160 + 1 inpLen = int(np.ceil(inpLen / 4)) * 4 reqLen = req_input_length(trgt) + 1 if reqLen * 4 > inpLen: inpLen = reqLen * 4 if inpLen > maxInpLen: maxInpLen = inpLen else: nWords = np.array([ " ".join(words[i:i + numWords]) for i in range(len(words) - numWords + 1) ]) nWordLens = np.array([len(nWord) + 1 for nWord in nWords ]).astype(np.float) nWordLens[nWordLens > 256] = -np.inf if np.all(nWordLens == -np.inf): print("Max target length reached. Exiting") exit() nWords = nWords[nWordLens > 0] for ix in range(len(nWords)): trgt = nWords[ix] audioStartTime = float(lines[4 + ix].split(" ")[1]) audioEndTime = float(lines[4 + ix + numWords - 1].split(" ")[2]) inpLen = ( (int(sampFreq * audioEndTime) - int(sampFreq * audioStartTime)) - 640) // 160 + 1 if inpLen < 4: inpLen = 4 inpLen = int(np.ceil(inpLen / 4)) * 4 reqLen = req_input_length(trgt) + 1 if reqLen * 4 > inpLen: inpLen = reqLen * 4 if inpLen > maxInpLen: maxInpLen = inpLen print(maxInpLen) return
def lrs2pretrain_max_inplen_checker(): maxInpLen = 0 numWords = args["PRETRAIN_NUM_WORDS"] for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/pretrain"): for file in files: if file.endswith(".mp4"): visualFeaturesFile = os.path.join(root, file[:-4]) + ".npy" targetFile = os.path.join(root, file[:-4]) + ".txt" with open(targetFile, "r") as f: lines = f.readlines() lines = [line.strip() for line in lines] trgt = lines[0][7:] words = trgt.split(" ") if len(words) <= numWords: if len(trgt) + 1 > 256: print("Max target length reached. Exiting") exit() visualFeatures = np.load(visualFeaturesFile) inpLen = len(visualFeatures) reqLen = req_input_length(trgt) + 1 if reqLen > inpLen: inpLen = reqLen if inpLen > maxInpLen: maxInpLen = inpLen else: nWords = np.array([ " ".join(words[i:i + numWords]) for i in range(len(words) - numWords + 1) ]) nWordLens = np.array([len(nWord) + 1 for nWord in nWords ]).astype(np.float) nWordLens[nWordLens > 256] = -np.inf if np.all(nWordLens == -np.inf): print("Max target length reached. Exiting") exit() nWords = nWords[nWordLens > 0] for ix in range(len(nWords)): trgt = nWords[ix] videoStartTime = float(lines[4 + ix].split(" ")[1]) videoEndTime = float(lines[4 + ix + numWords - 1].split(" ")[2]) inpLen = int( np.ceil(args["VIDEO_FPS"] * videoEndTime) - np.floor(args["VIDEO_FPS"] * videoStartTime)) reqLen = req_input_length(trgt) + 1 if reqLen > inpLen: inpLen = reqLen if inpLen > maxInpLen: maxInpLen = inpLen print(maxInpLen) return
def lrs2main_max_inplen_checker(): maxInpLen = 0 for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/main"): for file in files: if file.endswith(".mp4"): audioFile = os.path.join(root, file[:-4]) + ".wav" visualFeaturesFile = os.path.join(root, file[:-4]) + ".npy" targetFile = os.path.join(root, file[:-4]) + ".txt" with open(targetFile, "r") as f: trgt = f.readline().strip()[7:] sampFreq, audio = wavfile.read(audioFile) audInpLen = (len(audio) - 640) // 160 + 1 visualFeatures = np.load(visualFeaturesFile) vidInpLen = len(visualFeatures) if vidInpLen >= audInpLen / 4: inpLen = vidInpLen else: inpLen = np.ceil(audInpLen / 4) reqLen = req_input_length(trgt) + 1 if reqLen > inpLen: inpLen = reqLen if inpLen > maxInpLen: maxInpLen = inpLen print(maxInpLen) return
def req_input_length_checker(): strings = ["WORKS FOR DOODEE OOTY ASSAM~", " ~", "NOOOOOOOOOO~", "IT'S THAT SIMPLE~"] for n in range(len(strings)): trgt = list() for i in range(len(strings[n])): char = strings[n][i] if char == "~": ix = args["CHAR_TO_INDEX"]["<EOS>"] else: ix = args["CHAR_TO_INDEX"][char] trgt.append(ix) print(req_input_length(trgt)) return
def lrs2main_max_inplen_checker(): maxInpLen = 0 for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/main"): for file in files: if file.endswith(".mp4"): visualFeaturesFile = os.path.join(root, file[:-4]) + ".npy" targetFile = os.path.join(root, file[:-4]) + ".txt" with open(targetFile, "r") as f: trgt = f.readline().strip()[7:] visualFeatures = np.load(visualFeaturesFile) inpLen = len(visualFeatures) reqLen = req_input_length(trgt) + 1 if reqLen > inpLen: inpLen = reqLen if inpLen > maxInpLen: maxInpLen = inpLen print(maxInpLen) return
def lrs2main_max_inplen_checker(): maxInpLen = 0 for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/main"): for file in files: if file.endswith(".mp4"): audioFile = os.path.join(root, file[:-4]) + ".wav" targetFile = os.path.join(root, file[:-4]) + ".txt" with open(targetFile, "r") as f: trgt = f.readline().strip()[7:] sampFreq, audio = wavfile.read(audioFile) inpLen = (len(audio) - 640) // 160 + 1 inpLen = int(np.ceil(inpLen / 4)) * 4 reqLen = req_input_length(trgt) + 1 if reqLen * 4 > inpLen: inpLen = reqLen * 4 if inpLen > maxInpLen: maxInpLen = inpLen print(maxInpLen) return