コード例 #1
0
def lrs2pretrain_max_inplen_checker():
    maxInpLen = 0
    numWords = args["PRETRAIN_NUM_WORDS"]
    for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/pretrain"):
        for file in files:
            if file.endswith(".mp4"):

                audioFile = os.path.join(root, file[:-4]) + ".wav"
                targetFile = os.path.join(root, file[:-4]) + ".txt"
                with open(targetFile, "r") as f:
                    lines = f.readlines()
                lines = [line.strip() for line in lines]
                trgt = lines[0][7:]
                words = trgt.split(" ")

                if len(words) <= numWords:
                    if len(trgt) + 1 > 256:
                        print("Max target length reached. Exiting")
                        exit()
                    sampFreq, audio = wavfile.read(audioFile)
                    inpLen = (len(audio) - 640) // 160 + 1
                    inpLen = int(np.ceil(inpLen / 4)) * 4
                    reqLen = req_input_length(trgt) + 1
                    if reqLen * 4 > inpLen:
                        inpLen = reqLen * 4
                    if inpLen > maxInpLen:
                        maxInpLen = inpLen

                else:
                    nWords = np.array([
                        " ".join(words[i:i + numWords])
                        for i in range(len(words) - numWords + 1)
                    ])
                    nWordLens = np.array([len(nWord) + 1 for nWord in nWords
                                          ]).astype(np.float)
                    nWordLens[nWordLens > 256] = -np.inf
                    if np.all(nWordLens == -np.inf):
                        print("Max target length reached. Exiting")
                        exit()

                    nWords = nWords[nWordLens > 0]
                    for ix in range(len(nWords)):
                        trgt = nWords[ix]
                        audioStartTime = float(lines[4 + ix].split(" ")[1])
                        audioEndTime = float(lines[4 + ix + numWords -
                                                   1].split(" ")[2])
                        inpLen = (
                            (int(sampFreq * audioEndTime) -
                             int(sampFreq * audioStartTime)) - 640) // 160 + 1
                        if inpLen < 4:
                            inpLen = 4
                        inpLen = int(np.ceil(inpLen / 4)) * 4
                        reqLen = req_input_length(trgt) + 1
                        if reqLen * 4 > inpLen:
                            inpLen = reqLen * 4
                        if inpLen > maxInpLen:
                            maxInpLen = inpLen
    print(maxInpLen)
    return
コード例 #2
0
ファイル: checker.py プロジェクト: kasri-mids/deep_avsr
def lrs2pretrain_max_inplen_checker():
    maxInpLen = 0
    numWords = args["PRETRAIN_NUM_WORDS"]
    for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/pretrain"):
        for file in files:
            if file.endswith(".mp4"):

                visualFeaturesFile = os.path.join(root, file[:-4]) + ".npy"
                targetFile = os.path.join(root, file[:-4]) + ".txt"
                with open(targetFile, "r") as f:
                    lines = f.readlines()
                lines = [line.strip() for line in lines]
                trgt = lines[0][7:]
                words = trgt.split(" ")

                if len(words) <= numWords:
                    if len(trgt) + 1 > 256:
                        print("Max target length reached. Exiting")
                        exit()
                    visualFeatures = np.load(visualFeaturesFile)
                    inpLen = len(visualFeatures)
                    reqLen = req_input_length(trgt) + 1
                    if reqLen > inpLen:
                        inpLen = reqLen
                    if inpLen > maxInpLen:
                        maxInpLen = inpLen

                else:
                    nWords = np.array([
                        " ".join(words[i:i + numWords])
                        for i in range(len(words) - numWords + 1)
                    ])
                    nWordLens = np.array([len(nWord) + 1 for nWord in nWords
                                          ]).astype(np.float)
                    nWordLens[nWordLens > 256] = -np.inf
                    if np.all(nWordLens == -np.inf):
                        print("Max target length reached. Exiting")
                        exit()

                    nWords = nWords[nWordLens > 0]
                    for ix in range(len(nWords)):
                        trgt = nWords[ix]
                        videoStartTime = float(lines[4 + ix].split(" ")[1])
                        videoEndTime = float(lines[4 + ix + numWords -
                                                   1].split(" ")[2])
                        inpLen = int(
                            np.ceil(args["VIDEO_FPS"] * videoEndTime) -
                            np.floor(args["VIDEO_FPS"] * videoStartTime))
                        reqLen = req_input_length(trgt) + 1
                        if reqLen > inpLen:
                            inpLen = reqLen
                        if inpLen > maxInpLen:
                            maxInpLen = inpLen
    print(maxInpLen)
    return
コード例 #3
0
def lrs2main_max_inplen_checker():
    maxInpLen = 0
    for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/main"):
        for file in files:
            if file.endswith(".mp4"):
                audioFile = os.path.join(root, file[:-4]) + ".wav"
                visualFeaturesFile = os.path.join(root, file[:-4]) + ".npy"
                targetFile = os.path.join(root, file[:-4]) + ".txt"
                with open(targetFile, "r") as f:
                    trgt = f.readline().strip()[7:]
                sampFreq, audio = wavfile.read(audioFile)
                audInpLen = (len(audio) - 640) // 160 + 1
                visualFeatures = np.load(visualFeaturesFile)
                vidInpLen = len(visualFeatures)
                if vidInpLen >= audInpLen / 4:
                    inpLen = vidInpLen
                else:
                    inpLen = np.ceil(audInpLen / 4)
                reqLen = req_input_length(trgt) + 1
                if reqLen > inpLen:
                    inpLen = reqLen
                if inpLen > maxInpLen:
                    maxInpLen = inpLen
    print(maxInpLen)
    return
コード例 #4
0
ファイル: checker.py プロジェクト: mlomnitz/deep_avsr
def req_input_length_checker():
    strings = ["WORKS FOR DOODEE OOTY ASSAM~", "       ~", "NOOOOOOOOOO~", "IT'S THAT SIMPLE~"]
    for n in range(len(strings)):
        trgt = list()
        for i in range(len(strings[n])):
            char = strings[n][i]
            if char == "~":
                ix = args["CHAR_TO_INDEX"]["<EOS>"]
            else:
                ix = args["CHAR_TO_INDEX"][char]
            trgt.append(ix)
        print(req_input_length(trgt))
    return
コード例 #5
0
ファイル: checker.py プロジェクト: kasri-mids/deep_avsr
def lrs2main_max_inplen_checker():
    maxInpLen = 0
    for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/main"):
        for file in files:
            if file.endswith(".mp4"):
                visualFeaturesFile = os.path.join(root, file[:-4]) + ".npy"
                targetFile = os.path.join(root, file[:-4]) + ".txt"
                with open(targetFile, "r") as f:
                    trgt = f.readline().strip()[7:]
                visualFeatures = np.load(visualFeaturesFile)
                inpLen = len(visualFeatures)
                reqLen = req_input_length(trgt) + 1
                if reqLen > inpLen:
                    inpLen = reqLen
                if inpLen > maxInpLen:
                    maxInpLen = inpLen
    print(maxInpLen)
    return
コード例 #6
0
def lrs2main_max_inplen_checker():
    maxInpLen = 0
    for root, dirs, files in os.walk(args["DATA_DIRECTORY"] + "/main"):
        for file in files:
            if file.endswith(".mp4"):
                audioFile = os.path.join(root, file[:-4]) + ".wav"
                targetFile = os.path.join(root, file[:-4]) + ".txt"
                with open(targetFile, "r") as f:
                    trgt = f.readline().strip()[7:]
                sampFreq, audio = wavfile.read(audioFile)
                inpLen = (len(audio) - 640) // 160 + 1
                inpLen = int(np.ceil(inpLen / 4)) * 4
                reqLen = req_input_length(trgt) + 1
                if reqLen * 4 > inpLen:
                    inpLen = reqLen * 4
                if inpLen > maxInpLen:
                    maxInpLen = inpLen
    print(maxInpLen)
    return