コード例 #1
0
def harvest(cmd):
    docid = cmd["id"]

    meta = rec_set.get_meta(docid)

    x, fs = librosa.load(os.path.join(get_attachpath(), meta["path"]), sr=None)
    print("SYSTEM: harvesting...")
    hv_start = time.time()
    f0, timeaxis = pyworld.harvest(x.astype(np.float64), fs)
    print(f"SYSTEM: finished harvesting! (took {time.time() - hv_start:.2f}s)")

    with tempfile.NamedTemporaryFile(suffix=".txt", delete=False,
                                     mode="w") as harvest_fp:
        for i in range(len(timeaxis)):
            harvest_fp.write(f'{timeaxis[i]} {f0[i]}\n')

    if len(open(harvest_fp.name).read().strip()) == 0:
        return {"error": "Harvest computation failed"}

    # XXX: frozen attachdir
    harvesthash = guts.attach(harvest_fp.name, get_attachpath())

    guts.bschange(
        rec_set.dbs[docid],
        {
            "type": "set",
            "id": "meta",
            "key": "harvest",
            "val": harvesthash
        },
    )

    return {"harvest": harvesthash}
コード例 #2
0
def pitch(cmd):
    docid = cmd["id"]

    meta = rec_set.get_meta(docid)

    # Create an 8khz wav file
    with tempfile.NamedTemporaryFile(suffix=".wav") as wav_fp:
        ff_start = time.time()
        subprocess.call([
            "ffmpeg",
            "-y",
            "-loglevel",
            "panic",
            "-i",
            os.path.join(get_attachpath(), meta["path"]),
            "-ar",
            "8000",
            "-ac",
            "1",
            wav_fp.name,
        ])

        print(f'SYSTEM: FFMPEG took {time.time() - ff_start:.2f}s')

        # ...and use it to compute pitch
        with tempfile.NamedTemporaryFile(suffix=".txt",
                                         delete=False) as pitch_fp:
            subprocess.call([get_calc_sbpca(), wav_fp.name, pitch_fp.name])

    if len(open(pitch_fp.name).read().strip()) == 0:
        return {"error": "Pitch computation failed"}

    # XXX: frozen attachdir
    pitchhash = guts.attach(pitch_fp.name, get_attachpath())

    guts.bschange(
        rec_set.dbs[docid],
        {
            "type": "set",
            "id": "meta",
            "key": "pitch",
            "val": pitchhash
        },
    )

    return {"pitch": pitchhash}
コード例 #3
0
def gen_mat(cmd):
    id = cmd["id"]
    # Hm!
    meta = rec_set.get_meta(id)

    out = {}

    measure = _measure(id, raw=True)

    out.update(measure["measure"])
    # out.update(measure["raw"])

    if meta.get("rms"):
        out["rms"] = np.array(
            json.load(open(os.path.join(get_attachpath(), meta["rms"]))))
    if meta.get("pitch"):
        p_path = os.path.join(get_attachpath(), meta["pitch"])
        out["pitch"] = np.array(
            [float(X.split()[1]) for X in open(p_path) if len(X.split()) > 2])
    if meta.get("align"):
        a_path = os.path.join(get_attachpath(), meta["align"])
        out["align"] = json.load(open(a_path))
        # Remove 'None' values
        for seg in out['align']['segments']:
            for k, v in list(seg.items()):
                if v is None:
                    del seg[k]

    with tempfile.NamedTemporaryFile(suffix=".mat", delete=False) as mf:
        sio.savemat(mf.name, out)

        mathash = guts.attach(mf.name, get_attachpath())

    guts.bschange(rec_set.dbs[id], {
        "type": "set",
        "id": "meta",
        "key": "mat",
        "val": mathash
    })

    return {"mat": mathash}
コード例 #4
0
def rms(cmd):
    docid = cmd["id"]
    info = rec_set.get_meta(docid)

    vpath = os.path.join(get_attachpath(), info["path"])

    R = 44100

    snd = nmt.sound2np(vpath,
                       R=R,
                       nchannels=1,
                       ffopts=["-filter:a", "dynaudnorm"])

    WIN_LEN = int(R / 100)

    rms = []
    for idx in range(int(len(snd) / WIN_LEN)):
        chunk = snd[idx * WIN_LEN:(idx + 1) * WIN_LEN]
        rms.append((chunk.astype(float)**2).sum() / len(chunk))
    rms = np.array(rms)

    rms -= rms.min()
    rms /= rms.max()

    with tempfile.NamedTemporaryFile(suffix=".json", delete=False,
                                     mode="w") as fh:
        json.dump(rms.tolist(), fh)
        fh.close()

    rmshash = guts.attach(fh.name, get_attachpath())

    guts.bschange(rec_set.dbs[docid], {
        "type": "set",
        "id": "meta",
        "key": "rms",
        "val": rmshash
    })

    return {"rms": rmshash}
コード例 #5
0
def save_audio_info(cmd):
    docid = cmd["id"]

    meta = rec_set.get_meta(docid)

    if os.path.getsize(os.path.join(get_attachpath(), meta["path"])) > 10e6:
        duration = sys.maxsize
    else:
        x, fs = librosa.load(os.path.join(get_attachpath(), meta["path"]),
                             sr=None)
        duration = librosa.get_duration(y=x, sr=fs)

    guts.bschange(
        rec_set.dbs[docid],
        {
            "type": "set",
            "id": "meta",
            "key": "info",
            "val": duration
        },
    )

    return {"info": duration}
コード例 #6
0
def _measure(id=None,
             start_time=None,
             end_time=None,
             full_ts=False,
             force_gen=False,
             raw=False):

    if start_time is not None:
        start_time = float(start_time)
    if end_time is not None:
        end_time = float(end_time)

    meta = rec_set.get_meta(id)

    # full transcription duration should be the same for any given document,
    # prosodic measures for these are cached so we can bulk download them.
    if full_ts and not force_gen and meta.get("full_ts"):
        cached = json.load(
            open(os.path.join(get_attachpath(), meta["full_ts"])))

        # if dynamism is part of cached data, return it. otherwise, it is outdated and must be reloaded
        if 'Dynamism' in cached or not calc_intense:
            return cached

    pitch = [[float(Y) for Y in X.split(" ")]
             for X in open(os.path.join(get_attachpath(), meta["pitch"]))]

    # redundacy, CSV did not load sometimes on older versions of Drift. Generate if nonexistent
    if not meta.get("csv"):
        gen_csv({"id": id})

    # if not meta.get("info"):
    #     save_audio_info({ "id": id })

    # check, maybe Drift is now running on calc_intense mode even though it wasn't when the audio file was originally uploaded
    # Generate Harvest if nonexistent
    if calc_intense and not meta.get("harvest"):
        harvest({"id": id})

    # TODO will these hang? this is just to prevent concurrent calls to harvest/csv during their initialization throwing errors
    while not rec_set.get_meta(id).get("csv"):
        pass

    # while not rec_set.get_meta(id).get("info"):
    #     pass

    while calc_intense and not rec_set.get_meta(id).get("harvest"):
        pass

    meta = rec_set.get_meta(id)
    driftcsv = open(os.path.join(get_attachpath(), meta["csv"]))
    gentlecsv = open(os.path.join(get_attachpath(), meta["aligncsv"]))

    st = start_time if start_time is not None else 0
    full_data = {
        "measure": {
            "start_time": st,
            "end_time": end_time if end_time is not None else
            (len(pitch) / 100.0)
        }
    }

    gentle_drift_data = prosodic_measures.measure_gentle_drift(
        gentlecsv, driftcsv, start_time, end_time)

    full_data["measure"].update(gentle_drift_data)

    if calc_intense:
        voxit_data = prosodic_measures.measure_voxit(
            os.path.join(get_attachpath(), meta["path"]),
            open(os.path.join(get_attachpath(), meta["pitch"])),
            open(os.path.join(get_attachpath(), meta["harvest"])), start_time,
            end_time)
        full_data["measure"].update(voxit_data)

    if full_ts:
        with tempfile.NamedTemporaryFile(suffix=".json",
                                         delete=False,
                                         mode="w") as dfh:
            json.dump(full_data, dfh, indent=2)

            dfh.close()
        fulltshash = guts.attach(dfh.name, get_attachpath())

        guts.bschange(
            rec_set.dbs[id],
            {
                "type": "set",
                "id": "meta",
                "key": "full_ts",
                "val": fulltshash
            },
        )

    return full_data
コード例 #7
0
def gen_csv(cmd):
    docid = cmd["id"]
    meta = rec_set.get_meta(docid)

    p_path = os.path.join(get_attachpath(), meta["pitch"])
    pitch = [float(X.split()[1]) for X in open(p_path) if len(X.split()) > 2]

    a_path = os.path.join(get_attachpath(), meta["align"])
    align = json.load(open(a_path))

    words = []
    for seg in align["segments"]:
        for wd in seg["wdlist"]:
            wd_p = dict(wd)
            wd_p["speaker"] = seg["speaker"]
            words.append(wd_p)

    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False,
                                     mode="w") as fp:
        w = csv.writer(fp)

        w.writerow(["time (s)", "pitch (hz)", "word", "phoneme", "speaker"])

        for idx, pitch_val in enumerate(pitch):
            t = idx / 100.0

            wd_txt = None
            ph_txt = None
            speaker = None

            for wd_idx, wd in enumerate(words):
                if wd.get("start") is None or wd.get("end") is None:
                    continue

                if wd["start"] <= t and wd["end"] >= t:
                    wd_txt = wd["word"].encode("utf-8")

                    speaker = wd["speaker"]

                    # find phone
                    cur_t = wd["start"]
                    for phone in wd.get("phones", []):
                        if cur_t + phone["duration"] >= t:
                            ph_txt = phone["phone"]
                            break
                        cur_t += phone["duration"]

                    break

            if type(wd_txt) == bytes:
                wd_txt = wd_txt.decode("utf-8")
            elif type(wd_txt) != str:
                wd_txt = str(wd_txt or "")

            row = [t, pitch_val, wd_txt, ph_txt, speaker]
            w.writerow(row)

        fp.flush()

    csvhash = guts.attach(fp.name, get_attachpath())
    guts.bschange(
        rec_set.dbs[cmd["id"]],
        {
            "type": "set",
            "id": "meta",
            "key": "csv",
            "val": csvhash
        },
    )

    return {"csv": csvhash}
コード例 #8
0
def align(cmd):
    meta = rec_set.get_meta(cmd["id"])

    media = os.path.join(get_attachpath(), meta["path"])
    segs = parse_speakers_in_transcript(
        open(os.path.join(get_attachpath(), meta["transcript"])).read())

    tscript_txt = "\n".join([X["line"] for X in segs])
    url = f"http://localhost:{GENTLE_PORT}/transcriptions"

    res = requests.post(url,
                        data={"transcript": tscript_txt},
                        files={'audio': ('audio', open(media, 'rb'))})

    # Find the ID
    uid = res.history[0].headers['Location'].split('/')[-1]

    # Poll for status
    status_url = url + '/' + uid + '/status.json'

    cur_status = -1

    while True:
        status = requests.get(status_url).json()
        if status.get('status') != 'OK':
            s = status.get('percent', 0)
            if s > cur_status:
                cur_status = s

                guts.bschange(
                    rec_set.dbs[cmd["id"]], {
                        "type": "set",
                        "id": "meta",
                        "key": "align_px",
                        "val": cur_status
                    })

            time.sleep(1)

        else:
            # transcription done
            break

    align_url = url + '/' + uid + '/align.json'
    trans = requests.get(align_url).json()

    # Re-diarize Gentle output into a sane diarization format
    diary = {"segments": [{}]}
    seg = diary["segments"][0]
    seg["speaker"] = segs[0]["speaker"]

    wdlist = []
    end_offset = 0
    seg_idx = 0

    cur_end = 0

    for wd in trans["words"]:
        gap = trans["transcript"][end_offset:wd["startOffset"]]
        seg_idx += len(gap.split("\n")) - 1

        if "\n" in gap and len(wdlist) > 0:
            # Linebreak - new segment!
            wdlist[-1]["word"] += gap.split("\n")[0]

            seg["wdlist"] = gentle_punctuate(wdlist, trans["transcript"])

            # Compute start & end
            seg["start"] = seg["wdlist"][0].get("start", cur_end)
            has_end = [X for X in seg["wdlist"] if X.get("end")]
            if len(has_end) > 0:
                seg["end"] = has_end[-1]["end"]
            else:
                seg["end"] = cur_end
            cur_end = seg["end"]

            wdlist = []
            seg = {}
            diary["segments"].append(seg)
            if len(segs) > seg_idx:
                seg["speaker"] = segs[seg_idx]["speaker"]

        wdlist.append(wd)
        end_offset = wd["endOffset"]

    seg["wdlist"] = gentle_punctuate(wdlist, trans["transcript"])

    # Compute start & end
    seg["start"] = seg["wdlist"][0].get("start", cur_end)
    has_end = [X for X in seg["wdlist"] if X.get("end")]
    if len(has_end) > 0:
        seg["end"] = has_end[-1]["end"]
    else:
        seg["end"] = cur_end

    # For now, hit disk. Later we can explore the transcription DB.
    with tempfile.NamedTemporaryFile(suffix=".json", delete=False,
                                     mode="w") as dfh:
        json.dump(diary, dfh, indent=2)

        dfh.close()
    alignhash = guts.attach(dfh.name, get_attachpath())

    guts.bschange(
        rec_set.dbs[cmd["id"]],
        {
            "type": "set",
            "id": "meta",
            "key": "align",
            "val": alignhash
        },
    )

    # https://stackoverflow.com/questions/45978295/saving-a-downloaded-csv-file-using-python
    with tempfile.NamedTemporaryFile(suffix=".csv", delete=False,
                                     mode="w") as fp:
        w = csv.writer(fp)
        aligncsv_url = url + '/' + uid + '/align.csv'
        aligncsv = requests.get(aligncsv_url)
        for line in aligncsv.iter_lines():
            w.writerow(line.decode('utf-8').split(','))
        fp.close()
    aligncsvhash = guts.attach(fp.name, get_attachpath())

    guts.bschange(
        rec_set.dbs[cmd["id"]],
        {
            "type": "set",
            "id": "meta",
            "key": "aligncsv",
            "val": aligncsvhash
        },
    )

    return {"align": alignhash}