def harvest(cmd): docid = cmd["id"] meta = rec_set.get_meta(docid) x, fs = librosa.load(os.path.join(get_attachpath(), meta["path"]), sr=None) print("SYSTEM: harvesting...") hv_start = time.time() f0, timeaxis = pyworld.harvest(x.astype(np.float64), fs) print(f"SYSTEM: finished harvesting! (took {time.time() - hv_start:.2f}s)") with tempfile.NamedTemporaryFile(suffix=".txt", delete=False, mode="w") as harvest_fp: for i in range(len(timeaxis)): harvest_fp.write(f'{timeaxis[i]} {f0[i]}\n') if len(open(harvest_fp.name).read().strip()) == 0: return {"error": "Harvest computation failed"} # XXX: frozen attachdir harvesthash = guts.attach(harvest_fp.name, get_attachpath()) guts.bschange( rec_set.dbs[docid], { "type": "set", "id": "meta", "key": "harvest", "val": harvesthash }, ) return {"harvest": harvesthash}
def pitch(cmd): docid = cmd["id"] meta = rec_set.get_meta(docid) # Create an 8khz wav file with tempfile.NamedTemporaryFile(suffix=".wav") as wav_fp: ff_start = time.time() subprocess.call([ "ffmpeg", "-y", "-loglevel", "panic", "-i", os.path.join(get_attachpath(), meta["path"]), "-ar", "8000", "-ac", "1", wav_fp.name, ]) print(f'SYSTEM: FFMPEG took {time.time() - ff_start:.2f}s') # ...and use it to compute pitch with tempfile.NamedTemporaryFile(suffix=".txt", delete=False) as pitch_fp: subprocess.call([get_calc_sbpca(), wav_fp.name, pitch_fp.name]) if len(open(pitch_fp.name).read().strip()) == 0: return {"error": "Pitch computation failed"} # XXX: frozen attachdir pitchhash = guts.attach(pitch_fp.name, get_attachpath()) guts.bschange( rec_set.dbs[docid], { "type": "set", "id": "meta", "key": "pitch", "val": pitchhash }, ) return {"pitch": pitchhash}
def gen_mat(cmd): id = cmd["id"] # Hm! meta = rec_set.get_meta(id) out = {} measure = _measure(id, raw=True) out.update(measure["measure"]) # out.update(measure["raw"]) if meta.get("rms"): out["rms"] = np.array( json.load(open(os.path.join(get_attachpath(), meta["rms"])))) if meta.get("pitch"): p_path = os.path.join(get_attachpath(), meta["pitch"]) out["pitch"] = np.array( [float(X.split()[1]) for X in open(p_path) if len(X.split()) > 2]) if meta.get("align"): a_path = os.path.join(get_attachpath(), meta["align"]) out["align"] = json.load(open(a_path)) # Remove 'None' values for seg in out['align']['segments']: for k, v in list(seg.items()): if v is None: del seg[k] with tempfile.NamedTemporaryFile(suffix=".mat", delete=False) as mf: sio.savemat(mf.name, out) mathash = guts.attach(mf.name, get_attachpath()) guts.bschange(rec_set.dbs[id], { "type": "set", "id": "meta", "key": "mat", "val": mathash }) return {"mat": mathash}
def rms(cmd): docid = cmd["id"] info = rec_set.get_meta(docid) vpath = os.path.join(get_attachpath(), info["path"]) R = 44100 snd = nmt.sound2np(vpath, R=R, nchannels=1, ffopts=["-filter:a", "dynaudnorm"]) WIN_LEN = int(R / 100) rms = [] for idx in range(int(len(snd) / WIN_LEN)): chunk = snd[idx * WIN_LEN:(idx + 1) * WIN_LEN] rms.append((chunk.astype(float)**2).sum() / len(chunk)) rms = np.array(rms) rms -= rms.min() rms /= rms.max() with tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode="w") as fh: json.dump(rms.tolist(), fh) fh.close() rmshash = guts.attach(fh.name, get_attachpath()) guts.bschange(rec_set.dbs[docid], { "type": "set", "id": "meta", "key": "rms", "val": rmshash }) return {"rms": rmshash}
def _measure(id=None, start_time=None, end_time=None, full_ts=False, force_gen=False, raw=False): if start_time is not None: start_time = float(start_time) if end_time is not None: end_time = float(end_time) meta = rec_set.get_meta(id) # full transcription duration should be the same for any given document, # prosodic measures for these are cached so we can bulk download them. if full_ts and not force_gen and meta.get("full_ts"): cached = json.load( open(os.path.join(get_attachpath(), meta["full_ts"]))) # if dynamism is part of cached data, return it. otherwise, it is outdated and must be reloaded if 'Dynamism' in cached or not calc_intense: return cached pitch = [[float(Y) for Y in X.split(" ")] for X in open(os.path.join(get_attachpath(), meta["pitch"]))] # redundacy, CSV did not load sometimes on older versions of Drift. Generate if nonexistent if not meta.get("csv"): gen_csv({"id": id}) # if not meta.get("info"): # save_audio_info({ "id": id }) # check, maybe Drift is now running on calc_intense mode even though it wasn't when the audio file was originally uploaded # Generate Harvest if nonexistent if calc_intense and not meta.get("harvest"): harvest({"id": id}) # TODO will these hang? this is just to prevent concurrent calls to harvest/csv during their initialization throwing errors while not rec_set.get_meta(id).get("csv"): pass # while not rec_set.get_meta(id).get("info"): # pass while calc_intense and not rec_set.get_meta(id).get("harvest"): pass meta = rec_set.get_meta(id) driftcsv = open(os.path.join(get_attachpath(), meta["csv"])) gentlecsv = open(os.path.join(get_attachpath(), meta["aligncsv"])) st = start_time if start_time is not None else 0 full_data = { "measure": { "start_time": st, "end_time": end_time if end_time is not None else (len(pitch) / 100.0) } } gentle_drift_data = prosodic_measures.measure_gentle_drift( gentlecsv, driftcsv, start_time, end_time) full_data["measure"].update(gentle_drift_data) if calc_intense: voxit_data = prosodic_measures.measure_voxit( os.path.join(get_attachpath(), meta["path"]), open(os.path.join(get_attachpath(), meta["pitch"])), open(os.path.join(get_attachpath(), meta["harvest"])), start_time, end_time) full_data["measure"].update(voxit_data) if full_ts: with tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode="w") as dfh: json.dump(full_data, dfh, indent=2) dfh.close() fulltshash = guts.attach(dfh.name, get_attachpath()) guts.bschange( rec_set.dbs[id], { "type": "set", "id": "meta", "key": "full_ts", "val": fulltshash }, ) return full_data
def gen_csv(cmd): docid = cmd["id"] meta = rec_set.get_meta(docid) p_path = os.path.join(get_attachpath(), meta["pitch"]) pitch = [float(X.split()[1]) for X in open(p_path) if len(X.split()) > 2] a_path = os.path.join(get_attachpath(), meta["align"]) align = json.load(open(a_path)) words = [] for seg in align["segments"]: for wd in seg["wdlist"]: wd_p = dict(wd) wd_p["speaker"] = seg["speaker"] words.append(wd_p) with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode="w") as fp: w = csv.writer(fp) w.writerow(["time (s)", "pitch (hz)", "word", "phoneme", "speaker"]) for idx, pitch_val in enumerate(pitch): t = idx / 100.0 wd_txt = None ph_txt = None speaker = None for wd_idx, wd in enumerate(words): if wd.get("start") is None or wd.get("end") is None: continue if wd["start"] <= t and wd["end"] >= t: wd_txt = wd["word"].encode("utf-8") speaker = wd["speaker"] # find phone cur_t = wd["start"] for phone in wd.get("phones", []): if cur_t + phone["duration"] >= t: ph_txt = phone["phone"] break cur_t += phone["duration"] break if type(wd_txt) == bytes: wd_txt = wd_txt.decode("utf-8") elif type(wd_txt) != str: wd_txt = str(wd_txt or "") row = [t, pitch_val, wd_txt, ph_txt, speaker] w.writerow(row) fp.flush() csvhash = guts.attach(fp.name, get_attachpath()) guts.bschange( rec_set.dbs[cmd["id"]], { "type": "set", "id": "meta", "key": "csv", "val": csvhash }, ) return {"csv": csvhash}
def align(cmd): meta = rec_set.get_meta(cmd["id"]) media = os.path.join(get_attachpath(), meta["path"]) segs = parse_speakers_in_transcript( open(os.path.join(get_attachpath(), meta["transcript"])).read()) tscript_txt = "\n".join([X["line"] for X in segs]) url = f"http://localhost:{GENTLE_PORT}/transcriptions" res = requests.post(url, data={"transcript": tscript_txt}, files={'audio': ('audio', open(media, 'rb'))}) # Find the ID uid = res.history[0].headers['Location'].split('/')[-1] # Poll for status status_url = url + '/' + uid + '/status.json' cur_status = -1 while True: status = requests.get(status_url).json() if status.get('status') != 'OK': s = status.get('percent', 0) if s > cur_status: cur_status = s guts.bschange( rec_set.dbs[cmd["id"]], { "type": "set", "id": "meta", "key": "align_px", "val": cur_status }) time.sleep(1) else: # transcription done break align_url = url + '/' + uid + '/align.json' trans = requests.get(align_url).json() # Re-diarize Gentle output into a sane diarization format diary = {"segments": [{}]} seg = diary["segments"][0] seg["speaker"] = segs[0]["speaker"] wdlist = [] end_offset = 0 seg_idx = 0 cur_end = 0 for wd in trans["words"]: gap = trans["transcript"][end_offset:wd["startOffset"]] seg_idx += len(gap.split("\n")) - 1 if "\n" in gap and len(wdlist) > 0: # Linebreak - new segment! wdlist[-1]["word"] += gap.split("\n")[0] seg["wdlist"] = gentle_punctuate(wdlist, trans["transcript"]) # Compute start & end seg["start"] = seg["wdlist"][0].get("start", cur_end) has_end = [X for X in seg["wdlist"] if X.get("end")] if len(has_end) > 0: seg["end"] = has_end[-1]["end"] else: seg["end"] = cur_end cur_end = seg["end"] wdlist = [] seg = {} diary["segments"].append(seg) if len(segs) > seg_idx: seg["speaker"] = segs[seg_idx]["speaker"] wdlist.append(wd) end_offset = wd["endOffset"] seg["wdlist"] = gentle_punctuate(wdlist, trans["transcript"]) # Compute start & end seg["start"] = seg["wdlist"][0].get("start", cur_end) has_end = [X for X in seg["wdlist"] if X.get("end")] if len(has_end) > 0: seg["end"] = has_end[-1]["end"] else: seg["end"] = cur_end # For now, hit disk. Later we can explore the transcription DB. with tempfile.NamedTemporaryFile(suffix=".json", delete=False, mode="w") as dfh: json.dump(diary, dfh, indent=2) dfh.close() alignhash = guts.attach(dfh.name, get_attachpath()) guts.bschange( rec_set.dbs[cmd["id"]], { "type": "set", "id": "meta", "key": "align", "val": alignhash }, ) # https://stackoverflow.com/questions/45978295/saving-a-downloaded-csv-file-using-python with tempfile.NamedTemporaryFile(suffix=".csv", delete=False, mode="w") as fp: w = csv.writer(fp) aligncsv_url = url + '/' + uid + '/align.csv' aligncsv = requests.get(aligncsv_url) for line in aligncsv.iter_lines(): w.writerow(line.decode('utf-8').split(',')) fp.close() aligncsvhash = guts.attach(fp.name, get_attachpath()) guts.bschange( rec_set.dbs[cmd["id"]], { "type": "set", "id": "meta", "key": "aligncsv", "val": aligncsvhash }, ) return {"align": alignhash}