def significance_c1_vs_c2(config_path1: str, config_path2: str): config1 = loadDBReader(config_path1).config config2 = loadDBReader(config_path2).config eval_conf1 = get_best_eval_config(config_path1) eval_conf2 = get_best_eval_config(config_path2) conversations = read_conversations(config1) eval_conversations = sorted(conversations['eval']) r1 = [] r2 = [] for conv in tqdm([*eval_conversations ]): # , *sorted(conversations['validate']) for channel in ["A", "B"]: convid = f"{conv}-{channel}" _, res1 = evaluate_conv(config_path1, convid, { **eval_conf1, 'min_talk_len': 5 }) _, res2 = evaluate_conv(config_path2, convid, { **eval_conf2, 'min_talk_len': 5 }) r1.append(precision_recall(res1)['f1_score']) r2.append(precision_recall(res2)['f1_score']) print(f"r1: f1 = {meanpm(r1)}") print(f"r2: f1 = {meanpm(r2)}") print(f"differ: p = {stats.ttest_ind(r1, r2, equal_var=False).pvalue}")
def get_word2vec(adc_path: str, sample_window_ms: int, convid: str, feat_dim: int, T="") -> Feature: from extract import readDB cp = "trainNN/out/v050-finunified-16-g1be124b-dirty:lstm-best-features-power,pitch,ffv,word2vec_dim30-slowbatch/config.json" model = readDB.word_to_vec( cp, dimension=feat_dim, T=T) # for dimensions pow = pure_get_power(adc_path, sample_window_ms, convid) frames, _ = pow.shape w2v = np.zeros((frames, feat_dim), dtype=np.float32) reader = readDB.loadDBReader( "configs/finunified/vary-features/lstm-best-features-power,ffv.json") # exact config file is unimportant words = [(float(word['to']), reader.noise_filter(word['text'])) for word in readDB.get_all_nonsilent_words(reader, convid) if reader.noise_filter(word['text']) in model] inx = 0 def inxtotime(sample_index): return (sample_window_ms / 2 + sample_index * pow.frame_shift_ms) / 1000 for frame in range(frames): time = inxtotime(frame) if inx < len(words) - 1 and words[inx + 1][0] <= time: inx += 1 w2v[frame] = model[words[inx][1]] return Feature(w2v, infofrom=pow)
def stat(config_path: str): reader = loadDBReader(config_path) convs = read_conversations(reader.config) convids = [ "{}-{}".format(conv, channel) for conv in convs['eval'] for channel in ["A", "B"] ] total_s = 0 mono_s = 0 bc_count = 0 utt_count = 0 mono_bc_count = 0 for convid in convids: utts = list(reader.get_utterances(convid)) utt_count += len(utts) start = float(utts[0][1]['from']) end = float(utts[-1][1]['to']) total_s += end - start monosegs = list( get_monologuing_segments(reader, convid, min_talk_len=5)) mono_s += sum(end - start for (start, end) in monosegs) bcs = list(reader.get_backchannels(utts)) bcs = [reader.getBcRealStartTime(utt) for utt, uttInfo in bcs] bc_count += len(bcs) bcs = list(filter_ranges(bcs, monosegs)) mono_bc_count += len(bcs) print( dict(total_s=total_s, mono_s=mono_s, bc_count=bc_count, utt_count=utt_count, mono_bc_count=mono_bc_count))
def get_predictions(config_path: str, convid: str, eval_config: dict): reader = loadDBReader(config_path) smoothed = cached_smoothed_netout(config_path, convid, eval_config['epoch'], Hashabledict(eval_config['smoother'])) return list( predict_bcs(reader, 1 - smoothed[:, [0]], eval_config['threshold'], eval_config['at_start']))
def get_boring_bcs(config_path: str, convid: str): reader = loadDBReader(config_path) bcs = reader.get_backchannels(list(reader.get_utterances(convid))) l = [] for (bc, bcInfo) in bcs: text = bcInfo['text'] # type: str if "[laughter" in text or "[noise" in text: continue filtered = reader.noise_filter(text).lower() if reader.bc_to_category[filtered] != 'neutral': continue l.append((bc, bcInfo)) return l
def significance_better_than_random(): config_path = sys.argv[1] config = loadDBReader(config_path).config eval_conf = get_best_eval_config(config_path, margin=(0, 1)) conversations = read_conversations(config) eval_conversations = sorted(conversations['eval']) nn = [] rand = [] for conv in tqdm(eval_conversations): for channel in ["A", "B"]: convid = f"{conv}-{channel}" _, res = evaluate_conv(config_path, convid, eval_conf) _, randres = evaluate_conv(config_path, convid, { **eval_conf, 'random_baseline': {} }) nn.append(precision_recall(res)['f1_score']) rand.append(precision_recall(randres)['f1_score']) print(f"nn: f1 = {meanpm(nn)}") print(f"rand: f1 = {meanpm(rand)}") print(f"differ: p = {stats.ttest_ind(nn, rand, equal_var=False).pvalue}")
def significance_better_than_mmueller(): config_path = sys.argv[1] config = loadDBReader(config_path).config conversations = read_conversations(config) eval_conversations = sorted(conversations['eval']) def filt(res): [l, r] = res['config']['margin_of_error'] return res['config']['min_talk_len'] is None and r - l < 0.41 eval_conf = get_best_eval_config(config_path, filter=filt) nn = [] for conv in tqdm(eval_conversations): for channel in ["A", "B"]: convid = f"{conv}-{channel}" _, res = evaluate_conv(config_path, convid, eval_conf) nn.append(precision_recall(res)['f1_score']) mmueller = 0.109 print( f"ours ({np.mean(nn)}) is better than mmueller ({mmueller}) with p={stats.ttest_1samp(nn, mmueller).pvalue}" )
async def sendFeature(ws, id: str, conv: str, featFull: str, micro): if featFull[0] != '/': raise Exception("featname must start with /") channel, category, *path = featFull.split("/")[1:] convid = conv + "-" + channel if category == "transcript": (featname,) = path reader = origReader if featname == "bc": await sendOtherFeature(ws, id, {"typ": "highlights", "data": getHighlights(reader, conv, channel)}) elif featname == "is_talking": await sendOtherFeature(ws, id, dict(typ="highlights", data=list(get_talking_feature(reader, convid)))) elif featname == "is_silent": await sendOtherFeature(ws, id, dict(typ="highlights", data=list(get_silent_feature(reader, convid)))) elif featname == "is_monologuing": await sendOtherFeature(ws, id, dict(typ="highlights", data=list(get_monologuing_feature(reader, convid)))) elif featname == "text": await sendOtherFeature(ws, id, segsToJSON(reader, convid, featFull)) elif featname == "words": await sendOtherFeature(ws, id, segsToJSON(origReader, convid, featFull, words=True)) else: raise Exception("unknown trans feature: " + featname) elif category == "NN outputs": if path[-1].endswith(".thres"): path[-1] = path[-1][:-len(".thres")] config_path, eval_config, feature = get_net_output(convid, path) feature = maybe_onedim(feature) onedim = feature if feature.shape[1] == 1 else 1 - feature[:, [0]] await sendOtherFeature(ws, id, get_larger_threshold_feature(onedim, origReader, featFull, threshold=eval_config['threshold'])) elif path[-1].endswith(".bc"): path[-1] = path[-1][:-len(".bc")] config_path, eval_config, feature = get_net_output(convid, path) feature = maybe_onedim(feature) onedim = feature if feature.shape[1] == 1 else 1 - feature[:, [0]] predictions = evaluate.get_predictions(config_path, convid, eval_config) _orig_audio = origReader.features.get_adc(convid) import random st = random.choice(write_wavs.good_bc_sample_tracks) print(f"bcs from {st}") bc_audio = write_wavs.get_bc_audio(origReader, _orig_audio.size, list( write_wavs.bcs_to_samples( readDB.loadDBReader(config_path), write_wavs.get_boring_bcs(config_path, st))), predictions) await sendNumFeature(ws, id, conv, featFull, bc_audio) else: _, _, feature = get_net_output(convid, path) feature = maybe_onedim(feature) await sendNumFeature(ws, id, conv, featFull, feature) elif category == "extracted": featname, = path if channel == "microphone": feats = micro.features if featname in feats: return await sendNumFeature(ws, id, conv, featFull, feats[featname]) else: feats = get_extracted_features(origReader) if featname in feats: featout = feats[featname](convid) # if featname == "pitch": # featout = -featout return await sendNumFeature(ws, id, conv, featFull, featout) raise Exception("feature not found: {}".format(featFull)) else: raise Exception("unknown category " + category)
def train(): from . import network_model, evaluate global reader global backchannels global config_path config_path = sys.argv[1] config = load_config(config_path) version = subprocess.check_output("git describe --dirty", shell=True).decode('ascii').strip() if config_path.startswith("trainNN/out"): out_dir = os.path.dirname(config_path) print("Continuing training from folder " + out_dir) load_stats = config['train_output']['stats'] load_epoch = max([int(epoch) for epoch in load_stats.keys()]) load_params = os.path.join( out_dir, config['train_output']['stats'][str(load_epoch)]['weights']) print( f"Continuing training from folder {out_dir}, epoch={load_epoch}, params={load_params}" ) config.setdefault('train_output_old', {})[load_epoch] = config['train_output'] else: load_epoch = -1 load_stats = {} load_params = None out_dir = os.path.join("trainNN", "out", version + ":" + config['name']) if os.path.isdir(out_dir): print( "Output directory {} already exists, aborting".format(out_dir)) sys.exit(1) os.makedirs(out_dir, exist_ok=True) LOGFILE = os.path.join(out_dir, "train.log") logging.root.handlers.clear() logging.basicConfig( level=logging.DEBUG, format='%(asctime)s %(levelname)-8s %(message)s', handlers=[logging.FileHandler(LOGFILE), logging.StreamHandler()]) logging.debug("version={}:{}".format(version, config['name'])) reader = readDB.loadDBReader(config_path) train_config = config['train_config'] context_stride = train_config['context_stride'] context_frames = int(train_config['context_ms'] / 10 / context_stride) train_config['context_frames'] = context_frames gaussian = train_config['gaussian'] out_all = {'all': True, 'single': False}[train_config['output_type']] if gaussian: raise Exception("not implemented") # train_data = load_numpy_file(os.path.join(dir, train_config['files']['train'])) # validate_data = load_numpy_file(os.path.join(dir, train_config['files']['validate'])) # train_inputs, train_outputs = train_data[:, :input_dim], train_data[:, input_dim] # validate_inputs, validate_outputs = validate_data[:, :input_dim], validate_data[:, input_dim] else: batchers = {} for t in 'train', 'validate': # with open(os.path.join(dir, train_config['files'][t]['ids'])) as f: # meta = json.load(f) # groups = [slice(begin, end) for begin, end in meta['ranges']] # inputs = load_numpy_file(os.path.join(dir, train_config['files'][t]['input'])) # outputs = load_numpy_file(os.path.join(dir, train_config['files'][t]['output'])) convos = readDB.read_conversations(config) balance_method = train_config.get('balance_method', None) uttids = [ bc for bc in readDB.all_uttids(config_path, convos[t]) if extract(bc) is not None ] if balance_method is None: backchannels = list(readDB.balance_data(config_path, uttids)) elif balance_method == "weighted": backchannels = list( readDB.get_balanced_weights(config_path, uttids)) else: raise Exception(f"unknown balance method {balance_method}") input_dim = extract(backchannels[0])[0].shape[1] logging.debug(f"set input dim to {input_dim}") inxtoname = { **{v: k for k, v in reader.category_to_index.items()}, 0: None } train_config['input_dim'] = input_dim if config['extract_config'].get('categories', None) is not None: category_names = [ inxtoname[inx] for inx in range(len(reader.categories) + 1) ] train_config['category_names'] = category_names train_config['num_labels'] = len(category_names) else: train_config['num_labels'] = 2 logging.debug(f"input dim = {input_dim}") context_stride = train_config['context_stride'] context_length = int(train_config['context_ms'] / 10 / context_stride) sequence_length = int( (reader.method['nbc'][1] - reader.method['nbc'][0]) * 1000 / 10) inner_indices = windowed_indices(sequence_length, context_length, context_stride) all_elements = list(itertools.product(backchannels, inner_indices)) batchers[t] = partial(iterate_minibatches, train_config, all_elements, out_all) # batchers[t] = iterate_faster_minibatches(train_config, all_elements, out_all) before = time.perf_counter() before_cpu = time.process_time() logging.debug("loading data into ram") i = 0 for backchannel in tqdm(backchannels): extract(backchannel) logging.debug( f"loading data took {time.perf_counter () - before:.3f}s (cpu: {time.process_time()-before_cpu:.3f}s)" ) create_network = getattr(network_model, train_config['model_function']) model = create_network(train_config) out_layer = model['output_layer'] resume_parameters = train_config.get('resume_parameters', None) finetune_config = train_config.get("finetune", None) if finetune_config is not None: import lasagne if load_params is not None or resume_parameters is not None: raise Exception("cant finetune and load") ft_config_path = finetune_config['config'] epoch = finetune_config['epoch'] which_layers = finetune_config['layers'] ft_layers, _ = evaluate.get_network_outputter(ft_config_path, epoch, batch_size=250) layers = lasagne.layers.get_all_layers(out_layer) for inx, (layer_config, layer, ft_layer) in enumerate(zip(which_layers, layers, ft_layers)): do_load = layer_config['load'] do_freeze = layer_config['freeze'] if do_load: for param, ft_param in zip(layer.get_params(), ft_layer.get_params()): param.set_value(ft_param.get_value()) logging.info( f"loaded layer {inx} ({ {repr(p): p.get_value().shape for p in layer.get_params()} })" ) if do_freeze: logging.info(f"freezing layer {inx}") train_func.freeze(layer) stats_generator = train_func.train_network( network=out_layer, twodimensional_output=False, scheduling_method=None, start_epoch=load_epoch + 1, resume=load_params if load_params is not None else resume_parameters, l2_regularization=train_config.get("l2_regularization", None), # scheduling_params=(0.8, 0.000001), update_method=train_config['update_method'], num_epochs=train_config['epochs'], learning_rate_num=train_config['learning_rate'], iterate_minibatches_train=batchers['train'], iterate_minibatches_validate=batchers['validate'], categorical_output=not gaussian, output_prefix=os.path.join(out_dir, "epoch")) config_out = os.path.join(out_dir, "config.json") for stats in stats_generator: for k, v in stats.items(): v['weights'] = os.path.basename(v['weights']) with open(config_out, "w") as f: json.dump( { **config, 'train_output': { 'stats': { **load_stats, **stats }, 'source': config_path, 'environment': dict(os.environ) } }, f, indent='\t') logging.info("Wrote output to " + config_out) latest_path = os.path.join("trainNN", "out", "latest") with contextlib.suppress(FileNotFoundError): os.remove(latest_path) os.symlink(version, latest_path)
"sw3715", "sw2027", "sw2849", "sw2787", "sw3357", "sw2389" ] # assume problems are symmetric bad_eval_convos = [track.split("-")[0] for track in bad_eval_tracks] good_eval_tracks = [] if __name__ == '__main__': config_path = sys.argv[1] args = config_path.split("/") version = "None" if len(args) == 4: _, _, version, _ = args config = load_config(config_path) reader = loadDBReader(config_path) conversations = read_conversations(config) eval_conversations = sorted(conversations['eval']) eval_conversations = [ convo for convo in eval_conversations if convo not in bad_eval_convos ] # valid_conversations = sorted(conversations['validate']) write_wavs(reader, eval_conversations, 1e10, version, good_bc_sample_tracks, write_mono=True, write_nn=True, write_orig=False, write_truthrandom=True,
def evaluate_conv_multiclass(config_path: str, convid: str, config: dict): # ooh boy this code be stupdid reader = loadDBReader(config_path) bc_convid = swap_speaker(convid) _correct_bcs = [(reader.getBcRealStartTime(utt), bc_to_category(reader, uttInfo)) for utt, uttInfo in reader.get_backchannels( list(reader.get_utterances(bc_convid)))] correct_bcs = [time for time, _ in _correct_bcs] correct_categories = [cat for _, cat in _correct_bcs] if 'random_baseline' in config: raise Exception("cant baseline multiclass") if 'sigma_ms' in config: if 'smoother' in config: raise Exception('conflicting options: smoother and sigma') smoother = {'type': 'gauss', 'sigma_ms': config['sigma_ms']} else: smoother = config['smoother'] net_output = cached_smoothed_netout(config_path, convid, config["epoch"], Hashabledict(smoother)) any_predictor = 1 - net_output[:, [0]] predicted_bcs = list( predict_bcs(reader, any_predictor, threshold=config['threshold'], at_start=config['at_start'])) predicted_count = len(predicted_bcs) predicted_inx = 0 predicted_categories = [ np.argmax(net_output[net_output.time_to_sample_index(time)][1:]) + 1 for time in predicted_bcs ] if predicted_count > 0: for correct_bc in correct_bcs: while predicted_inx < predicted_count - 1 and nearer( predicted_bcs[predicted_inx + 1], predicted_bcs[predicted_inx], correct_bc): predicted_inx += 1 if bc_is_within_margin_of_error(predicted_bcs[predicted_inx], correct_bc, config['margin_of_error']): predicted_bcs[predicted_inx] = correct_bc correct = { time: category for time, category in zip(correct_bcs, correct_categories) } predicted = { time: category for time, category in zip(predicted_bcs, predicted_categories) } if config['min_talk_len'] is not None: segs = list( get_monologuing_segments(reader, convid, min_talk_len=config['min_talk_len'])) predicted_bcs = list(filter_ranges(predicted_bcs, segs)) correct_bcs = list(filter_ranges(correct_bcs, segs)) category_count = len(reader.categories.keys()) + 1 confusion = np.zeros((category_count, category_count), dtype=np.int32) for time in correct_bcs: confusion[correct[time]][predicted.get(time, 0)] += 1 for time in predicted_bcs: if time not in correct: confusion[0][predicted[time]] += 1 # https://www.wikiwand.com/en/Precision_and_recall selected = set(predicted_bcs) relevant = set(correct_bcs) true_positives = selected & relevant false_positives = selected - relevant false_negatives = relevant - selected return convid, dict(selected=len(selected), relevant=len(relevant), true_positives=len(true_positives), false_positives=len(false_positives), false_negatives=len(false_negatives), confusion_matrix=confusion)
def evaluate_conv(config_path: str, convid: str, config: dict): reader = loadDBReader(config_path) if reader.config['extract_config'].get('categories', None) is not None: return evaluate_conv_multiclass(config_path, convid, config) bc_convid = swap_speaker(convid) correct_bcs = [ reader.getBcRealStartTime(utt) for utt, uttInfo in reader.get_backchannels( list(reader.get_utterances(bc_convid))) ] if 'random_baseline' in config: predicted_bcs = random_predictor(reader, convid, config) else: if 'sigma_ms' in config: if 'smoother' in config: raise Exception('conflicting options: smoother and sigma') smoother = { 'type': 'gauss', 'sigma_ms': config['sigma_ms'], 'cutoff_sigma': 2 } else: smoother = config['smoother'] net_output = cached_smoothed_netout(config_path, convid, config["epoch"], Hashabledict(smoother)) net_output = 1 - net_output[:, [0]] predicted_bcs = list( predict_bcs(reader, net_output, threshold=config['threshold'], at_start=config['at_start'])) predicted_count = len(predicted_bcs) predicted_inx = 0 if predicted_count > 0: for correct_bc in correct_bcs: while predicted_inx < predicted_count - 1 and nearer( predicted_bcs[predicted_inx + 1], predicted_bcs[predicted_inx], correct_bc): predicted_inx += 1 if bc_is_within_margin_of_error(predicted_bcs[predicted_inx], correct_bc, config['margin_of_error']): predicted_bcs[predicted_inx] = correct_bc if config['min_talk_len'] is not None: segs = list( get_monologuing_segments(reader, convid, min_talk_len=config['min_talk_len'])) predicted_bcs = filter_ranges(predicted_bcs, segs) correct_bcs = filter_ranges(correct_bcs, segs) # https://www.wikiwand.com/en/Precision_and_recall selected = set(predicted_bcs) relevant = set(correct_bcs) true_positives = selected & relevant false_positives = selected - relevant false_negatives = relevant - selected return convid, dict(selected=len(selected), relevant=len(relevant), true_positives=len(true_positives), false_positives=len(false_positives), false_negatives=len(false_negatives))
def cached_smoothed_netout(config_path, convid, epoch, smoother): return loadDBReader(config_path).features.smooth(convid, epoch, smoother)