def recognize(args): workspace = cfg.workspace events = cfg.events n_events = args.n_events snr = args.snr md_na = args.model_name lb_to_ix = cfg.lb_to_ix n_out = len(cfg.events) te_fold = cfg.te_fold md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, md_na) md = serializations.load(md_path) # Load data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) x = te_x at_gts = te_at_y sed_gts = te_sed_y na_list = te_na_list # Recognize. [at_pds] = md.predict(x) # (N, 16) observe_nodes = [md.find_layer('detect').output_] f_forward = md.get_observe_forward_func(observe_nodes) [seg_masks] = md.run_function(f_forward, x, batch_size=500, tr_phase=0.) # (n_clips, n_time, n_out) seg_masks = np.transpose(seg_masks, (0, 2, 1))[:, :, :, np.newaxis] # Dump to pickle. out_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, os.path.splitext(md_na)[0]) pp_data.create_folder(out_dir) out_at_path = os.path.join(out_dir, "at_probs.p") out_seg_masks_path = os.path.join(out_dir, "seg_masks.p") cPickle.dump(at_pds, open(out_at_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(seg_masks, open(out_seg_masks_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) # Print stats. sed_pds = np.mean(seg_masks, axis=-1) # (N, n_out, n_time) sed_pds = np.transpose(sed_pds, (0, 2, 1)) # (N, n_time, n_out) print_stats(at_pds, at_gts, sed_pds, sed_gts)
def get_sep_stats(args): workspace = cfg.workspace te_fold = cfg.te_fold events = cfg.events n_events = args.n_events snr = args.snr sep_stat_path = os.path.join(workspace, "sep_stats", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, "sep_stat.p") sep_stats = cPickle.load(open(sep_stat_path, 'rb')) print(sep_stats) sdrs, sirs, sars = [], [], [] for e in events: sdr = np.mean(sep_stats[e]['sdr'][0]) sir = np.mean(sep_stats[e]['sir'][0]) sar = np.mean(sep_stats[e]['sar'][0]) sdrs.append(sdr) sirs.append(sir) sars.append(sar) logging.info("%sSDR\tSIR\tSAR" % ("".ljust(16))) logging.info("*%s*%.3f\t*%.3f\t*%.3f" % ("Avg. of each".ljust(16), np.mean(sdrs), np.mean(sirs), np.mean(sars))) for i1 in xrange(len(events)): logging.info("%s%.3f\t%.3f\t%.3f" % (events[i1].ljust(16), sdrs[i1], sirs[i1], sars[i1]))
def get_avg_stats(args, file_name, bgn_iter, fin_iter, interval_iter): eval_hdf5_path = os.path.join(args.cpickle_dir, "eval.h5") workspace = args.workspace # Load ground truth (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path) y = te_y # Average prediction probabilities of several iterations prob_dir = os.path.join(workspace, "probs", file_name, "test") names = os.listdir(prob_dir) probs = [] iters = range(bgn_iter, fin_iter, interval_iter) for iter in iters: pickle_path = os.path.join(prob_dir, "prob_%d_iters.p" % iter) prob = cPickle.load(open(pickle_path, 'rb')) probs.append(prob) #print(len(probs)) avg_prob = np.mean(np.array(probs), axis=0) # Compute stats t1 = time.time() n_out = y.shape[1] stats = [] for k in range(n_out): (precisions, recalls, thresholds) = metrics.precision_recall_curve(y[:, k], avg_prob[:, k]) avg_precision = metrics.average_precision_score(y[:, k], avg_prob[:, k], average=None) (fpr, tpr, thresholds) = metrics.roc_curve(y[:, k], avg_prob[:, k]) auc = metrics.roc_auc_score(y[:, k], avg_prob[:, k], average=None) #eer = pp_data.eer(avg_prob[:, k], y[:, k]) skip = 1000 dict = {'precisions': precisions[0::skip], 'recalls': recalls[0::skip], 'AP': avg_precision, 'fpr': fpr[0::skip], 'fnr': 1. - tpr[0::skip], 'auc': auc} stats.append(dict) logging.info("Callback time: %s" % (time.time() - t1,)) # Dump stats dump_path = os.path.join(workspace, "stats", pp_data.get_filename(__file__), "test", "avg_%d_%d_%d.p" % (bgn_iter, fin_iter, interval_iter)) pp_data.create_folder(os.path.dirname(dump_path)) cPickle.dump(stats, open(dump_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) #print(stats.shape) #for i, e in enumerate(stats): # logging.info("%d. mAP: %f, auc: %f, d_prime: %f" % (i, e['AP'], e['auc'], pp_data.d_prime(e['auc']))) # Write out to log logging.info("bgn_iter, fin_iter, interval_iter: %d, %d, %d" % (bgn_iter, fin_iter, interval_iter)) logging.info("mAP: %f" % np.mean([e['AP'] for e in stats])) auc = np.mean([e['auc'] for e in stats]) logging.info("auc: %f" % auc) logging.info("d_prime: %f" % pp_data.d_prime(auc))
def plot_seg_masks(args): # Load data. te_pack_path = os.path.join(workspace, "packed_features", "logmel", "testing.h5") scaler_path = os.path.join(workspace, "scalers", "logmel", "training.scaler") with h5py.File(te_pack_path, 'r') as hf: te_na_list = list(hf.get('na_list')) te_x = np.array(hf.get('x')) te_y = np.array(hf.get('y')) te_x_unscaled = te_x # unscaled x for plot. scaler = pickle.load(open(scaler_path, 'rb')) te_x = pp_data.do_scaler_on_x3d(te_x, scaler) # Load model. md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__), args.model_name) md = serializations.load(md_path) # Observe function. observe_nodes = [md.find_layer('seg_masks').output_] f_forward = md.get_observe_forward_func(observe_nodes) [seg_masks] = md.run_function(f_forward, te_x, batch_size=50, tr_phase=0.) print("Segmentation masks: %s" % (seg_masks.shape, )) # Plot segmentation masks. for i1 in xrange(len(seg_masks)): na = te_na_list[i1] if ".mix_0db.wav" in na: print(na) gt_y = te_y[i1].astype(np.float32) print(gt_y) print("Ground truth: %s" % cfg.events[np.argmax(gt_y)]) events_ex = cfg.events + ['bg'] fig, axs = plt.subplots(3, 2, sharex=True) axs[0, 0].matshow(te_x_unscaled[i1].T, origin='lower', aspect='auto') axs[0, 0].set_title("log Mel spectrogram") for i2 in xrange(0, 4): axs[i2 / 2 + 1, i2 % 2].matshow(seg_masks[i1, i2].T, origin='lower', aspect='auto', vmin=0, vmax=1) axs[i2 / 2 + 1, i2 % 2].set_title(events_ex[i2]) plt.show()
def write_out_at_sed(md, gen, f_forward, x, at_y, sed_y, n_events, snr, te_fold): workspace = cfg.workspace pred_at_all = [] seg_masks_all = [] gt_at_all = [] gt_sed_all = [] for [batch_x, batch_at_y, batch_sed_y] in gen.generate(zs=[x, at_y, sed_y]): # AT. [at_pred] = md.predict(batch_x, batch_size=None) pred_at_all.append(at_pred) # SED. [seg_masks] = md.run_function(func=f_forward, z=[batch_x], batch_size=500, tr_phase=0.) seg_masks_all.append(seg_masks) gt_at_all.append(batch_at_y) gt_sed_all.append(batch_sed_y) # DO NOT SHUFFLE DATA! pred_at_all = np.concatenate(pred_at_all, axis=0) seg_masks_all = np.concatenate(seg_masks_all, axis=0) gt_at_all = np.concatenate(gt_at_all, axis=0) gt_sed_all = np.concatenate(gt_sed_all, axis=0) # Compress to float16 to reduce space. pred_at_all = pred_at_all.astype(np.float16) seg_masks_all = seg_masks_all.astype(np.float16) print(pred_at_all.shape) print(seg_masks_all.shape) print(pred_at_all.dtype) out_dir = os.path.join(workspace, "callbacks", "preds", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, "md%d_iters" % md.iter_) pp_data.create_folder(out_dir) out_at_path = os.path.join(out_dir, "at_probs.p") out_seg_masks_path = os.path.join(out_dir, "seg_masks.p") cPickle.dump(pred_at_all, open(out_at_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) cPickle.dump(seg_masks_all, open(out_seg_masks_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) thres = 0.5 (tp, fn, fp, tn) = tp_fn_fp_tn(pred_at_all, gt_at_all, thres, average='macro') (prec, recall, fvalue) = prec_recall_fvalue(pred_at_all, gt_at_all, thres, average='macro') logging.info("tp, fn, fp, tn: %d %d %d %d" % (tp, fn, fp, tn)) logging.info("prec, recall, fvalue: %f %f %f" % (prec, recall, fvalue))
def get_stats(args, bgn_iter, fin_iter, interval): workspace = cfg.workspace events = cfg.events te_fold = cfg.te_fold n_events = args.n_events snr = args.snr # Load ground truth data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) at_gts = te_at_y sed_gts = te_sed_y # Load and sum preds_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) at_probs_list, seg_masks_list = [], [] for iter in xrange(bgn_iter, fin_iter, interval): at_probs_path = os.path.join(preds_dir, "md%d_iters" % iter, "at_probs.p") at_probs = cPickle.load(open(at_probs_path, 'rb')) at_probs_list.append(at_probs) seg_masks_path = os.path.join(preds_dir, "md%d_iters" % iter, "seg_masks.p") seg_masks = cPickle.load(open(seg_masks_path, 'rb')) seg_masks_list.append(seg_masks) at_probs = np.mean(at_probs_list, axis=0) # (n_clips, n_classes) seg_masks = np.mean(seg_masks_list, axis=0) # (n_clips, n_classes, n_time, n_freq) sed_probs = np.mean(seg_masks, axis=-1).transpose(0, 2, 1) # (n_clips, n_time, n_classes) print_stats(at_probs, at_gts, sed_probs, sed_gts)
def plot_hotmap(args): workspace = cfg.workspace events = cfg.events md_na = args.model_name n_events = args.n_events te_fold = cfg.te_fold feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, is_scale=is_scale) md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__), "n_events=%d" % n_events, md_na) md = serializations.load(md_path) x = te_x y = te_at_y observe_nodes = [md.find_layer('hotmap').output_] f_forward = md.get_observe_forward_func(observe_nodes) [a4] = md.run_function(f_forward, x, batch_size=500, tr_phase=0.) print a4.shape for i1 in xrange(len(a4)): # if te_na_list[i1] == 'CR_lounge_220110_0731.s2700_chunk48': print(y[i1]) # print np.mean(a4[i1], axis=(1,2)) fig, axs = plt.subplots(5, 4, sharex=True) axs[0, 0].matshow(x[i1].T, origin='lower', aspect='auto') for i2 in xrange(16): axs[i2 / 4 + 1, i2 % 4].matshow(a4[i1, i2].T, origin='lower', aspect='auto', vmin=0, vmax=1) axs[i2 / 4 + 1, i2 % 4].set_title(events[i2]) plt.show()
cnt += 1 avg = {} for e in ['sdr', 'sir', 'sar']: avg[e] = [] for event_type in dict.keys(): logging.info(event_type) for evaluate_type in dict[event_type]: tmp = np.mean(dict[event_type][evaluate_type]) logging.info((evaluate_type, tmp)) avg[evaluate_type[0:3]].append(tmp) logging.info("Average stats:") for e in ['sdr', 'sir', 'sar']: logging.info("%s, %f" % (e, np.mean(avg[e]))) if __name__ == '__main__': parser = argparse.ArgumentParser(description="") parser.add_argument("--workspace", type=str) parser.add_argument("--sep_type", type=str, help="The sub folder of separation. ") args = parser.parse_args() logs_dir = os.path.join(args.workspace, "logs", pp_data.get_filename(__file__)) pp_data.create_folder(logs_dir) logging = pp_data.create_logging(logs_dir, filemode='w') logging.info(os.path.abspath(__file__)) logging.info(sys.argv) evaluate_separation(args)
def evaluate_separation(args): workspace = cfg.workspace events = cfg.events te_fold = cfg.te_fold n_window = cfg.n_window n_overlap = cfg.n_overlap fs = cfg.sample_rate clip_duration = cfg.clip_duration n_events = args.n_events snr = args.snr # Load ground truth data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) at_y = te_at_y sed_y = te_sed_y na_list = te_na_list audio_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) sep_dir = os.path.join(workspace, "sep_audio", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) sep_stats = {} for e in events: sep_stats[e] = {'sdr': [], 'sir': [], 'sar': []} cnt = 0 for (i1, na) in enumerate(na_list): bare_na = os.path.splitext(na)[0] gt_audio_path = os.path.join(audio_dir, "%s.wav" % bare_na) (stereo_audio, _) = pp_data.read_stereo_audio(gt_audio_path, target_fs=fs) gt_event_audio = stereo_audio[:, 0] gt_noise_audio = stereo_audio[:, 1] print(na) for j1 in xrange(len(events)): if at_y[i1][j1] == 1: sep_event_audio_path = os.path.join( sep_dir, "%s.%s.wav" % (bare_na, events[j1])) (sep_event_audio, _) = pp_data.read_audio(sep_event_audio_path, target_fs=fs) sep_noise_audio_path = os.path.join(sep_dir, "%s.noise.wav" % bare_na) (sep_noise_audio, _) = pp_data.read_audio(sep_noise_audio_path, target_fs=fs) ref_array = np.array((gt_event_audio, gt_noise_audio)) est_array = np.array((sep_event_audio, sep_noise_audio)) (sdr, sir, sar) = sdr_sir_sar(ref_array, est_array, sed_y[i1, :, j1], inside_only=True) print(sdr, sir, sar) sep_stats[events[j1]]['sdr'].append(sdr) sep_stats[events[j1]]['sir'].append(sir) sep_stats[events[j1]]['sar'].append(sar) cnt += 1 # if cnt == 5: break print(sep_stats) sep_stat_path = os.path.join(workspace, "sep_stats", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, "sep_stat.p") pp_data.create_folder(os.path.dirname(sep_stat_path)) cPickle.dump(sep_stats, open(sep_stat_path, 'wb'))
def recognize(args): workspace = args.workspace md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__), args.model_name) t1 = time.time() # Load scaler. scaler_path = os.path.join(workspace, "scalers", "logmel", "training.scaler") scaler = pickle.load(open(scaler_path, 'rb')) # Load model. md = serializations.load(md_path) # Observe function. observe_nodes = [md.find_layer('seg_masks').output_] f_forward = md.get_observe_forward_func(observe_nodes) audio_dir = os.path.join(workspace, "mixed_audio", "testing") names = os.listdir(audio_dir) at_pd_ary = [] at_gt_ary = [] sed_pd_ary = [] sed_gt_ary = [] # For all audio clips. for na in names: if '.mix_0db.wav' in na: logging.info(na) # Load audio. bare_na = os.path.splitext(os.path.splitext(na)[0])[0] audio_path = os.path.join(audio_dir, na) (bg_audio, event_audio, fs) = pp_data.read_audio_stereo(audio_path) mixed_audio = bg_audio + event_audio # Load yaml. yaml_path = os.path.join(audio_dir, "%s.yaml" % bare_na) with open(yaml_path, 'r') as f: data = yaml.load(f) event_type = data['event_type'] # Calculate feature. x = pp_data.calc_feat(mixed_audio) x3d = pp_data.do_scaler_on_x3d(x[np.newaxis, ...], scaler) # Ground truth. gt_y = [0, 0, 0, 0] gt_y[cfg.lb_to_ix[event_type]] = 1 at_gt_ary.append(gt_y) # Audio tagging (AT) prediction. [pred_y] = md.predict(x3d) # (1, n_events+1) pred_y = pred_y[0] # (n_events+1,) at_pd_ary.append(pred_y) # Sound event detection (SED) prediction. [masks] = md.run_function( f_forward, x3d, batch_size=10, tr_phase=0.) # (1, n_events+1, n_time, n_freq) masks = masks[0] # (n_events+1, n_time, n_freq) sed_pd = np.mean(masks, axis=-1).T # (n_time, n_events+1) sed_pd_ary.append(sed_pd) sed_gt = np.zeros_like(sed_pd) [bgn_sec, fin_sec] = data['event_segment'] bgn_fr = int(bgn_sec * cfg.sample_rate / float(cfg.n_window - cfg.n_overlap)) fin_fr = int(fin_sec * cfg.sample_rate / float(cfg.n_window - cfg.n_overlap)) sed_gt[bgn_fr:fin_fr, cfg.lb_to_ix[event_type]] = 1 sed_gt_ary.append(sed_gt) at_pd_ary = np.array(at_pd_ary) at_gt_ary = np.array(at_gt_ary) sed_pd_ary = np.array(sed_pd_ary) sed_gt_ary = np.array(sed_gt_ary) # Write out AT and SED presence probabilites. logging.info("at_pd_ary.shape: %s" % (at_pd_ary.shape, )) logging.info("at_gt_ary.shape: %s" % (at_gt_ary.shape, )) logging.info("sed_pd_ary.shape: %s" % (sed_pd_ary.shape, )) logging.info("sed_gt_ary.shape: %s" % (sed_gt_ary.shape, )) dict = {} dict['at_pd_ary'] = at_pd_ary dict['at_gt_ary'] = at_gt_ary dict['sed_pd_ary'] = sed_pd_ary dict['sed_gt_ary'] = sed_gt_ary out_path = os.path.join(workspace, "_tmp", "_at_sed_dict.p") pp_data.create_folder(os.path.dirname(out_path)) cPickle.dump(dict, open(out_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) logging.info("Recognize time: %s" % (time.time() - t1, ))
def separate(args, bgn_iter, fin_iter, interval): workspace = cfg.workspace events = cfg.events te_fold = cfg.te_fold n_events = args.n_events n_window = cfg.n_window n_overlap = cfg.n_overlap fs = cfg.sample_rate clip_duration = cfg.clip_duration snr = args.snr # Load ground truth data. feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) at_y = te_at_y sed_y = te_sed_y na_list = te_na_list # Load and sum preds_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) at_probs_list, seg_masks_list = [], [] for iter in xrange(bgn_iter, fin_iter, interval): seg_masks_path = os.path.join(preds_dir, "md%d_iters" % iter, "seg_masks.p") seg_masks = cPickle.load(open(seg_masks_path, 'rb')) seg_masks_list.append(seg_masks) seg_masks = np.mean(seg_masks_list, axis=0) # (n_clips, n_classes, n_time, n_freq) print(seg_masks.shape) # audio_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) sep_dir = os.path.join(workspace, "sep_audio", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) pp_data.create_folder(sep_dir) ham_win = np.hamming(n_window) recover_scaler = np.sqrt((ham_win**2).sum()) melW = librosa.filters.mel(sr=fs, n_fft=n_window, n_mels=64, fmin=0., fmax=fs / 2) inverse_melW = get_inverse_W(melW) # (64, 513) seg_stats = {} for e in events: seg_stats[e] = { 'fvalue': [], 'auc': [], 'iou': [], 'hit': [], 'fa': [], 'tp': [], 'fn': [], 'fp': [] } cnt = 0 for (i1, na) in enumerate(na_list): bare_na = os.path.splitext(na)[0] audio_path = os.path.join(audio_dir, "%s.wav" % bare_na) (stereo_audio, _) = pp_data.read_stereo_audio(audio_path, target_fs=fs) event_audio = stereo_audio[:, 0] noise_audio = stereo_audio[:, 1] mixed_audio = event_audio + noise_audio mixed_cmplx_sp = pp_data.calc_sp(mixed_audio, fs, ham_win, n_window, n_overlap) mixed_sp = np.abs(mixed_cmplx_sp) event_sp = np.abs( pp_data.calc_sp(event_audio, fs, ham_win, n_window, n_overlap)) noise_sp = np.abs( pp_data.calc_sp(noise_audio, fs, ham_win, n_window, n_overlap)) sm = seg_masks[i1] # (n_classes, n_time, n_freq) sm_upsampled = np.dot(sm, inverse_melW) # (n_classes, n_time, 513) print(na) # Write out separated events. for j1 in xrange(len(events)): if at_y[i1][j1] == 1: (fvalue, auc, iou, tp, fn, fp) = fvalue_iou(sm_upsampled[j1], event_sp, noise_sp, sed_y[i1, :, j1], seg_thres, inside_only=True) (hit, fa) = hit_fa(sm_upsampled[j1], event_sp, noise_sp, sed_y[i1, :, j1], seg_thres, inside_only=True) seg_stats[events[j1]]['fvalue'].append(fvalue) seg_stats[events[j1]]['auc'].append(auc) seg_stats[events[j1]]['iou'].append(iou) seg_stats[events[j1]]['hit'].append(hit) seg_stats[events[j1]]['fa'].append(fa) seg_stats[events[j1]]['tp'].append(tp) seg_stats[events[j1]]['fn'].append(fn) seg_stats[events[j1]]['fp'].append(fp) sep_event_sp = sm_upsampled[j1] * mixed_sp sep_event_s = spectrogram_to_wave.recover_wav( sep_event_sp, mixed_cmplx_sp, n_overlap=n_overlap, winfunc=np.hamming, wav_len=int(fs * clip_duration)) sep_event_s *= recover_scaler out_event_audio_path = os.path.join( sep_dir, "%s.%s.wav" % (bare_na, events[j1])) pp_data.write_audio(out_event_audio_path, sep_event_s, fs) # Write out separated noise. sm_noise_upsampled = np.clip(1. - np.sum(sm_upsampled, axis=0), 0., 1.) sep_noise_sp = sm_noise_upsampled * mixed_sp sep_noise_s = spectrogram_to_wave.recover_wav(sep_noise_sp, mixed_cmplx_sp, n_overlap=n_overlap, winfunc=np.hamming, wav_len=int( fs * clip_duration)) sep_noise_s *= recover_scaler out_noise_audio_path = os.path.join(sep_dir, "%s.noise.wav" % bare_na) pp_data.write_audio(out_noise_audio_path, sep_noise_s, fs) cnt += 1 # if cnt == 2: break fvalues, aucs, ious, hits, fas, tps, fns, fps = [], [], [], [], [], [], [], [] for e in events: fvalues.append(np.mean(seg_stats[e]['fvalue'])) ious.append(np.mean(seg_stats[e]['iou'])) aucs.append(np.mean(seg_stats[e]['auc'])) hits.append(np.mean(seg_stats[e]['hit'])) fas.append(np.mean(seg_stats[e]['fa'])) tps.append(np.mean(seg_stats[e]['tp'])) fns.append(np.mean(seg_stats[e]['fn'])) fps.append(np.mean(seg_stats[e]['fp'])) logging.info("%sfvalue\tauc\tiou\tHit\tFa\tHit-Fa\tTP\tFN\tFP" % ("".ljust(16))) logging.info( "%s*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f" % ("*Avg. of each".ljust(16), np.mean(fvalues), np.mean(aucs), np.mean(ious), np.mean(hits), np.mean(fas), np.mean(hits) - np.mean(fas), np.mean(tps), np.mean(fns), np.mean(fps))) for i1 in xrange(len(events)): logging.info( "%s%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f" % (events[i1].ljust(16), fvalues[i1], aucs[i1], ious[i1], hits[i1], fas[i1], hits[i1] - fas[i1], tps[i1], fns[i1], fps[i1]))
def train(args): workspace = cfg.workspace te_fold = cfg.te_fold n_events = args.n_events snr = args.snr feature_dir = os.path.join(workspace, "features", "logmel", "n_events=%d" % n_events) yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events) (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y, te_na_list) = pp_data.load_data(feature_dir=feature_dir, yaml_dir=yaml_dir, te_fold=te_fold, snr=snr, is_scale=is_scale) print(tr_x.shape, tr_at_y.shape) print(te_x.shape, te_at_y.shape) (_, n_time, n_freq) = tr_x.shape n_out = len(cfg.events) if False: for e in tr_x: plt.matshow(e.T, origin='lower', aspect='auto') plt.show() # Build model. lay_in = InputLayer(in_shape=(n_time, n_freq)) a = Reshape((1, n_time, n_freq))(lay_in) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Dropout(p_drop=0.2)(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Dropout(p_drop=0.2)(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Conv2D(n_outfmaps=64, n_row=3, n_col=5, act='linear', strides=(1, 1), border_mode=(1, 2))(a) a = BN(axis=(0, 2, 3))(a) a = Activation('relu')(a) a = Dropout(p_drop=0.2)(a) a = Conv2D(n_outfmaps=n_out, n_row=1, n_col=1, act='sigmoid', border_mode=(0, 0), name='seg_masks')(a) a8 = Lambda(_global_avg_pooling, name='a8')(a) md = Model([lay_in], [a8]) md.compile() md.summary(is_logging=True) # Callbacks. md_dir = os.path.join(workspace, "models", pp_data.get_filename(__file__), "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr) pp_data.create_folder(md_dir) save_model = SaveModel(md_dir, call_freq=50, type='iter', is_logging=True) validation = Validation(te_x=te_x, te_y=te_at_y, batch_size=50, call_freq=50, metrics=['binary_crossentropy'], dump_path=None, is_logging=True) callbacks = [save_model, validation] observe_nodes = [md.find_layer('seg_masks').output_] f_forward = md.get_observe_forward_func(observe_nodes) # Generator. tr_gen = DataGenerator(batch_size=32, type='train') eva_gen = DataGenerator2(batch_size=32, type='test') # Train. loss_ary = [] t1 = time.time() optimizer = Adam(1e-3) for (batch_x, batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_at_y]): if md.iter_ % 50 == 0: logging.info("iter: %d tr_loss: %f time: %s" % ( md.iter_, np.mean(loss_ary), time.time() - t1, )) t1 = time.time() loss_ary = [] # if md.iter_ % 200 == 0: # write_out_at_sed(md, eva_gen, f_forward, te_x, te_at_y, te_sed_y, n_events, snr, te_fold) if md.iter_ == 5001: break loss = md.train_on_batch(batch_x, batch_y, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks) loss_ary.append(loss)
parser_get_sep_stats = subparsers.add_parser('get_sep_stats') parser_get_sep_stats.add_argument('--n_events', type=int) parser_get_sep_stats.add_argument('--snr', type=int) parser_b2 = subparsers.add_parser('avg_recognize') parser_b2.add_argument('--n_events', type=int) parser_b2.add_argument('--snr', type=int) parser_c = subparsers.add_parser('plot_hotmap') parser_c.add_argument('--model_name', type=str) parser_c.add_argument('--n_events', type=int) args = parser.parse_args() logs_dir = os.path.join(cfg.workspace, "logs", pp_data.get_filename(__file__)) pp_data.create_folder(logs_dir) logging = pp_data.create_logging(logs_dir, filemode='w') logging.info(os.path.abspath(__file__)) logging.info(sys.argv) if args.mode == "train": train(args) elif args.mode == "recognize": recognize(args) elif args.mode == "get_stats": bgn_iter, fin_iter, interval = 2000, 3001, 200 get_stats(args, bgn_iter, fin_iter, interval) elif args.mode == "separate": bgn_iter, fin_iter, interval = 2000, 3001, 200 separate(args, bgn_iter, fin_iter, interval)
def train(args): cpickle_dir = args.cpickle_dir workspace = args.workspace # Path of hdf5 data bal_train_hdf5_path = os.path.join(cpickle_dir, "bal_train.h5") unbal_train_hdf5_path = os.path.join(cpickle_dir, "unbal_train.h5") eval_hdf5_path = os.path.join(cpickle_dir, "eval.h5") # Load data t1 = time.time() (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_hdf5_path) (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_hdf5_path) print(tr_x1.shape) print(tr_x2.shape) tr_x = np.concatenate((tr_x1, tr_x2)) tr_y = np.concatenate((tr_y1, tr_y2)) tr_id_list = tr_id_list1 + tr_id_list2 (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path) logging.info("Loading data time: %s s" % (time.time() - t1)) logging.info(tr_x1.shape, tr_x2.shape) logging.info("tr_x.shape: %s" % (tr_x.shape, )) (_, n_time, n_freq) = tr_x.shape # Build model n_hid = 600 n_out = tr_y.shape[1] lay_in = Input(shape=(n_time, n_freq)) a_0 = BatchNormalization()(lay_in) a_1 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_0) a_1 = BatchNormalization()(a_1) a_1 = Activation('relu')(a_1) a_1 = Dropout(rate=0.4)(a_1) a_2 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_1) a_2 = BatchNormalization()(a_2) a_2 = Activation('relu')(a_2) a_2 = Dropout(rate=0.4)(a_2) a_3 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_2) a_3 = BatchNormalization()(a_3) a_3 = Activation('relu')(a_3) a_3 = Dropout(rate=0.4)(a_3) cla_1 = Dense(n_out, name='cla_1')(a_3) cla_1 = BatchNormalization()(cla_1) cla_1 = Activation('sigmoid')(cla_1) att_1 = Dense(n_out, name='att_1')(a_3) att_1 = BatchNormalization()(att_1) att_1 = Activation('softmax')(att_1) # Attention lay_out_a = Lambda(_attention, output_shape=_att_output_shape)([cla_1, att_1]) cla_2 = Dense(n_out, name='cla_2')(a_2) cla_2 = BatchNormalization()(cla_2) cla_2 = Activation('sigmoid')(cla_2) att_2 = Dense(n_out, name='att2')(a_2) att_2 = BatchNormalization()(att_2) att_2 = Activation('softmax')(att_2) lay_out_b = Lambda(_attention, output_shape=_att_output_shape)([cla_2, att_2]) lay_out_c = Concatenate(axis=1)([lay_out_a, lay_out_b]) #lay_out = Dense(n_out, activation='sigmoid', name='output')(lay_out_c) lay_out = Dense(n_out, name='output')(lay_out_c) lay_out = BatchNormalization()(lay_out) lay_out = Activation('sigmoid')(lay_out) # Compile model md = Model(inputs=lay_in, outputs=lay_out) md.summary() # Save model every several iterations call_freq = 1000 dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(dump_fd) # save_model = SaveModel(dump_fd=dump_fd, call_freq=call_freq, type='iter', is_logging=True) # Callbacks function #callbacks = []#save_model] batch_size = 500 tr_gen = RatioDataGenerator(batch_size=batch_size, type='train') # Optimization method optimizer = Adam(lr=args.lr) md.compile(loss='binary_crossentropy', optimizer=optimizer) #callbacks=callbacks) # Train stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__)) pp_data.create_folder(stat_dir) prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__)) pp_data.create_folder(prob_dir) tr_time = time.time() iter_ = 1 for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): # Compute stats every several interations if iter_ % call_freq == 0: # Stats of evaluation dataset t1 = time.time() te_err = eval(md=md, x=te_x, y=te_y, out_dir=os.path.join(stat_dir, "test"), out_probs_dir=os.path.join(prob_dir, "test"), iter_=iter_) logging.info("Evaluate test time: %s" % (time.time() - t1, )) # Stats of training dataset t1 = time.time() tr_bal_err = eval(md=md, x=tr_x1, y=tr_y1, out_dir=os.path.join(stat_dir, "train_bal"), out_probs_dir=None, iter_=iter_) logging.info("Evaluate tr_bal time: %s" % (time.time() - t1, )) iter_ += 1 # Update params (tr_batch_x, tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y) md.train_on_batch(x=tr_batch_x, y=tr_batch_y) # Stop training when maximum iteration achieves if iter_ == call_freq * 151: break
def inference(args): cuda = args.use_cuda and torch.cuda.is_available() workspace = args.workspace model_name = args.model_name feat_type = args.feat_type script_na = args.script_na # Load data. te_packed_feat_path = os.path.join(workspace, "packed_features", feat_type, "test.p") [te_x_list, te_y_list, te_na_list] = cPickle.load(open(te_packed_feat_path, 'rb')) # Scale. if True: scale_path = os.path.join(workspace, "scalers", feat_type, "scaler.p") scaler = pickle.load(open(scale_path, 'rb')) te_x_list = pp_data.scale_on_x_list(te_x_list, scaler) # Construct model topology. n_concat = 3 te_n_hop = 1 n_freq = te_x_list[0].shape[-1] n_out = te_y_list[0].shape[-1] model = Net(n_concat, n_freq, n_out) # Init the weights of model using trained weights. model_path = os.path.join(workspace, "models", script_na, feat_type, model_name) if os.path.isfile(model_path): print("Loading checkpoint '%s'" % model_path) checkpoint = torch.load(model_path) model.load_state_dict(checkpoint['state_dict']) else: raise Exception("Model path %s does not exist!" % model_path) # Move model to GPU. if cuda: model.cuda() # Directory to write out transcript midi files. out_midi_dir = os.path.join(workspace, "out_midis", pp_data.get_filename(__file__), feat_type) pp_data.create_folder(out_midi_dir) # Data to 3d. n_half = (n_concat - 1) / 2 for i1 in xrange(len(te_x_list)): x = te_x_list[i1] # (n_time, n_freq) y = te_y_list[i1] # (n_time, n_out) bare_na = os.path.splitext(te_na_list[i1])[0] (n_time, n_freq) = x.shape zero_pad = np.zeros((n_half, n_freq)) x = np.concatenate((zero_pad, x, zero_pad), axis=0) x3d = pp_data.mat_2d_to_3d(x, n_concat, te_n_hop) # (n_time, n_concat, n_freq) # Move data to GPU. x3d = torch.Tensor(x3d) x3d = Variable(x3d) if cuda: x3d = x3d.cuda() # Inference. model.eval() pred = model(x3d) # (n_time, n_out) # Convert data type to numpy. pred = pred.data.cpu().numpy() # Threshold and write out predicted piano roll to midi file. mid_roll = pp_data.prob_to_midi_roll(pred, 0.5) out_path = os.path.join(out_midi_dir, "%s.mid" % bare_na) print("Write out to: %s" % out_path) pp_data.write_midi_roll_to_midi(mid_roll, out_path) # Debug plot. if True: fig, axs = plt.subplots(3, 1, sharex=True) axs[0].matshow(y.T, origin='lower', aspect='auto') axs[1].matshow(pred.T, origin='lower', aspect='auto') binary_pred = (np.sign(pred - 0.5) + 1) / 2 axs[2].matshow(binary_pred.T, origin='lower', aspect='auto') axs[0].set_title("Ground truth") axs[1].set_title("DNN output probability") axs[2].set_title("DNN output probability after thresholding") for j1 in xrange(3): axs[j1].set_ylabel('note index') axs[j1].set_xlabel('frames') axs[j1].xaxis.set_label_coords(1.06, -0.01) axs[j1].xaxis.tick_bottom() plt.tight_layout() plt.show()
parser_inference_wiener = subparsers.add_parser('inference_wiener') parser_inference_wiener.add_argument('--use_cuda', action='store_true', default=True) parser_inference_wiener.add_argument('--workspace', type=str, required=True) parser_inference_wiener.add_argument('--iteration', type=int, required=True) parser_inference_wiener.add_argument('--stack_num', type=int, required=True) parser_inference_wiener.add_argument('--mini_num', type=int, default=-1) parser_inference_wiener.add_argument('--visualize', action='store_true', default=False) args = parser.parse_args() args.filename = pp_data.get_filename(__file__) if args.mode == "calculate_scalar": calculate_scalar(args) elif args.mode == "train": train(args) elif args.mode == "inference": inference(args) elif args.mode == "inference_wiener": inference_wiener(args) else: raise Exception("Error!")
if __name__ == '__main__': parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest='mode') parser_train = subparsers.add_parser('train') parser_train.add_argument('--use_cuda', action='store_true', default=True) parser_train.add_argument('--workspace', type=str) parser_train.add_argument('--feat_type', type=str, choices=['logmel']) parser_train.add_argument('--lr', type=float, default=1e-3) parser_train.add_argument('--resume_model_path', type=str, default="") parser_inference = subparsers.add_parser('inference') parser_inference.add_argument('--use_cuda', action='store_true', default=True) parser_inference.add_argument('--workspace', type=str) parser_inference.add_argument('--model_name', type=str) parser_inference.add_argument('--feat_type', type=str, choices=['logmel']) args = parser.parse_args() if args.mode == "train": args.script_na = pp_data.get_filename(__file__) train(args) elif args.mode == "inference": args.script_na = pp_data.get_filename(__file__) inference(args) else: raise Exception("Incorrect argument!")
def train(args): workspace = args.workspace cla_mapping = args.cla_mapping # Load data. t1 = time.time() tr_pack_path = os.path.join(workspace, "packed_features", "logmel", "training.h5") te_pack_path = os.path.join(workspace, "packed_features", "logmel", "testing.h5") with h5py.File(tr_pack_path, 'r') as hf: tr_na_list = list(hf.get('na_list')) tr_x = np.array(hf.get('x')) tr_y = np.array(hf.get('y')) with h5py.File(te_pack_path, 'r') as hf: te_na_list = list(hf.get('na_list')) te_x = np.array(hf.get('x')) te_y = np.array(hf.get('y')) logging.info("Loading data time: %s" % (time.time() - t1, )) # Scale. t1 = time.time() scaler_path = os.path.join(workspace, "scalers", "logmel", "training.scaler") scaler = pickle.load(open(scaler_path, 'rb')) tr_x = pp_data.do_scaler_on_x3d(tr_x, scaler) te_x = pp_data.do_scaler_on_x3d(te_x, scaler) logging.info("Scale time: %s" % (time.time() - t1, )) logging.info("tr_x: %s %s" % (tr_x.shape, tr_x.dtype)) logging.info("tr_y: %s %s" % (tr_y.shape, tr_y.dtype)) logging.info("y: 1-of-4 representation: %s" % (cfg.events + ['bg'], )) # Build model. (_, n_time, n_freq) = tr_x.shape n_out = len(cfg.events) + 1 in0 = InputLayer(in_shape=(n_time, n_freq)) a1 = Reshape((1, n_time, n_freq))(in0) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Dropout(0.3)(a1) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Dropout(0.3)(a1) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Dropout(0.3)(a1) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Conv2D(n_outfmaps=64, n_row=3, n_col=3, act='linear', border_mode=(1, 1))(a1) a1 = BN(axis=(0, 2, 3))(a1) a1 = Activation('relu')(a1) a1 = Dropout(0.3)(a1) # Segmentation mask for 'babycry', 'glassbreak' and 'gunshot'. a1 = Conv2D(n_outfmaps=len(cfg.events), n_row=1, n_col=1, act='sigmoid', border_mode=(0, 0))(a1) # Extend segmentation mask to 'babycry', 'glassbreak', 'gunshot' and 'background'. a1 = Lambda(_seg_mask_ext_bg, name='seg_masks')(a1) # Classification mapping. cla_mapping = args.cla_mapping if cla_mapping == 'global_rank_pooling': weight1d = np.power(r * np.ones(120 * 64), np.arange(120 * 64)) a8 = Lambda(_global_rank_pooling, weight1d=weight1d, name='a5')(a1) elif cla_mapping == 'global_max_pooling': a8 = Lambda(_global_max_pooling)(a1) elif cla_mapping == 'global_avg_pooling': a8 = Lambda(_global_avg_pooling)(a1) else: raise Exception("Incorrect cla_mapping!") md = Model([in0], [a8]) md.compile() md.summary(is_logging=True) # Callbacks. md_dir = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(md_dir) save_model = SaveModel(md_dir, call_freq=100, type='iter') validation = Validation(te_x=te_x, te_y=te_y, batch_size=100, call_freq=50, metrics=['binary_crossentropy'], dump_path=None, is_logging=True) callbacks = [save_model, validation] # Train. generator = DataGenerator(batch_size=20, type='train') loss_ary = [] t1 = time.time() optimizer = Adam(1e-4) for (batch_x, batch_y) in generator.generate(xs=[tr_x], ys=[tr_y]): np.set_printoptions(threshold=np.nan, linewidth=1000, precision=2, suppress=True) loss = md.train_on_batch(batch_x, batch_y, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks) loss_ary.append(loss) if md.iter_ % 50 == 0: # Evalute training loss every several iterations. logging.info("iter: %d, tr loss: %d" % (md.iter_, np.mean(loss_ary))) logging.info("time: %s" % (time.time() - t1, )) t1 = time.time() loss_ary = [] if md.iter_ == 10001: # Stop after several iterations. break
def train(args): EVAL_MAP = -1000. PATIENCE = 0 data_dir = args.data_dir workspace = args.workspace tag = args.tag levels = args.levels # Path for the hdf5 dara bal_train_path = os.path.join(data_dir, "bal_train.h5") unbal_train_path = os.path.join(data_dir, "unbal_train.h5") eval_path = os.path.join(data_dir, "eval.h5") # Load data t1 = time.time() (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_path) (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_path) (eval_x, eval_y, eval_id_list) = pp_data.load_data(eval_path) #tr_x = tr_x1 #tr_y = tr_y1 #tr_id_list = tr_id_list1 tr_x = np.concatenate((tr_x1, tr_x2)) tr_y = np.concatenate((tr_y1, tr_y2)) tr_id_list = tr_id_list1 + tr_id_list2 logging.info("Loading dat time: %s s" % (time.time() - t1)) logging.info(tr_x1.shape, tr_x2.shape) logging.info("tr_x.shape: %s" % (tr_x.shape, )) (_, n_time, n_freq) = tr_x.shape # Build Model model = get_ml_attention(levels) logging.info(model.to_json()) # Optimization method optimizer = Adam(lr=args.lr) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['binary_accuracy']) #logging.info(model.summary()) # batch_size = 500 tr_gen = RatioDataGenerator(batch_size=batch_size, type='train') # Save Model every call_freq iterations model_iter = 0 call_freq = 1000 dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(dump_fd) # Train stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__)) pp_data.create_folder(stat_dir) prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__)) pp_data.create_folder(prob_dir) tr_time = time.time() for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): # Computes stats every several iterations print(model_iter) if model_iter % call_freq == 0: # every 1000 iterations # Stats of evaluation dataset t1 = time.time() eval_MAP = eval(model=model, x=eval_x, y=eval_y, out_dir=os.path.join(stat_dir, "eval"), out_probs_dir=os.path.join(prob_dir, "eval"), md_iter=model_iter) logging.info("Evaluate evaluation-set time: %s" % (time.time() - t1, )) if eval_MAP >= EVAL_MAP: #md_name = "/scratch/work/xuz2/model_" + tag + "_.h5" md_name = tag + "_.h5" model.save(md_name) EVAL_MAP = eval_MAP PATIENCE = 0 else: PATIENCE += 1 logging.info("Patience now: %d" % (PATIENCE, )) if PATIENCE >= 10: break # print("Training stop at %s iterations" % (model_iter,)) # break # Stats of training dataset #t1 =time.time() #tr_bal_err = eval(model=model, x=tr_x1, y=tr_y1, # out_dir=os.path.join(stat_dir, "train_bal"), ## out_probs_dir=None, # md_iter=model_iter) #logging.info("Evaluate tr_bal time: %s" % (time.time() - t1,)) # Save Model #if eval_MAP > 0.342: # md_name = "/scratch/work/xuz2/model_" + str(model_iter) + "_.h5" # model.save(md_name) # Update params (tr_batch_x, tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y) model.train_on_batch(tr_batch_x, tr_batch_y) model_iter += 1 # Stop training when maximum iteration achieves if model_iter == call_freq * 151: break
def train(args): cpickle_dir = args.cpickle_dir workspace = args.workspace # Path of hdf5 data bal_train_hdf5_path = os.path.join(cpickle_dir, "bal_train.h5") unbal_train_hdf5_path = os.path.join(cpickle_dir, "unbal_train.h5") eval_hdf5_path = os.path.join(cpickle_dir, "eval.h5") # Load data t1 = time.time() (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_hdf5_path) (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_hdf5_path) tr_x = np.concatenate((tr_x1, tr_x2)) tr_y = np.concatenate((tr_y1, tr_y2)) tr_id_list = tr_id_list1 + tr_id_list2 (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path) logging.info("Loading data time: %s s" % (time.time() - t1)) logging.info(tr_x1.shape, tr_x2.shape) logging.info("tr_x.shape: %s" % (tr_x.shape, )) (_, n_time, n_freq) = tr_x.shape # Build model n_hid = 500 n_out = tr_y.shape[1] lay_in = InputLayer(in_shape=(n_time, n_freq)) a = Dense(n_out=n_hid, act='relu')(lay_in) a = Dropout(p_drop=0.2)(a) a = Dense(n_out=n_hid, act='relu')(a) a = Dropout(p_drop=0.2)(a) a = Dense(n_out=n_hid, act='relu')(a) a = Dropout(p_drop=0.2)(a) cla = Dense(n_out=n_out, act='sigmoid', name='cla')(a) att = Dense(n_out=n_out, act='softmax', name='att')(a) # Attention lay_out = Lambda(_attention)([cla, att]) # Compile model md = Model(in_layers=[lay_in], out_layers=[lay_out]) md.compile() md.summary(is_logging=True) # Save model every several iterations call_freq = 1000 dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__)) pp_data.create_folder(dump_fd) save_model = SaveModel(dump_fd=dump_fd, call_freq=call_freq, type='iter', is_logging=True) # Callbacks function callbacks = [save_model] batch_size = 500 tr_gen = RatioDataGenerator(batch_size=batch_size, type='train') # Optimization method optimizer = Adam(lr=args.lr) # Train stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__)) pp_data.create_folder(stat_dir) prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__)) pp_data.create_folder(prob_dir) tr_time = time.time() for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]): # Compute stats every several interations if md.iter_ % call_freq == 0: # Stats of evaluation dataset t1 = time.time() te_err = eval(md=md, x=te_x, y=te_y, out_dir=os.path.join(stat_dir, "test"), out_probs_dir=os.path.join(prob_dir, "test")) logging.info("Evaluate test time: %s" % (time.time() - t1, )) # Stats of training dataset t1 = time.time() tr_bal_err = eval(md=md, x=tr_x1, y=tr_y1, out_dir=os.path.join(stat_dir, "train_bal"), out_probs_dir=None) logging.info("Evaluate tr_bal time: %s" % (time.time() - t1, )) # Update params (tr_batch_x, tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y) md.train_on_batch(batch_x=tr_batch_x, batch_y=tr_batch_y, loss_func='binary_crossentropy', optimizer=optimizer, callbacks=callbacks) # Stop training when maximum iteration achieves if md.iter_ == call_freq * 31: break
# Arguments parser = argparse.ArgumentParser(description="") subparsers = parser.add_subparsers(dest='mode') parser_train = subparsers.add_parser('train') parser_train.add_argument('--cpickle_dir', type=str) parser_train.add_argument('--workspace', type=str) parser_train.add_argument('--lr', type=float, default=1e-3) parser_get_avg_stats = subparsers.add_parser('get_avg_stats') parser_get_avg_stats.add_argument('--cpickle_dir', type=str) parser_get_avg_stats.add_argument('--workspace') args = parser.parse_args() # Logs logs_dir = os.path.join(args.workspace, "logs", pp_data.get_filename(__file__)) pp_data.create_folder(logs_dir) logging = pp_data.create_logging(logs_dir, filemode='w') logging.info(os.path.abspath(__file__)) logging.info(sys.argv) if args.mode == "train": train(args) elif args.mode == 'get_avg_stats': file_name = pp_data.get_filename(__file__) bgn_iter, fin_iter, interval_iter = 20000, 30001, 1000 get_avg_stats(args, file_name, bgn_iter, fin_iter, interval_iter) else: raise Exception("Error!")