def recognize(args):
    workspace = cfg.workspace
    events = cfg.events
    n_events = args.n_events
    snr = args.snr
    md_na = args.model_name
    lb_to_ix = cfg.lb_to_ix
    n_out = len(cfg.events)
    te_fold = cfg.te_fold

    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, "fold=%d" % te_fold,
                           "snr=%d" % snr, md_na)
    md = serializations.load(md_path)

    # Load data.
    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     snr=snr,
                                     is_scale=is_scale)

    x = te_x
    at_gts = te_at_y
    sed_gts = te_sed_y
    na_list = te_na_list

    # Recognize.
    [at_pds] = md.predict(x)  # (N, 16)

    observe_nodes = [md.find_layer('detect').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)
    [seg_masks] = md.run_function(f_forward, x, batch_size=500,
                                  tr_phase=0.)  # (n_clips, n_time, n_out)
    seg_masks = np.transpose(seg_masks, (0, 2, 1))[:, :, :, np.newaxis]

    # Dump to pickle.
    out_dir = os.path.join(workspace, "preds", pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, "fold=%d" % te_fold,
                           "snr=%d" % snr,
                           os.path.splitext(md_na)[0])
    pp_data.create_folder(out_dir)
    out_at_path = os.path.join(out_dir, "at_probs.p")
    out_seg_masks_path = os.path.join(out_dir, "seg_masks.p")

    cPickle.dump(at_pds,
                 open(out_at_path, 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(seg_masks,
                 open(out_seg_masks_path, 'wb'),
                 protocol=cPickle.HIGHEST_PROTOCOL)

    # Print stats.
    sed_pds = np.mean(seg_masks, axis=-1)  # (N, n_out, n_time)
    sed_pds = np.transpose(sed_pds, (0, 2, 1))  # (N, n_time, n_out)
    print_stats(at_pds, at_gts, sed_pds, sed_gts)
def get_sep_stats(args):
    workspace = cfg.workspace
    te_fold = cfg.te_fold
    events = cfg.events
    n_events = args.n_events
    snr = args.snr
    sep_stat_path = os.path.join(workspace, "sep_stats",
                                 pp_data.get_filename(__file__),
                                 "n_events=%d" % n_events, "fold=%d" % te_fold,
                                 "snr=%d" % snr, "sep_stat.p")
    sep_stats = cPickle.load(open(sep_stat_path, 'rb'))
    print(sep_stats)

    sdrs, sirs, sars = [], [], []
    for e in events:
        sdr = np.mean(sep_stats[e]['sdr'][0])
        sir = np.mean(sep_stats[e]['sir'][0])
        sar = np.mean(sep_stats[e]['sar'][0])
        sdrs.append(sdr)
        sirs.append(sir)
        sars.append(sar)

    logging.info("%sSDR\tSIR\tSAR" % ("".ljust(16)))
    logging.info("*%s*%.3f\t*%.3f\t*%.3f" %
                 ("Avg. of each".ljust(16), np.mean(sdrs), np.mean(sirs),
                  np.mean(sars)))
    for i1 in xrange(len(events)):
        logging.info("%s%.3f\t%.3f\t%.3f" %
                     (events[i1].ljust(16), sdrs[i1], sirs[i1], sars[i1]))
예제 #3
0
def get_avg_stats(args, file_name, bgn_iter, fin_iter, interval_iter):
    eval_hdf5_path = os.path.join(args.cpickle_dir, "eval.h5")
    workspace = args.workspace
    
    # Load ground truth
    (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path)
    y = te_y
    
    # Average prediction probabilities of several iterations
    prob_dir = os.path.join(workspace, "probs", file_name, "test")
    names = os.listdir(prob_dir)
    
    probs = []
    iters = range(bgn_iter, fin_iter, interval_iter)
    for iter in iters:
        pickle_path = os.path.join(prob_dir, "prob_%d_iters.p" % iter)
        prob = cPickle.load(open(pickle_path, 'rb'))
        probs.append(prob)
    #print(len(probs))
    avg_prob = np.mean(np.array(probs), axis=0)

    # Compute stats
    t1 = time.time()
    n_out = y.shape[1]
    stats = []
    for k in range(n_out):
        (precisions, recalls, thresholds) = metrics.precision_recall_curve(y[:, k], avg_prob[:, k])
        avg_precision = metrics.average_precision_score(y[:, k], avg_prob[:, k], average=None)
        (fpr, tpr, thresholds) = metrics.roc_curve(y[:, k], avg_prob[:, k])
        auc = metrics.roc_auc_score(y[:, k], avg_prob[:, k], average=None)
        #eer = pp_data.eer(avg_prob[:, k], y[:, k])
        
        skip = 1000
        dict = {'precisions': precisions[0::skip], 'recalls': recalls[0::skip], 'AP': avg_precision, 
                'fpr': fpr[0::skip], 'fnr': 1. - tpr[0::skip], 'auc': auc}
        
        stats.append(dict)
    logging.info("Callback time: %s" % (time.time() - t1,))
    
    # Dump stats
    dump_path = os.path.join(workspace, "stats", pp_data.get_filename(__file__), "test", "avg_%d_%d_%d.p" % (bgn_iter, fin_iter, interval_iter))
    pp_data.create_folder(os.path.dirname(dump_path))
    cPickle.dump(stats, open(dump_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
    #print(stats.shape)
    #for i, e in enumerate(stats):
    #  logging.info("%d. mAP: %f, auc: %f, d_prime: %f" % (i, e['AP'], e['auc'], pp_data.d_prime(e['auc'])))

    # Write out to log
    logging.info("bgn_iter, fin_iter, interval_iter: %d, %d, %d" % (bgn_iter, fin_iter, interval_iter))
    logging.info("mAP: %f" % np.mean([e['AP'] for e in stats]))
    auc = np.mean([e['auc'] for e in stats])
    logging.info("auc: %f" % auc)
    logging.info("d_prime: %f" % pp_data.d_prime(auc))
예제 #4
0
def plot_seg_masks(args):
    # Load data.
    te_pack_path = os.path.join(workspace, "packed_features", "logmel",
                                "testing.h5")
    scaler_path = os.path.join(workspace, "scalers", "logmel",
                               "training.scaler")

    with h5py.File(te_pack_path, 'r') as hf:
        te_na_list = list(hf.get('na_list'))
        te_x = np.array(hf.get('x'))
        te_y = np.array(hf.get('y'))

    te_x_unscaled = te_x  # unscaled x for plot.
    scaler = pickle.load(open(scaler_path, 'rb'))
    te_x = pp_data.do_scaler_on_x3d(te_x, scaler)

    # Load model.
    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           args.model_name)
    md = serializations.load(md_path)

    # Observe function.
    observe_nodes = [md.find_layer('seg_masks').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)
    [seg_masks] = md.run_function(f_forward, te_x, batch_size=50, tr_phase=0.)
    print("Segmentation masks: %s" % (seg_masks.shape, ))

    # Plot segmentation masks.
    for i1 in xrange(len(seg_masks)):
        na = te_na_list[i1]
        if ".mix_0db.wav" in na:
            print(na)
            gt_y = te_y[i1].astype(np.float32)
            print(gt_y)
            print("Ground truth: %s" % cfg.events[np.argmax(gt_y)])

            events_ex = cfg.events + ['bg']
            fig, axs = plt.subplots(3, 2, sharex=True)
            axs[0, 0].matshow(te_x_unscaled[i1].T,
                              origin='lower',
                              aspect='auto')
            axs[0, 0].set_title("log Mel spectrogram")
            for i2 in xrange(0, 4):
                axs[i2 / 2 + 1, i2 % 2].matshow(seg_masks[i1, i2].T,
                                                origin='lower',
                                                aspect='auto',
                                                vmin=0,
                                                vmax=1)
                axs[i2 / 2 + 1, i2 % 2].set_title(events_ex[i2])
            plt.show()
예제 #5
0
def write_out_at_sed(md, gen, f_forward, x, at_y, sed_y, n_events, snr, te_fold):
    workspace = cfg.workspace
    pred_at_all = []
    seg_masks_all = []
    gt_at_all = []
    gt_sed_all = []
    for [batch_x, batch_at_y, batch_sed_y] in gen.generate(zs=[x, at_y, sed_y]):
        # AT. 
        [at_pred] = md.predict(batch_x, batch_size=None)
        pred_at_all.append(at_pred)
        
        # SED. 
        [seg_masks] = md.run_function(func=f_forward, z=[batch_x], batch_size=500, tr_phase=0.)
        seg_masks_all.append(seg_masks)
        
        gt_at_all.append(batch_at_y)
        gt_sed_all.append(batch_sed_y)
        
    # DO NOT SHUFFLE DATA!
    pred_at_all = np.concatenate(pred_at_all, axis=0)
    seg_masks_all = np.concatenate(seg_masks_all, axis=0)
    
    gt_at_all = np.concatenate(gt_at_all, axis=0)
    gt_sed_all = np.concatenate(gt_sed_all, axis=0)

    # Compress to float16 to reduce space. 
    pred_at_all = pred_at_all.astype(np.float16)
    seg_masks_all = seg_masks_all.astype(np.float16)
    
    print(pred_at_all.shape)
    print(seg_masks_all.shape)
    print(pred_at_all.dtype)
    
    out_dir = os.path.join(workspace, "callbacks", "preds", pp_data.get_filename(__file__), 
                          "n_events=%d" % n_events, "fold=%d" % te_fold, "snr=%d" % snr, 
                          "md%d_iters" % md.iter_)
    pp_data.create_folder(out_dir)
    out_at_path = os.path.join(out_dir, "at_probs.p")
    out_seg_masks_path = os.path.join(out_dir, "seg_masks.p")
    
    cPickle.dump(pred_at_all, open(out_at_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
    cPickle.dump(seg_masks_all, open(out_seg_masks_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
    
    thres = 0.5
    (tp, fn, fp, tn) = tp_fn_fp_tn(pred_at_all, gt_at_all, thres, average='macro')
    (prec, recall, fvalue) = prec_recall_fvalue(pred_at_all, gt_at_all, thres, average='macro')
    logging.info("tp, fn, fp, tn: %d %d %d %d" % (tp, fn, fp, tn))
    logging.info("prec, recall, fvalue: %f %f %f" % (prec, recall, fvalue))
def get_stats(args, bgn_iter, fin_iter, interval):
    workspace = cfg.workspace
    events = cfg.events
    te_fold = cfg.te_fold
    n_events = args.n_events
    snr = args.snr

    # Load ground truth data.
    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     snr=snr,
                                     is_scale=is_scale)

    at_gts = te_at_y
    sed_gts = te_sed_y

    # Load and sum
    preds_dir = os.path.join(workspace, "preds",
                             pp_data.get_filename(__file__),
                             "n_events=%d" % n_events, "fold=%d" % te_fold,
                             "snr=%d" % snr)

    at_probs_list, seg_masks_list = [], []
    for iter in xrange(bgn_iter, fin_iter, interval):
        at_probs_path = os.path.join(preds_dir, "md%d_iters" % iter,
                                     "at_probs.p")
        at_probs = cPickle.load(open(at_probs_path, 'rb'))
        at_probs_list.append(at_probs)
        seg_masks_path = os.path.join(preds_dir, "md%d_iters" % iter,
                                      "seg_masks.p")
        seg_masks = cPickle.load(open(seg_masks_path, 'rb'))
        seg_masks_list.append(seg_masks)
    at_probs = np.mean(at_probs_list, axis=0)  # (n_clips, n_classes)
    seg_masks = np.mean(seg_masks_list,
                        axis=0)  # (n_clips, n_classes, n_time, n_freq)
    sed_probs = np.mean(seg_masks,
                        axis=-1).transpose(0, 2,
                                           1)  # (n_clips, n_time, n_classes)

    print_stats(at_probs, at_gts, sed_probs, sed_gts)
def plot_hotmap(args):
    workspace = cfg.workspace
    events = cfg.events
    md_na = args.model_name
    n_events = args.n_events
    te_fold = cfg.te_fold

    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     is_scale=is_scale)

    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, md_na)
    md = serializations.load(md_path)

    x = te_x
    y = te_at_y

    observe_nodes = [md.find_layer('hotmap').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)
    [a4] = md.run_function(f_forward, x, batch_size=500, tr_phase=0.)
    print a4.shape

    for i1 in xrange(len(a4)):
        # if te_na_list[i1] == 'CR_lounge_220110_0731.s2700_chunk48':
        print(y[i1])

        # print np.mean(a4[i1], axis=(1,2))

        fig, axs = plt.subplots(5, 4, sharex=True)
        axs[0, 0].matshow(x[i1].T, origin='lower', aspect='auto')
        for i2 in xrange(16):
            axs[i2 / 4 + 1, i2 % 4].matshow(a4[i1, i2].T,
                                            origin='lower',
                                            aspect='auto',
                                            vmin=0,
                                            vmax=1)
            axs[i2 / 4 + 1, i2 % 4].set_title(events[i2])
        plt.show()
            cnt += 1
        
    avg = {}
    for e in ['sdr', 'sir', 'sar']:
        avg[e] = []
        
    for event_type in dict.keys():
        logging.info(event_type)
        for evaluate_type in dict[event_type]:
            tmp = np.mean(dict[event_type][evaluate_type])
            logging.info((evaluate_type, tmp))
            avg[evaluate_type[0:3]].append(tmp)

    logging.info("Average stats:")
    for e in ['sdr', 'sir', 'sar']:
        logging.info("%s, %f" % (e, np.mean(avg[e])))


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="")
    parser.add_argument("--workspace", type=str)
    parser.add_argument("--sep_type", type=str, help="The sub folder of separation. ")
    args = parser.parse_args()
    
    logs_dir = os.path.join(args.workspace, "logs", pp_data.get_filename(__file__))
    pp_data.create_folder(logs_dir)
    logging = pp_data.create_logging(logs_dir, filemode='w')
    logging.info(os.path.abspath(__file__))
    logging.info(sys.argv)
    
    evaluate_separation(args)
def evaluate_separation(args):
    workspace = cfg.workspace
    events = cfg.events
    te_fold = cfg.te_fold
    n_window = cfg.n_window
    n_overlap = cfg.n_overlap
    fs = cfg.sample_rate
    clip_duration = cfg.clip_duration
    n_events = args.n_events
    snr = args.snr

    # Load ground truth data.
    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     snr=snr,
                                     is_scale=is_scale)

    at_y = te_at_y
    sed_y = te_sed_y
    na_list = te_na_list

    audio_dir = os.path.join(workspace, "mixed_audio",
                             "n_events=%d" % n_events)

    sep_dir = os.path.join(workspace, "sep_audio",
                           pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, "fold=%d" % te_fold,
                           "snr=%d" % snr)

    sep_stats = {}
    for e in events:
        sep_stats[e] = {'sdr': [], 'sir': [], 'sar': []}

    cnt = 0
    for (i1, na) in enumerate(na_list):
        bare_na = os.path.splitext(na)[0]
        gt_audio_path = os.path.join(audio_dir, "%s.wav" % bare_na)
        (stereo_audio, _) = pp_data.read_stereo_audio(gt_audio_path,
                                                      target_fs=fs)
        gt_event_audio = stereo_audio[:, 0]
        gt_noise_audio = stereo_audio[:, 1]

        print(na)
        for j1 in xrange(len(events)):
            if at_y[i1][j1] == 1:
                sep_event_audio_path = os.path.join(
                    sep_dir, "%s.%s.wav" % (bare_na, events[j1]))
                (sep_event_audio, _) = pp_data.read_audio(sep_event_audio_path,
                                                          target_fs=fs)
                sep_noise_audio_path = os.path.join(sep_dir,
                                                    "%s.noise.wav" % bare_na)
                (sep_noise_audio, _) = pp_data.read_audio(sep_noise_audio_path,
                                                          target_fs=fs)
                ref_array = np.array((gt_event_audio, gt_noise_audio))
                est_array = np.array((sep_event_audio, sep_noise_audio))
                (sdr, sir, sar) = sdr_sir_sar(ref_array,
                                              est_array,
                                              sed_y[i1, :, j1],
                                              inside_only=True)
                print(sdr, sir, sar)
                sep_stats[events[j1]]['sdr'].append(sdr)
                sep_stats[events[j1]]['sir'].append(sir)
                sep_stats[events[j1]]['sar'].append(sar)

        cnt += 1
        # if cnt == 5: break

    print(sep_stats)
    sep_stat_path = os.path.join(workspace, "sep_stats",
                                 pp_data.get_filename(__file__),
                                 "n_events=%d" % n_events, "fold=%d" % te_fold,
                                 "snr=%d" % snr, "sep_stat.p")
    pp_data.create_folder(os.path.dirname(sep_stat_path))
    cPickle.dump(sep_stats, open(sep_stat_path, 'wb'))
예제 #10
0
def recognize(args):
    workspace = args.workspace
    md_path = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                           args.model_name)
    t1 = time.time()

    # Load scaler.
    scaler_path = os.path.join(workspace, "scalers", "logmel",
                               "training.scaler")
    scaler = pickle.load(open(scaler_path, 'rb'))

    # Load model.
    md = serializations.load(md_path)

    # Observe function.
    observe_nodes = [md.find_layer('seg_masks').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)

    audio_dir = os.path.join(workspace, "mixed_audio", "testing")
    names = os.listdir(audio_dir)

    at_pd_ary = []
    at_gt_ary = []
    sed_pd_ary = []
    sed_gt_ary = []

    # For all audio clips.
    for na in names:
        if '.mix_0db.wav' in na:
            logging.info(na)

            # Load audio.
            bare_na = os.path.splitext(os.path.splitext(na)[0])[0]
            audio_path = os.path.join(audio_dir, na)
            (bg_audio, event_audio, fs) = pp_data.read_audio_stereo(audio_path)
            mixed_audio = bg_audio + event_audio

            # Load yaml.
            yaml_path = os.path.join(audio_dir, "%s.yaml" % bare_na)
            with open(yaml_path, 'r') as f:
                data = yaml.load(f)
            event_type = data['event_type']

            # Calculate feature.
            x = pp_data.calc_feat(mixed_audio)
            x3d = pp_data.do_scaler_on_x3d(x[np.newaxis, ...], scaler)

            # Ground truth.
            gt_y = [0, 0, 0, 0]
            gt_y[cfg.lb_to_ix[event_type]] = 1
            at_gt_ary.append(gt_y)

            # Audio tagging (AT) prediction.
            [pred_y] = md.predict(x3d)  # (1, n_events+1)
            pred_y = pred_y[0]  # (n_events+1,)
            at_pd_ary.append(pred_y)

            # Sound event detection (SED) prediction.
            [masks] = md.run_function(
                f_forward, x3d, batch_size=10,
                tr_phase=0.)  # (1, n_events+1, n_time, n_freq)
            masks = masks[0]  # (n_events+1, n_time, n_freq)
            sed_pd = np.mean(masks, axis=-1).T  # (n_time, n_events+1)
            sed_pd_ary.append(sed_pd)
            sed_gt = np.zeros_like(sed_pd)
            [bgn_sec, fin_sec] = data['event_segment']
            bgn_fr = int(bgn_sec * cfg.sample_rate /
                         float(cfg.n_window - cfg.n_overlap))
            fin_fr = int(fin_sec * cfg.sample_rate /
                         float(cfg.n_window - cfg.n_overlap))
            sed_gt[bgn_fr:fin_fr, cfg.lb_to_ix[event_type]] = 1
            sed_gt_ary.append(sed_gt)

    at_pd_ary = np.array(at_pd_ary)
    at_gt_ary = np.array(at_gt_ary)
    sed_pd_ary = np.array(sed_pd_ary)
    sed_gt_ary = np.array(sed_gt_ary)

    # Write out AT and SED presence probabilites.
    logging.info("at_pd_ary.shape: %s" % (at_pd_ary.shape, ))
    logging.info("at_gt_ary.shape: %s" % (at_gt_ary.shape, ))
    logging.info("sed_pd_ary.shape: %s" % (sed_pd_ary.shape, ))
    logging.info("sed_gt_ary.shape: %s" % (sed_gt_ary.shape, ))
    dict = {}
    dict['at_pd_ary'] = at_pd_ary
    dict['at_gt_ary'] = at_gt_ary
    dict['sed_pd_ary'] = sed_pd_ary
    dict['sed_gt_ary'] = sed_gt_ary
    out_path = os.path.join(workspace, "_tmp", "_at_sed_dict.p")
    pp_data.create_folder(os.path.dirname(out_path))
    cPickle.dump(dict, open(out_path, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
    logging.info("Recognize time: %s" % (time.time() - t1, ))
def separate(args, bgn_iter, fin_iter, interval):
    workspace = cfg.workspace
    events = cfg.events
    te_fold = cfg.te_fold
    n_events = args.n_events
    n_window = cfg.n_window
    n_overlap = cfg.n_overlap
    fs = cfg.sample_rate
    clip_duration = cfg.clip_duration
    snr = args.snr

    # Load ground truth data.
    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     snr=snr,
                                     is_scale=is_scale)

    at_y = te_at_y
    sed_y = te_sed_y
    na_list = te_na_list

    # Load and sum
    preds_dir = os.path.join(workspace, "preds",
                             pp_data.get_filename(__file__),
                             "n_events=%d" % n_events, "fold=%d" % te_fold,
                             "snr=%d" % snr)

    at_probs_list, seg_masks_list = [], []
    for iter in xrange(bgn_iter, fin_iter, interval):
        seg_masks_path = os.path.join(preds_dir, "md%d_iters" % iter,
                                      "seg_masks.p")
        seg_masks = cPickle.load(open(seg_masks_path, 'rb'))
        seg_masks_list.append(seg_masks)
    seg_masks = np.mean(seg_masks_list,
                        axis=0)  # (n_clips, n_classes, n_time, n_freq)

    print(seg_masks.shape)

    #
    audio_dir = os.path.join(workspace, "mixed_audio",
                             "n_events=%d" % n_events)

    sep_dir = os.path.join(workspace, "sep_audio",
                           pp_data.get_filename(__file__),
                           "n_events=%d" % n_events, "fold=%d" % te_fold,
                           "snr=%d" % snr)
    pp_data.create_folder(sep_dir)

    ham_win = np.hamming(n_window)
    recover_scaler = np.sqrt((ham_win**2).sum())
    melW = librosa.filters.mel(sr=fs,
                               n_fft=n_window,
                               n_mels=64,
                               fmin=0.,
                               fmax=fs / 2)
    inverse_melW = get_inverse_W(melW)  # (64, 513)

    seg_stats = {}
    for e in events:
        seg_stats[e] = {
            'fvalue': [],
            'auc': [],
            'iou': [],
            'hit': [],
            'fa': [],
            'tp': [],
            'fn': [],
            'fp': []
        }

    cnt = 0
    for (i1, na) in enumerate(na_list):
        bare_na = os.path.splitext(na)[0]
        audio_path = os.path.join(audio_dir, "%s.wav" % bare_na)
        (stereo_audio, _) = pp_data.read_stereo_audio(audio_path, target_fs=fs)
        event_audio = stereo_audio[:, 0]
        noise_audio = stereo_audio[:, 1]
        mixed_audio = event_audio + noise_audio

        mixed_cmplx_sp = pp_data.calc_sp(mixed_audio, fs, ham_win, n_window,
                                         n_overlap)
        mixed_sp = np.abs(mixed_cmplx_sp)
        event_sp = np.abs(
            pp_data.calc_sp(event_audio, fs, ham_win, n_window, n_overlap))
        noise_sp = np.abs(
            pp_data.calc_sp(noise_audio, fs, ham_win, n_window, n_overlap))

        sm = seg_masks[i1]  # (n_classes, n_time, n_freq)
        sm_upsampled = np.dot(sm, inverse_melW)  # (n_classes, n_time, 513)

        print(na)

        # Write out separated events.
        for j1 in xrange(len(events)):
            if at_y[i1][j1] == 1:
                (fvalue, auc, iou, tp, fn, fp) = fvalue_iou(sm_upsampled[j1],
                                                            event_sp,
                                                            noise_sp,
                                                            sed_y[i1, :, j1],
                                                            seg_thres,
                                                            inside_only=True)
                (hit, fa) = hit_fa(sm_upsampled[j1],
                                   event_sp,
                                   noise_sp,
                                   sed_y[i1, :, j1],
                                   seg_thres,
                                   inside_only=True)
                seg_stats[events[j1]]['fvalue'].append(fvalue)
                seg_stats[events[j1]]['auc'].append(auc)
                seg_stats[events[j1]]['iou'].append(iou)
                seg_stats[events[j1]]['hit'].append(hit)
                seg_stats[events[j1]]['fa'].append(fa)
                seg_stats[events[j1]]['tp'].append(tp)
                seg_stats[events[j1]]['fn'].append(fn)
                seg_stats[events[j1]]['fp'].append(fp)

                sep_event_sp = sm_upsampled[j1] * mixed_sp
                sep_event_s = spectrogram_to_wave.recover_wav(
                    sep_event_sp,
                    mixed_cmplx_sp,
                    n_overlap=n_overlap,
                    winfunc=np.hamming,
                    wav_len=int(fs * clip_duration))
                sep_event_s *= recover_scaler

                out_event_audio_path = os.path.join(
                    sep_dir, "%s.%s.wav" % (bare_na, events[j1]))
                pp_data.write_audio(out_event_audio_path, sep_event_s, fs)

        # Write out separated noise.
        sm_noise_upsampled = np.clip(1. - np.sum(sm_upsampled, axis=0), 0., 1.)
        sep_noise_sp = sm_noise_upsampled * mixed_sp
        sep_noise_s = spectrogram_to_wave.recover_wav(sep_noise_sp,
                                                      mixed_cmplx_sp,
                                                      n_overlap=n_overlap,
                                                      winfunc=np.hamming,
                                                      wav_len=int(
                                                          fs * clip_duration))
        sep_noise_s *= recover_scaler
        out_noise_audio_path = os.path.join(sep_dir, "%s.noise.wav" % bare_na)
        pp_data.write_audio(out_noise_audio_path, sep_noise_s, fs)

        cnt += 1
        # if cnt == 2: break


    fvalues, aucs, ious, hits, fas, tps, fns, fps = [], [], [], [], [], [], [], []
    for e in events:
        fvalues.append(np.mean(seg_stats[e]['fvalue']))
        ious.append(np.mean(seg_stats[e]['iou']))
        aucs.append(np.mean(seg_stats[e]['auc']))
        hits.append(np.mean(seg_stats[e]['hit']))
        fas.append(np.mean(seg_stats[e]['fa']))
        tps.append(np.mean(seg_stats[e]['tp']))
        fns.append(np.mean(seg_stats[e]['fn']))
        fps.append(np.mean(seg_stats[e]['fp']))

    logging.info("%sfvalue\tauc\tiou\tHit\tFa\tHit-Fa\tTP\tFN\tFP" %
                 ("".ljust(16)))
    logging.info(
        "%s*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f\t*%.3f" %
        ("*Avg. of each".ljust(16), np.mean(fvalues), np.mean(aucs),
         np.mean(ious), np.mean(hits), np.mean(fas), np.mean(hits) -
         np.mean(fas), np.mean(tps), np.mean(fns), np.mean(fps)))
    for i1 in xrange(len(events)):
        logging.info(
            "%s%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f\t%.3f" %
            (events[i1].ljust(16), fvalues[i1], aucs[i1], ious[i1], hits[i1],
             fas[i1], hits[i1] - fas[i1], tps[i1], fns[i1], fps[i1]))
def train(args):
    workspace = cfg.workspace
    te_fold = cfg.te_fold
    n_events = args.n_events
    snr = args.snr

    feature_dir = os.path.join(workspace, "features", "logmel",
                               "n_events=%d" % n_events)
    yaml_dir = os.path.join(workspace, "mixed_audio", "n_events=%d" % n_events)
    (tr_x, tr_at_y, tr_sed_y, tr_na_list, te_x, te_at_y, te_sed_y,
     te_na_list) = pp_data.load_data(feature_dir=feature_dir,
                                     yaml_dir=yaml_dir,
                                     te_fold=te_fold,
                                     snr=snr,
                                     is_scale=is_scale)

    print(tr_x.shape, tr_at_y.shape)
    print(te_x.shape, te_at_y.shape)
    (_, n_time, n_freq) = tr_x.shape
    n_out = len(cfg.events)

    if False:
        for e in tr_x:
            plt.matshow(e.T, origin='lower', aspect='auto')
            plt.show()

    # Build model.
    lay_in = InputLayer(in_shape=(n_time, n_freq))

    a = Reshape((1, n_time, n_freq))(lay_in)
    a = Conv2D(n_outfmaps=64,
               n_row=3,
               n_col=5,
               act='linear',
               strides=(1, 1),
               border_mode=(1, 2))(a)
    a = BN(axis=(0, 2, 3))(a)
    a = Activation('relu')(a)
    a = Conv2D(n_outfmaps=64,
               n_row=3,
               n_col=5,
               act='linear',
               strides=(1, 1),
               border_mode=(1, 2))(a)
    a = BN(axis=(0, 2, 3))(a)
    a = Activation('relu')(a)
    a = Dropout(p_drop=0.2)(a)

    a = Conv2D(n_outfmaps=64,
               n_row=3,
               n_col=5,
               act='linear',
               strides=(1, 1),
               border_mode=(1, 2))(a)
    a = BN(axis=(0, 2, 3))(a)
    a = Activation('relu')(a)
    a = Conv2D(n_outfmaps=64,
               n_row=3,
               n_col=5,
               act='linear',
               strides=(1, 1),
               border_mode=(1, 2))(a)
    a = BN(axis=(0, 2, 3))(a)
    a = Activation('relu')(a)
    a = Dropout(p_drop=0.2)(a)

    a = Conv2D(n_outfmaps=64,
               n_row=3,
               n_col=5,
               act='linear',
               strides=(1, 1),
               border_mode=(1, 2))(a)
    a = BN(axis=(0, 2, 3))(a)
    a = Activation('relu')(a)
    a = Conv2D(n_outfmaps=64,
               n_row=3,
               n_col=5,
               act='linear',
               strides=(1, 1),
               border_mode=(1, 2))(a)
    a = BN(axis=(0, 2, 3))(a)
    a = Activation('relu')(a)
    a = Dropout(p_drop=0.2)(a)

    a = Conv2D(n_outfmaps=n_out,
               n_row=1,
               n_col=1,
               act='sigmoid',
               border_mode=(0, 0),
               name='seg_masks')(a)

    a8 = Lambda(_global_avg_pooling, name='a8')(a)

    md = Model([lay_in], [a8])
    md.compile()
    md.summary(is_logging=True)

    # Callbacks.
    md_dir = os.path.join(workspace, "models", pp_data.get_filename(__file__),
                          "n_events=%d" % n_events, "fold=%d" % te_fold,
                          "snr=%d" % snr)
    pp_data.create_folder(md_dir)
    save_model = SaveModel(md_dir, call_freq=50, type='iter', is_logging=True)
    validation = Validation(te_x=te_x,
                            te_y=te_at_y,
                            batch_size=50,
                            call_freq=50,
                            metrics=['binary_crossentropy'],
                            dump_path=None,
                            is_logging=True)

    callbacks = [save_model, validation]

    observe_nodes = [md.find_layer('seg_masks').output_]
    f_forward = md.get_observe_forward_func(observe_nodes)

    # Generator.
    tr_gen = DataGenerator(batch_size=32, type='train')
    eva_gen = DataGenerator2(batch_size=32, type='test')

    # Train.
    loss_ary = []
    t1 = time.time()
    optimizer = Adam(1e-3)
    for (batch_x, batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_at_y]):
        if md.iter_ % 50 == 0:
            logging.info("iter: %d tr_loss: %f time: %s" % (
                md.iter_,
                np.mean(loss_ary),
                time.time() - t1,
            ))
            t1 = time.time()
            loss_ary = []
        # if md.iter_ % 200 == 0:
        # write_out_at_sed(md, eva_gen, f_forward, te_x, te_at_y, te_sed_y, n_events, snr, te_fold)
        if md.iter_ == 5001:
            break
        loss = md.train_on_batch(batch_x,
                                 batch_y,
                                 loss_func='binary_crossentropy',
                                 optimizer=optimizer,
                                 callbacks=callbacks)
        loss_ary.append(loss)
    parser_get_sep_stats = subparsers.add_parser('get_sep_stats')
    parser_get_sep_stats.add_argument('--n_events', type=int)
    parser_get_sep_stats.add_argument('--snr', type=int)

    parser_b2 = subparsers.add_parser('avg_recognize')
    parser_b2.add_argument('--n_events', type=int)
    parser_b2.add_argument('--snr', type=int)

    parser_c = subparsers.add_parser('plot_hotmap')
    parser_c.add_argument('--model_name', type=str)
    parser_c.add_argument('--n_events', type=int)

    args = parser.parse_args()

    logs_dir = os.path.join(cfg.workspace, "logs",
                            pp_data.get_filename(__file__))
    pp_data.create_folder(logs_dir)
    logging = pp_data.create_logging(logs_dir, filemode='w')
    logging.info(os.path.abspath(__file__))
    logging.info(sys.argv)

    if args.mode == "train":
        train(args)
    elif args.mode == "recognize":
        recognize(args)
    elif args.mode == "get_stats":
        bgn_iter, fin_iter, interval = 2000, 3001, 200
        get_stats(args, bgn_iter, fin_iter, interval)
    elif args.mode == "separate":
        bgn_iter, fin_iter, interval = 2000, 3001, 200
        separate(args, bgn_iter, fin_iter, interval)
예제 #14
0
def train(args):
    cpickle_dir = args.cpickle_dir
    workspace = args.workspace

    # Path of hdf5 data
    bal_train_hdf5_path = os.path.join(cpickle_dir, "bal_train.h5")
    unbal_train_hdf5_path = os.path.join(cpickle_dir, "unbal_train.h5")
    eval_hdf5_path = os.path.join(cpickle_dir, "eval.h5")

    # Load data
    t1 = time.time()
    (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_hdf5_path)
    (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_hdf5_path)
    print(tr_x1.shape)
    print(tr_x2.shape)
    tr_x = np.concatenate((tr_x1, tr_x2))
    tr_y = np.concatenate((tr_y1, tr_y2))
    tr_id_list = tr_id_list1 + tr_id_list2

    (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path)
    logging.info("Loading data time: %s s" % (time.time() - t1))

    logging.info(tr_x1.shape, tr_x2.shape)
    logging.info("tr_x.shape: %s" % (tr_x.shape, ))

    (_, n_time, n_freq) = tr_x.shape

    # Build model
    n_hid = 600
    n_out = tr_y.shape[1]

    lay_in = Input(shape=(n_time, n_freq))
    a_0 = BatchNormalization()(lay_in)
    a_1 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_0)
    a_1 = BatchNormalization()(a_1)
    a_1 = Activation('relu')(a_1)
    a_1 = Dropout(rate=0.4)(a_1)
    a_2 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_1)
    a_2 = BatchNormalization()(a_2)
    a_2 = Activation('relu')(a_2)
    a_2 = Dropout(rate=0.4)(a_2)
    a_3 = Dense(n_hid, kernel_regularizer=regularizers.l2(0.001))(a_2)
    a_3 = BatchNormalization()(a_3)
    a_3 = Activation('relu')(a_3)
    a_3 = Dropout(rate=0.4)(a_3)
    cla_1 = Dense(n_out, name='cla_1')(a_3)
    cla_1 = BatchNormalization()(cla_1)
    cla_1 = Activation('sigmoid')(cla_1)
    att_1 = Dense(n_out, name='att_1')(a_3)
    att_1 = BatchNormalization()(att_1)
    att_1 = Activation('softmax')(att_1)

    # Attention
    lay_out_a = Lambda(_attention,
                       output_shape=_att_output_shape)([cla_1, att_1])
    cla_2 = Dense(n_out, name='cla_2')(a_2)
    cla_2 = BatchNormalization()(cla_2)
    cla_2 = Activation('sigmoid')(cla_2)
    att_2 = Dense(n_out, name='att2')(a_2)
    att_2 = BatchNormalization()(att_2)
    att_2 = Activation('softmax')(att_2)

    lay_out_b = Lambda(_attention,
                       output_shape=_att_output_shape)([cla_2, att_2])
    lay_out_c = Concatenate(axis=1)([lay_out_a, lay_out_b])

    #lay_out = Dense(n_out, activation='sigmoid', name='output')(lay_out_c)
    lay_out = Dense(n_out, name='output')(lay_out_c)
    lay_out = BatchNormalization()(lay_out)
    lay_out = Activation('sigmoid')(lay_out)
    # Compile model
    md = Model(inputs=lay_in, outputs=lay_out)
    md.summary()

    # Save model every several iterations
    call_freq = 1000
    dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__))
    pp_data.create_folder(dump_fd)
    # save_model = SaveModel(dump_fd=dump_fd, call_freq=call_freq, type='iter', is_logging=True)

    # Callbacks function
    #callbacks = []#save_model]

    batch_size = 500
    tr_gen = RatioDataGenerator(batch_size=batch_size, type='train')

    # Optimization method
    optimizer = Adam(lr=args.lr)
    md.compile(loss='binary_crossentropy', optimizer=optimizer)
    #callbacks=callbacks)

    # Train
    stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__))
    pp_data.create_folder(stat_dir)
    prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__))
    pp_data.create_folder(prob_dir)

    tr_time = time.time()
    iter_ = 1
    for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]):
        # Compute stats every several interations
        if iter_ % call_freq == 0:
            # Stats of evaluation dataset
            t1 = time.time()
            te_err = eval(md=md,
                          x=te_x,
                          y=te_y,
                          out_dir=os.path.join(stat_dir, "test"),
                          out_probs_dir=os.path.join(prob_dir, "test"),
                          iter_=iter_)
            logging.info("Evaluate test time: %s" % (time.time() - t1, ))

            # Stats of training dataset
            t1 = time.time()
            tr_bal_err = eval(md=md,
                              x=tr_x1,
                              y=tr_y1,
                              out_dir=os.path.join(stat_dir, "train_bal"),
                              out_probs_dir=None,
                              iter_=iter_)
            logging.info("Evaluate tr_bal time: %s" % (time.time() - t1, ))
        iter_ += 1
        # Update params
        (tr_batch_x,
         tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y)
        md.train_on_batch(x=tr_batch_x, y=tr_batch_y)
        # Stop training when maximum iteration achieves
        if iter_ == call_freq * 151:
            break
예제 #15
0
def inference(args):
    cuda = args.use_cuda and torch.cuda.is_available()
    workspace = args.workspace
    model_name = args.model_name
    feat_type = args.feat_type
    script_na = args.script_na

    # Load data.
    te_packed_feat_path = os.path.join(workspace, "packed_features", feat_type,
                                       "test.p")
    [te_x_list, te_y_list,
     te_na_list] = cPickle.load(open(te_packed_feat_path, 'rb'))

    # Scale.
    if True:
        scale_path = os.path.join(workspace, "scalers", feat_type, "scaler.p")
        scaler = pickle.load(open(scale_path, 'rb'))
        te_x_list = pp_data.scale_on_x_list(te_x_list, scaler)

    # Construct model topology.
    n_concat = 3
    te_n_hop = 1
    n_freq = te_x_list[0].shape[-1]
    n_out = te_y_list[0].shape[-1]
    model = Net(n_concat, n_freq, n_out)

    # Init the weights of model using trained weights.
    model_path = os.path.join(workspace, "models", script_na, feat_type,
                              model_name)
    if os.path.isfile(model_path):
        print("Loading checkpoint '%s'" % model_path)
        checkpoint = torch.load(model_path)
        model.load_state_dict(checkpoint['state_dict'])
    else:
        raise Exception("Model path %s does not exist!" % model_path)

    # Move model to GPU.
    if cuda:
        model.cuda()

    # Directory to write out transcript midi files.
    out_midi_dir = os.path.join(workspace, "out_midis",
                                pp_data.get_filename(__file__), feat_type)
    pp_data.create_folder(out_midi_dir)

    # Data to 3d.
    n_half = (n_concat - 1) / 2
    for i1 in xrange(len(te_x_list)):
        x = te_x_list[i1]  # (n_time, n_freq)
        y = te_y_list[i1]  # (n_time, n_out)
        bare_na = os.path.splitext(te_na_list[i1])[0]
        (n_time, n_freq) = x.shape

        zero_pad = np.zeros((n_half, n_freq))
        x = np.concatenate((zero_pad, x, zero_pad), axis=0)
        x3d = pp_data.mat_2d_to_3d(x, n_concat,
                                   te_n_hop)  # (n_time, n_concat, n_freq)

        # Move data to GPU.
        x3d = torch.Tensor(x3d)
        x3d = Variable(x3d)
        if cuda:
            x3d = x3d.cuda()

        # Inference.
        model.eval()
        pred = model(x3d)  # (n_time, n_out)

        # Convert data type to numpy.
        pred = pred.data.cpu().numpy()

        # Threshold and write out predicted piano roll to midi file.
        mid_roll = pp_data.prob_to_midi_roll(pred, 0.5)
        out_path = os.path.join(out_midi_dir, "%s.mid" % bare_na)
        print("Write out to: %s" % out_path)
        pp_data.write_midi_roll_to_midi(mid_roll, out_path)

        # Debug plot.
        if True:
            fig, axs = plt.subplots(3, 1, sharex=True)
            axs[0].matshow(y.T, origin='lower', aspect='auto')
            axs[1].matshow(pred.T, origin='lower', aspect='auto')
            binary_pred = (np.sign(pred - 0.5) + 1) / 2
            axs[2].matshow(binary_pred.T, origin='lower', aspect='auto')
            axs[0].set_title("Ground truth")
            axs[1].set_title("DNN output probability")
            axs[2].set_title("DNN output probability after thresholding")
            for j1 in xrange(3):
                axs[j1].set_ylabel('note index')
                axs[j1].set_xlabel('frames')
                axs[j1].xaxis.set_label_coords(1.06, -0.01)
                axs[j1].xaxis.tick_bottom()
            plt.tight_layout()
            plt.show()
예제 #16
0
    parser_inference_wiener = subparsers.add_parser('inference_wiener')
    parser_inference_wiener.add_argument('--use_cuda',
                                         action='store_true',
                                         default=True)
    parser_inference_wiener.add_argument('--workspace',
                                         type=str,
                                         required=True)
    parser_inference_wiener.add_argument('--iteration',
                                         type=int,
                                         required=True)
    parser_inference_wiener.add_argument('--stack_num',
                                         type=int,
                                         required=True)
    parser_inference_wiener.add_argument('--mini_num', type=int, default=-1)
    parser_inference_wiener.add_argument('--visualize',
                                         action='store_true',
                                         default=False)

    args = parser.parse_args()
    args.filename = pp_data.get_filename(__file__)

    if args.mode == "calculate_scalar":
        calculate_scalar(args)
    elif args.mode == "train":
        train(args)
    elif args.mode == "inference":
        inference(args)
    elif args.mode == "inference_wiener":
        inference_wiener(args)
    else:
        raise Exception("Error!")
예제 #17
0

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest='mode')

    parser_train = subparsers.add_parser('train')
    parser_train.add_argument('--use_cuda', action='store_true', default=True)
    parser_train.add_argument('--workspace', type=str)
    parser_train.add_argument('--feat_type', type=str, choices=['logmel'])
    parser_train.add_argument('--lr', type=float, default=1e-3)
    parser_train.add_argument('--resume_model_path', type=str, default="")

    parser_inference = subparsers.add_parser('inference')
    parser_inference.add_argument('--use_cuda',
                                  action='store_true',
                                  default=True)
    parser_inference.add_argument('--workspace', type=str)
    parser_inference.add_argument('--model_name', type=str)
    parser_inference.add_argument('--feat_type', type=str, choices=['logmel'])

    args = parser.parse_args()

    if args.mode == "train":
        args.script_na = pp_data.get_filename(__file__)
        train(args)
    elif args.mode == "inference":
        args.script_na = pp_data.get_filename(__file__)
        inference(args)
    else:
        raise Exception("Incorrect argument!")
예제 #18
0
def train(args):
    workspace = args.workspace
    cla_mapping = args.cla_mapping

    # Load data.
    t1 = time.time()
    tr_pack_path = os.path.join(workspace, "packed_features", "logmel",
                                "training.h5")
    te_pack_path = os.path.join(workspace, "packed_features", "logmel",
                                "testing.h5")

    with h5py.File(tr_pack_path, 'r') as hf:
        tr_na_list = list(hf.get('na_list'))
        tr_x = np.array(hf.get('x'))
        tr_y = np.array(hf.get('y'))

    with h5py.File(te_pack_path, 'r') as hf:
        te_na_list = list(hf.get('na_list'))
        te_x = np.array(hf.get('x'))
        te_y = np.array(hf.get('y'))
    logging.info("Loading data time: %s" % (time.time() - t1, ))

    # Scale.
    t1 = time.time()
    scaler_path = os.path.join(workspace, "scalers", "logmel",
                               "training.scaler")
    scaler = pickle.load(open(scaler_path, 'rb'))
    tr_x = pp_data.do_scaler_on_x3d(tr_x, scaler)
    te_x = pp_data.do_scaler_on_x3d(te_x, scaler)
    logging.info("Scale time: %s" % (time.time() - t1, ))

    logging.info("tr_x: %s %s" % (tr_x.shape, tr_x.dtype))
    logging.info("tr_y: %s %s" % (tr_y.shape, tr_y.dtype))
    logging.info("y: 1-of-4 representation: %s" % (cfg.events + ['bg'], ))

    # Build model.
    (_, n_time, n_freq) = tr_x.shape
    n_out = len(cfg.events) + 1

    in0 = InputLayer(in_shape=(n_time, n_freq))
    a1 = Reshape((1, n_time, n_freq))(in0)

    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Dropout(0.3)(a1)

    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Dropout(0.3)(a1)

    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Dropout(0.3)(a1)

    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Conv2D(n_outfmaps=64,
                n_row=3,
                n_col=3,
                act='linear',
                border_mode=(1, 1))(a1)
    a1 = BN(axis=(0, 2, 3))(a1)
    a1 = Activation('relu')(a1)
    a1 = Dropout(0.3)(a1)

    # Segmentation mask for 'babycry', 'glassbreak' and 'gunshot'.
    a1 = Conv2D(n_outfmaps=len(cfg.events),
                n_row=1,
                n_col=1,
                act='sigmoid',
                border_mode=(0, 0))(a1)

    # Extend segmentation mask to 'babycry', 'glassbreak', 'gunshot' and 'background'.
    a1 = Lambda(_seg_mask_ext_bg, name='seg_masks')(a1)

    # Classification mapping.
    cla_mapping = args.cla_mapping

    if cla_mapping == 'global_rank_pooling':
        weight1d = np.power(r * np.ones(120 * 64), np.arange(120 * 64))
        a8 = Lambda(_global_rank_pooling, weight1d=weight1d, name='a5')(a1)
    elif cla_mapping == 'global_max_pooling':
        a8 = Lambda(_global_max_pooling)(a1)
    elif cla_mapping == 'global_avg_pooling':
        a8 = Lambda(_global_avg_pooling)(a1)
    else:
        raise Exception("Incorrect cla_mapping!")

    md = Model([in0], [a8])
    md.compile()
    md.summary(is_logging=True)

    # Callbacks.
    md_dir = os.path.join(workspace, "models", pp_data.get_filename(__file__))
    pp_data.create_folder(md_dir)
    save_model = SaveModel(md_dir, call_freq=100, type='iter')
    validation = Validation(te_x=te_x,
                            te_y=te_y,
                            batch_size=100,
                            call_freq=50,
                            metrics=['binary_crossentropy'],
                            dump_path=None,
                            is_logging=True)
    callbacks = [save_model, validation]

    # Train.
    generator = DataGenerator(batch_size=20, type='train')
    loss_ary = []
    t1 = time.time()
    optimizer = Adam(1e-4)
    for (batch_x, batch_y) in generator.generate(xs=[tr_x], ys=[tr_y]):
        np.set_printoptions(threshold=np.nan,
                            linewidth=1000,
                            precision=2,
                            suppress=True)
        loss = md.train_on_batch(batch_x,
                                 batch_y,
                                 loss_func='binary_crossentropy',
                                 optimizer=optimizer,
                                 callbacks=callbacks)
        loss_ary.append(loss)
        if md.iter_ % 50 == 0:  # Evalute training loss every several iterations.
            logging.info("iter: %d, tr loss: %d" %
                         (md.iter_, np.mean(loss_ary)))
            logging.info("time: %s" % (time.time() - t1, ))
            t1 = time.time()
            loss_ary = []
        if md.iter_ == 10001:  # Stop after several iterations.
            break
def train(args):
    EVAL_MAP = -1000.
    PATIENCE = 0
    data_dir = args.data_dir
    workspace = args.workspace
    tag = args.tag
    levels = args.levels
    # Path for the hdf5 dara
    bal_train_path = os.path.join(data_dir, "bal_train.h5")
    unbal_train_path = os.path.join(data_dir, "unbal_train.h5")
    eval_path = os.path.join(data_dir, "eval.h5")

    # Load data
    t1 = time.time()
    (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_path)
    (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_path)
    (eval_x, eval_y, eval_id_list) = pp_data.load_data(eval_path)
    #tr_x = tr_x1
    #tr_y = tr_y1
    #tr_id_list = tr_id_list1
    tr_x = np.concatenate((tr_x1, tr_x2))
    tr_y = np.concatenate((tr_y1, tr_y2))
    tr_id_list = tr_id_list1 + tr_id_list2

    logging.info("Loading dat time: %s s" % (time.time() - t1))
    logging.info(tr_x1.shape, tr_x2.shape)
    logging.info("tr_x.shape: %s" % (tr_x.shape, ))

    (_, n_time, n_freq) = tr_x.shape

    # Build Model

    model = get_ml_attention(levels)
    logging.info(model.to_json())
    # Optimization method
    optimizer = Adam(lr=args.lr)
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
                  metrics=['binary_accuracy'])
    #logging.info(model.summary())
    #
    batch_size = 500
    tr_gen = RatioDataGenerator(batch_size=batch_size, type='train')
    # Save Model every call_freq iterations
    model_iter = 0
    call_freq = 1000
    dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__))
    pp_data.create_folder(dump_fd)

    # Train
    stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__))
    pp_data.create_folder(stat_dir)
    prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__))
    pp_data.create_folder(prob_dir)

    tr_time = time.time()

    for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]):
        # Computes stats every several iterations
        print(model_iter)
        if model_iter % call_freq == 0:  # every 1000 iterations
            # Stats of evaluation dataset
            t1 = time.time()
            eval_MAP = eval(model=model,
                            x=eval_x,
                            y=eval_y,
                            out_dir=os.path.join(stat_dir, "eval"),
                            out_probs_dir=os.path.join(prob_dir, "eval"),
                            md_iter=model_iter)

            logging.info("Evaluate evaluation-set time: %s" %
                         (time.time() - t1, ))
            if eval_MAP >= EVAL_MAP:
                #md_name = "/scratch/work/xuz2/model_" + tag + "_.h5"
                md_name = tag + "_.h5"
                model.save(md_name)
                EVAL_MAP = eval_MAP
                PATIENCE = 0
            else:
                PATIENCE += 1
                logging.info("Patience now: %d" % (PATIENCE, ))
                if PATIENCE >= 10:
                    break
            #	print("Training stop at %s iterations" % (model_iter,))
            #	break
            # Stats of training dataset
            #t1 =time.time()
            #tr_bal_err = eval(model=model, x=tr_x1, y=tr_y1,

            #				  out_dir=os.path.join(stat_dir, "train_bal"),
            ##				  out_probs_dir=None,
            #				  md_iter=model_iter)
            #logging.info("Evaluate tr_bal time: %s" % (time.time() - t1,))

            # Save Model
            #if eval_MAP > 0.342:
            #	md_name = "/scratch/work/xuz2/model_" + str(model_iter) + "_.h5"
            #	model.save(md_name)

        # Update params
        (tr_batch_x,
         tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y)
        model.train_on_batch(tr_batch_x, tr_batch_y)

        model_iter += 1

        # Stop training when maximum iteration achieves
        if model_iter == call_freq * 151:
            break
예제 #20
0
def train(args):
    cpickle_dir = args.cpickle_dir
    workspace = args.workspace

    # Path of hdf5 data
    bal_train_hdf5_path = os.path.join(cpickle_dir, "bal_train.h5")
    unbal_train_hdf5_path = os.path.join(cpickle_dir, "unbal_train.h5")
    eval_hdf5_path = os.path.join(cpickle_dir, "eval.h5")

    # Load data
    t1 = time.time()
    (tr_x1, tr_y1, tr_id_list1) = pp_data.load_data(bal_train_hdf5_path)
    (tr_x2, tr_y2, tr_id_list2) = pp_data.load_data(unbal_train_hdf5_path)
    tr_x = np.concatenate((tr_x1, tr_x2))
    tr_y = np.concatenate((tr_y1, tr_y2))
    tr_id_list = tr_id_list1 + tr_id_list2

    (te_x, te_y, te_id_list) = pp_data.load_data(eval_hdf5_path)
    logging.info("Loading data time: %s s" % (time.time() - t1))

    logging.info(tr_x1.shape, tr_x2.shape)
    logging.info("tr_x.shape: %s" % (tr_x.shape, ))

    (_, n_time, n_freq) = tr_x.shape

    # Build model
    n_hid = 500
    n_out = tr_y.shape[1]

    lay_in = InputLayer(in_shape=(n_time, n_freq))
    a = Dense(n_out=n_hid, act='relu')(lay_in)
    a = Dropout(p_drop=0.2)(a)
    a = Dense(n_out=n_hid, act='relu')(a)
    a = Dropout(p_drop=0.2)(a)
    a = Dense(n_out=n_hid, act='relu')(a)
    a = Dropout(p_drop=0.2)(a)
    cla = Dense(n_out=n_out, act='sigmoid', name='cla')(a)
    att = Dense(n_out=n_out, act='softmax', name='att')(a)

    # Attention
    lay_out = Lambda(_attention)([cla, att])

    # Compile model
    md = Model(in_layers=[lay_in], out_layers=[lay_out])
    md.compile()
    md.summary(is_logging=True)

    # Save model every several iterations
    call_freq = 1000
    dump_fd = os.path.join(workspace, "models", pp_data.get_filename(__file__))
    pp_data.create_folder(dump_fd)
    save_model = SaveModel(dump_fd=dump_fd,
                           call_freq=call_freq,
                           type='iter',
                           is_logging=True)

    # Callbacks function
    callbacks = [save_model]

    batch_size = 500
    tr_gen = RatioDataGenerator(batch_size=batch_size, type='train')

    # Optimization method
    optimizer = Adam(lr=args.lr)

    # Train
    stat_dir = os.path.join(workspace, "stats", pp_data.get_filename(__file__))
    pp_data.create_folder(stat_dir)
    prob_dir = os.path.join(workspace, "probs", pp_data.get_filename(__file__))
    pp_data.create_folder(prob_dir)

    tr_time = time.time()
    for (tr_batch_x, tr_batch_y) in tr_gen.generate(xs=[tr_x], ys=[tr_y]):
        # Compute stats every several interations
        if md.iter_ % call_freq == 0:
            # Stats of evaluation dataset
            t1 = time.time()
            te_err = eval(md=md,
                          x=te_x,
                          y=te_y,
                          out_dir=os.path.join(stat_dir, "test"),
                          out_probs_dir=os.path.join(prob_dir, "test"))
            logging.info("Evaluate test time: %s" % (time.time() - t1, ))

            # Stats of training dataset
            t1 = time.time()
            tr_bal_err = eval(md=md,
                              x=tr_x1,
                              y=tr_y1,
                              out_dir=os.path.join(stat_dir, "train_bal"),
                              out_probs_dir=None)
            logging.info("Evaluate tr_bal time: %s" % (time.time() - t1, ))

        # Update params
        (tr_batch_x,
         tr_batch_y) = pp_data.transform_data(tr_batch_x, tr_batch_y)
        md.train_on_batch(batch_x=tr_batch_x,
                          batch_y=tr_batch_y,
                          loss_func='binary_crossentropy',
                          optimizer=optimizer,
                          callbacks=callbacks)

        # Stop training when maximum iteration achieves
        if md.iter_ == call_freq * 31:
            break
예제 #21
0
    # Arguments
    parser = argparse.ArgumentParser(description="")
    subparsers = parser.add_subparsers(dest='mode')

    parser_train = subparsers.add_parser('train')
    parser_train.add_argument('--cpickle_dir', type=str)
    parser_train.add_argument('--workspace', type=str)
    parser_train.add_argument('--lr', type=float, default=1e-3)

    parser_get_avg_stats = subparsers.add_parser('get_avg_stats')
    parser_get_avg_stats.add_argument('--cpickle_dir', type=str)
    parser_get_avg_stats.add_argument('--workspace')

    args = parser.parse_args()

    # Logs
    logs_dir = os.path.join(args.workspace, "logs",
                            pp_data.get_filename(__file__))
    pp_data.create_folder(logs_dir)
    logging = pp_data.create_logging(logs_dir, filemode='w')
    logging.info(os.path.abspath(__file__))
    logging.info(sys.argv)

    if args.mode == "train":
        train(args)
    elif args.mode == 'get_avg_stats':
        file_name = pp_data.get_filename(__file__)
        bgn_iter, fin_iter, interval_iter = 20000, 30001, 1000
        get_avg_stats(args, file_name, bgn_iter, fin_iter, interval_iter)
    else:
        raise Exception("Error!")