예제 #1
0
def compute_edge_features(x,config_d):
    S, sample_mapping, sample_to_frames =  esp.get_spectrogram_features(x.astype(float)/(2**15-1),
                                                                        config_d['SPECTROGRAM']['sample_rate'],
                                                                        config_d['SPECTROGRAM']['num_window_samples'],
                                                                        config_d['SPECTROGRAM']['num_window_step_samples'],
                                                                        config_d['SPECTROGRAM']['fft_length'],
                                                                        config_d['SPECTROGRAM']['freq_cutoff'],
                                                                        config_d['SPECTROGRAM']['kernel_length'],
                                                                        preemph=config_d['SPECTROGRAM']['preemphasis'],
                                                                        no_use_dpss=config_d['SPECTROGRAM']['no_use_dpss'],
                                                                        do_freq_smoothing=config_d['SPECTROGRAM']['do_freq_smoothing'],
                                                                        return_sample_mapping=True
                                 )

    E, edge_feature_row_breaks,\
        edge_orientations = esp._edge_map_no_threshold(S.T)
    esp._edge_map_threshold_segments(E,
                                     config_d['EDGES']['block_length'],
                                     config_d['EDGES']['spread_length'],
                                     threshold=config_d['EDGES']['threshold'],
                                     edge_orientations = edge_orientations,
                                     edge_feature_row_breaks = edge_feature_row_breaks,
                                         abst_threshold=config_d['EDGES']['abst_threshold'],
                                     verbose=False)
    return reorg_part_for_fast_filtering(E).astype(np.uint8)
def get_waliji_feature_map(s,
                           log_part_blocks,
                           log_invpart_blocks,
                           abst_threshold=np.array([.025,.025,.015,.015,
                                                     .02,.02,.02,.02]),
                           spread_length=3,
                           fft_length=512,
                           num_window_step_samples=80,
                           freq_cutoff=3000,
                           sample_rate=16000,
                           num_window_samples=320,
                           kernel_length=7):
    """
    Input is usually just the signal s as the rest of the parameters
    are not going to change very often

    Parameters:
    ===========
    s: np.ndarray[ndim=1]
        Raw signal data that we are extracting feature from
    log_part_blocks: np.ndarray[ndim=4,dtype=np.float32]
        First dimension is over the different features
    log_invpart_blocks: np.ndarray[ndim=4,dtype=np.float32]
        Essentially the same array as log_part_blocks. Related
        by its equal to np.log(1-np.exp(log_part_blocks))
    """
    S = esp.get_spectrogram_features(s,
                                     sample_rate,
                                     num_window_samples,
                                     num_window_step_samples,
                                     fft_length,
                                     freq_cutoff,
                                     kernel_length,
                                 )
    E, edge_feature_row_breaks,\
        edge_orientations = esp._edge_map_no_threshold(S)
    esp._edge_map_threshold_segments(E,
                                 40,
                                 1,
                                 threshold=.7,
                                 edge_orientations = edge_orientations,
                                 edge_feature_row_breaks = edge_feature_row_breaks)
    E = reorg_part_for_fast_filtering(E)
    F = cp.code_parts_fast(E.astype(np.uint8),log_part_blocks,log_invpart_blocks,10)
    F = np.argmax(F,2)
    # the amount of spreading to do is governed by the size of the part features
    F = swp.spread_waliji_patches(F,
                                  log_part_blocks.shape[1],
                                  log_part_blocks.shape[2],
                                  log_part_blocks.shape[0])
    return collapse_to_grid(F,log_part_blocks.shape[1],
                            log_part_blocks.shape[2])
 s_idx_list = []
 for s_idx, s_fname in enumerate(s_fnames[:400]):
     print "s_idx = %d" %s_idx
     f.write(str(s_idx)+'\t'+s_fname+'\n')
     s = np.load(s_fname)
     S = esp.get_spectrogram_features(s,
                                      sample_rate,
                                      num_window_samples,
                                      num_window_step_samples,
                                      fft_length,
                                      freq_cutoff,
                                  kernel_length)
     if lower_cutoff == 10:
         np.save(tmp_data_path+str(s_idx)+'S.npy',S)
     E, edge_feature_row_breaks,\
       edge_orientations = esp._edge_map_no_threshold(S)
     esp._edge_map_threshold_segments(E,
                                      20,
                                      1,
                                      threshold=.7,
                                      edge_orientations = edge_orientations,
                                      edge_feature_row_breaks = edge_feature_row_breaks)
     if lower_cutoff == 10:
         np.save(tmp_data_path+str(s_idx)+'E.npy',E)
     patch_width = 5
     patch_height = 5
     upper_cutoff = 200
     bp,all_patch_rows,all_patch_cols = elf.extract_local_features_tied(E,patch_height,
                                                                        patch_width, lower_cutoff,
                                                                        upper_cutoff,
                                                                        edge_feature_row_breaks,
def main(args):
    if args.v:
        print args
        print "Checking value of args.useDefaultParams=", args.useDefaultParams
    if args.useDefaultParams:
        if args.v:
            # retrieving params with function
            print "Using default parameters"
        (
            sp,
            ep,
            root_path,
            utterances_path,
            file_indices,
            num_mix_params,
            test_path,
            train_path,
            train_example_lengths,
            train_file_indices,
            test_example_lengths,
            test_file_indices,
        ) = get_params(args)
    else:
        # need to change this to something else
        (
            sp,
            ep,
            root_path,
            utterances_path,
            file_indices,
            num_mix_params,
            test_path,
            train_path,
            train_example_lengths,
            train_file_indices,
            test_example_lengths,
            test_file_indices,
        ) = get_params(args)
    file_indices = get_file_indices(args.fileIndicesPath, args.dataPath)
    if args.limitFileIndices > -1:
        if args.v:
            print "Limiting file indices to length %d" % args.limitFileIndices
        file_indices = file_indices[: args.limitFileIndices]
    elif args.v:
        print "No limit on file indices"

    if args.partsPath != "":
        if args.v:
            print "Loading in parts from %s" % args.partsPath
        EParts = np.clip(np.load(args.partsPath), 0.01, 0.99)
        logParts = np.log(EParts).astype(np.float64)
        logInvParts = np.log(1 - EParts).astype(np.float64)

    if args.printEdgeDistribution:
        edge_distribution = getEdgeDistribution(file_indices, args.hw, file_indices_chunks=args.file_indices_chunks)
    elif args.edgeQuantileComparison != "":
        for fl_id, fl in enumerate(file_indices):
            if args.v:
                print fl_id
            utterance = gtrd.makeUtterance(args.dataPath, fl)
            print sp, args.mel_smoothing_kernel
            S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel)
            E, edge_feature_row_breaks, edge_orientations = esp._edge_map_no_threshold(S.T)
            E2 = np.empty((E.shape[0] / 8, E.shape[1], 8))
            for i in xrange(8):
                E2[:, :, i] = E[E.shape[0] / 8 * i : E.shape[0] / 8 * (i + 1), :]
            print E2.shape, S.shape
            visualize_spec_and_quantiles(
                "%s_%d" % (args.edgeQuantileComparison, fl_id), E2, S, args.quantileSet, args.blockLengthSet
            )
    elif args.createBackgroundMixture > 0:
        pass
    elif args.getUnclippedBackground != "":
        # initialize the background
        if args.v:
            print "Initializing average background to be computed over parts"
        avg_bgd = gtrd.AverageBackground()
        for fl_id, fl in enumerate(file_indices):
            if args.v:
                print fl_id
            utterance = gtrd.makeUtterance(args.dataPath, fl)
            print sp, args.mel_smoothing_kernel
            S = gtrd.get_spectrogram(utterance.s, sp, mel_smoothing_kernel=args.mel_smoothing_kernel)
            E = gtrd.get_edge_features(S.T, ep, verbose=False)
            if args.seeBackgroundEstimatePlots != "":
                visualize_edge_on_specs("%s_%d.png" % (args.seeBackgroundEstimatePlots, fl_id), E, S)
            out = code_parts.code_parts(E.astype(np.uint8), logParts, logInvParts, args.bernsteinEdgeThreshold)
            max_responses = np.argmax(out, -1)
            if args.bernsteinPreSpreadVisualizeOnSpec != "":
                # cycle over all parts
                for part_id in xrange(logParts.shape[0]):
                    visualize_bern_on_specs(
                        "%s_%d_%d.png" % (args.bernsteinPreSpreadVisualizeOnSpec, fl_id, part_id),
                        max_responses,
                        S,
                        part_id,
                    )
            bin_out_map = code_parts.spread_patches(max_responses, 2, 2, out.shape[-1] - 1)
            avg_bgd.add_frames(bin_out_map, time_axis=0)

        np.save(args.getUnclippedBackground, avg_bgd.E)
    else:
        pass