Example #1
0
    def _test_size(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        for set_size in numpy.arange(100, 1000, 100):
            X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
            X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
            assert(len(y_train) == set_size)

            train_set = (X_train, y_train)
            test_set_original = (X_test, y_test)

            ms = MS2(LogisticRegression)
            ms.fit(train_set)

            r = 0.05
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = ms.predict(X_test_new)

            err = rms(dist_est, dist_true)

            print dist_est
            print "size: %d, err: %f" % (set_size, err)
Example #2
0
    def test_class_ratio(self):
        '''
        Compare several competing methods changing the ratio of the positive
        class in the dataset. We use binary class dataset for the easy of
        interpretation.
        '''
        #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
        X_train_full, y_train_full, X_test, y_test = news_20_reader.toNumpy()
        set_size = 1000
        X_train, y_train = self.get_sub_set_with_size([X_train_full, y_train_full], set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        ms = MSHI(LinearSVC)
        ms.fit(train_set)

        print 'Done training'

        for r in numpy.arange(0.05, 1.0, 0.05):
        #for r in [0.05]:
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = ms.predict(X_test_new)

            err = rms(dist_est, dist_true)

            print "r: %f, pos: %f" % (r, dist_est[1])
Example #3
0
File: cc2.py Project: pyongjoo/ende
    def test_ratio(self):
        #X_train_full, y_train_full, X_test, y_test = nc_rna_reader.toNumpy()
        #X_train_full, y_train_full, X_test, y_test = rcv1_binary_reader.toNumpy()
        X_train_full, y_train_full, X_test, y_test = synthetic_reader.toNumpy(0.3, n_class=2)
        set_size = 500     # an arbitrary number
        X_train, y_train= self.get_sub_set_with_size([X_train_full, y_train_full], set_size)
        assert(len(y_train) == set_size)

        train_set = (X_train, y_train)
        test_set_original = (X_test, y_test)

        cc = CC2(KNeighborsClassifier)
        cc.fit(train_set)


        for r in numpy.arange(0.05, 1.0, 0.05):
            X_test_new, y_test_new = SetGen.with_pos_ratio(test_set_original, r, pos_label=1)
            test_set = [X_test_new, y_test_new]

            dist_true = DE.arrayToDist(y_test_new)
            dist_est = cc.predict(X_test_new)

            err = rms(dist_est, dist_true)

            #print dist_est
            print "%f\t%f" % (dist_true[1], dist_est[1])
Example #4
0
    def load_directory(self, directory_path, condition):

        filenames = [
            filename for filename in os.listdir(directory_path)
            if filename.endswith('.wav')
        ]

        speakers = []
        file_paths = []
        speech_onset_offset_indices = []
        regain_factors = []
        sequences = []
        for filename in filenames:

            speaker_name = filename[0:4]
            speakers.append(speaker_name)

            filepath = os.path.join(directory_path, filename)

            if condition == 'clean':

                sequence = util.load_wav(filepath, self.sample_rate)
                sequences.append(sequence)
                self.num_sequences_in_memory += 1
                regain_factors.append(self.regain / util.rms(sequence))
                #如果extract_voice为true,则需要进行去除前后静音操作
                if self.extract_voice:
                    #speech_onset_offset_indices是非静音段的起止点
                    speech_onset_offset_indices.append(
                        util.get_subsequence_with_speech_indices(sequence))
            else:
                if self.in_memory_percentage == 1 or np.random.uniform(
                        0, 1) <= (self.in_memory_percentage - 0.5) * 2:
                    sequence = util.load_wav(filepath, self.sample_rate)
                    sequences.append(sequence)
                    self.num_sequences_in_memory += 1
                else:
                    sequences.append([-1])

            if speaker_name not in self.speaker_mapping:
                self.speaker_mapping[speaker_name] = len(
                    self.speaker_mapping) + 1

            file_paths.append(filepath)

        return sequences, file_paths, speakers, speech_onset_offset_indices, regain_factors
Example #5
0
def get_actual_noise(path, numberOfSegments, durOfSegment, sampleFreq=100, filtfreqs=(0.1, 30)):
    ''' Loads all eeg (.vhdr) data sets, does a little preprocessing (filtering, resampling) and then extracts random segments of them. 
        Segments have the following properties:
        * re-referenced to common average
        * baseline corrected to first 10 datapoints
    '''
    segmentSize = int(durOfSegment*sampleFreq)
    
    fileList = np.array(os.listdir(path))
    vhdr_indices = np.where([i.endswith('.vhdr') for i in fileList])[0]
    fileList=fileList[vhdr_indices]

    dataSets = []
    for i, fn in enumerate(fileList):
        raw = mne.io.read_raw_brainvision(path + '/' + fn, preload=True, verbose=0)
        raw.filter(*filtfreqs, verbose=0)
        raw.resample(sampleFreq)
        dataSets.append( raw._data ) 

    numberOfChannels = dataSets[0].shape[0]
    segments = np.zeros((numberOfSegments, numberOfChannels, segmentSize))
    
    from util import rms

    for i in range(numberOfSegments):
        dataSet = dataSets[np.random.choice(np.arange(len(dataSets)))]
        segmentStartIndex = np.random.choice( np.arange(dataSet.shape[1] - segmentSize) )
        segment = dataSet[:, segmentStartIndex:segmentStartIndex+segmentSize]
        # Common Average reference
        segment = np.array( [seg  - np.mean(segment, axis=0) for seg in segment] )
        # Baseline Correction
        segment = np.array([seg - np.mean(seg[0:10]) for seg in segment])
        # RMS scaling so each trial is about equally 'loud'
        trial_rms = np.mean([rms(chan) for chan in segment])
        segments[i, :, :] = segment / trial_rms
    return segments
Example #6
0
def explain(model, input, output_filename_prefix, sample_rate, output_path):

    batch_size = 1
    if len(input['noisy']) < model.receptive_field_length:
        raise ValueError(
            'Input is not long enough to be used with this model.')

    num_output_samples = input['noisy'].shape[0] - (
        model.receptive_field_length - 1)
    num_fragments = int(np.ceil(num_output_samples /
                                model.target_field_length))
    num_batches = int(np.ceil(num_fragments / batch_size))

    denoised_output = []
    noise_output = []
    num_pad_values = 0
    fragment_i = 0
    for batch_i in tqdm.tqdm(range(0, num_batches)):

        if batch_i == num_batches - 1:  #If its the last batch'
            batch_size = num_fragments - batch_i * batch_size

        input_batch = np.zeros((batch_size, model.input_length))

        #Assemble batch
        for batch_fragment_i in range(0, batch_size):

            if fragment_i + model.target_field_length > num_output_samples:
                remainder = input['noisy'][fragment_i:]
                current_fragment = np.zeros((model.input_length, ))
                current_fragment[:remainder.shape[0]] = remainder
                num_pad_values = model.input_length - remainder.shape[0]
            else:
                current_fragment = input['noisy'][fragment_i:fragment_i +
                                                  model.input_length]

            input_batch[batch_fragment_i, :] = current_fragment
            fragment_i += model.target_field_length

        denoised_output_fragments = model.denoise_batch(
            {'data_input': input_batch})
        layer_outputs = model.get_layer_outputs(input_batch)
        plot_layer_outputs(layer_outputs, 2, output_path)
        if type(denoised_output_fragments) is list:
            noise_output_fragment = denoised_output_fragments[1]
            denoised_output_fragment = denoised_output_fragments[0]

        denoised_output_fragment = denoised_output_fragment[:, model.
                                                            target_padding:
                                                            model.
                                                            target_padding +
                                                            model.
                                                            target_field_length]
        denoised_output_fragment = denoised_output_fragment.flatten().tolist()

        if noise_output_fragment is not None:
            noise_output_fragment = noise_output_fragment[:, model.
                                                          target_padding:model.
                                                          target_padding +
                                                          model.
                                                          target_field_length]
            noise_output_fragment = noise_output_fragment.flatten().tolist()

        if type(denoised_output_fragments) is float:
            denoised_output_fragment = [denoised_output_fragment]
        if type(noise_output_fragment) is float:
            noise_output_fragment = [noise_output_fragment]

        denoised_output = denoised_output + denoised_output_fragment
        noise_output = noise_output + noise_output_fragment

    denoised_output = np.array(denoised_output)
    noise_output = np.array(noise_output)

    if num_pad_values != 0:
        denoised_output = denoised_output[:-num_pad_values]
        noise_output = noise_output[:-num_pad_values]

    valid_noisy_signal = input['noisy'][model.half_receptive_field_length:model
                                        .half_receptive_field_length +
                                        len(denoised_output)]

    if input['clean'] is not None:
        input['noise'] = input['noisy'] - input['clean']

        valid_clean_signal = input['clean'][model.half_receptive_field_length:
                                            model.half_receptive_field_length +
                                            len(denoised_output)]

        noise_in_denoised_output = denoised_output - valid_clean_signal

        rms_clean = util.rms(valid_clean_signal)
        rms_noise_out = util.rms(noise_in_denoised_output)
        rms_noise_in = util.rms(input['noise'])

        new_snr_db = int(np.round(util.snr_db(rms_clean, rms_noise_out)))
        initial_snr_db = int(np.round(util.snr_db(rms_clean, rms_noise_in)))

        output_clean_filename = output_filename_prefix + 'clean.wav'
        output_clean_filepath = os.path.join(output_path,
                                             output_clean_filename)
        util.write_wav(valid_clean_signal, output_clean_filepath, sample_rate)

        output_denoised_filename = output_filename_prefix + 'denoised_%ddB.wav' % new_snr_db
        output_noisy_filename = output_filename_prefix + 'noisy_%ddB.wav' % initial_snr_db
    else:
        output_denoised_filename = output_filename_prefix + 'denoised.wav'
        output_noisy_filename = output_filename_prefix + 'noisy.wav'

    output_noise_filename = output_filename_prefix + 'noise.wav'

    output_denoised_filepath = os.path.join(output_path,
                                            output_denoised_filename)
    output_noisy_filepath = os.path.join(output_path, output_noisy_filename)
    output_noise_filepath = os.path.join(output_path, output_noise_filename)

    util.write_wav(denoised_output, output_denoised_filepath, sample_rate)
    util.write_wav(valid_noisy_signal, output_noisy_filepath, sample_rate)
    util.write_wav(noise_output, output_noise_filepath, sample_rate)
Example #7
0
 def get_rms(self, mcmc=False):
     if mcmc:
         return rms(self.fcor - self.get_model(mcmc=True))
     else:
         return rms(self.fcor - self.model)