Beispiel #1
0
 def read_data(self, DataListName, FeatureName,shape):
     '''
     Load original data and the feature that you choose to use is needed
     You can choose different output shape.
     :DataListName: The log.txt path
     :FeatureName: The name of the feature that you choose to use
     :shape: The output shape that you prefer
     '''
     Feature = FeatureName.capitalize()
     Data = np.zeros((1,shape))
     Label = []
     processer = PreProcessing(512, 128)
     wav_list, frame_list, energy_list, zcr_list, endpoint_list, Label = processer.process(DataListName)
     if Feature[0] == 'E':
         for i in range(len(zcr_list)):
             temp = processer.effective_feature(energy_list[i], endpoint_list[i])
             temp = processer.reshape(temp, shape)
             Data=np.concatenate((Data,temp),axis = 0)
         Data = Data[1:]
         return Data, Label
     elif Feature[0] == 'Z':
         for i in range(len(zcr_list)):
             temp = processer.effective_feature(zcr_list[i], endpoint_list[i])
             temp = processer.reshape(temp, shape)
             Data=np.concatenate((Data,temp),axis = 0)
         Data = Data[1:]
         return Data, Label
     else:
         print("please choose correct feature, and we will return ZCR by default")
         for i in range(len(zcr_list)):
             temp = processer.effective_feature(zcr_list[i], endpoint_list[i])
             temp = processer.reshape(temp, shape)
             Data=np.concatenate((Data,temp),axis = 0)
         Data = Data[1:]
         return Data, Label
 def setUp(self):
     self.DATA_DIR = './TEST_DIR'
     self.LOG = './TEST_DIR/data_log.txt'
     builder = VoiceDataSetBuilder(
         dst_path=self.DATA_DIR,
         log_file=self.LOG,
         rate=44100)
     builder.build()
     self.pre_process = PreProcessing(frame_size=512, overlap=128)
     plt.ion()
     plt.figure(1)
Beispiel #3
0
 def load_target(self, ModelListName):
     '''
     Load model data
     this is the model data for classification
     :ModelListName: The Model_log.txt path
     '''
     eff_label_list = []
     eff_mfcc = []
     processer = PreProcessing(512, 128)
     wav_list, frame_list, mfcc_list, energy_list, zcr_list, endpoint_list, label_list = processer.process(
         ModelListName)
     for i in range(len(mfcc_list)):
         temp = processer.effective_feature(mfcc_list[i], endpoint_list[i])
         if endpoint_list[i][1] - endpoint_list[i][0] != 0:
             eff_label_list.append(label_list[i])
             eff_mfcc.append(mfcc_list[i])
         else:
             continue
     return eff_mfcc, eff_label_list
Beispiel #4
0
 def read_data(self, DataListName):
     '''
     Load original data
     You can choose different output shape.
     :DataListName: The log.txt path
     '''
     eff_label_list = []
     eff_mfcc = []
     processer = PreProcessing(512, 128)
     wav_list, frame_list, mfcc_list, energy_list, zcr_list, endpoint_list, label_list = processer.process(
         DataListName)
     for i in range(len(mfcc_list)):
         temp = processer.effective_feature(mfcc_list[i], endpoint_list[i])
         if endpoint_list[i][1] - endpoint_list[i][0] != 0:
             eff_label_list.append(label_list[i])
             eff_mfcc.append(mfcc_list[i])
         else:
             continue
     return eff_mfcc, eff_label_list
class PreProcessingTests(unittest.TestCase):

    def setUp(self):
        self.DATA_DIR = './TEST_DIR'
        self.LOG = './TEST_DIR/data_log.txt'
        builder = VoiceDataSetBuilder(
            dst_path=self.DATA_DIR,
            log_file=self.LOG,
            rate=44100)
        builder.build()
        self.pre_process = PreProcessing(frame_size=512, overlap=128)
        plt.ion()
        plt.figure(1)

    def test_all(self):
        wav_list, frame_list, energy_list, zcr_list, endpoint_list = \
            self.pre_process.process(self.LOG)
        for i in range(len(frame_list)):
            wav_data = wav_list[i]
            print(np.max(np.abs(wav_data)))
            frames = frame_list[i]
            energys = energy_list[i]
            zcrs = zcr_list[i]
            endpoints = endpoint_list[i]
            effective_es = PreProcessing.effective_feature(zcrs, endpoints)
            plt.figure(i + 1)
            plt.subplot(221)
            plt.plot(wav_data)
            print(endpoints)
            for ep in endpoints:
                plt.axvline(ep * (self.pre_process.frame_size - self.pre_process.overlap), color='r')
            plt.subplot(222)
            plt.plot(energys)
            for ep in endpoints:
                plt.axvline(ep, color='r')
            plt.subplot(223)
            longest_e = []
            for e in effective_es:
                if len(e) > len(longest_e):
                    longest_e = e
            plt.plot(longest_e)
            plt.subplot(224)
            plt.plot(zcrs)
            for ep in endpoints:
                plt.axvline(ep, color='r')
            plt.show()
            plt.waitforbuttonpress()

    def tearDown(self):
        for file in os.listdir(self.DATA_DIR):
            os.remove(os.path.join(self.DATA_DIR, file))
        if os.path.exists(self.DATA_DIR):
            os.removedirs(self.DATA_DIR)
        if os.path.exists(self.LOG):
            os.remove(self.LOG)
class FeatureExtractorsTests(unittest.TestCase):

    def setUp(self):
        self.WAVE_NUM = 10
        self.WAVE_DURANCE = 1000
        self.wave_list = np.random.randn(self.WAVE_DURANCE, self.WAVE_NUM)
        self.base = PreProcessing(100, 10)
        self.frames = np.array([self.base.Enframe(wave) for wave in self.wave_list])

    def test_enhance_frame(self):
        for i in range(self.WAVE_NUM):
            self.assertTrue((
                FeatureExtractors.enhance_frame(
                    wav_data=self.wave_list[:, i],
                    frame_size=100,
                    overlap=10,
                    windowing_method='Hamming'
                ) ==
                self.base.Enframe(self.wave_list[:, i])
            ).all())

    def test_zero_crossing_rate(self):
        for i in range(self.WAVE_NUM):
           self.assertTrue((
               FeatureExtractors.zero_crossing_rate(
                   frames=self.frames
               ) ==
               self.base.ZCR(self.frames)
           ).any())

    def test_energy(self):
        for i in range(self.WAVE_NUM):
            self.assertTrue((
                FeatureExtractors.energy(self.frames) ==
                self.base.energy(self.frames)
            ).any())
Beispiel #7
0
def call_extraction(args):
    if len(args) == 3:
        input_file = args[0]
        template_name = args[1]
        output_file = args[2]
    else:
        raise IncorrectArgumentNumberException(3, args)

    xml_tree = ET.ElementTree()
    with open(constants.base_filepath + input_file) as file:
        data = file.read()
    try:
        xml_tree = ET.fromstring(data)
    except (ET.XMLSyntaxError, ValueError) as e:

        data = re.sub('<\\?.*?\\?>', '', data)
        xml_tree = ET.fromstring(data)

    template = XMLUtil.Template(template_name)

    pre_processed_xml_tree = PreProcessing.apply_all(
        xml_tree, template.pre_process_queue)

    extracted_xml = XMLExtractor.extract_template_data_from_xml(
        template.get_template(), pre_processed_xml_tree)

    extracted_xml = PostProcessing.apply_all(extracted_xml,
                                             template.post_process_queue)

    out = XMLUtil.xml_to_string(extracted_xml)

    if output_file != 'None':
        with open(constants.base_filepath + output_file, 'w') as file:
            file.write(out)

    return out
 def read_data(self, DataListName, FeatureName, shape):
     '''
     Load original data and the feature that you choose to use is needed
     You can choose different output shape.
     :DataListName: The log.txt path
     :FeatureName: The name of the feature that you choose to use
     :shape: The output shape that you prefer
     '''
     Feature = FeatureName.capitalize()
     Data = np.zeros((1, shape))
     zcrdata = np.zeros((1, shape))
     energydata = np.zeros((1, shape))
     eff_label_list = []
     processer = PreProcessing(512, 128)
     wav_list, frame_list, mfcc_list, energy_list, zcr_list, endpoint_list, label_list = processer.process(
         DataListName)
     if Feature[0] == 'E':
         for i in range(len(energy_list)):
             temp = processer.effective_feature(energy_list[i],
                                                endpoint_list[i])
             temp = processer.reshape(temp, shape)
             if len(temp) != 0:
                 eff_label_list.append(label_list[i])
             else:
                 continue
             Data = np.concatenate((Data, temp), axis=0)
         Data = Data[1:]
         return Data, eff_label_list
     elif Feature[0] == 'Z':
         for i in range(len(zcr_list)):
             temp = processer.effective_feature(zcr_list[i],
                                                endpoint_list[i])
             temp = processer.reshape(temp, shape)
             print(np.shape(temp))
             if len(temp) != 0:
                 eff_label_list.append(label_list[i])
             else:
                 continue
             Data = np.concatenate((Data, temp), axis=0)
         Data = Data[1:]
         return Data, eff_label_list
     elif Feature[0] == 'W':
         for i in range(len(zcr_list)):
             temp = processer.effective_feature(zcr_list[i],
                                                endpoint_list[i])
             temp = processer.reshape(temp, shape)
             if len(temp) != 0:
                 eff_label_list.append(label_list[i])
             else:
                 continue
             zcrdata = np.concatenate((zcrdata, temp), axis=0)
         zcrdata = zcrdata[1:]
         for i in range(len(zcr_list)):
             temp = processer.effective_feature(energy_list[i],
                                                endpoint_list[i])
             temp = processer.reshape(temp, shape)
             if len(temp) == 0:
                 continue
             energydata = np.concatenate((energydata, temp), axis=0)
         energydata = energydata[1:]
         data = energydata * zcrdata
         return data, eff_label_list
     else:
         print(
             "please choose correct feature, and we will return ZCR by default"
         )
         for i in range(len(zcr_list)):
             temp = processer.effective_feature(zcr_list[i],
                                                endpoint_list[i])
             temp = processer.reshape(temp, shape)
             if len(temp) != 0:
                 eff_label_list.append(label_list[i])
             else:
                 continue
             Data = np.concatenate((Data, temp), axis=0)
         Data = Data[1:]
         return Data, eff_label_list
 def setUp(self):
     self.WAVE_NUM = 10
     self.WAVE_DURANCE = 1000
     self.wave_list = np.random.randn(self.WAVE_DURANCE, self.WAVE_NUM)
     self.base = PreProcessing(100, 10)
     self.frames = np.array([self.base.Enframe(wave) for wave in self.wave_list])