Example #1
0
def frame_combine(frame, file_path, start, end):
    fbank = fbank_reader.HTKFeat_read(file_path).getall()

    if end - start + 1 < 41:
        if frame - start <= 30 and end - frame <= 10:
            frame_to_combine = []
            front_rest = 30 - (frame - start)
            back_rest = 10 - (end - frame)
            for i in range(front_rest):
                frame_to_combine.append(fbank[start].tolist())
            for i in range(start, end + 1):
                frame_to_combine.append(fbank[i].tolist())
            for i in range(back_rest):
                frame_to_combine.append(fbank[end].tolist())

        elif end - frame >= 10:
            frame_to_combine = []
            front_rest = 30 - (frame - start)
            for i in range(front_rest):
                frame_to_combine.append(fbank[start].tolist())
            for i in range(start, frame + 11):
                frame_to_combine.append(fbank[i].tolist())

        else:
            frame_to_combine = []
            back_rest = 10 - (end - frame)
            for i in range(frame - 30, end + 1):
                frame_to_combine.append(fbank[i].tolist())
            for i in range(back_rest):
                frame_to_combine.append(fbank[end].tolist())
        combined = np.array(frame_to_combine).reshape(-1)

    else:
        if frame - start >= 30 and end - frame >= 10:
            frame_to_combine = fbank[frame - 30:frame + 11]
            combined = frame_to_combine.reshape(-1)

        elif frame - start < 30:
            frame_to_combine = fbank[start:start + 41]
            combined = frame_to_combine.reshape(-1)

        else:
            frame_to_combine = fbank[end - 40:end + 1]
            combined = frame_to_combine.reshape(-1)

    return combined.tolist()
Example #2
0
    def read_train_set(self):
        self.example = []
        self.labels = []
        self.num_examples = 0
        step_length = 10
        start = self.fbank_position % len(self.examples_list)
        end = (self.fbank_position + step_length) % len(self.examples_list)
        if start < end:
            fbank_list = self.examples_list[start:end]
            self.fbank_position += step_length

        else:
            fbank_list = self.examples_list[start:len(self.examples_list)]
            self.fbank_position = 0
            index = np.arange(len(self.examples_list))
            np.random.shuffle(index)
            self.examples_list = np.array(self.examples_list)[index]

        for example in fbank_list:
            if example == '':
                continue
            file_path = example.split(" ")[1]
            if example.split('/').count("positive") > 0:
                start = self.position_data.find(getname(example))
                end = self.position_data.find("positive", start + 1)
                if end != -1:
                    position_str = self.position_data[start + 15:end - 1]
                else:
                    position_str = self.position_data[start + 15:end]

                # start and end position of "hello" & start and end position of "xiao gua"
                keyword_position = position_str.split(" ")

                file_path = example.split(" ")[1]

                keyword_frame_position = []
                for i in range(4):
                    fbank = fbank_reader.HTKFeat_read(file_path).getall()
                    length = fbank.shape[0]
                    frame_position = (int(keyword_position[i]) - 240) // 160
                    if frame_position >= length:
                        frame_position = length - 1
                    keyword_frame_position.append(frame_position)

                #print (example)

                for frame in range(keyword_frame_position[0],
                                   keyword_frame_position[1] + 1):
                    self.example.append(
                        frame_combine(frame, file_path,
                                      keyword_frame_position[0],
                                      keyword_frame_position[1]))
                    self.labels.append('0')
                    self.num_examples += 1
                for frame in range(keyword_frame_position[2],
                                   keyword_frame_position[3] + 1):
                    self.example.append(
                        frame_combine(frame, file_path,
                                      keyword_frame_position[2],
                                      keyword_frame_position[3]))
                    self.labels.append('1')
                    self.num_examples += 1

            else:
                file_path = example.split(" ")[1]

                fbank = fbank_reader.HTKFeat_read(file_path).getall()
                frame_number = fbank.shape[0]

                #print (example)

                for frame in range(frame_number):
                    self.example.append(
                        frame_combine(frame, file_path, 0, frame_number - 1))
                    self.labels.append('2')
                    self.num_examples += 1
Example #3
0
 def get_next_batch(self, batch_size):
     example = []
     label = []
     start = self.position % self.length
     if start == 0:
         index = np.array(range(self.length))
         np.random.shuffle(index)
         self.file_list = np.array(self.file_list)[index]
     end = (self.position + batch_size) % self.length
     if start < end:
         fbank_list = self.file_list[start:end]
         self.position += batch_size
     else:
         fbank_list = self.file_list[start:self.length]
         self.position = 0
     for fileinfo in fbank_list:
         l = []
         ll = []
         file_path = fileinfo.split(" ")[1]
         if fileinfo.split("/").count("negative") > 0:
             fbank = fbank_reader.HTKFeat_read(file_path).getall()
             for i in range(fbank.shape[0]):
                 l.append(frame_combine(i, file_path, 0,
                                        fbank.shape[0] - 1))
                 ll.append('2')
         else:
             fbank = fbank_reader.HTKFeat_read(file_path).getall()
             first = self.position_list.find(getname(fileinfo))
             second = self.position_list.find("positive", first + 1)
             if second != -1:
                 second -= 1
             position_str = self.position_list[first + 15:second]
             keyword_position = position_str.split(" ")
             keyword_frame_position = []
             for i in range(4):
                 frame_position = (int(keyword_position[i]) - 240) // 160
                 if frame_position >= fbank.shape[0]:
                     frame_position = fbank.shape[0] - 1
                 keyword_frame_position.append(frame_position)
             st = 0
             en = keyword_frame_position[0]
             for frame in range(st, en):
                 l.append(frame_combine(frame, file_path, st, en - 1))
                 ll.append('2')
             st = en
             en = keyword_frame_position[1] + 1
             for frame in range(st, en):
                 l.append(frame_combine(frame, file_path, st, en - 1))
                 ll.append('0')
             st = en
             en = keyword_frame_position[2]
             for frame in range(st, en):
                 l.append(frame_combine(frame, file_path, st, en - 1))
                 ll.append('2')
             st = en
             en = keyword_frame_position[3] + 1
             for frame in range(st, en):
                 l.append(frame_combine(frame, file_path, st, en - 1))
                 ll.append('1')
             st = en
             en = fbank.shape[0]
             for frame in range(st, en):
                 l.append(frame_combine(frame, file_path, st, en - 1))
                 ll.append('2')
                 # print("lshape",len(l))
         example.append(np.array(l.copy()))
         #print("example",len(example[0]))
         label.append(np.array(ll.copy()))
     res1 = np.array(example)
     res2 = np.array(label)
     print(res1.shape, res2.shape)
     print(res1[1].shape)
     return res1, res2
Example #4
0
def read_data_sets():
    f = open(
        "/home/disk2/internship_anytime/aslp_hotword_data/aslp_wake_up_word_data/positiveKeywordPosition.txt",
        "r")
    position_data = f.read()
    f.close()

    f = open(
        "/home/disk2/internship_anytime/aslp_hotword_data/aslp_wake_up_word_data/train.scp",
        "r")
    temp = f.read()
    train_list = temp.split('\n')
    f.close()

    f = open(
        "/home/disk2/internship_anytime/aslp_hotword_data/aslp_wake_up_word_data/test.scp",
        "r")
    temp = f.read()
    test_list = temp.split('\n')
    f.close()

    test_examples = []
    test_labels = []
    test_length = []
    test_num = 0

    for example in test_list:
        if example == '':
            continue
        if example.split('/').count("positive") < 1:
            continue
        start = position_data.find(getname(example))
        end = position_data.find("positive", start + 1)
        if end != -1:
            position_str = position_data[start + 15:end - 1]
        else:
            position_str = position_data[start + 15:end]

        # start and end position of "hello" & start and end position of "xiao gua"
        keyword_position = position_str.split(" ")

        file_path = example.split(" ")[1]

        keyword_frame_position = []
        for i in range(4):
            fbank = fbank_reader.HTKFeat_read(file_path).getall()
            length = fbank.shape[0]
            frame_position = int(keyword_position[i]) // 160
            if frame_position >= length:
                frame_position = length - 1
            keyword_frame_position.append(frame_position)

        test_length.append(keyword_frame_position[1] -
                           keyword_frame_position[0] + 1 +
                           keyword_frame_position[3] -
                           keyword_frame_position[2] + 1)

        #        print( example)
        for frame in range(keyword_frame_position[0],
                           keyword_frame_position[1] + 1):
            test_examples.append(
                frame_combine(frame, file_path, keyword_frame_position[0],
                              keyword_frame_position[1]))
            test_labels.append('0')
            test_num += 1
        for frame in range(keyword_frame_position[2],
                           keyword_frame_position[3] + 1):
            test_examples.append(
                frame_combine(frame, file_path, keyword_frame_position[2],
                              keyword_frame_position[3]))
            test_labels.append('1')
            test_num += 1
#            print(np.array(test_examples).shape)
    for example in test_list:
        if example == '':
            continue
        if example.split('/').count("negative") < 1:
            continue
        file_path = example.split(" ")[1]

        fbank = fbank_reader.HTKFeat_read(file_path).getall()
        frame_number = fbank.shape[0]
        test_length.append(frame_number)
        #        print (example)
        for frame in range(frame_number):
            test_examples.append(
                frame_combine(frame, file_path, 0, frame_number - 1))
            test_labels.append('2')
            test_num += 1
    fbank_end_frame = []
    for i in range(len(test_length)):
        fbank_end_frame.append(sum(test_length[0:i + 1]))

    index = np.arange(len(train_list))
    np.random.shuffle(index)
    train_list = np.array(train_list)[index]

    train = TrainSet(train_list, position_data)
    test = TestSet(test_examples, test_labels, test_num, fbank_end_frame)

    return train, test