def frame_combine(frame, file_path, start, end): fbank = fbank_reader.HTKFeat_read(file_path).getall() if end - start + 1 < 41: if frame - start <= 30 and end - frame <= 10: frame_to_combine = [] front_rest = 30 - (frame - start) back_rest = 10 - (end - frame) for i in range(front_rest): frame_to_combine.append(fbank[start].tolist()) for i in range(start, end + 1): frame_to_combine.append(fbank[i].tolist()) for i in range(back_rest): frame_to_combine.append(fbank[end].tolist()) elif end - frame >= 10: frame_to_combine = [] front_rest = 30 - (frame - start) for i in range(front_rest): frame_to_combine.append(fbank[start].tolist()) for i in range(start, frame + 11): frame_to_combine.append(fbank[i].tolist()) else: frame_to_combine = [] back_rest = 10 - (end - frame) for i in range(frame - 30, end + 1): frame_to_combine.append(fbank[i].tolist()) for i in range(back_rest): frame_to_combine.append(fbank[end].tolist()) combined = np.array(frame_to_combine).reshape(-1) else: if frame - start >= 30 and end - frame >= 10: frame_to_combine = fbank[frame - 30:frame + 11] combined = frame_to_combine.reshape(-1) elif frame - start < 30: frame_to_combine = fbank[start:start + 41] combined = frame_to_combine.reshape(-1) else: frame_to_combine = fbank[end - 40:end + 1] combined = frame_to_combine.reshape(-1) return combined.tolist()
def read_train_set(self): self.example = [] self.labels = [] self.num_examples = 0 step_length = 10 start = self.fbank_position % len(self.examples_list) end = (self.fbank_position + step_length) % len(self.examples_list) if start < end: fbank_list = self.examples_list[start:end] self.fbank_position += step_length else: fbank_list = self.examples_list[start:len(self.examples_list)] self.fbank_position = 0 index = np.arange(len(self.examples_list)) np.random.shuffle(index) self.examples_list = np.array(self.examples_list)[index] for example in fbank_list: if example == '': continue file_path = example.split(" ")[1] if example.split('/').count("positive") > 0: start = self.position_data.find(getname(example)) end = self.position_data.find("positive", start + 1) if end != -1: position_str = self.position_data[start + 15:end - 1] else: position_str = self.position_data[start + 15:end] # start and end position of "hello" & start and end position of "xiao gua" keyword_position = position_str.split(" ") file_path = example.split(" ")[1] keyword_frame_position = [] for i in range(4): fbank = fbank_reader.HTKFeat_read(file_path).getall() length = fbank.shape[0] frame_position = (int(keyword_position[i]) - 240) // 160 if frame_position >= length: frame_position = length - 1 keyword_frame_position.append(frame_position) #print (example) for frame in range(keyword_frame_position[0], keyword_frame_position[1] + 1): self.example.append( frame_combine(frame, file_path, keyword_frame_position[0], keyword_frame_position[1])) self.labels.append('0') self.num_examples += 1 for frame in range(keyword_frame_position[2], keyword_frame_position[3] + 1): self.example.append( frame_combine(frame, file_path, keyword_frame_position[2], keyword_frame_position[3])) self.labels.append('1') self.num_examples += 1 else: file_path = example.split(" ")[1] fbank = fbank_reader.HTKFeat_read(file_path).getall() frame_number = fbank.shape[0] #print (example) for frame in range(frame_number): self.example.append( frame_combine(frame, file_path, 0, frame_number - 1)) self.labels.append('2') self.num_examples += 1
def get_next_batch(self, batch_size): example = [] label = [] start = self.position % self.length if start == 0: index = np.array(range(self.length)) np.random.shuffle(index) self.file_list = np.array(self.file_list)[index] end = (self.position + batch_size) % self.length if start < end: fbank_list = self.file_list[start:end] self.position += batch_size else: fbank_list = self.file_list[start:self.length] self.position = 0 for fileinfo in fbank_list: l = [] ll = [] file_path = fileinfo.split(" ")[1] if fileinfo.split("/").count("negative") > 0: fbank = fbank_reader.HTKFeat_read(file_path).getall() for i in range(fbank.shape[0]): l.append(frame_combine(i, file_path, 0, fbank.shape[0] - 1)) ll.append('2') else: fbank = fbank_reader.HTKFeat_read(file_path).getall() first = self.position_list.find(getname(fileinfo)) second = self.position_list.find("positive", first + 1) if second != -1: second -= 1 position_str = self.position_list[first + 15:second] keyword_position = position_str.split(" ") keyword_frame_position = [] for i in range(4): frame_position = (int(keyword_position[i]) - 240) // 160 if frame_position >= fbank.shape[0]: frame_position = fbank.shape[0] - 1 keyword_frame_position.append(frame_position) st = 0 en = keyword_frame_position[0] for frame in range(st, en): l.append(frame_combine(frame, file_path, st, en - 1)) ll.append('2') st = en en = keyword_frame_position[1] + 1 for frame in range(st, en): l.append(frame_combine(frame, file_path, st, en - 1)) ll.append('0') st = en en = keyword_frame_position[2] for frame in range(st, en): l.append(frame_combine(frame, file_path, st, en - 1)) ll.append('2') st = en en = keyword_frame_position[3] + 1 for frame in range(st, en): l.append(frame_combine(frame, file_path, st, en - 1)) ll.append('1') st = en en = fbank.shape[0] for frame in range(st, en): l.append(frame_combine(frame, file_path, st, en - 1)) ll.append('2') # print("lshape",len(l)) example.append(np.array(l.copy())) #print("example",len(example[0])) label.append(np.array(ll.copy())) res1 = np.array(example) res2 = np.array(label) print(res1.shape, res2.shape) print(res1[1].shape) return res1, res2
def read_data_sets(): f = open( "/home/disk2/internship_anytime/aslp_hotword_data/aslp_wake_up_word_data/positiveKeywordPosition.txt", "r") position_data = f.read() f.close() f = open( "/home/disk2/internship_anytime/aslp_hotword_data/aslp_wake_up_word_data/train.scp", "r") temp = f.read() train_list = temp.split('\n') f.close() f = open( "/home/disk2/internship_anytime/aslp_hotword_data/aslp_wake_up_word_data/test.scp", "r") temp = f.read() test_list = temp.split('\n') f.close() test_examples = [] test_labels = [] test_length = [] test_num = 0 for example in test_list: if example == '': continue if example.split('/').count("positive") < 1: continue start = position_data.find(getname(example)) end = position_data.find("positive", start + 1) if end != -1: position_str = position_data[start + 15:end - 1] else: position_str = position_data[start + 15:end] # start and end position of "hello" & start and end position of "xiao gua" keyword_position = position_str.split(" ") file_path = example.split(" ")[1] keyword_frame_position = [] for i in range(4): fbank = fbank_reader.HTKFeat_read(file_path).getall() length = fbank.shape[0] frame_position = int(keyword_position[i]) // 160 if frame_position >= length: frame_position = length - 1 keyword_frame_position.append(frame_position) test_length.append(keyword_frame_position[1] - keyword_frame_position[0] + 1 + keyword_frame_position[3] - keyword_frame_position[2] + 1) # print( example) for frame in range(keyword_frame_position[0], keyword_frame_position[1] + 1): test_examples.append( frame_combine(frame, file_path, keyword_frame_position[0], keyword_frame_position[1])) test_labels.append('0') test_num += 1 for frame in range(keyword_frame_position[2], keyword_frame_position[3] + 1): test_examples.append( frame_combine(frame, file_path, keyword_frame_position[2], keyword_frame_position[3])) test_labels.append('1') test_num += 1 # print(np.array(test_examples).shape) for example in test_list: if example == '': continue if example.split('/').count("negative") < 1: continue file_path = example.split(" ")[1] fbank = fbank_reader.HTKFeat_read(file_path).getall() frame_number = fbank.shape[0] test_length.append(frame_number) # print (example) for frame in range(frame_number): test_examples.append( frame_combine(frame, file_path, 0, frame_number - 1)) test_labels.append('2') test_num += 1 fbank_end_frame = [] for i in range(len(test_length)): fbank_end_frame.append(sum(test_length[0:i + 1])) index = np.arange(len(train_list)) np.random.shuffle(index) train_list = np.array(train_list)[index] train = TrainSet(train_list, position_data) test = TestSet(test_examples, test_labels, test_num, fbank_end_frame) return train, test