def main(argv): #zip_file = zipfile.ZipFile(argv[0] + 'mfcc.zip', 'r') #print(zip_file.namelist()) timestamp1 = time.time() first_map = [] second_map = [] # Read 48_39.map # with open(argv[0] + 'phones/48_39.map') as myfile: for line in myfile: input_buffer = line.strip().split('\t') first_map.append(input_buffer) first_map = np.array(first_map) print('first_map', first_map.shape) # Read 48phone_char.map # with open(argv[0] + '48phone_char.map') as myfile: for line in myfile: input_buffer = line.strip().split('\t') second_map.append(input_buffer) second_map = np.array(second_map) print('second_map', second_map.shape) frame_amount = [] # Read features for MFCC# mfcc_train_data = [] frame_id = [] #frame_amount = [] count = 0 r = re.compile("([a-zA-Z]+)([0-9]+)") #with zipfile.ZipFile(argv[0] + 'mfcc.zip') as myzip: with open(argv[0] + 'mfcc/train.ark') as myfile: for line in myfile: #line = line.decode('utf8').strip() input_buffer = line.strip().split() id_value = input_buffer[0].split('_') #input_buffer[0] = r.match(id_value[1]).group(2) input_buffer = input_buffer + id_value mfcc_train_data.append(input_buffer) #frame_id.append(id_value[2]) ''' if id_value[2] == '1': if count != 0: frame_amount.append(count) count = 1 else: count += 1 ''' print('success--mfccd') #frame_amount.append(count) #mfcc_train_data = np.array(mfcc_train_data).astype(np.float) mfcc_train_data = np.array(mfcc_train_data) #mfcc_index = mfcc_train_data[:,-3].argsort(kind='mergesort') #Read features for fbank# fbank_train_data = [] #with zipfile.ZipFile(argv[0] + 'fbank.zip') as myzip: with open(argv[0] + 'fbank/train.ark') as myfile: for line in myfile: #line = line.decode('utf8').strip() input_buffer = line.strip().split() id_value = input_buffer[0].split('_') #input_buffer[0] = r.match(id_value[1]).group(2) input_buffer = input_buffer + id_value fbank_train_data.append(input_buffer) print('success-fbank') fbank_train_data = np.array(fbank_train_data) print(fbank_train_data.shape) print(fbank_train_data[0]) index_1 = mfcc_train_data[:, -3].argsort(kind='mergesort') mfcc_train_data = mfcc_train_data[index_1] fbank_train_data = fbank_train_data[index_1] index_2 = mfcc_train_data[:, -2].argsort(kind='mergesort') mfcc_train_data = mfcc_train_data[index_2] fbank_train_data = fbank_train_data[index_2] print('after sort') #print(np.unique(mfcc_train_data[:,0]).shape) #frame_id = np.array(frame_id) count = 1 for i in range(mfcc_train_data.shape[0] - 1): if mfcc_train_data[i, -3] == mfcc_train_data[ i + 1, -3] and mfcc_train_data[i, -2] == mfcc_train_data[i + 1, -2]: count += 1 else: frame_amount.append(count) count = 1 frame_amount.append(count) frame_amount = np.array(frame_amount) np.save('frame_amount.npy', frame_amount) #print(mfcc_train_data[0:100,0]) print(mfcc_train_data[0, 0], mfcc_train_data[1, 0]) #print(fbank_train_data[0,0], fbank_train_data[1,0]) #print(mfcc_train_data.shape) #print(mfcc_train_data[0],'\n',mfcc_train_data[1]) #print('frame_id_shape',frame_id.shape) #print('frame_amount_shape',len(frame_amount)) #Max_frame_length = np.amax(frame_amount) print('frame_amount.shape', frame_amount.shape) print(frame_amount[0], frame_amount[-1]) print('frame_amount sum : ', np.sum(frame_amount)) mfcc_train_data = mfcc_train_data[:, 1:-3].astype(np.float) fbank_train_data = fbank_train_data[:, 1:-3].astype(np.float) #mfcc_train_data[mfcc_train_data[:,0].argsort()] # Read labels # label = [] with open(argv[0] + 'label/train.lab') as myfile: for line in myfile: input_buffer = line.strip().split(',') id_value = input_buffer[0].split('_') #input_buffer[0] = r.match(id_value[1]).group(2) #input_buffer[0] = id_value[1] input_buffer[1] = np.where(first_map[:, 0] == input_buffer[1])[0][0] input_buffer = input_buffer + id_value label.append(input_buffer) print('success') #new = [list(convert(sublist)) for sublist in label] #label = np.array(new,dtype = object) #label = np.array(label).astype(np.float) label = np.array(label, dtype=object) #print(label[0]) label = label[label[:, -3].argsort(kind='mergesort')] label = label[label[:, -2].argsort(kind='mergesort')] print(label.shape) #print(label[0:100]) lb = preprocessing.LabelBinarizer() new_label = label[:, 1].astype(np.int) lb.fit(new_label) #print('lb.classes_: ', lb.classes_) one_hot_label = lb.transform(new_label) print(one_hot_label[0]) print('one_hot_label.shape: ', one_hot_label.shape) ''' reordered_label = [] for i in range(label.shape[0]): index = mfcc_train_data[i][0] for x in range(label.shape[0]): if label[x][0] == index: reordered_label.append(label[x:x + frame_amount[i]]) break i = i + frame_amount[i] reordered_label = np.array(reordered_label) print('reordered_label.shape', reordered_label.shape) print(reordered_label[0]) ''' timestamp2 = time.time() x_f, y_f, y_one_hot_f = batch_padding(fbank_train_data, label[:, 1], one_hot_label, frame_amount, MAX_FRAME_LENGTH, 69) x_m, y_m, y_one_hot_m = batch_padding(mfcc_train_data, label[:, 1], one_hot_label, frame_amount, MAX_FRAME_LENGTH, 39) filepath1 = 'my_model-fbank-cnn.hdf5' model1 = load_model(filepath1) prediction1 = model1.predict(x_f, batch_size=128, verbose=1) #print('prediction.shape : ', prediction1.shape) #outfile = TemporaryFile() np.save('prediction1.npy', prediction1) np.save('one_hot_f.npy', y_one_hot_f) filepath2 = 'my_model-2119.hdf5' model2 = load_model(filepath2) prediction2 = model2.predict(x_m, batch_size=128, verbose=1) print('prediction.shape : ', prediction2.shape) np.save('prediction2.npy', prediction2) '''
def main(argv): timestamp1 = time.time() first_map = [] second_map = [] # Read 48_39.map # with open(argv[0] + 'phones/48_39.map') as myfile: for line in myfile: input_buffer = line.strip().split('\t') first_map.append(input_buffer) first_map = np.array(first_map) print('first_map', first_map.shape) # Read 48phone_char.map # with open(argv[0] + '48phone_char.map') as myfile: for line in myfile: input_buffer = line.strip().split('\t') second_map.append(input_buffer) second_map = np.array(second_map) print('second_map', second_map.shape) # Read features for MFCC# fbank_test_data = [] mfcc_test_data = [] frame_id = [] frame_amount = [] count = 0 name_0 = [] #name_1 = [] r = re.compile("([a-zA-Z]+)([0-9]+)") #with zipfile.ZipFile(argv[0] + 'fbank.zip') as myzip: with open(argv[0] + 'fbank/test.ark') as myfile: for line in myfile: #line = line.decode('utf8').strip() input_buffer = line.strip().split() id_value = input_buffer[0].split('_') #name_0.append(id_value[0]+'_'+id_value[1]) #input_buffer[0] = r.match(id_value[1]).group(2) input_buffer = input_buffer + id_value fbank_test_data.append(input_buffer) frame_id.append(id_value[2]) ''' if id_value[2] == '1': if count != 0: frame_amount.append(count) name_1.append(id_value[0]+'_'+id_value[1]) count = 1 else: count += 1 ''' print('success') #frame_amount.append(count) #name_1.append(id_value[0]+'_'+id_value[1]) #fbank_test_data = np.array(fbank_test_data).astype(np.float) fbank_test_data = np.array(fbank_test_data) frame_id = np.array(frame_id) # Read features for MFCC# mfcc_test_data = [] frame_id = [] count = 0 #name_1 = [] r = re.compile("([a-zA-Z]+)([0-9]+)") #with zipfile.ZipFile(argv[0] + 'mfcc.zip') as myzip: with open(argv[0] + 'mfcc/test.ark') as myfile: for line in myfile: #line = line.decode('utf8').strip() input_buffer = line.strip().split() id_value = input_buffer[0].split('_') #name_0.append(id_value[0]+'_'+id_value[1]) #input_buffer[0] = r.match(id_value[1]).group(2) input_buffer = input_buffer + id_value mfcc_test_data.append(input_buffer) frame_id.append(id_value[2]) ''' if id_value[2] == '1': if count != 0: frame_amount.append(count) name_1.append(id_value[0]+'_'+id_value[1]) count = 1 else: count += 1 ''' print('success') #frame_amount.append(count) #name_1.append(id_value[0]+'_'+id_value[1]) #mfcc_test_data = np.array(mfcc_test_data).astype(np.float) #mfcc_test_data = np.array(mfcc_test_data) count = 1 for i in range(fbank_test_data.shape[0] - 1): if fbank_test_data[i, -3] == fbank_test_data[ i + 1, -3] and fbank_test_data[i, -2] == fbank_test_data[i + 1, -2]: count += 1 else: frame_amount.append(count) count = 1 name_0.append(fbank_test_data[i, -3] + '_' + fbank_test_data[i, -2]) frame_amount.append(count) name_0.append(fbank_test_data[-1, -3] + '_' + fbank_test_data[-1, -2]) frame_amount = np.array(frame_amount) print('fbank_test_data.shape : ', fbank_test_data.shape) print('frame_amount : ', np.sum(frame_amount) - fbank_test_data.shape[0]) print('name_0 : ', len(name_0), name_0[0]) #print('name_1 : ', len(name_1)) test_data1 = batch_padding(fbank_test_data[:, 1:-3].astype(np.float), frame_amount, MAX_FRAME_LENGTH, 69) #test_data1[:,0:23] = test_data1[:,0:23]/10 #test_data2 = batch_padding(mfcc_test_data[:,1:-3].astype(np.float), frame_amount, MAX_FRAME_LENGTH, 39) timestamp2 = time.time() print("Data preprocessing took %.2f seconds" % (timestamp2 - timestamp1)) filepath1 = 'my_model-fbank-cnn.hdf5' model1 = load_model(filepath1) ''' tt = 23 three_nine_phone = first_map[tt, 1] index = np.where(second_map[:,0] == three_nine_phone) ans = second_map[index[0][0], 2] print('~~~~~~~~~~',ans) ''' prediction1 = model1.predict(test_data1, batch_size=64, verbose=1) #filepath2 = '/home/zong-ze/MLDS/hw1/Keras_model/my_model-2119.hdf5' #model2 = load_model(filepath2) #prediction2 = model2.predict(test_data2, batch_size = 128, verbose=1) #prediction = prediction1*0.4 + prediction2*0.6 print('prediction.shape : ', prediction1.shape) #print(' sum : ',np.sum(frame_amount)) #print(prediction[0,0,:]) answer = [] for i in range(frame_amount.shape[0]): skip = 0 for j in range(frame_amount[i]): #if prediction1[i,j,np.argmax(prediction1[i,j,:])] > 0.5: three_nine_phone = first_map[np.argmax(prediction1[i, j, :]), 1] #print(three_nine_phone) index = np.where(second_map[:, 0] == three_nine_phone) #index = np.where(second_map[np.argmax(prediction[i,j]),0] == three_nine_phone) #print(index) ans = second_map[index[0][0], 2] #ans = second_map[np.argmax(prediction[i,j]), 2] answer.append(ans) # else: # skip += 1 #frame_amount[i] -= skip print('answer_length : ', len(answer), answer[0]) reshape_answer = [] seq_len = 0 for i in range(frame_amount.shape[0]): reshape_answer.append(answer[seq_len:seq_len + frame_amount[i]]) seq_len += frame_amount[i] print(len(reshape_answer[1])) #print(reshape_answer[0]) reshape_answer = delete_double(reshape_answer) reshape_answer = remove_sil(reshape_answer) file = open(argv[1], 'w') file.write('id,phone_sequence') file.write('\n') seq_len = 0 for i in range(len(frame_amount)): file.write(name_0[i]) file.write(',') file.write(''.join(reshape_answer[i])) file.write('\n') file.close()
def main(argv): #zip_file = zipfile.ZipFile(argv[0] + 'mfcc.zip', 'r') #print(zip_file.namelist()) timestamp1 = time.time() first_map = [] second_map = [] # Read 48_39.map # with open(argv[0] + 'phones/48_39.map') as myfile: for line in myfile: input_buffer = line.strip().split('\t') first_map.append(input_buffer) first_map = np.array(first_map) print('first_map', first_map.shape) # Read 48phone_char.map # with open(argv[0] + '48phone_char.map') as myfile: for line in myfile: input_buffer = line.strip().split('\t') second_map.append(input_buffer) second_map = np.array(second_map) print('second_map', second_map.shape) # Read features for MFCC# mfcc_train_data = [] frame_id = [] frame_amount = [] count = 0 r = re.compile("([a-zA-Z]+)([0-9]+)") #with zipfile.ZipFile(argv[0] + 'mfcc.zip') as myzip: with open(argv[0] + 'mfcc/train.ark') as myfile: for line in myfile: #line = line.decode('utf8').strip() input_buffer = line.strip().split() id_value = input_buffer[0].split('_') #input_buffer[0] = r.match(id_value[1]).group(2) input_buffer = input_buffer + id_value mfcc_train_data.append(input_buffer) #frame_id.append(id_value[2]) print('success') #frame_amount.append(count) #mfcc_train_data = np.array(mfcc_train_data).astype(np.float) mfcc_train_data = np.array(mfcc_train_data) #mfcc_index = mfcc_train_data[:,-3].argsort(kind='mergesort') mfcc_train_data = mfcc_train_data[mfcc_train_data[:,-3].argsort(kind='mergesort')] mfcc_train_data = mfcc_train_data[mfcc_train_data[:,-2].argsort(kind='mergesort')] #print(np.unique(mfcc_train_data[:,0]).shape) frame_id = np.array(frame_id) count = 1 for i in range(mfcc_train_data.shape[0]-1): if mfcc_train_data[i,-3] == mfcc_train_data[i+1,-3] and mfcc_train_data[i,-2] == mfcc_train_data[i+1,-2]: count += 1 else: frame_amount.append(count) count = 1 frame_amount.append(count) frame_amount = np.array(frame_amount) #print(mfcc_train_data[0:100,0]) print(mfcc_train_data[0,0], mfcc_train_data[1,0]) print(mfcc_train_data.shape) #print(mfcc_train_data[0],'\n',mfcc_train_data[1]) #print('frame_id_shape',frame_id.shape) #print('frame_amount_shape',len(frame_amount)) #Max_frame_length = np.amax(frame_amount) print('frame_amount.shape', frame_amount.shape) print(frame_amount[0],frame_amount[-1]) print('frame_amount sum : ', np.sum(frame_amount)) mfcc_train_data = mfcc_train_data[:,1:-3].astype(np.float) ''' #Read features for fbank# fbank_train_data = [] with zipfile.ZipFile(argv[0] + 'fbank.zip') as myzip: with myzip.open('fbank/train.ark') as myfile: for line in myfile: line = line.decode('utf8').strip() input_buffer = line.strip().split() id_value = input_buffer[0].split('_') input_buffer[0] = r.match(id_value[1]).group(2) fbank_train_data.append(input_buffer) print('success') fbank_train_data = np.array(fbank_train_data) print(fbank_train_data.shape) print(fbank_train_data[0]) ''' # Read labels # label = [] with open(argv[0] + 'label/train.lab') as myfile: for line in myfile: input_buffer = line.strip().split(',') id_value = input_buffer[0].split('_') #input_buffer[0] = r.match(id_value[1]).group(2) #input_buffer[0] = id_value[1] input_buffer[1] = np.where(first_map[:,0] == input_buffer[1])[0][0] input_buffer = input_buffer + id_value label.append(input_buffer) print('success') #new = [list(convert(sublist)) for sublist in label] #label = np.array(new,dtype = object) #label = np.array(label).astype(np.float) label = np.array(label, dtype = object) #print(label[0]) label = label[label[:,-3].argsort(kind='mergesort')] label = label[label[:,-2].argsort(kind='mergesort')] print(label.shape) #print(label[0:100]) lb = preprocessing.LabelBinarizer() new_label = label[:,1].astype(np.int) lb.fit(new_label) #print('lb.classes_: ', lb.classes_) one_hot_label = lb.transform(new_label) print(one_hot_label[0]) #print('one_hot_label.shape: ', one_hot_label.shape) ''' reordered_label = [] for i in range(label.shape[0]): index = mfcc_train_data[i][0] for x in range(label.shape[0]): if label[x][0] == index: reordered_label.append(label[x:x + frame_amount[i]]) break i = i + frame_amount[i] reordered_label = np.array(reordered_label) print('reordered_label.shape', reordered_label.shape) print(reordered_label[0]) ''' timestamp2 = time.time() x, y, y_one_hot = batch_padding(mfcc_train_data, label[:,1], one_hot_label, frame_amount, MAX_FRAME_LENGTH) X_train, X_test, y_train, y_test, frame_amount_train, frame_amount_test, y_one_hot_train, y_one_hot_test = train_test_split(x, y, frame_amount, y_one_hot, test_size=0.15, random_state=42) print("Data preprocessing took %.2f seconds" % (timestamp2 - timestamp1)) filepath = argv[1] + '.hdf5' model = Sequential() model.add(Conv1D(input_shape = (1000, 39), filters = 64, kernel_size = 3, strides=1, padding = 'same')) model.add(Dropout(0.2)) model.add(Conv1D(filters = 32, kernel_size = 3, strides=1, padding = 'same')) model.add(Dropout(0.2)) #model.add(MaxPooling1D(pool_size=2, strides=None, padding='same')) model.add(Masking(mask_value=0.)) model.add(Bidirectional(GRU(units = 128, return_sequences = True, dropout=0.2, recurrent_dropout=0.2))) model.add(Bidirectional(GRU(units = 128, return_sequences = True, dropout=0.2, recurrent_dropout=0.2))) model.add(TimeDistributed(Dense(256, activation = 'relu'))) model.add(TimeDistributed(BatchNormalization())) model.add(Dropout(0.25)) model.add(TimeDistributed(Dense(256, activation = 'relu'))) model.add(TimeDistributed(BatchNormalization())) model.add(Dropout(0.25)) model.add(TimeDistributed(Dense(48, activation = 'softmax'))) model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['accuracy']) model.summary() model_check = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=False, save_weights_only=False, mode='max', period=10) # Train the model, iterating on the data in batches of 32 samples reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.2,patience=5, min_lr=0.001, mode = 'max', verbose=1) model.fit(X_train, y_one_hot_train, epochs=120, batch_size=128, callbacks=[model_check, reduce_lr], validation_data=(X_test, y_one_hot_test))