def create_test_set(test_data): try: print >> sys.stderr, "*** DOING TEST SET ***" X_test = test_data.values() Y_test = test_data.keys() return X_test, Y_test except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in create-test-set", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in create-test-set process" } return HttpResponse(json.dumps(response))
def do_GET(self): log = Logger() # log the received command and the client ip address and port number. log.log_info(f"{self.command} received from {self.client_address}") # parse the url url = urlparse.urlparse(self.path) # check if the client call is correct. if url.path == '/geocode': # send 200 : Ok status. self.send_response(200) self.send_header('Content-type', 'json') self.end_headers() # check if address query string is passed if urlparse.parse_qs(url.query).get('address'): # get the geocodes off the passed address address = urlparse.parse_qs(url.query)['address'][0].replace(" ", "+") self._get_geocode(address) else: # send 400 : Bad Request status and log the error. self.send_response(400) log.log_error("address parameter not passed") else: # send 404 : Not Found status and log the error. self.send_response(404) log.log_error("Unknown service requested.")
def train_gmm(self): all_data = self.create_structure() path = os.path.dirname(os.path.abspath(__file__)) try: keys = all_data.keys() n_classes = len(np.unique(keys)) gmm_classifier = mixture.GMM(n_components=n_classes, covariance_type='full', init_params='wmc', min_covar=0.001, n_init=1, n_iter=100, params='wmc', random_state=None, thresh=None, tol=0.001) for data in all_data.values(): for val in data.values(): f1 = val.get_object(2) f2 = val.get_object(3) data = zip(f1, f2) if len(data) >= n_classes: gmm_classifier.fit(data) # save data path_trainset = os.path.join(path, self.trainset_name) with open(path_trainset, 'wb') as fid: cPickle.dump(all_data, fid) # save the classifier model_directory = os.path.join(path, self.model_name) with open(model_directory, 'wb') as fid: cPickle.dump(gmm_classifier, fid) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in GMM-train model", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in GMM-train-model process" } return HttpResponse(json.dumps(response))
def models_if_exist(self): try: path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(path, self.model_name) return os.path.exists(model_path) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in GMM-check-if-models-exist", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in GMM-check-if-models-exist process"} return HttpResponse(json.dumps(response))
def make_ellipses(self, ax, native_f1, native_f2, predicted_f1, predicted_f2): try: print >> sys.stderr, "*** MAKE ELLIPSES ***" x1 = min(native_f1) x2 = max(native_f1) y1 = min(native_f2) y2 = max(native_f2) centroid_x = (x2 + x1) / 2 centroid_y = (y2 + y1) / 2 x_2 = math.pow((centroid_x - predicted_f1), 2) y_2 = math.pow((centroid_y - predicted_f2), 2) distance_from_centroid = math.sqrt(x_2 + y_2) ellipse = mpl.patches.Ellipse(xy=((x2 + x1) / 2, (y2 + y1) / 2), width=(x2 - x1) * 1.4, height=(y2 - y1) * 1.2) ellipse.set_edgecolor('r') ellipse.set_facecolor('none') ellipse.set_clip_box(ax.bbox) ellipse.set_alpha(0.5) ax.add_artist(ellipse) print >> sys.stderr, "*** ELLIPSES DONE ***" return distance_from_centroid except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in GMM-make ellipse", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in GMM-make-ellipse process" } return HttpResponse(json.dumps(response))
def extract_data(audio_file): print >> sys.stderr, "*** DOING EXTRACT DATA ***" # need to change speakerfile for the female gender path = os.path.dirname(os.path.abspath(__file__)) path_fave = path + "/libraries/FAVE_extract/" config_file = "--outputFormat txt --candidates --speechSoftware praat --formantPredictionMethod default --measurementPointMethod faav --nFormants 3 --minVowelDuration 0.001 --nSmoothing 12 --remeasure --vowelSystem phila --speaker " + path_fave + "/speakerinfo.speakerfile" textgrid_file_directory = path + "/data/" output_file_directory = path + "/data/" wav_file = audio_file wav_file_cleaned = wav_file.replace('.wav', '.TextGrid') (dir_name, file_name) = os.path.split(wav_file_cleaned) textgrid_file = os.path.join(textgrid_file_directory, file_name) output_file = os.path.join(output_file_directory, file_name.replace('.TextGrid', '.txt')) # debug print command = "python " + path_fave + "bin/extractFormants.py " + config_file + " " + audio_file + " " + textgrid_file + " " + output_file try: # run command proc = Popen(command, shell=True) proc.wait() except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in exctract-formants", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in extract-formants process" } return HttpResponse(json.dumps(response))
def get_native_vowels(self, sentence): try: path = os.path.dirname(os.path.abspath(__file__)) label_path = path + self.native_vowels sentences_path = path + self.native_sentences s = sentence.lower() vowels = [] with open(label_path, 'rb') as vowels_file: reader = csv.reader(vowels_file, delimiter='\n') all_lines = list(reader) for line in all_lines: l = line[0].split(' ') vowels.append(l) sentences = [] with open(sentences_path, 'rb') as sentences_file: reader = csv.reader(sentences_file, delimiter='\n') all_lines = list(reader) for line in all_lines: sen = line[0] sentences.append(sen) result = dict(zip(sentences, vowels)) return result[s] except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in GMM-get-native-vowels-struct", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in GMM-get-native-vowels process" } return HttpResponse(json.dumps(response))
def create_test_set(test_data): try: print>>sys.stderr, "*** DOING TEST SET ***" X_test = test_data.values() Y_test = test_data.keys() return X_test, Y_test except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in create-test-set", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in create-test-set process"} return HttpResponse(json.dumps(response))
def train_gmm(self): all_data = self.create_structure() path = os.path.dirname(os.path.abspath(__file__)) try: keys = all_data.keys() n_classes = len(np.unique(keys)) gmm_classifier = mixture.GMM(n_components=n_classes, covariance_type='full', init_params='wmc', min_covar=0.001, n_init=1, n_iter=100, params='wmc', random_state=None, thresh=None, tol=0.001) for data in all_data.values(): for val in data.values(): f1 = val.get_object(2) f2 = val.get_object(3) data = zip(f1, f2) if len(data) >= n_classes: gmm_classifier.fit(data) # save data path_trainset = os.path.join(path, self.trainset_name) with open(path_trainset, 'wb') as fid: cPickle.dump(all_data, fid) # save the classifier model_directory = os.path.join(path, self.model_name) with open(model_directory, 'wb') as fid: cPickle.dump(gmm_classifier, fid) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in GMM-train model", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in GMM-train-model process"} return HttpResponse(json.dumps(response))
def models_if_exist(self): try: path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(path, self.model_name) return os.path.exists(model_path) except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in GMM-check-if-models-exist", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in GMM-check-if-models-exist process" } return HttpResponse(json.dumps(response))
def extract_data(audio_file): print>>sys.stderr, "*** DOING EXTRACT DATA ***" # need to change speakerfile for the female gender path = os.path.dirname(os.path.abspath(__file__)) path_fave = path + "/libraries/FAVE_extract/" config_file = "--outputFormat txt --candidates --speechSoftware praat --formantPredictionMethod default --measurementPointMethod faav --nFormants 3 --minVowelDuration 0.001 --nSmoothing 12 --remeasure --vowelSystem phila --speaker " + path_fave + "/speakerinfo.speakerfile" textgrid_file_directory = path + "/data/" output_file_directory = path + "/data/" wav_file = audio_file wav_file_cleaned = wav_file.replace('.wav', '.TextGrid') (dir_name, file_name) = os.path.split(wav_file_cleaned) textgrid_file = os.path.join(textgrid_file_directory, file_name) output_file = os.path.join(output_file_directory, file_name.replace('.TextGrid', '.txt')) # debug print command = "python " + path_fave + "bin/extractFormants.py " + config_file + " " + audio_file + " " + textgrid_file + " " + output_file try: # run command proc = Popen(command, shell=True) proc.wait() except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in exctract-formants", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in extract-formants process"} return HttpResponse(json.dumps(response))
def make_ellipses(self, ax, native_f1, native_f2, predicted_f1, predicted_f2): try: print >> sys.stderr, "*** MAKE ELLIPSES ***" x1 = min(native_f1) x2 = max(native_f1) y1 = min(native_f2) y2 = max(native_f2) centroid_x = (x2 + x1) / 2 centroid_y = (y2 + y1) / 2 x_2 = math.pow((centroid_x - predicted_f1), 2) y_2 = math.pow((centroid_y - predicted_f2), 2) distance_from_centroid = math.sqrt(x_2 + y_2) ellipse = mpl.patches.Ellipse(xy=((x2 + x1) / 2, (y2 + y1) / 2), width=(x2 - x1) * 1.4, height=(y2 - y1) * 1.2) ellipse.set_edgecolor('r') ellipse.set_facecolor('none') ellipse.set_clip_box(ax.bbox) ellipse.set_alpha(0.5) ax.add_artist(ellipse) print >> sys.stderr, "*** ELLIPSES DONE ***" return distance_from_centroid except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in GMM-make ellipse", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in GMM-make-ellipse process"} return HttpResponse(json.dumps(response))
def get_native_vowels(self, sentence): try: path = os.path.dirname(os.path.abspath(__file__)) label_path = path + self.native_vowels sentences_path = path + self.native_sentences s = sentence.lower() vowels = [] with open(label_path, 'rb') as vowels_file: reader = csv.reader(vowels_file, delimiter='\n') all_lines = list(reader) for line in all_lines: l = line[0].split(' ') vowels.append(l) sentences = [] with open(sentences_path, 'rb') as sentences_file: reader = csv.reader(sentences_file, delimiter='\n') all_lines = list(reader) for line in all_lines: sen = line[0] sentences.append(sen) result = dict(zip(sentences, vowels)) return result[s] except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in GMM-get-native-vowels-struct", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in GMM-get-native-vowels process"} return HttpResponse(json.dumps(response))
def create_test_data(filename): try: print >> sys.stderr, "*** DOING TEST DATA ***" path = os.path.dirname(os.path.abspath(__file__)) path_data = path + "/data/" txt_file = path_data + filename.replace('.wav', '_norm.txt') csv_file = path_data + filename.replace('.wav', '.csv') # use 'with' if the program isn't going to immediately terminate # so you don't leave files open # the 'b' is necessary on Windows # it prevents \x1a, Ctrl-z, from ending the stream prematurely # and also stops Python converting to / from different line terminators # On other platforms, it has no effect with open(txt_file, "rb") as opened_txt: in_txt = csv.reader(opened_txt, delimiter='\t') with open(csv_file, 'wb') as opened_csv: out_csv = csv.writer(opened_csv) out_csv.writerows(in_txt) all_data = dict() with open(csv_file, 'r') as tabbed_file: reader = csv.reader(tabbed_file, delimiter="\t") all_lines = list(reader) not_included = 0 for line in all_lines: if not_included <= 2: not_included += 1 continue l = line[0].split(',') data = GmmStructure() data.set_object(0, l[1]) data.set_object(1, l[2]) try: if l[3] == '': f1_val = 0.0 else: f1_val = float(l[3]) if l[4] == '': f2_val = 0.0 else: f2_val = float(l[4]) data.set_object(2, f1_val) data.set_object(3, f2_val) except: print "Error: ", sys.exc_info() if l[0] in all_data: # append the new number to the existing array at this slot obj = all_data[l[0]] # we use it only for phoneme prediction obj.concat_object(0, data.norm_F1) obj.concat_object(1, data.norm_F2) all_data[l[0]] = obj else: # create a new array in this slot all_data[l[0]] = data return all_data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in create-test-data", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in create-test-data process" } return HttpResponse(json.dumps(response))
def test_gmm(self, X_test, Y_test, plot_filename, sentence): # region LOAD SETS path = os.path.dirname(os.path.abspath(__file__)) path += '/' model_name = path + self.model_name trainset_name = path + self.trainset_name print >> sys.stderr, "*** Model name: " + model_name + "***" print >> sys.stderr, "*** Trainset name: " + trainset_name + "***" # load it again with open(model_name, 'rb') as model: gmm_classifier = cPickle.load(model) with open(trainset_name, 'rb') as traindata: all_data = cPickle.load(traindata) print >> sys.stderr, "*** LOADED Model name: " + model_name + "***" print >> sys.stderr, "*** LOADED Trainset name: " + trainset_name + "***" print >> sys.stderr, "*** ITEMS N: " + str(len(all_data.items())) + " ***" all_vowels = [] for key, val in all_data.items(): for v in val.keys(): all_vowels.append(v) labels = np.unique(all_vowels) print >> sys.stderr, "*** LABELS ***" int_labels = np.arange(len(labels)) print >> sys.stderr, "*** INT LABELS ***" map_int_label = dict(zip(int_labels, labels)) print >> sys.stderr, "*** MAP INT LABELS ***" map_label_int = dict(zip(labels, int_labels)) print >> sys.stderr, "*** MAP LABELS INT ***" # results key_sentence = sentence.lower() key_sentence = key_sentence.replace(' ', '_') train_dict = all_data.get(key_sentence) X_train = train_dict.values() print >> sys.stderr, "*** X_TRAIN ***" Y_train = train_dict.keys() print >> sys.stderr, "*** Y_TRAIN ***" # endregion try: # region PLOT PARAMETERS print >> sys.stderr, "*** PREPARING FOR PLOTTING GMM ***" plt.figure() plt.subplots_adjust(wspace=0.4, hspace=0.5) colors = ['b', 'g', 'c', 'm', 'y', 'k'] predicted_formants = [] current_trend_formants_data = dict() # endregion # 3 rows when we have 5 vowels if len(X_test) > 4: rows = 3 else: rows = 2 # region PRINT PREDICTED VOWELS print >> sys.stderr, "*** PRINT PREDICTED VOWELS ***" columns = 2 index = 1 for val in X_test: f1 = val.norm_F1 f2 = val.norm_F2 data = zip(f1, f2) gmm_predict = gmm_classifier.predict(data) current_trend_formants_data[index] = data # save data for trend graph + index of subplot gmm_l = gmm_predict.tolist() predicted_formants.append(gmm_l[0]) # print the predicted-vowels based on the formants l = gmm_l[0] # TODO: investigate on how to have the highest probability only plt.subplot(rows, columns, index) plt.scatter(f1, f2, s=80, c='r', marker='+', label=r"$ {} $".format(map_int_label[l])) index += 1 score = gmm_classifier.score(data) print >> sys.stderr, "*** LOG-PROBABILITY: " + str(score) + " ***" # endregion # region STRUCT FOR RETRIEVING THE ACTUAL LABEL print >> sys.stderr, "*** STRUCT FOR RETRIEVING THE ACTUAL LABEL ***" predicted_labels = [] for pf in predicted_formants: predicted_labels.append(map_int_label[pf]) native_vowels = self.get_native_vowels(sentence) uniq_predicted_labels = np.unique(predicted_labels) # endregion # TODO: saving data for creating trend chart current_trend_data = zip(predicted_labels, current_trend_formants_data) # region ACCURACY # try: # print >> sys.stderr, "\n" # # pred_uniq = [] # for i in uniq_predicted_labels.ravel(): # pred_uniq.append(map_label_int[i]) # # print >> sys.stderr, "*** PRED UNIQ ***" # # native_lab = [] # for i in np.array(native_vowels): # native_lab.append(map_label_int[i]) # # print >> sys.stderr, "*** NATIVE LAB ***" # # print >> sys.stderr, "*** PREDICTED LABELS: " + np.array_str(np.asarray(pred_uniq)) + " ***" # print >> sys.stderr, "*** NATIVE LABELS: " + np.array_str(np.asarray(native_lab)) + " ***" # # #test_accuracy = np.mean(pred_uniq == native_lab) * 100 # # print >> sys.stderr, "*** ACCURACY: " + str(0) + " ***" # print >> sys.stderr, "\n" # except: # print >> sys.stderr, "*** EXCEPTION ***" # print >> sys.stderr, "\n" # pass # endregion new_trend_data = [] # region PRINT NATIVE VOWELS FORMANTS print >> sys.stderr, "*** PRINT NATIVE VOWELS FORMANTS ***" i = 0 duplicate = [] native_data = dict(zip(Y_train, X_train)) index = 1 for n in native_vowels: if n in duplicate: continue found = False for pred in current_trend_data: if n in pred[0]: plot_index = pred[1] predicted_data = current_trend_formants_data[plot_index] found = True if found is False: plot_index = index predicted_data = current_trend_formants_data[index] print >> sys.stderr, "*** READY TO CREATE THE PLOT ***" struct = native_data[n] native_f1 = struct.get_object(2) native_f2 = struct.get_object(3) ax = plt.subplot(rows, columns, plot_index) plt.tight_layout() ax.scatter(native_f1, native_f2, s=40, c=colors[i], marker='.', label=r"$ {} $".format(n)) axes = plt.gca() axes.set_xlim([min(native_f1) - 500, max(native_f1) + 500]) axes.set_ylim([min(native_f2) - 500, max(native_f2) + 500]) ax.set_xlabel('F1') ax.set_ylabel('F2') ax.set_title("Vowel: " + n) # ellipse inside graph distance_from_centroid = self.make_ellipses(ax, native_f1, native_f2, predicted_data[0][0], predicted_data[0][1]) # American date format date_obj = datetime.datetime.utcnow() date_str = date_obj.strftime('%m-%d-%Y') new_trend_data.append((current_trend_data[index - 1][0], n, distance_from_centroid, date_str)) duplicate.append(n) i += 1 index += 1 # endregion print >> sys.stderr, "*** SAVE THE PLOT ***" plt.savefig(plot_filename, bbox_inches='tight', transparent=True) with open(plot_filename, "rb") as imageFile: return base64.b64encode(imageFile.read()), new_trend_data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in GMM-test-model", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in GMM-test-model process"} return HttpResponse(json.dumps(response))
def get_pitch_contour(audio_file, sentence): try: print>>sys.stderr, "*** DOING PITCH CONTOUR ***" path = os.path.dirname(os.path.abspath(__file__)) path_script = path + "/libraries/pitch_contour/pitch_intensity_formants.praat" (dir_name, file_name) = os.path.split(audio_file) output_name = file_name.replace(".wav", ".csv") output_folder = path + "/data/" + output_name sentence = sentence.lower() sentence = sentence.replace(' ', '_') min_pitch = '65' native_csv = path + "/data/native/male/" + sentence + ".csv" # see script file for the usage command = '/usr/bin/praat ' + path_script + " " + audio_file + " " + output_folder + " " + 'wav' + " " + '10' + " " + min_pitch + " " + '500' + " " + '11025' print>>sys.stderr, command proc = Popen(command, shell=True) proc.wait() # native print>>sys.stderr, "*** READING NATIVE CSV ***" native_pitch = [] with open(native_csv, 'r') as native_file: reader = csv.reader(native_file, delimiter=',') all_lines = list(reader) for line in all_lines: if line[1] == 'pitch': continue if line[1] == '?': native_pitch.append('0') else: native_pitch.append(line[1]) # user print>>sys.stderr, "*** READING USER CSV ***" user_pitch = [] with open(output_folder, 'r') as user_file: reader = csv.reader(user_file, delimiter=',') all_lines = list(reader) for line in all_lines: if line[1] == 'pitch': continue if line[1] == '?': user_pitch.append('0') else: user_pitch.append(line[1]) print>>sys.stderr, "*** PADDING ***" # Padding with 0s on the end if len(native_pitch) != len(user_pitch): copy_native_pitch = native_pitch index = 0 for val in copy_native_pitch: if val == 0 or val == '0': del native_pitch[index] index += 1 else: break copy_user_pitch = user_pitch index = 0 for val in copy_user_pitch: if val == 0 or val == '0': del user_pitch[index] index += 1 else: break length_native = len(native_pitch) length_user = len(user_pitch) if length_native > length_user: diff = length_native - length_user temp = ['0'] * diff user_pitch += temp elif length_user > length_native: diff = length_user - length_native temp = ['0'] * diff native_pitch += temp # Create scatter image print>>sys.stderr, "*** CREATING FIGURE ***" time = [] val = 0 for i in range(len(native_pitch)): val += 0.1 time.append(val) # Normalized Data normalized_native = [] normalized_native_floats = [float(x) for x in native_pitch] for val in normalized_native_floats: dd = (val - min(normalized_native_floats)) / (max(normalized_native_floats) - min(normalized_native_floats)) normalized_native.append(dd) normalized_user = [] normalized_user_floats = [float(x) for x in user_pitch] for val in normalized_user_floats: dd = (val - min(normalized_user_floats)) / (max(normalized_user_floats) - min(normalized_user_floats)) normalized_user.append(dd) return normalized_native, normalized_user except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in get-pitch-contour", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in get-pitch-contour process"} return HttpResponse(json.dumps(response))
def create_test_data(filename): try: print>>sys.stderr, "*** DOING TEST DATA ***" path = os.path.dirname(os.path.abspath(__file__)) path_data = path + "/data/" txt_file = path_data + filename.replace('.wav', '_norm.txt') csv_file = path_data + filename.replace('.wav', '.csv') # use 'with' if the program isn't going to immediately terminate # so you don't leave files open # the 'b' is necessary on Windows # it prevents \x1a, Ctrl-z, from ending the stream prematurely # and also stops Python converting to / from different line terminators # On other platforms, it has no effect with open(txt_file, "rb") as opened_txt: in_txt = csv.reader(opened_txt, delimiter='\t') with open(csv_file, 'wb') as opened_csv: out_csv = csv.writer(opened_csv) out_csv.writerows(in_txt) all_data = dict() with open(csv_file, 'r') as tabbed_file: reader = csv.reader(tabbed_file, delimiter="\t") all_lines = list(reader) not_included = 0 for line in all_lines: if not_included <= 2: not_included += 1 continue l = line[0].split(',') data = GmmStructure() data.set_object(0, l[1]) data.set_object(1, l[2]) try: if l[3] == '': f1_val = 0.0 else: f1_val = float(l[3]) if l[4] == '': f2_val = 0.0 else: f2_val = float(l[4]) data.set_object(2, f1_val) data.set_object(3, f2_val) except: print "Error: ", sys.exc_info() if l[0] in all_data: # append the new number to the existing array at this slot obj = all_data[l[0]] # we use it only for phoneme prediction obj.concat_object(0, data.norm_F1) obj.concat_object(1, data.norm_F2) all_data[l[0]] = obj else: # create a new array in this slot all_data[l[0]] = data return all_data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in create-test-data", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in create-test-data process"} return HttpResponse(json.dumps(response))
def create_structure(self): try: all_data = dict() path = os.path.dirname(os.path.abspath(__file__)) formants_files = os.path.join(path, self.formants_files_directory) os.chdir(formants_files) for filename in os.listdir("."): if ".DS_Store" in filename or "_norm" not in filename: continue cleaned_filename = filename.replace(".txt", "") cleaned_filename = cleaned_filename.replace('_norm', '') last_index = cleaned_filename.rfind("_") cleaned_filename = cleaned_filename[:last_index] training_data = dict() with open(filename, 'r') as tabbed_file: reader = csv.reader(tabbed_file, delimiter="\n") all_lines = list(reader) not_included = 0 for line in all_lines: if not_included <= 2: not_included += 1 continue l = line[0].split('\t') data = GmmStructure() data.set_object(0, l[1]) data.set_object(1, l[2]) try: if l[3] == '': f1_val = 0.0 else: f1_val = float(l[3]) if l[4] == '': f2_val = 0.0 else: f2_val = float(l[4]) data.set_object(2, f1_val) data.set_object(3, f2_val) except: print "Error: ", sys.exc_info() if l[0] in training_data: # append the new number to the existing array at this slot obj = training_data.get(l[0]) # we use it only for phoneme prediction obj.concat_object(0, data.norm_F1) obj.concat_object(1, data.norm_F2) training_data[l[0]] = obj else: # create a new array in this slot training_data[l[0]] = data if cleaned_filename in all_data: curr = all_data.get(cleaned_filename) vowels = curr.keys() for key, value in training_data.items(): if key in vowels: # the vowel is present - otherwise mistake old_gmm_struct = curr.get(key) old_gmm_struct.concat_object(0, value.norm_F1) old_gmm_struct.concat_object(1, value.norm_F2) curr[key] = old_gmm_struct else: curr[key] = value else: all_data[cleaned_filename] = training_data return all_data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in GMM-create-struct", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in GMM-creation-structure process" } return HttpResponse(json.dumps(response))
def extract_phonemes(audio_file, sentence, predicted_phonemes): try: print >> sys.stderr, "*** DOING EXTRACT PHONEMES ***" path = os.path.dirname(os.path.abspath(__file__)) textgrid_directory = path + "/data" (dir_name, file_name) = os.path.split(audio_file) output_filename = os.path.join(textgrid_directory, file_name.replace('.wav', '.txt')) vowel_stress = [] phonemes = [] with open(output_filename, 'r') as textgrid_file: reader = csv.reader(textgrid_file, delimiter='\t') all_lines = list(reader) print >> sys.stderr, "*** OPENED: " + output_filename + " ***" i = 0 for line in all_lines: if i == 0: i += 1 continue # vowel, stress vowel = line[12] stress = line[13] vowel_stress.append((vowel, stress)) # phonemes pre_word_trans = line[39] word_trans = line[40] fol_word_trans = line[41] pre_word_trans = pre_word_trans.replace(' ', '') if pre_word_trans != "SP" and pre_word_trans not in phonemes: phonemes.append(pre_word_trans) word_trans = word_trans.replace(' ', '') if word_trans != "SP" and word_trans not in phonemes: phonemes.append(word_trans) fol_word_trans = fol_word_trans.replace(' ', '') if fol_word_trans != "SP" and fol_word_trans not in phonemes: phonemes.append(fol_word_trans) index = native_sentences.index(sentence) current_native_phonemes = native_phonemes[index] # do WER with the CMU Sphinx phonemes but keep the old ones for stress print >> sys.stderr, "*** WER ***" test_phonemes = "" cmu_phonemes_list = str(predicted_phonemes).split(' ') sentence_list = current_native_phonemes.split(' ') for s in sentence_list: for cmu in cmu_phonemes_list[:]: if cmu in s: test_phonemes += cmu cmu_phonemes_list.remove(cmu) test_phonemes += " " wer_result, numCor, numSub, numIns, numDel = wer( current_native_phonemes, test_phonemes) result_wer = "Word Error Rate: {}%".format(wer_result * 100) return test_phonemes.split(' '), vowel_stress, result_wer except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in extract-phonemes", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in extract-phonemes process" } return HttpResponse(json.dumps(response))
def test_gmm(self, X_test, Y_test, plot_filename, sentence): # region LOAD SETS path = os.path.dirname(os.path.abspath(__file__)) path += '/' model_name = path + self.model_name trainset_name = path + self.trainset_name print >> sys.stderr, "*** Model name: " + model_name + "***" print >> sys.stderr, "*** Trainset name: " + trainset_name + "***" # load it again with open(model_name, 'rb') as model: gmm_classifier = cPickle.load(model) with open(trainset_name, 'rb') as traindata: all_data = cPickle.load(traindata) print >> sys.stderr, "*** LOADED Model name: " + model_name + "***" print >> sys.stderr, "*** LOADED Trainset name: " + trainset_name + "***" print >> sys.stderr, "*** ITEMS N: " + str(len( all_data.items())) + " ***" all_vowels = [] for key, val in all_data.items(): for v in val.keys(): all_vowels.append(v) labels = np.unique(all_vowels) print >> sys.stderr, "*** LABELS ***" int_labels = np.arange(len(labels)) print >> sys.stderr, "*** INT LABELS ***" map_int_label = dict(zip(int_labels, labels)) print >> sys.stderr, "*** MAP INT LABELS ***" map_label_int = dict(zip(labels, int_labels)) print >> sys.stderr, "*** MAP LABELS INT ***" # results key_sentence = sentence.lower() key_sentence = key_sentence.replace(' ', '_') train_dict = all_data.get(key_sentence) X_train = train_dict.values() print >> sys.stderr, "*** X_TRAIN ***" Y_train = train_dict.keys() print >> sys.stderr, "*** Y_TRAIN ***" # endregion try: # region PLOT PARAMETERS print >> sys.stderr, "*** PREPARING FOR PLOTTING GMM ***" plt.figure() plt.subplots_adjust(wspace=0.4, hspace=0.5) colors = ['b', 'g', 'c', 'm', 'y', 'k'] predicted_formants = [] current_trend_formants_data = dict() # endregion # 3 rows when we have 5 vowels if len(X_test) > 4: rows = 3 else: rows = 2 # region PRINT PREDICTED VOWELS print >> sys.stderr, "*** PRINT PREDICTED VOWELS ***" columns = 2 index = 1 for val in X_test: f1 = val.norm_F1 f2 = val.norm_F2 data = zip(f1, f2) gmm_predict = gmm_classifier.predict(data) current_trend_formants_data[ index] = data # save data for trend graph + index of subplot gmm_l = gmm_predict.tolist() predicted_formants.append(gmm_l[0]) # print the predicted-vowels based on the formants l = gmm_l[ 0] # TODO: investigate on how to have the highest probability only plt.subplot(rows, columns, index) plt.scatter(f1, f2, s=80, c='r', marker='+', label=r"$ {} $".format(map_int_label[l])) index += 1 score = gmm_classifier.score(data) print >> sys.stderr, "*** LOG-PROBABILITY: " + str( score) + " ***" # endregion # region STRUCT FOR RETRIEVING THE ACTUAL LABEL print >> sys.stderr, "*** STRUCT FOR RETRIEVING THE ACTUAL LABEL ***" predicted_labels = [] for pf in predicted_formants: predicted_labels.append(map_int_label[pf]) native_vowels = self.get_native_vowels(sentence) uniq_predicted_labels = np.unique(predicted_labels) # endregion # TODO: saving data for creating trend chart current_trend_data = zip(predicted_labels, current_trend_formants_data) # region ACCURACY # try: # print >> sys.stderr, "\n" # # pred_uniq = [] # for i in uniq_predicted_labels.ravel(): # pred_uniq.append(map_label_int[i]) # # print >> sys.stderr, "*** PRED UNIQ ***" # # native_lab = [] # for i in np.array(native_vowels): # native_lab.append(map_label_int[i]) # # print >> sys.stderr, "*** NATIVE LAB ***" # # print >> sys.stderr, "*** PREDICTED LABELS: " + np.array_str(np.asarray(pred_uniq)) + " ***" # print >> sys.stderr, "*** NATIVE LABELS: " + np.array_str(np.asarray(native_lab)) + " ***" # # #test_accuracy = np.mean(pred_uniq == native_lab) * 100 # # print >> sys.stderr, "*** ACCURACY: " + str(0) + " ***" # print >> sys.stderr, "\n" # except: # print >> sys.stderr, "*** EXCEPTION ***" # print >> sys.stderr, "\n" # pass # endregion new_trend_data = [] # region PRINT NATIVE VOWELS FORMANTS print >> sys.stderr, "*** PRINT NATIVE VOWELS FORMANTS ***" i = 0 duplicate = [] native_data = dict(zip(Y_train, X_train)) index = 1 for n in native_vowels: if n in duplicate: continue found = False for pred in current_trend_data: if n in pred[0]: plot_index = pred[1] predicted_data = current_trend_formants_data[ plot_index] found = True if found is False: plot_index = index predicted_data = current_trend_formants_data[index] print >> sys.stderr, "*** READY TO CREATE THE PLOT ***" struct = native_data[n] native_f1 = struct.get_object(2) native_f2 = struct.get_object(3) ax = plt.subplot(rows, columns, plot_index) plt.tight_layout() ax.scatter(native_f1, native_f2, s=40, c=colors[i], marker='.', label=r"$ {} $".format(n)) axes = plt.gca() axes.set_xlim([min(native_f1) - 500, max(native_f1) + 500]) axes.set_ylim([min(native_f2) - 500, max(native_f2) + 500]) ax.set_xlabel('F1') ax.set_ylabel('F2') ax.set_title("Vowel: " + n) # ellipse inside graph distance_from_centroid = self.make_ellipses( ax, native_f1, native_f2, predicted_data[0][0], predicted_data[0][1]) # American date format date_obj = datetime.datetime.utcnow() date_str = date_obj.strftime('%m-%d-%Y') new_trend_data.append((current_trend_data[index - 1][0], n, distance_from_centroid, date_str)) duplicate.append(n) i += 1 index += 1 # endregion print >> sys.stderr, "*** SAVE THE PLOT ***" plt.savefig(plot_filename, bbox_inches='tight', transparent=True) with open(plot_filename, "rb") as imageFile: return base64.b64encode(imageFile.read()), new_trend_data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in GMM-test-model", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in GMM-test-model process" } return HttpResponse(json.dumps(response))
def wer(ref, hyp, debug=False): try: DEL_PENALTY = 2 SUB_PENALTY = 1 INS_PENALTY = 3 r = ref # .split() h = hyp # .split() # costs will holds the costs, like in the Levenshtein distance algorithm costs = [[0 for inner in range(len(h) + 1)] for outer in range(len(r) + 1)] # backtrace will hold the operations we've done. # so we could later backtrace, like the WER algorithm requires us to. backtrace = [[0 for inner in range(len(h) + 1)] for outer in range(len(r) + 1)] OP_OK = 0 OP_SUB = 1 OP_INS = 2 OP_DEL = 3 # First column represents the case where we achieve zero # hypothesis words by deleting all reference words. for i in range(1, len(r) + 1): costs[i][0] = DEL_PENALTY * i backtrace[i][0] = OP_DEL # First row represents the case where we achieve the hypothesis # by inserting all hypothesis words into a zero-length reference. for j in range(1, len(h) + 1): costs[0][j] = INS_PENALTY * j backtrace[0][j] = OP_INS # computation for i in range(1, len(r) + 1): for j in range(1, len(h) + 1): if r[i - 1] == h[j - 1]: costs[i][j] = costs[i - 1][j - 1] backtrace[i][j] = OP_OK else: substitutionCost = costs[i - 1][ j - 1] + SUB_PENALTY # penalty is always 1 insertionCost = costs[i][ j - 1] + INS_PENALTY # penalty is always 1 deletionCost = costs[ i - 1][j] + DEL_PENALTY # penalty is always 1 costs[i][j] = min(substitutionCost, insertionCost, deletionCost) if costs[i][j] == substitutionCost: backtrace[i][j] = OP_SUB elif costs[i][j] == insertionCost: backtrace[i][j] = OP_INS else: backtrace[i][j] = OP_DEL # back trace though the best route: i = len(r) j = len(h) numSub = 0 numDel = 0 numIns = 0 numCor = 0 if debug: print("OP\tREF\tHYP") lines = [] while i > 0 or j > 0: if backtrace[i][j] == OP_OK: numCor += 1 i -= 1 j -= 1 if debug: lines.append("OK\t" + r[i] + "\t" + h[j]) elif backtrace[i][j] == OP_SUB: numSub += 1 i -= 1 j -= 1 if debug: lines.append("SUB\t" + r[i] + "\t" + h[j]) elif backtrace[i][j] == OP_INS: numIns += 1 j -= 1 if debug: lines.append("INS\t" + "****" + "\t" + h[j]) elif backtrace[i][j] == OP_DEL: numDel += 1 i -= 1 if debug: lines.append("DEL\t" + r[i] + "\t" + "****") if debug: lines = reversed(lines) for line in lines: print(line) print("#cor " + str(numCor)) print("#sub " + str(numSub)) print("#del " + str(numDel)) print("#ins " + str(numIns)) return (numSub + numDel + numIns) / (float)(len(r)) wer_result = round((numSub + numDel + numIns) / (float)(len(r)), 3) return wer_result, numCor, numSub, numIns, numDel except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in WER", traceback.print_exc() + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in WER process"} return HttpResponse(json.dumps(response))
def get_pitch_contour(audio_file, sentence): try: print >> sys.stderr, "*** DOING PITCH CONTOUR ***" path = os.path.dirname(os.path.abspath(__file__)) path_script = path + "/libraries/pitch_contour/pitch_intensity_formants.praat" (dir_name, file_name) = os.path.split(audio_file) output_name = file_name.replace(".wav", ".csv") output_folder = path + "/data/" + output_name sentence = sentence.lower() sentence = sentence.replace(' ', '_') min_pitch = '65' native_csv = path + "/data/native/male/" + sentence + ".csv" # see script file for the usage command = '/usr/bin/praat ' + path_script + " " + audio_file + " " + output_folder + " " + 'wav' + " " + '10' + " " + min_pitch + " " + '500' + " " + '11025' print >> sys.stderr, command proc = Popen(command, shell=True) proc.wait() # native print >> sys.stderr, "*** READING NATIVE CSV ***" native_pitch = [] with open(native_csv, 'r') as native_file: reader = csv.reader(native_file, delimiter=',') all_lines = list(reader) for line in all_lines: if line[1] == 'pitch': continue if line[1] == '?': native_pitch.append('0') else: native_pitch.append(line[1]) # user print >> sys.stderr, "*** READING USER CSV ***" user_pitch = [] with open(output_folder, 'r') as user_file: reader = csv.reader(user_file, delimiter=',') all_lines = list(reader) for line in all_lines: if line[1] == 'pitch': continue if line[1] == '?': user_pitch.append('0') else: user_pitch.append(line[1]) print >> sys.stderr, "*** PADDING ***" # Padding with 0s on the end if len(native_pitch) != len(user_pitch): copy_native_pitch = native_pitch index = 0 for val in copy_native_pitch: if val == 0 or val == '0': del native_pitch[index] index += 1 else: break copy_user_pitch = user_pitch index = 0 for val in copy_user_pitch: if val == 0 or val == '0': del user_pitch[index] index += 1 else: break length_native = len(native_pitch) length_user = len(user_pitch) if length_native > length_user: diff = length_native - length_user temp = ['0'] * diff user_pitch += temp elif length_user > length_native: diff = length_user - length_native temp = ['0'] * diff native_pitch += temp # Create scatter image print >> sys.stderr, "*** CREATING FIGURE ***" time = [] val = 0 for i in range(len(native_pitch)): val += 0.1 time.append(val) # Normalized Data normalized_native = [] normalized_native_floats = [float(x) for x in native_pitch] for val in normalized_native_floats: dd = (val - min(normalized_native_floats)) / ( max(normalized_native_floats) - min(normalized_native_floats)) normalized_native.append(dd) normalized_user = [] normalized_user_floats = [float(x) for x in user_pitch] for val in normalized_user_floats: dd = (val - min(normalized_user_floats)) / ( max(normalized_user_floats) - min(normalized_user_floats)) normalized_user.append(dd) return normalized_native, normalized_user except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error( "Exception in get-pitch-contour", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = { 'Response': 'FAILED', 'Reason': "Exception in get-pitch-contour process" } return HttpResponse(json.dumps(response))
def wer(ref, hyp, debug=False): try: DEL_PENALTY = 2 SUB_PENALTY = 1 INS_PENALTY = 3 r = ref # .split() h = hyp # .split() # costs will holds the costs, like in the Levenshtein distance algorithm costs = [[0 for inner in range(len(h) + 1)] for outer in range(len(r) + 1)] # backtrace will hold the operations we've done. # so we could later backtrace, like the WER algorithm requires us to. backtrace = [[0 for inner in range(len(h) + 1)] for outer in range(len(r) + 1)] OP_OK = 0 OP_SUB = 1 OP_INS = 2 OP_DEL = 3 # First column represents the case where we achieve zero # hypothesis words by deleting all reference words. for i in range(1, len(r) + 1): costs[i][0] = DEL_PENALTY * i backtrace[i][0] = OP_DEL # First row represents the case where we achieve the hypothesis # by inserting all hypothesis words into a zero-length reference. for j in range(1, len(h) + 1): costs[0][j] = INS_PENALTY * j backtrace[0][j] = OP_INS # computation for i in range(1, len(r) + 1): for j in range(1, len(h) + 1): if r[i - 1] == h[j - 1]: costs[i][j] = costs[i - 1][j - 1] backtrace[i][j] = OP_OK else: substitutionCost = costs[i - 1][j - 1] + SUB_PENALTY # penalty is always 1 insertionCost = costs[i][j - 1] + INS_PENALTY # penalty is always 1 deletionCost = costs[i - 1][j] + DEL_PENALTY # penalty is always 1 costs[i][j] = min(substitutionCost, insertionCost, deletionCost) if costs[i][j] == substitutionCost: backtrace[i][j] = OP_SUB elif costs[i][j] == insertionCost: backtrace[i][j] = OP_INS else: backtrace[i][j] = OP_DEL # back trace though the best route: i = len(r) j = len(h) numSub = 0 numDel = 0 numIns = 0 numCor = 0 if debug: print("OP\tREF\tHYP") lines = [] while i > 0 or j > 0: if backtrace[i][j] == OP_OK: numCor += 1 i -= 1 j -= 1 if debug: lines.append("OK\t" + r[i] + "\t" + h[j]) elif backtrace[i][j] == OP_SUB: numSub += 1 i -= 1 j -= 1 if debug: lines.append("SUB\t" + r[i] + "\t" + h[j]) elif backtrace[i][j] == OP_INS: numIns += 1 j -= 1 if debug: lines.append("INS\t" + "****" + "\t" + h[j]) elif backtrace[i][j] == OP_DEL: numDel += 1 i -= 1 if debug: lines.append("DEL\t" + r[i] + "\t" + "****") if debug: lines = reversed(lines) for line in lines: print(line) print("#cor " + str(numCor)) print("#sub " + str(numSub)) print("#del " + str(numDel)) print("#ins " + str(numIns)) return (numSub + numDel + numIns) / (float)(len(r)) wer_result = round((numSub + numDel + numIns) / (float)(len(r)), 3) return wer_result, numCor, numSub, numIns, numDel except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in WER", traceback.print_exc() + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in WER process"} return HttpResponse(json.dumps(response))
def create_structure(self): try: all_data = dict() path = os.path.dirname(os.path.abspath(__file__)) formants_files = os.path.join(path, self.formants_files_directory) os.chdir(formants_files) for filename in os.listdir("."): if ".DS_Store" in filename or "_norm" not in filename: continue cleaned_filename = filename.replace(".txt", "") cleaned_filename = cleaned_filename.replace('_norm', '') last_index = cleaned_filename.rfind("_") cleaned_filename = cleaned_filename[:last_index] training_data = dict() with open(filename, 'r') as tabbed_file: reader = csv.reader(tabbed_file, delimiter="\n") all_lines = list(reader) not_included = 0 for line in all_lines: if not_included <= 2: not_included += 1 continue l = line[0].split('\t') data = GmmStructure() data.set_object(0, l[1]) data.set_object(1, l[2]) try: if l[3] == '': f1_val = 0.0 else: f1_val = float(l[3]) if l[4] == '': f2_val = 0.0 else: f2_val = float(l[4]) data.set_object(2, f1_val) data.set_object(3, f2_val) except: print "Error: ", sys.exc_info() if l[0] in training_data: # append the new number to the existing array at this slot obj = training_data.get(l[0]) # we use it only for phoneme prediction obj.concat_object(0, data.norm_F1) obj.concat_object(1, data.norm_F2) training_data[l[0]] = obj else: # create a new array in this slot training_data[l[0]] = data if cleaned_filename in all_data: curr = all_data.get(cleaned_filename) vowels = curr.keys() for key, value in training_data.items(): if key in vowels: # the vowel is present - otherwise mistake old_gmm_struct = curr.get(key) old_gmm_struct.concat_object(0, value.norm_F1) old_gmm_struct.concat_object(1, value.norm_F2) curr[key] = old_gmm_struct else: curr[key] = value else: all_data[cleaned_filename] = training_data return all_data except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in GMM-create-struct", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in GMM-creation-structure process"} return HttpResponse(json.dumps(response))
def extract_phonemes(audio_file, sentence, predicted_phonemes): try: print>>sys.stderr, "*** DOING EXTRACT PHONEMES ***" path = os.path.dirname(os.path.abspath(__file__)) textgrid_directory = path + "/data" (dir_name, file_name) = os.path.split(audio_file) output_filename = os.path.join(textgrid_directory, file_name.replace('.wav', '.txt')) vowel_stress = [] phonemes = [] with open(output_filename, 'r') as textgrid_file: reader = csv.reader(textgrid_file, delimiter='\t') all_lines = list(reader) print>>sys.stderr, "*** OPENED: " + output_filename + " ***" i = 0 for line in all_lines: if i == 0: i += 1 continue # vowel, stress vowel = line[12] stress = line[13] vowel_stress.append((vowel, stress)) # phonemes pre_word_trans = line[39] word_trans = line[40] fol_word_trans = line[41] pre_word_trans = pre_word_trans.replace(' ', '') if pre_word_trans != "SP" and pre_word_trans not in phonemes: phonemes.append(pre_word_trans) word_trans = word_trans.replace(' ', '') if word_trans != "SP" and word_trans not in phonemes: phonemes.append(word_trans) fol_word_trans = fol_word_trans.replace(' ', '') if fol_word_trans != "SP" and fol_word_trans not in phonemes: phonemes.append(fol_word_trans) index = native_sentences.index(sentence) current_native_phonemes = native_phonemes[index] # do WER with the CMU Sphinx phonemes but keep the old ones for stress print>>sys.stderr, "*** WER ***" test_phonemes = "" cmu_phonemes_list = str(predicted_phonemes).split(' ') sentence_list = current_native_phonemes.split(' ') for s in sentence_list: for cmu in cmu_phonemes_list[:]: if cmu in s: test_phonemes += cmu cmu_phonemes_list.remove(cmu) test_phonemes += " " wer_result, numCor, numSub, numIns, numDel = wer(current_native_phonemes, test_phonemes) result_wer = "Word Error Rate: {}%".format(wer_result * 100) return test_phonemes.split(' '), vowel_stress, result_wer except Exception as e: exc_type, exc_obj, exc_tb = sys.exc_info() fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1] l = Logger() l.log_error("Exception in extract-phonemes", str(traceback.print_exc()) + "\n\n" + fname + " " + str(exc_tb.tb_lineno)) response = {'Response': 'FAILED', 'Reason': "Exception in extract-phonemes process"} return HttpResponse(json.dumps(response))