def match_cosine_similarity(self, image_path, topn=5): # Compare image_path with other images features = fe.extract_features(image_path) img_distances = self.cosine_similarity(np.array(features)) # Slice list with len() = topn after sorted descending nearest_img_index = np.argsort(img_distances)[::-1][:topn].tolist() nearest_img_paths = self.names[nearest_img_index].tolist() return nearest_img_paths, img_distances[nearest_img_index].tolist()
def getDataset(self,df,datasetType): features = [] tag = [] for index,row in df.iterrows(): if datasetType is 'test': # data.append(features.extract_features(row['Sentence1'],row['Sentence2'],row['GoldTag'])) features.append(featureExtraction.extract_features( row['Sentence1'], row['Sentence2']) ) else: features.append( featureExtraction.extract_features( row['Sentence1'], row['Sentence2'] )) tag.append(row['GoldTag']) if datasetType is 'train' or 'dev': label = pd.DataFrame(tag, columns = ['gold_tag']) dataset = pd.DataFrame(features, columns = ['Jaccard','bow','cosine','hyper_ratio', 'hypo_ratio','holo_ratio','mero_ratio']) return dataset, label
def train(): display = tk.Text(master=window, height=8, width=40) display.config(font=("Helvetica")) display.grid(columnspan=2, row=5, sticky='e') warnings.filterwarnings("ignore") # path to training data source = "data/" # path where training speakers will be saved dest = "models/" train_file = "data.txt" file_paths = open(train_file, 'r') count = 1 # Extracting features for each speaker (5 files per speakers) features = np.asarray(()) for path in file_paths: path = path.strip() # print path # display.insert(tk.END, path) # display.insert(tk.END, "\n") # read the audio sr, audio = read(source + path) # extract 40 dimensional MFCC & delta MFCC features vector = extract_features(audio, sr) if features.size == 0: features = vector else: features = np.vstack((features, vector)) # when features of 5 files of speaker are concatenated, then do model training if count == 5: gmm = GMM(n_components=16, n_iter=200, covariance_type='diag', n_init=3) gmm.fit(features) # dumping the trained gaussian model picklefile = path.split("/")[0] + ".gmm" # print picklefile # f = open(dest + picklefile, 'w+') cPickle.dump(gmm, open(dest + picklefile, 'w+')) # print '+ modeling completed for word:', picklefile, " with data point = ", features.shape phrase = "Modeling completed: " + picklefile display.insert(tk.END, phrase) display.insert(tk.END, "\n") features = np.asarray(()) count = 0 count = count + 1
import sys from sklearn import svm import featureExtraction from readData import CorpusReader from sklearn import svm # class model: if __name__ == "__main__": # trainSet = sys.argv[1] # datasetType = sys.argv[2] s1 = "The young lady enjoys listening to the guitar." s2 = "The young lady enjoys playing the guitar." tag = 4 # extract features features = featureExtraction.extract_features(s1, s2) dataset = pd.Dataframe(features, columns=[ 'Jaccard', 'bow', 'cosine', 'hyper_ratio', 'hypo_ratio', 'holo_ratio', 'mero_ratio' ]) label = pd.Dataframe(tag, column=['gold_tag']) # call model and save model # evaluate
def test(): display = tk.Text(master=window, height=8, width=40) display.config(font=("Helvetica")) display.grid(columnspan=2, row=5, sticky='e') WAVE_OUTPUT_FILENAME = "test/test" # #path to training data source = "data/" modelpath = "models/" # test_file = "development_set_test.txt" # file_paths = open(test_file,'r') # gmm_files = [ os.path.join(modelpath, fname) for fname in os.listdir(modelpath) if fname.endswith('.gmm') ] # Load the Gaussian gender Models models = [cPickle.load(open(fname, 'r')) for fname in gmm_files] speakers = [fname.split("/")[-1].split(".gmm")[0] for fname in gmm_files] # Read the test directory and get the list of test audio files p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) # print("* recording") frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) # print("* done recording") stream.stop_stream() stream.close() p.terminate() wf = wave.open(WAVE_OUTPUT_FILENAME + ".wav", 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() sr, audio = read("test/test.wav") vector = extract_features(audio, sr) log_likelihood = np.zeros(len(models)) for i in range(len(models)): gmm = models[i] # checking with each model one by one scores = np.array(gmm.score(vector)) log_likelihood[i] = scores.sum() winner = np.argmax(log_likelihood) # print "\tDETECTED AS: ", speakers[winner] display.insert(tk.END, "DETECTED AS: " + speakers[winner]) execute(speakers[winner])