def hmm_train(features): gmmhmm = GMMHMM(n_components=30, n_mix=8) gmmhmm.startprob_ = np.array([ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) l = np.identity(30) * 0.95 for i in range(l.shape[0] - 1): l[i, i + 1] = 0.05 l[-1, -1] = 1 gmmhmm.transmat_ = l gmmhmm.fit(features) preds = gmmhmm.predict(features) print(preds)
seq_d = dataset_dining.randomSequence('dining.chineseRestaurant', 10) print 'dining l:' print seq_d seq_f = dataset_fitness.randomSequence('fitness.running', 5) print 'fitness' print seq_f seq_w = dataset_work.randomSequence('work.office', 5) print 'work' print seq_w seq_s = dataset_shop.randomSequence('shopping.mall', 5) print 'shopping' print seq_s model_dining.fit(D) model_fitness.fit(F) model_work.fit(W) model_shop.fit(S) print model_dining.startprob_.tolist() print model_dining.transmat_.tolist() print 'After training' print ' - Classification for seq dining s-' print 'dining result:' print model_dining.score(np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
def newtrain(speakers, name): #folder="C:/Anaconda codes/speaker reco/something new/for hack/add new people/" folder = "C:/Anaconda codes/Hackverse/servermodel/clientfiles/" s = list(speakers) l = len(speakers) #name= input("enter your name") speakers.append(name) new_person = speakers[l] #rint(new_person) try: os.makedirs("clientfiles/dataset/" + name) except: print("already exists") return (s) #os.mkdir(folder+"dataset/"+ name) x = "clientfiles/dataset/" + name + "/" samples(x) training_speaker_name = name file_path = x file_names = os.listdir(file_path) #print((len(file_names))) lengths = np.empty(len(file_names)) #print(np.shape(lengths)) feature_vectors = np.empty([20, 0]) for i in range(len(file_names)): x, rate = librosa.load(file_path + file_names[i]) #loads the file #rate, x = wavfile.read(file_names[i]) x = librosa.feature.mfcc(y=x[0:int(len(x) / 1.25)], sr=rate) #extracts mfcc #x = mfcc(x[0:len(x)/1.25], samplerate=rate) lengths[i] = int(len(x.transpose())) #print(np.shape(x)) feature_vectors = np.concatenate((feature_vectors, x), axis=1) #feature_vectors = np.vstack((feature_vectors, x.transpose())) #print(((lengths))) #print(np.shape(feature_vectors)) #TRAINING A MODEL N = 3 # Number of States of HMM Mixtures = 64 # Number of Gaussian Mixtures. model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag') startprob = np.ones(N) * (10**(-30)) # Left to Right Model startprob[0] = 1.0 - (N - 1) * (10**(-30)) transmat = np.zeros([N, N]) # Initial Transmat for Left to Right Model #print(startprob,'\n',transmat) for i in range(N): for j in range(N): transmat[i, j] = 1 / (N - i) transmat = np.triu(transmat, k=0) transmat[transmat == 0] = (10**(-30)) model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag', init_params="mcw", n_iter=100) model.startprob_ = startprob model.transmat_ = transmat #print(startprob,'\n',transmat) feature = feature_vectors.transpose() #print(np.shape(feature)) lengths = [int(x) for x in lengths] #print(type(lengths[0])) model.fit(feature, lengths) joblib.dump(model, folder + "/models/" + name + ".pkl") return (speakers)
class GMMHMMTrainer(BaseTrainer): '''A wrapper to GMMHMM Attributes ---------- _model: init params gmmhmm: hmmlearn GMMHMM instance params_: params after fit train_data_: current train datas ''' def __init__(self, _model): super(GMMHMMTrainer, self).__init__(_model) hmm_params = _model['hmmParams'] gmm_params = _model['gmmParams'] n_iter = _model.get('nIter', 50) transmat = np.array(hmm_params['transMat']) transmat_prior = np.array(hmm_params['transMatPrior']) n_component = hmm_params['nComponent'] startprob = np.array(hmm_params['startProb']) startprob_prior = np.array(hmm_params['startProbPrior']) n_mix = gmm_params['nMix'] covariance_type = gmm_params['covarianceType'] gmms = gmm_params.get('gmms', None) gmm_obj_list = [] if not gmms: gmm_obj_list = None else: for gmm in gmms: gmm_obj = GMM(n_components=gmm['nComponent'], covariance_type=gmm['covarianceType']) gmm_obj.covars_ = np.array(gmm['covars']) gmm_obj.means_ = np.array(gmm['means']) gmm_obj.weights_ = np.array(gmm['weights']) gmm_obj_list.append(gmm_obj) self.gmmhmm = GMMHMM(n_components=n_component, n_mix=n_mix, gmms=gmm_obj_list, n_iter=n_iter, covariance_type=covariance_type, transmat=transmat, transmat_prior=transmat_prior, startprob=startprob, startprob_prior=startprob_prior) def __repr__(self): return '<GMMHMMTrainer instance>\n\tinit_models:%s\n\tparams:%s\n\ttrain_data:%s' % ( self._model, self.params_, self.train_data_) def fit(self, train_data): train_data = np.array(train_data) self.gmmhmm.fit(train_data) gmms_ = [] for gmm in self.gmmhmm.gmms_: gmms_.append({ 'nComponent': gmm.n_components, 'nIter': gmm.n_iter, 'means': gmm.means_.tolist(), 'covars': gmm.covars_.tolist(), 'weights': gmm.weights_.tolist(), 'covarianceType': gmm.covariance_type, }) self.train_data_ += train_data.tolist() self.params_ = { 'nIter': self.gmmhmm.n_iter, 'hmmParams': { 'nComponent': self.gmmhmm.n_components, 'transMat': self.gmmhmm.transmat_.tolist(), 'transMatPrior': self.gmmhmm.transmat_prior.tolist(), 'startProb': self.gmmhmm.startprob_.tolist(), 'startProbPrior': self.gmmhmm.startprob_prior.tolist(), }, 'gmmParams': { 'nMix': self.gmmhmm.n_mix, 'covarianceType': self.gmmhmm.covariance_type, 'gmms': gmms_, } }
samples_raw_3, labels_3, _ = FileReader.read(FILE_PATH_3) samples_raw_3 = samples_raw_3[:, 0:6] window_size = 100 X_train, X_test, y_train, y_test = train_test_split(np.vstack([samples_raw_1, samples_raw_2, samples_raw_3]), np.vstack([labels_1, labels_2, labels_3]), train_size=0.6) samples_healthy = X_train[y_train.ravel() == 0, :] samples_unhealthy = X_train[y_train.ravel() == 1, :] model_healthy = GMMHMM() model_unhealthy = GMMHMM() seqs, lengths = PreProcessor.split2sequences(samples_healthy, window_size) model_healthy.fit(seqs, lengths) seqs, lengths = PreProcessor.split2sequences(samples_unhealthy, window_size) model_unhealthy.fit(seqs, lengths) seqs, lengths = PreProcessor.split2sequences(X_test, window_size) accuracy = 0 for i in range(0, len(lengths)): ll_healthy, post_healthy = model_healthy.score_samples(seqs[i*window_size:(i+1)*window_size,:]) ll_unhealthy, post_unhealthy = model_unhealthy.score_samples(seqs[i*window_size:(i+1)*window_size,:]) print("[" + str(ll_unhealthy) + "|" + str(ll_unhealthy) + "]") prediction_sample = 0 if ll_healthy > ll_unhealthy else 0 if prediction_sample == y_test[i*window_size]: accuracy += 1/len(lengths)
class StockPredictor(object): def __init__(self, ticker, chunks=9, delta=0, n_hidden_states=5, n_latency_days=10, n_steps_frac_change=10, n_steps_frac_high=30, n_steps_frac_low=10, n_iter=100, verbose=False, prediction_date=None): self.total_score = 0 self.verbose = verbose self.ticker = ticker self.n_latency_days = n_latency_days self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter) self.chunks = chunks self.delta = delta self.prediction_date = prediction_date self.fetch_training_data() self._compute_all_possible_outcomes(n_steps_frac_change, n_steps_frac_high, n_steps_frac_low) def fetch_training_data(self): print("Fetching training data ...") res = es.search(index="market", doc_type="quote", size=10000, body={"query": { "match": { "ticker": self.ticker } }}) self.training_data = json_normalize(res['hits']['hits']) self.chunked_training_data = self.training_data #vectors = [] #chunked_training_data_lengths = [] #start_index = 0 #end_index = start_index + self.chunks #delta_date_index = end_index + self.delta #while delta_date_index <= len(self.training_data): #training_chunk = self.training_data[start_index:end_index] # delta_chunk = self.training_data.iloc[delta_date_index] # total_chunk = training_chunk.append(delta_chunk) # #print("%s training_chunk to train %s" % (total_chunk, self.ticker)) # start_index = end_index + 1 # end_index = start_index + self.chunks # delta_date_index = end_index + self.delta # vectors.append(total_chunk) # chunked_training_data_lengths.append(len(total_chunk)) # if self.verbose: print(total_chunk) #self.chunked_training_data = pd.DataFrame(np.concatenate(vectors), columns = self.training_data.columns) #self.chunked_training_data_lengths = chunked_training_data_lengths if self.verbose: print("Latest record for training:\n%s" % self.chunked_training_data.tail(1)) latest_date = self.chunked_training_data.tail(1)['_source.timestamp'] datetime_object = datetime.datetime.strptime(latest_date.values[0], '%Y-%m-%dT%H:%M:%S') if self.prediction_date == None: prediction_date = datetime_object + timedelta(days=self.delta + 1) self.prediction_date = datetime.datetime.strftime( prediction_date, '%Y-%m-%dT%H:%M:%S') @staticmethod def _extract_features(data): frac_change = np.array( data['_source.change']) #(close_price - open_price) / open_price frac_high = np.array(data['_source.change_high'] ) #(high_price - open_price) / open_price frac_low = np.array( data['_source.change_low']) #(open_price - low_price) / open_price return np.column_stack((frac_change, frac_high, frac_low)) def fit(self): print('Extracting Features') feature_vector = StockPredictor._extract_features( self.chunked_training_data) if self.verbose: print("feature vector %s" % feature_vector) print('Training Model with %s features' % feature_vector.size) print( "Latest date to be used in training is %s" % self.chunked_training_data.tail(1)['_source.timestamp'].values[0]) #self.hmm.fit(feature_vector, self.chunked_training_data_lengths) self.hmm.fit(feature_vector) print('Model trained') def _compute_all_possible_outcomes(self, n_steps_frac_change, n_steps_frac_high, n_steps_frac_low): frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change) frac_high_range = np.linspace(0, 0.05, n_steps_frac_high) frac_low_range = np.linspace(0, 0.05, n_steps_frac_low) self.all_possible_outcomes = np.array( list( itertools.product(frac_change_range, frac_high_range, frac_low_range))) def json_data_for_trade(self): rows = list() # meta ticker = self.ticker date = self.prediction_date total_score = self.total_score id = "%s-%s-%s" % (ticker, date, total_score) meta = { "index": { "_index": TRADE_INDEX_NAME, "_type": TRADE_TYPE_NAME, "_id": id } } rows.append(json.dumps(meta)) # data row = ObjDict() row.total_score = total_score row.timestamp = self.prediction_date row.ticker = self.ticker rows.append(json.dumps(row)) return rows def json_data_for_outcome(self, outcome, score): rows = list() # meta ticker = self.ticker date = self.prediction_date vector = outcome id = "%s-%s-%s" % (ticker, date, vector) meta = {"index": {"_index": INDEX_NAME, "_type": TYPE_NAME, "_id": id}} rows.append(json.dumps(meta)) # data row = ObjDict() row.frac_change = outcome[0] row.frac_high_range = outcome[1] row.frac_low_range = outcome[2] open_price = self.training_data.tail(1)['_source.open'].values[0] predicted_close = open_price * (1 + outcome[0]) expected_value = outcome[0] * score row.predicted_close = predicted_close row.expected_value = expected_value row.timestamp = self.prediction_date row.score = score row.chunks = self.chunks row.delta = self.delta row.score = score row.ticker = self.ticker rows.append(json.dumps(row)) return rows def delete_prediction_data(self, ticker): print("Deleting prediction data for ... %s" % self.ticker) es.delete_by_query(index=INDEX_NAME, doc_type=TYPE_NAME, body={'query': { 'match': { 'ticker': self.ticker } }}) def predict_outcomes(self): print("predicting outcomes for: %s" % self.prediction_date) previous_testing_data = self.training_data.tail( self.n_latency_days).index if self.verbose: print("previous_testing_data %s" % previous_testing_data) test_data = self.training_data.iloc[previous_testing_data] if self.verbose: print("Using the following slice of data:") print("[%s]" % previous_testing_data) print(test_data) test_data_features = StockPredictor._extract_features(test_data) # to blow everything away - may need to recreate/refresh indexes in ES! #self.delete_and_create_index() bulk_data = list() trade_data = list() outcome_score = [] for possible_outcome in self.all_possible_outcomes: test_feature_vectors = np.row_stack( (test_data_features, possible_outcome)) score = self.hmm.score(test_feature_vectors) # ignoring scores <= 0 if score > 0: rows = self.json_data_for_outcome(possible_outcome, score) bulk_data.append(rows) if possible_outcome[0] > 0: self.total_score = self.total_score + score if possible_outcome[0] < 0: self.total_score = self.total_score - score trade_rows = self.json_data_for_trade() trade_data.append(trade_rows) print("Exporting predictions to ES") es_array = self.format_data_for_es(bulk_data) res = es.bulk(index=INDEX_NAME, body=es_array, refresh=True) es_array = self.format_data_for_es(trade_data) res = es.bulk(index=TRADE_INDEX_NAME, body=es_array, refresh=True) def format_data_for_es(self, data): es_array = "" for row in data: es_array += row[0] es_array += "\n" es_array += row[1] es_array += "\n" return es_array
def trainingGMMHMM( dataset, # training dataset. n_c, # number of hmm's components (ie. hidden states) n_m, # number of gmm's mixtures (ie. Gaussian model) start_prob_prior=None, # prior of start hidden states probabilities. trans_mat_prior=None, # prior of transition matrix. start_prob=None, # the start hidden states probabilities. trans_mat=None, # the transition matrix. gmms=None, # models' params of gmm covar_type='full', n_i=50): # Initiation of dataset. # d = Dataset(dataset) X = dataset.getDataset() # Initiation of GMM. _GMMs = [] if gmms is None: _GMMs = None else: for gmm in gmms: _GMM = GMM(n_components=n_m, covariance_type=covar_type) _GMM.covars_ = np.array(gmm["covars"]) _GMM.means_ = np.array(gmm["means"]) _GMM.weights_ = np.array(gmm["weights"]) _GMMs.append(_GMM) # Initiation of GMMHMM. model = GMMHMM(startprob_prior=np.array(start_prob_prior), transmat_prior=np.array(trans_mat_prior), startprob=np.array(start_prob), transmat=np.array(trans_mat), gmms=_GMMs, n_components=n_c, n_mix=n_m, covariance_type=covar_type, n_iter=n_i) # Training. model.fit(X) # The result. new_gmmhmm = { "nComponent": n_c, "nMix": n_m, "covarianceType": covar_type, "hmmParams": { "startProb": model.startprob_.tolist(), "transMat": model.transmat_.tolist() }, "gmmParams": { "nMix": n_m, "covarianceType": covar_type, "params": [] } } for i in range(0, n_m): gaussian_model = { "covars": model.gmms_[i].covars_.tolist(), "means": model.gmms_[i].means_.tolist(), "weights": model.gmms_[i].weights_.tolist() } new_gmmhmm["gmmParams"]["params"].append(gaussian_model) return new_gmmhmm
def main(): outdir = r'./training_files/multi' outdir2 = r'./training_files/arnab' outdir3 = r'./training_files/kejriwal' outdir4 = r'./training_files/ravish' outdir5 = r'./training_files/not-shouting' outdir6 = r'./training_files/shouting' outdir7 = r'./training_files/single' outdir8 = r'./training_files/modi' outdir9 = r'./training_files/ond_more' #create 3 hmm one for each case multi = GMMHMM(5, 2) discuss = GMMHMM(5, 2) arnab = GMMHMM(5, 2) kejriwal = GMMHMM(5, 2) ravish = GMMHMM(5, 2) notshouting = GMMHMM(5, 2) shouting = GMMHMM(5, 2) single = GMMHMM(5, 2) #training for multi l = get_files_list(outdir) for i in l: f = open(i, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] multi.fit(obs) #training for arnab l = get_files_list(outdir2) for i in l: f = open(i, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] arnab.fit(obs) #training for kejriwal l = get_files_list(outdir3) for i in l: f = open(i, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] kejriwal.fit(obs) #training for ravish l = get_files_list(outdir4) for i in l: f = open(i, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] ravish.fit(obs) #training for notshouting l = get_files_list(outdir5) for i in l: f = open(i, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] notshouting.fit(obs) #training for shouting l = get_files_list(outdir6) for i in l: f = open(i, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] shouting.fit(obs) #training for single l = get_files_list(outdir7) for i in l: f = open(i, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] single.fit(obs) #Its time for some testing q = [] t = "testcase_output.txt" out = open(t, "w") #Read test file and make list of list of sequence 10 for --->1 #te=["test1.txt","test2.txt","test3.txt","test4.txt","test5.txt","test6.txt","test7.txt","test8.txt","test9.txt","test10.txt"] #f=open("expected.txt") #d_expected={} ''' for line in f: x=line.strip().split() d_expected[x[0]]={'arnab':float(x[1]),'kejriwal':float(x[2]),'ravish':float(x[3])} ''' te = get_files_list(r'./testing_files') #te=["test1.txt","test2.txt","test3.txt"] for ad in te: d = {"arnab": 0, "kejriwal": 0, "ravish": 0} f = open(ad, "r") obs = [] i_sequence = [] count = 0 for line in f: individual_obs = line.strip().split(",") #print individual_obs individual_obs = [float(i) for i in individual_obs] i_sequence.append(individual_obs) count += 1 if count == 10: obs.append(numpy.array(i_sequence)) count = 0 i_sequence = [] p = [] p_choosen = [] p1_choosen = [] p1 = [] p2 = [] p2_choosen = [] #print obs for i in obs: p.append((shouting.score(i), "shouting")) p.append((notshouting.score(i), "notshouting")) p_choosen.append(max(p, key=lambda x: x[0])) p = [] for i in obs: p1.append((arnab.score(i), "arnab")) p1.append((kejriwal.score(i), "kejriwal")) p1.append((ravish.score(i), "ravish")) p1_choosen.append(max(p1, key=lambda x: x[0])) p1 = [] for i in obs: p2.append((multi.score(i), "multi")) p2.append((single.score(i), "single")) p2_choosen.append(max(p2, key=lambda x: x[0])) p2 = [] #print p p = [] p1 = [] p_choosen = [b for a, b in p_choosen] p1_choosen = [b for a, b in p1_choosen] p2_choosen = [b for a, b in p2_choosen] ''' #print p_choosen #print the state sequence with the timestamp in the output file t="testcase_output_9.txt" out=open(t,"a+") out.write(str(ad)+"--->") out.write(p_choosen[0]) out.write("\n") ''' #calculate the amount per second and append to the same file #print p_choosen #print p1_choosen shouting1 = [] notshouting1 = [] totaltime = len(p_choosen) * 0.05 single_count = 0 for i in range(len(p_choosen)): if p2_choosen[i] == "single": single_count += 1 if p_choosen[i] == "shouting": shouting1.append(p1_choosen[i]) elif p_choosen[i] == "notshouting": notshouting1.append(p1_choosen[i]) #print d d_shouting = {"arnab": 0, "kejriwal": 0, "ravish": 0} d_notshouting = {"arnab": 0, "kejriwal": 0, "ravish": 0} for i in shouting1: d_shouting[i] += 1 for i in notshouting1: d_notshouting[i] += 1 #print p_choosen out.write("\n*******--> " + str(ad) + " <--*******\n") #write arnab,ravish and kejri fn = ad.strip().split("/") fn = fn[len(fn) - 1] #out.write("Time predicted for questioning: "+str((d5['question'])*0.05)+" seconds.\n") #out.write("Time predicted for discussion: "+str((d5['discuss'])*0.05)+" seconds.\n") out.write("\nChecking single HMM and multi HMM:\n") out.write("Number of instance of Single: " + str(single_count) + "\n") out.write( "\nChecking shouting and non-shouting HMM for all Single instances:\n" ) out.write("Number of instance of Shouting: " + str(len(shouting1)) + "\n") out.write("Number of instance of Not-shouting: " + str(len(notshouting1)) + "\n") out.write( "\nChecking the frequency of each speaker in both both shouting and not shouting instance...\n" ) out.write("Shouting instance: \n" + str(d_shouting) + "\n") out.write("Not-Shouting instance: \n" + str(d_notshouting) + "\n") out.write("\nResult:\n") for c, d in d_shouting.items(): out.write( str(c) + " was shouting for " + str(d * 0.05) + " sec.\n") out.write("\n") for c, d in d_notshouting.items(): out.write( str(c) + " was not shouting for " + str(d * 0.05) + " sec.\n") out.write("\n") for c, d in d_shouting.items(): out.write( str(c) + " was shouting for " + str(((d * 0.05) / totaltime) * 100) + " % of time.\n") out.write("\n") for c, d in d_notshouting.items(): out.write( str(c) + " was not shouting for " + str(((d * 0.05) / totaltime) * 100) + " sec.\n") out.write("\n") print d_shouting print d_notshouting
startprob[0] = 1.0 - (N - 1) * (10**(-30)) transmat = np.zeros([N, N]) # Initial Transmat for Left to Right Model print(startprob, '\n', transmat) for i in range(N): for j in range(N): transmat[i, j] = 1 / (N - i) transmat = np.triu(transmat, k=0) transmat[transmat == 0] = (10**(-30)) model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag', init_params="mcw", n_iter=100) model.startprob_ = startprob model.transmat_ = transmat print(startprob, '\n', transmat) feature = feature_vectors.transpose() print(np.shape(feature)) lengths = [int(x) for x in lengths] print(type(lengths[0])) model.fit(feature, lengths) joblib.dump( model, "C:/Anaconda codes/speaker reco/something new/for hack/models/" + training_speaker_name + ".pkl")
print 'dining s:' print seq_d_s seq_d = dataset_dining.randomSequence('dining.chineseRestaurant', 10) print 'dining l:' print seq_d seq_f = dataset_fitness.randomSequence('fitness.running', 5) print 'fitness' print seq_f seq_w = dataset_work.randomSequence('work.office', 5) print 'work' print seq_w seq_s = dataset_shop.randomSequence('shopping.mall', 5) print 'shopping' print seq_s model_dining.fit(D) model_fitness.fit(F) model_work.fit(W) model_shop.fit(S) print model_dining.startprob_.tolist() print model_dining.transmat_.tolist() print 'After training' print ' - Classification for seq dining s-' print 'dining result:' print model_dining.score( np.array(dataset_dining._convetNumericalSequence(seq_d_s))) print 'fitness result:'
import os from hmmlearn.hmm import GMMHMM from python_speech_features import mfcc from scipy.io import wavfile from sklearn.model_selection import train_test_split import numpy as np import sys input_folder = '/home/sachin/Downloads/cmu_us_awb_arctic-0.95-release/cmu_us_awb_arctic/wav' hmm_models = [] X = np.array([]) for filename in os.listdir(input_folder): filepath = os.path.join(input_folder, filename) sampling_freq, audio = wavfile.read(filepath) mfcc_features = mfcc(audio, sampling_freq) if len(X) == 0: X = mfcc_features else: X = np.append(X, mfcc_features, axis=0) model = GMMHMM(n_components=3, n_mix=45, n_iter=100) X_train, X_test = train_test_split(X, train_size=0.7) hmm_models.append(model.fit(X_train)) print(model.score(X_test))
# startprob = np.ones(N) * (10**(-30)) # Left to Right Model # startprob[0] = 1.0 - (N-1)*(10**(-30)) # transmat = np.zeros([N, N]) # Initial Transmat for Left to Right Model # for i in range(N): # for j in range(N): # transmat[i, j] = 1/(N-i) # transmat = np.triu(transmat, k=0) # transmat[transmat == 0] = (10**(-30)) # model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag', init_params="mcw") # model.startprob_ = startprob # model.transmat_ = transmat """ MODEL WITHOUT INITIAL PARAMETERS """ model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag') """ MODEL FITTING """ model.fit(feature_vectors) """ STORING THE MODEL """ sample = GMMModel(model, "FAML") # TODO: Change Name as well. pickle.dump(sample, f) """" FUTURE EXTENSIONS """ # TODO: Use score method to evaluate the model and run multiple iterations until best fit. # TODO: Create a loop so that multiple speakers can be trained in one run.
def trainingGMMHMM( dataset, # training dataset. n_c, # number of hmm's components (ie. hidden states) n_m, # number of gmm's mixtures (ie. Gaussian model) start_prob_prior=None, # prior of start hidden states probabilities. trans_mat_prior=None, # prior of transition matrix. start_prob=None, # the start hidden states probabilities. trans_mat=None, # the transition matrix. gmms=None, # models' params of gmm covar_type='full', n_i=50 ): # Initiation of dataset. # d = Dataset(dataset) X = dataset.getDataset() # Initiation of GMM. _GMMs = [] if gmms is None: _GMMs = None else: for gmm in gmms: _GMM = GMM(n_components=n_m, covariance_type=covar_type) _GMM.covars_ = np.array(gmm["covars"]) _GMM.means_ = np.array(gmm["means"]) _GMM.weights_ = np.array(gmm["weights"]) _GMMs.append(_GMM) # Initiation of GMMHMM. model = GMMHMM( startprob_prior=np.array(start_prob_prior), transmat_prior=np.array(trans_mat_prior), startprob=np.array(start_prob), transmat=np.array(trans_mat), gmms=_GMMs, n_components=n_c, n_mix=n_m, covariance_type=covar_type, n_iter=n_i ) # Training. model.fit(X) # The result. new_gmmhmm = { "nComponent": n_c, "nMix": n_m, "covarianceType": covar_type, "hmmParams": { "startProb": model.startprob_.tolist(), "transMat": model.transmat_.tolist() }, "gmmParams": { "nMix": n_m, "covarianceType": covar_type, "params": [] } } for i in range(0, n_m): gaussian_model = { "covars": model.gmms_[i].covars_.tolist(), "means": model.gmms_[i].means_.tolist(), "weights": model.gmms_[i].weights_.tolist() } new_gmmhmm["gmmParams"]["params"].append(gaussian_model) return new_gmmhmm
class GMMHMMTrainer(BaseTrainer): '''A wrapper to GMMHMM Attributes ---------- _model: init params gmmhmm: hmmlearn GMMHMM instance params_: params after fit train_data_: current train datas ''' def __init__(self, _model): super(GMMHMMTrainer, self).__init__(_model) hmm_params = _model['hmmParams'] gmm_params = _model['gmmParams'] n_iter = _model.get('nIter', 50) transmat = np.array(hmm_params['transMat']) transmat_prior = np.array(hmm_params['transMatPrior']) n_component = hmm_params['nComponent'] startprob = np.array(hmm_params['startProb']) startprob_prior = np.array(hmm_params['startProbPrior']) n_mix = gmm_params['nMix'] covariance_type = gmm_params['covarianceType'] gmms = gmm_params.get('gmms', None) gmm_obj_list = [] if not gmms: gmm_obj_list = None else: for gmm in gmms: gmm_obj = GMM(n_components=gmm['nComponent'], covariance_type=gmm['covarianceType']) gmm_obj.covars_ = np.array(gmm['covars']) gmm_obj.means_ = np.array(gmm['means']) gmm_obj.weights_ = np.array(gmm['weights']) gmm_obj_list.append(gmm_obj) self.gmmhmm = GMMHMM(n_components=n_component, n_mix=n_mix, gmms=gmm_obj_list, n_iter=n_iter, covariance_type=covariance_type, transmat=transmat, transmat_prior=transmat_prior, startprob=startprob, startprob_prior=startprob_prior) def __repr__(self): return '<GMMHMMTrainer instance>\n\tinit_models:%s\n\tparams:%s\n\ttrain_data:%s' % (self._model, self.params_, self.train_data_) def fit(self, train_data): train_data = np.array(train_data) self.gmmhmm.fit(train_data) gmms_ = [] for gmm in self.gmmhmm.gmms_: gmms_.append({ 'nComponent': gmm.n_components, 'nIter': gmm.n_iter, 'means': gmm.means_.tolist(), 'covars': gmm.covars_.tolist(), 'weights': gmm.weights_.tolist(), 'covarianceType': gmm.covariance_type, }) self.train_data_ += train_data.tolist() self.params_ = { 'nIter': self.gmmhmm.n_iter, 'hmmParams': { 'nComponent': self.gmmhmm.n_components, 'transMat': self.gmmhmm.transmat_.tolist(), 'transMatPrior': self.gmmhmm.transmat_prior.tolist(), 'startProb': self.gmmhmm.startprob_.tolist(), 'startProbPrior': self.gmmhmm.startprob_prior.tolist(), }, 'gmmParams': { 'nMix': self.gmmhmm.n_mix, 'covarianceType': self.gmmhmm.covariance_type, 'gmms': gmms_, } }
class StockPredictor(object): def __init__(self, ticker, n_hidden_states=5, n_latency_days=10, n_steps_frac_change=50, n_steps_frac_high=30, n_steps_frac_low=10, n_iter=1000, verbose=False): self.verbose = verbose self.ticker = ticker self.n_latency_days = n_latency_days self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter) self.fetch_training_data() self.fetch_latest_data() # to predict self._compute_allall_possible_outcomes(n_steps_frac_change, n_steps_frac_high, n_steps_frac_low) def fetch_latest_data(self): print("Fetching latest data ...") res = es.search(index="market", doc_type="quote", size=10000, body={"query": { "match": { "ticker": self.ticker } }}) latest_data = json_normalize(res['hits']['hits']) self.latest_data = latest_data.tail(1) if self.verbose: print("Latest data:\n%s" % self.latest_data) def fetch_training_data(self): print("Fetching training data ...") res = es.search(index="market", doc_type="quote", size=10000, body={"query": { "match": { "ticker": self.ticker } }}) self.training_data = json_normalize(res['hits']['hits']) self.training_data.drop(self.training_data.tail(1).index, inplace=True) print("%s records to train %s" % (len(self.training_data.index), self.ticker)) if self.verbose: print("Latest record for training:\n%s" % self.training_data.tail(1)) # tbd - to use es instead #q = query % (self.ticker, "lt", datetime.date.today().strftime("%Y-%m-%d")) #print(q) #res = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, size=10000, body=query) @staticmethod def _extract_features(data): frac_change = np.array( data['_source.change']) #(close_price - open_price) / open_price frac_high = np.array(data['_source.change_high'] ) #(high_price - open_price) / open_price frac_low = np.array( data['_source.change_low']) #(open_price - low_price) / open_price return np.column_stack((frac_change, frac_high, frac_low)) def fit(self): print('Extracting Features') feature_vector = StockPredictor._extract_features(self.training_data) if self.verbose: print("feature vector %s" % feature_vector) print('Training Model with %s features' % feature_vector.size) print("Latest date to be used in training is %s" % self.training_data.tail(1)['_source.timestamp'].values[0]) self.hmm.fit(feature_vector) print('Model trained') def _compute_allall_possible_outcomes(self, n_steps_frac_change, n_steps_frac_high, n_steps_frac_low): frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change) frac_high_range = np.linspace(0, 0.1, n_steps_frac_high) frac_low_range = np.linspace(0, 0.1, n_steps_frac_low) self.all_possible_outcomes = np.array( list( itertools.product(frac_change_range, frac_high_range, frac_low_range))) def json_data_for_outcome(self, day, outcome, score): rows = list() # meta ticker = day['_source.ticker'] date = day['_source.timestamp'] vector = outcome id = "%s-%s-%s" % (ticker, date, vector) meta = {"index": {"_index": INDEX_NAME, "_type": TYPE_NAME, "_id": id}} rows.append(json.dumps(meta)) # data row = ObjDict() row.frac_change = outcome[0] row.frac_high_range = outcome[1] row.frac_low_range = outcome[2] open_price = day['_source.open'].values[0] predicted_close = open_price * (1 + outcome[0]) expected_value = outcome[0] * score row.predicted_close = predicted_close row.expected_value = expected_value row.timestamp = day['_source.timestamp'].values[0] row.score = score row.ticker = day['_source.ticker'].values[0] rows.append(json.dumps(row)) return rows def predict_outcomes(self): print("predicting outcomes for: %s" % self.latest_data['_source.timestamp'].values[0]) previous_testing_data = self.training_data.tail( self.n_latency_days).index if self.verbose: print("previous_testing_data %s" % previous_testing_data) test_data = self.training_data.iloc[previous_testing_data] if self.verbose: print("Using the following slice of data:") print("[%s]" % previous_testing_data) print(test_data) test_data_features = StockPredictor._extract_features(test_data) # to blow everything away - may need to recreate/refresh indexes in ES! #self.delete_and_create_index() bulk_data = list() outcome_score = [] for possible_outcome in self.all_possible_outcomes: test_feature_vectors = np.row_stack( (test_data_features, possible_outcome)) if self.verbose: print("Final test feature set:") print("[%s]" % test_feature_vectors) score = self.hmm.score(test_feature_vectors) # ignoring scores <= 0 if score > 0: rows = self.json_data_for_outcome(self.latest_data, possible_outcome, score) bulk_data.append(rows) # format for ES, ugly es_array = "" for row in bulk_data: es_array += row[0] es_array += "\n" es_array += row[1] es_array += "\n" #print("Deleting prediction data for ... %s" % day['_source.ticker']) #es.delete_by_query(index=INDEX_NAME,doc_type=TYPE_NAME, body={'query': {'match': {'ticker': day['_source.ticker']}}}) print("Exporting predictions to ES") if self.verbose: print(es_array) res = es.bulk(index=INDEX_NAME, body=es_array, refresh=True)
class HiddenMarkovModel(BaseModel): def __init__(self): # Create some assets: assetsList = [ Asset('WS30', 'traditional', 'historical'), # Index US Asset('XAUUSD', 'traditional', 'historical'), # Commodity Asset('GDAXIm', 'traditional', 'historical'), # Index EUR Asset('EURUSD', 'traditional', 'historical'), # Major Asset('GBPJPY', 'traditional', 'historical') ] # Minor # Initialize the ResearchStudy class: super().__init__('HiddenMarkovModel', assetsList) # Make a random seed to reproduce results: np.random.seed(33) # Print to see if working: #logger.warning(self.PORTFOLIO._portfolioDict['WS30']) def _defineModelParameters(self): # Define the model: #self.model = GaussianHMM(n_components=2, # covariance_type="full", # n_iter=200, # verbose=True) self.model = GMMHMM(n_components=2, covariance_type="full", n_iter=20, verbose=True) def _monitorConvergence(self): # Print: logger.warning(f"Model Converged: {self.model.monitor_.converged}") def _monitorHistory(self): # Print: logger.warning(f"Model History: {self.model.monitor_.history}") def _fitTheModel(self, saveDirectory): # Loop the portfolio dict: for eachAssetName, eachAssetDataFrame in self.PORTFOLIO._portfolioDict.items( ): # Re-initialize the parameters: self._defineModelParameters() # Fit the model: # Get the returns into a 2D array > Actually, it is (X,) > We should conver to (X,1) RETURNS_RESHAPED = np.column_stack([eachAssetDataFrame["Returns"]]) self.model.fit(RETURNS_RESHAPED) logger.warning( f"Model Score for asset <{eachAssetName}>: {self.model.score(RETURNS_RESHAPED)}" ) # Check convergence and history: self._monitorConvergence() self._monitorHistory() # Predict the hidden states based on the returns: HIDDEN_STATES = self.model.predict(RETURNS_RESHAPED) #logger.warning(HIDDEN_STATES) # Save the model: if saveDirectory: self._saveModel(assetModelName=eachAssetName, saveDirectory=saveDirectory) # Create the new column in the dataframe: eachAssetDataFrame['HiddenStates'] = HIDDEN_STATES def _saveDataFrames(self, saveDirectory): # Save each dataframe: for eachAssetName, eachAssetDataFrame in self.PORTFOLIO._portfolioDict.items( ): logger.warning( f'[{self._saveDataFrames.__name__}] - Looping for asset <{eachAssetName}>...' ) eachAssetDataFrame.to_csv(saveDirectory + f'/{eachAssetName}_DF.csv') def _saveModel(self, assetModelName, saveDirectory): # Save the model: with open(saveDirectory + f'/HMM_{assetModelName}.pickle', 'wb') as pickle_file: pickle.dump(self.model, pickle_file) def _loadModel(self, assetModelName, loadDirectory): # Load the model: with open(loadDirectory + f'/HMM_{assetModelName}.pickle', 'rb') as pickle_file: self.model = pickle.load(pickle_file) def _plotModelOutput(self, saveDirectory='', showIt=False): # Plot: for eachAssetName, eachAssetDataFrame in self.PORTFOLIO._portfolioDict.items( ): logger.warning( f'[{self._plotModelOutput.__name__}] - Looping for asset <{eachAssetName}>...' ) # We will just get part of the dataframe for the plot: eachAssetDataFrame_Little = eachAssetDataFrame[:200].copy() eachAssetDataFrame_Little['date'] = range( 1, len(eachAssetDataFrame_Little) + 1) # Create the figure: f1, ax = plt.subplots(3, figsize=(10, 5)) # Create the colormap: colormap = cm.get_cmap('rainbow') # Create the plots: ax[0].scatter(eachAssetDataFrame_Little.date, eachAssetDataFrame_Little.close, c=eachAssetDataFrame_Little.HiddenStates, cmap=colormap, label='Hidden States', s=80) ax[0].set_xlabel('Hidden States', horizontalalignment='center', verticalalignment='center', fontsize=12, labelpad=20) ax[0].set_ylabel('Observations', horizontalalignment='center', verticalalignment='center', fontsize=12, labelpad=20) ax[0].legend(loc='best') ax[1].plot(eachAssetDataFrame_Little.date, eachAssetDataFrame_Little.close, label='Close Price') ax[1].set_xlabel('Observations', horizontalalignment='center', verticalalignment='center', fontsize=12, labelpad=20) ax[1].set_ylabel('Close Price', horizontalalignment='center', verticalalignment='center', fontsize=12, labelpad=20) ax[1].legend(loc='best') ax[2].plot(eachAssetDataFrame_Little.date, eachAssetDataFrame_Little.Returns, label='Returns') ax[2].set_xlabel('Observations', horizontalalignment='center', verticalalignment='center', fontsize=12, labelpad=20) ax[2].set_ylabel('Returns', horizontalalignment='center', verticalalignment='center', fontsize=12, labelpad=20) ax[2].legend(loc='best') plt.grid(linestyle='dotted') plt.subplots_adjust(left=0.09, bottom=0.20, right=0.94, top=0.90, wspace=0.2, hspace=0) f1.canvas.set_window_title( f'Hidden Markov Model + more data plot for asset <{eachAssetName}>' ) #f1.tight_layout() # In PNG: plt.savefig(saveDirectory + f'/HMM_{eachAssetName}.png') # Show it: if showIt: plt.show()