def train(self, metergroup, num_states_dict={}, **load_kwargs): """Train using 1d FHMM. Places the learnt model in `model` attribute The current version performs training ONLY on the first chunk. Online HMMs are welcome if someone can contribute :) Assumes all pre-processing has been done. """ learnt_model = OrderedDict() num_meters = len(metergroup.meters) if num_meters > 12: max_num_clusters = 2 else: max_num_clusters = 3 _check_memory(len((metergroup.submeters().meters))) for i, meter in enumerate(metergroup.submeters().meters): power_series = meter.power_series(**load_kwargs) meter_data = next(power_series).dropna() X = meter_data.values.reshape((-1, 1)) if not len(X): print( "Submeter '{}' has no samples, skipping...".format(meter)) continue assert X.ndim == 2 self.X = X num_total_states = None # Check if the user has specific the number of states for this meter num_total_states = num_states_dict.get(meter) # If not, check if the number of states for the appliances was specified if num_total_states is None: num_apps_states = [] for appliance in meter.appliances: num_app_state = num_states_dict.get(appliance) if num_app_state is None: num_app_state = num_states_dict.get( appliance.identifier.type) if num_app_state is not None: num_apps_states.append(num_app_state) if num_apps_states: num_total_states = sum(num_apps_states) if num_states_dict.get(meter) is not None or num_states_dict.get( meter) is not None: # User has specified the number of states for this appliance num_total_states = num_states_dict.get(meter) # Otherwise, find the optimum number of states via clustering if num_total_states is None: states = cluster(meter_data, max_num_clusters) num_total_states = len(states) print("Training model for submeter '{}' with {} states".format( meter, num_total_states)) learnt_model[meter] = hmm.GaussianHMM(num_total_states, "full") # Fit learnt_model[meter].fit(X) # Check to see if there are any more chunks. # TODO handle multiple chunks per appliance. try: next(power_series) except StopIteration: pass else: warn("The current implementation of FHMM" " can only handle a single chunk. But there are multiple" " chunks available. So have only trained on the" " first chunk!") # Combining to make a AFHMM self.meters = [] new_learnt_models = OrderedDict() for meter in learnt_model: startprob, means, covars, transmat = sort_learnt_parameters( learnt_model[meter].startprob_, learnt_model[meter].means_, learnt_model[meter].covars_, learnt_model[meter].transmat_) new_learnt_models[meter] = hmm.GaussianHMM(startprob.size, "full") new_learnt_models[meter].startprob_ = startprob new_learnt_models[meter].transmat_ = transmat new_learnt_models[meter].means_ = means new_learnt_models[meter].covars_ = covars # UGLY! But works. self.meters.append(meter) learnt_model_combined = create_combined_hmm(new_learnt_models) self.individual = new_learnt_models self.model = learnt_model_combined
pd.set_option('display.width', 100) np.set_printoptions(edgeitems=100) mpl.rcParams['font.sans-serif'] = [u'SimHei'] mpl.rcParams['axes.unicode_minus'] = False n_components = 3 data = pd.read_excel(io='Current.xls', sheetname='Sheet1', header=0) # data['Current'] = MinMaxScaler().fit_transform(data['Current']) data['Current'] *= 1e6 # 去除明显的异常值 data_clean(False) x = data['Time'].reshape(-1, 1) y = data['Current'].reshape(-1, 1) model = hmm.GaussianHMM(n_components=n_components, covariance_type='full', n_iter=10) model.fit(y) components = model.predict_proba(y) components_state = model.predict(y) components_pd = pd.DataFrame(components, columns=np.arange(n_components), index=data.index) data = pd.concat((data, components_pd), axis=1) print 'data = \n', data plt.figure(num=1, facecolor='w', figsize=(8, 9)) plt.subplot(n_components+1, 1, 1) plt.plot(x, y, 'r.-', lw=0.2) plt.ylim(extend(y.min(), y.max())) plt.grid(b=True, ls=':') plt.xlabel(u'时间', fontsize=14) plt.ylabel(u'电流强度', fontsize=14) plt.title(u'原始电流的变化情况', fontsize=16)
np.sum(np.divide(np.absolute(predicted_data - true_data), true_data), 0), true_data.shape[0]) for stock in STOCKS: dataset = np.genfromtxt(stock, delimiter=',') predicted_stock_data = np.empty([0, dataset.shape[1]]) likelihood_vect = np.empty([0, 1]) aic_vect = np.empty([0, 1]) bic_vect = np.empty([0, 1]) for states in STATE_SPACE: num_params = states**2 + states dirichlet_params_states = np.random.randint(1, 50, states) #model = hmm.GaussianHMM(n_components=states, covariance_type='full', startprob_prior=dirichlet_params_states, transmat_prior=dirichlet_params_states, tol=0.0001, n_iter=NUM_ITERS, init_params='mc') model = hmm.GaussianHMM(n_components=states, covariance_type='full', tol=0.0001, n_iter=NUM_ITERS) model.fit(dataset[NUM_TEST:, :]) if model.monitor_.iter == NUM_ITERS: print('Increase number of iterations') sys.exit(1) likelihood_vect = np.vstack((likelihood_vect, model.score(dataset))) aic_vect = np.vstack( (aic_vect, -2 * model.score(dataset) + 2 * num_params)) bic_vect = np.vstack((bic_vect, -2 * model.score(dataset) + num_params * np.log(dataset.shape[0]))) opt_states = np.argmin(bic_vect) + 2 print('Optimum number of states are {}'.format(opt_states)) for idx in reversed(range(NUM_TEST)):
def predict(self): # Set a new model for traidning self.remodel = hmm.GaussianHMM(n_components=2, covariance_type="full", n_iter=100) # Set initial parameters for training self.remodel.startprob_ = np.array([0.5, 0.5]) self.remodel.transmat_ = np.array([[0.5, 0.5], [0.5, 0.5]]) self.remodel.means_ = np.array([0, 1]) self.remodel.covars_ = np.tile(np.identity(1), (2, 1, 1)) / self.SNR**2 self.Z_predict = [None] * self.n_sample self.converged = [None] * self.n_sample self.X_mean = [None] * self.n_sample self.X_var = [None] * self.n_sample self.SNR = np.zeros(self.n_sample) self.tp = [None] * self.n_sample self.tp_ub = np.zeros(self.n_sample) self.tp_bu = np.zeros(self.n_sample) self.tb_HMM = np.zeros(self.n_sample) self.tu_HMM = np.zeros(self.n_sample) for i in range(n_sample): # Estimate model parameters (training) self.remodel.fit(self.X[i]) # Find most likely state sequence corresponding to X Z_predict = self.remodel.predict(self.X[i]) Z_predict = Z_predict.reshape(self.n_frame, 1) X_mean = self.remodel.means_ # Mean X_var = self.remodel.covars_ # Covariance ### Simplify the following tp = self.remodel.transmat_ # Transition probability self.converged[i] = self.remodel.monitor_.converged # Check convergence self.SNR[i] = (abs(X_mean[1][0]-X_mean[0][0])/(np.mean(X_var))**0.5) # Assign them such that X[state==0]=0 and X[state==1]=1 if X_mean[0] <= X_mean[1]: self.Z_predict[i] = Z_predict self.X_mean[i] = [X_mean[0][0], X_mean[1][0]] self.X_var[i] = [X_var[0][0][0], X_var[1][0][0]] self.tp[i] = [[tp[0][0], tp[0][1]], [tp[1][0], tp[1][1]]] else: self.Z_predict[i] = 1 - Z_predict self.X_mean[i] = [X_mean[1][0], X_mean[0][0]] self.X_var[i] = [X_var[1][0][0], X_var[0][0][0]] self.tp[i] = [[tp[1][1], tp[1][0]], [tp[0][1], tp[0][0]]] # HMM estimate of bound (tb) and unbound time (tu) self.tp_ub[i] = self.tp[i][0][1] + 1/n_frame # Transition prob from unbound to bound self.tp_bu[i] = self.tp[i][1][0] + 1/n_frame # Transition prob from bound to unbound self.tb_HMM[i] = 1/self.tp_bu[i] # Bound time self.tu_HMM[i] = 1/self.tp_ub[i] # Unbound time # Check the convergence print("%.1f %% converged." %(sum([int(i) for i in self.converged])/self.n_sample*100)) # Label only good data cond1 = np.array(self.tb_HMM) <= n_frame*0.5 # cond1 = ~outliers(self.tb_HMM) cond2 = np.array(self.tu_HMM) <= n_frame*0.5 # cond2 = ~outliers(self.tu_HMM) cond3 = ~outliers(self.SNR) self.good_data = cond1 & cond2 & cond3 # Log transition probability self.log_tp_ub = np.log10(np.array(self.tp_ub[self.good_data])) self.log_tp_bu = np.log10(np.array(self.tp_bu[self.good_data])) # MLE fitting with a Gaussian function result_bu = MLE_G(self.log_tp_bu) result_ub = MLE_G(self.log_tp_ub) self.m_b, self.s_b = result_bu["x"] self.m_u, self.s_u = result_ub["x"] self.tb_MLE = 1/10**(self.m_b) self.tu_MLE = 1/10**(self.m_u) error_tb = 100*(self.tb_MLE/self.time_bound-1) error_tu = 100*(self.tu_MLE/self.time_unbound-1) print("Time bound (MLE) = %.1f (%.1f %%)" %(self.tb_MLE, error_tb)) print("Time unbound (MLE) = %.1f (%.1f %%) \n" %(self.tu_MLE, error_tu)) # ---------------------------------------------------------------------- # HMM prediction with concatenated data self.remodel.fit(self.X_conc) # Fit (train) to find the parameters Z_predict_conc = self.remodel.predict(self.X_conc) # Predict the most likely trajectory self.Z_predict_conc = Z_predict_conc.reshape(self.n_frame*self.n_sample, 1) self.converged_conc = self.remodel.monitor_.converged # Check the convergence self.tp_conc = self.remodel.transmat_ # Transition probability # Reorder state number such that X[Z=0] < X[Z=1] if self.X_conc[Z_predict_conc == 0].mean() > self.X_conc[Z_predict_conc == 1].mean(): self.Z_predict_conc = 1 - self.Z_predict_conc self.tp_conc = np.array([[self.tp_conc[1][1], self.tp_conc[1][0]], [self.tp_conc[0][1], self.tp_conc[0][0]]]) self.tp_bu_conc = self.tp_conc[1][0] + 1/n_frame # Transition prob from unbound to bound self.tp_ub_conc = self.tp_conc[0][1] + 1/n_frame # Transition prob from bound to unbound self.tb_HMM_conc = 1/self.tp_bu_conc # Bound time self.tu_HMM_conc = 1/self.tp_ub_conc # Unbound time error_tb = 100*(self.tb_HMM_conc/self.time_bound-1) error_tu = 100*(self.tu_HMM_conc/self.time_unbound-1) print("HMM_concatenated is %s" %(["not converged.", "converged."][int(self.converged_conc)])) print("Time bound (HMM, conc) = %.1f (%.1f %%)" %(self.tb_HMM_conc, error_tb)) print("Time unbound (HMM, conc) = %.1f (%.1f %%)\n" %(self.tu_HMM_conc, error_tu))
# user_score for the video superframes. user_score = np.array(ground_truth['user_score'], dtype=np.double) lengths.append(len(X)) i = i + 1 # rest of the videos. else: filename = os.path.join(root, file) vid_str = io.loadmat(filename) X1 = np.array(vid_str['vid_str']['c3d_fc6'][0][0]) lengths.append(len(X1)) X = np.concatenate((X1, X), axis=0) # number of HMM states or the states present in the video. num_of_states = 30 model = hmm.GaussianHMM(n_components=num_of_states) model.fit(X, lengths) # value of the states after training. states = model.means_ # transition probability of the states. state_trans_prob = model.transmat_ # initial probability of the states. state_init_prob = model.startprob_ # video data for which subset is to be found. target_video = Y M = len(states)
def makeModel(components, converted, scalar, hist, histT, vol, i, dataPoints, vers): HMM = hmm.GaussianHMM(n_components = components, covariance_type="full", n_iter = 750, verbose = False) HMM.fit(converted, lengths = [x.__len__() for x in hist]) joblib.dump(HMM, "models/"+vers+str(i) + "-" + str(components) + "-" + str(1)+".pkl") return runTests(HMM, histT, 250, 1, vol, i, dataPoints, vers + str(i) + "-" + str(components) + "-" + str(1), scalar)
import numpy as np import pandas as pd from hmmlearn import hmm #path="/pb_winning_numbers.csv" pb_data=pd.read_csv('data.csv') model = hmm.GaussianHMM(n_components=69, covariance_type="full") start=np.full((1,69),0.01492475362) transition=np.full((69,69),0.01449275362) emmision= np.identity(69) model.startprob_ = start model.transmat_ = transition model.means_ =emmision
def experiment_1(n_components, dataLength, div): print("HMM_test : experiment_1 : start") result = [] score = [] result.append(0) for step in range(1000): print("HMM_test : experiment_1 : STEP " + str(step)) # データを生成 print("HMM_test : experiment_1 : making data ") datas = [] datas.extend(makeData(testfunc_circle, div, 0, 2 * np.pi, 10)) datas.extend(makeData(testfunc_sigmoid, dataLength - div, 0, 600, 10)) # datas.extend(makeData(testfunc_cubic, 100, 0, 2*np.pi, 0.1)) # モデルを生成 model = hmm.GaussianHMM(n_components=n_components, covariance_type="diag") print("HMM_test : experiment_1 : fitting model") model.fit(datas[0:dataLength]) # 符号化 print("HMM_test : experiment_1 : predict label") res = model.predict(datas[0:dataLength]) # 境界の場所を探す print("HMM_test : experiment_1 : counting div") idx = div tempMin = -1 while True: if res[idx] != res[idx - 1]: tempMin = abs(div - idx) break else: idx = idx - 1 # # idx = div while True: if res[idx] != res[idx + 1]: tempMin = min(tempMin, abs(div - idx + 1)) break else: idx = idx + 1 # # # ヒストグラムに加える result.append(tempMin) print("HMM_test : experiment_1 : get pbobability") # tempMin が乱択されたヒストグラムに対して有意な値であるか検証 # ・res を用いてランダムなヒストグラムを生成 randomHist = generate_random_hist(res, 10000) # ・ヒストグラムをもとにガウス分布を生成 myu = sum(randomHist) / len(randomHist) var = np.var(np.array(randomHist)) # ・生成された ガウス分布のパラメータを用いてtempMin の生成確立を算出 tempScore = 1 / math.sqrt(2 * math.pi * var) * math.exp( -1 * (tempMin - myu) * (tempMin - myu) / var) # 記録 score.append(tempScore) # # ヒストグラムを書く print( "HMM_test : experiment_1 : result (the set of distance between truth and estmated div)" ) print(result) plt.hist(result, bins=dataLength) plt.title("result") plt.show() print( "HMM_test : experiment_1 : score (the set of probability that the estimated div was picked from random-Hist)" ) print(score) plt.hist(score, bins=dataLength) plt.title("score") plt.show() print("experiment_1 : Successfully terminated.")
time_bins = np.load("models/time_bins.npy") lengths = np.load("models/lengths.npy") for condition_label in condition_labels: # get training set trials training_idx = np.load("models/training_idx_" + condition_label + ".npy") training_dataset, training_lengths = get_conditioned_dataset(dataset, lengths, training_idx) training_dataset_reduced, _ = get_conditioned_dataset(dataset_reduced, lengths, training_idx) testing_idx = np.load("models/testing_idx_" + condition_label + ".npy") testing_dataset, testing_lengths = get_conditioned_dataset(dataset, lengths, testing_idx) testing_dataset_reduced, _ = get_conditioned_dataset(dataset_reduced, lengths, testing_idx) # Fit model model = hmm.GaussianHMM(n_components=n_compoments) model.fit(training_dataset_reduced, training_lengths) print("model converged: " + str(model.monitor_.converged)) # Save model with open("models/hmm_" + condition_label + "_" + str(n_compoments) + ".pkl", "wb") as file: pickle.dump(model, file) # Try Decoding [logprob, states] = model.decode(testing_dataset_reduced, lengths=testing_lengths, algorithm="viterbi") print(logprob) # Plot visual_times = np.load("models/visual_times.npy")[testing_idx] cue_times = np.load("models/cue_times.npy")[testing_idx] feedback_times = np.load("models/feedback_times.npy")[testing_idx]
def hmm_algo(base_object, batched_setting, logger, algorithm, kmeans, n_states, quickrun=''): # initialize the loaded model flag loaded_model = False if quickrun: files_in_data_folder = '' # check if the data folder exists and if it does, get all the files if os.path.exists(base_object.saved_model_dir): files_in_data_folder = os.listdir(base_object.saved_model_dir) if 'low' in base_object.test_activity: tmp = base_object.test_activity.split('_') activity = tmp[0] + '_l' elif 'high' in base_object.test_activity: tmp = base_object.test_activity.split('_') activity = tmp[0] + '_h' else: activity = base_object.test_activity # check all the files in the folder and look for the model file for sfile in files_in_data_folder: # check if user, activity and hmm keyword are part of the file if (base_object.test_user in sfile) and (activity in sfile) and \ ('hmm' in sfile) and ('.npy' not in sfile): logger.getLogger('line.tab.regular').info('hmm model found') logger.getLogger('tab.regular.line').info( 'using hmm model {0}'.format(sfile)) # calculate the whole path data_path = os.path.join(base_object.saved_model_dir, sfile) # load the model hmm_model = joblib.load(data_path) # turn on flag so the code does not re-train the model loaded_model = True logger.getLogger('tab.regular.time').info('hmm model loaded') break # check if flag is on if not loaded_model: nc = n_states cov_type = 'full' iterations = 10 logger.getLogger('tab.regular.time').info( 'defining Gaussian Hidden Markov Model.') logger.getLogger('tab.regular').info('\tmodel parameters') msg = '\t\tnumber of states:{0}'.format(nc) logger.getLogger('tab.regular').info(msg) msg = '\t\tnumber of iterations:{0}'.format(iterations) logger.getLogger('tab.regular').info(msg) msg = '\t\tcovariance type:{0}'.format(cov_type) logger.getLogger('tab.regular').info(msg) # defining models hmm_model = hmm.GaussianHMM(n_components=nc, covariance_type=cov_type, n_iter=iterations, verbose=True) if batched_setting: first_run = True total_batches, batched_lengths = batch( base_object.training_dataset_lengths, 30) last_batch_index = 0 end = 0 for index, sliced_length in enumerate(batched_lengths): msg = 'starting training Gaussian Hidden Markov Model on batch {1} out of {2}'. \ format(index, total_batches) logger.getLogger('tab.regular.time').info(msg) end += np.sum(sliced_length).astype(np.int32) msg = 'size of dataset: {0}'.format( base_object.training_testing_dataset_object[ 'training data'][last_batch_index:end].shape) logger.getLogger('tab.regular').debug(msg) if first_run: hmm_model.fit( X=base_object.training_testing_dataset_object[ 'training data'][last_batch_index:end], lengths=sliced_length, logger=logger, kmeans_opt=kmeans) first_run = False else: # by setting init_params='', we will be able to cascaded the training # results from the previous fitting runs hmm_model.init_params = '' hmm_model.fit( X=base_object.training_testing_dataset_object[ 'training data'][last_batch_index:end], lengths=sliced_length, logger=logger, kmeans_opt=kmeans) last_batch_index = end else: hmm_model.fit( X=base_object.training_testing_dataset_object['training data'], logger=logger, kmeans_opt=kmeans, lengths=base_object.training_dataset_lengths) logger.getLogger('tab.regular.time').info( 'finished training Hidden Markov Model.') # create a name for a file based on the user, activity and the time filename = 'hmm_' + base_object.test_user + '_' + base_object.test_activity + '_' + \ str(datetime.now().strftime('%Y%m%d%H%M%S')) # calculate the whole path hmm_path_filename = os.path.join(base_object.saved_model_dir, filename) logger.getLogger('tab.regular').debug( 'hmm model stored as {0}'.format(filename)) logger.getLogger('tab.regular').debug('location {0}'.format( base_object.saved_model_dir)) # if data folder does not exists, make it if not os.path.exists(base_object.saved_model_dir): os.mkdir(base_object.saved_model_dir) # store the model so its not needed to re-train it joblib.dump(hmm_model, hmm_path_filename) logger.getLogger('tab.regular.time').info('calculating predictions') train_predictions = hmm_model.predict_proba( base_object.training_testing_dataset_object['training data'], lengths=base_object.training_dataset_lengths) test_predictions = hmm_model.predict_proba( base_object.training_testing_dataset_object['testing data']) hmm_object = ResultClass() # using the model, run algorithms hmm_object.classification( train_predictions=train_predictions, traininglabels=base_object. training_testing_dataset_object['training labels'], test_predictions=test_predictions, testinglabels=base_object. training_testing_dataset_object['testing labels'], logger=logger) hmm_object.show_results(logger=logger)
fig = plt.subplot(1, n_trials, i + 1) # Load time pointers for the given trial trial = conditioned_trials[i] visual_time = trials['visStim_times'][trial] cue_time = trials['cue_times'][trial] feedback_time = trials['feedback_times'][trial] # generate the spike count histograms t0 = visual_time - pre_stim_dt tf = feedback_time + post_resp_dt [dataset, time_bins] = generate_spike_counts(recording_name, brain_region, neuron_min_score, bin_dt, t0, tf) (n_neurons, n_bins) = dataset.shape # Create a hmm model model = hmm.GaussianHMM(n_components=n_compoments, n_iter=1000) model.fit(dataset.T) [logprob, states] = model.decode(dataset.T) # Find the best mapping of the state sequences if i == 0: states_trial0 = states else: states = map_states(n_compoments, states_trial0, states) # Plot title = brain_region + ' trial#' + str(trial) plot_psths(dataset, time_bins, title, visual_time, cue_time, feedback_time) add_states_2_psth(fig, states, colors, n_neurons) plt.show()
seq = seq.split(',') sequence = [] for i in seq: sequence.append(int(i)) final_testing = [] for t in sequence: a = [] a.append(t) final_testing.append(a) num_forward = int(raw_input("Input number of future plays to predict: ")) print('Analyzing ' + str(num_forward) + ' play(s) into the future') print(final_testing) startprob, transmat, means, covars = estimate_parameters(X, y) model = hmm.GaussianHMM(receiver_number, "full") model.startprob_ = startprob model.transmat_ = transmat model.means_ = means new_covars = [] for c in covars: outermost = [] for i in c: outer = [] for j in i: if j == 0: outer.append(0.00001) # HMM hates zeros. Replace them with a minimal value else: outer.append(j) outermost.append(outer)
#datas = get_mfc_data('C:/Users/18341/Desktop/book/听觉/实验3-语音识别/语料/features/') datas = get_mfc_data('F:/HIT/大三上/视听觉/lab3/组/gzx_sound_mfcc/') #model = hmm.GaussianHMM(n_components = 5, n_iter = 20, tol = 0.01, covariance_type="diag") #hmms = dict() #datas = get_mfc_data('C:/Users/18341/Desktop/book/听觉/实验3-语音识别/语料/features/') hmms = dict() for category in datas: Qs = datas[category] n_hidden = 6 model = hmm.GaussianHMM(n_components=5, n_iter=20, tol=0.01, covariance_type="diag") vstack_Qs = np.vstack(tuple(Qs[:-3])) model.fit(vstack_Qs, [Q.shape[0] for Q in Qs[:-3]]) print('success fit') hmms[category] = model #test correct_num = 0 for category in datas: for test_sample in datas[category][-3:]: print('real_category:', category) max_score = -1 * np.inf predict = -1 for predict_category in hmms: model = hmms[predict_category]
def detect_spectral_rhythm(time, lfps, sampling_frequency, multitaper_params=_DEFAULT_MULTITAPER_PARAMS, hmm_params=_DEFAULT_HMM_PARAMS, frequency_band=(10, 16)): '''Find spectral rhythm times using spectral power and an HMM. Parameters ---------- time : ndarray, shape (n_time,) lfps : ndarray, shape (n_time, n_signals) sampling_frequency : float multitaper_params : dict, optional hmm_params : dict, optional freq_band : tuple, optional Returns ------- results : pandas.DataFrame, shape (n_time, 3) model : hmmlearn.GaussianHMM instance ''' power_time, spectral_rhythm_band_power = estimate_spectral_rhythm_power( atleast_2d(lfps), sampling_frequency, start_time=time[0], multitaper_params=multitaper_params, frequency_band=frequency_band) spectral_rhythm_band_power = spectral_rhythm_band_power.reshape( (power_time.shape[0], -1)) model = hmm.GaussianHMM(**hmm_params).fit( np.log(spectral_rhythm_band_power)) state_ind = model.predict(np.log(spectral_rhythm_band_power)) if (spectral_rhythm_band_power[state_ind == 0].mean() > spectral_rhythm_band_power[state_ind == 1].mean()): spectral_rhythm_ind = 0 else: spectral_rhythm_ind = 1 power_time = pd.Index(power_time, name='time') time = pd.Index(time, name='time') is_spectral_rhythm = np.zeros_like(state_ind, dtype=np.bool) is_spectral_rhythm[state_ind == spectral_rhythm_ind] = True is_spectral_rhythm = (pd.DataFrame( dict(is_spectral_rhythm=is_spectral_rhythm), index=power_time).reindex(index=time, method='pad').reset_index(drop=True)) spectral_rhythm_probability = model.predict_proba( np.log(spectral_rhythm_band_power)) spectral_rhythm_df = (pd.DataFrame( dict(probability=spectral_rhythm_probability[:, spectral_rhythm_ind]), index=power_time).reindex(index=time).reset_index( drop=True).interpolate()) spectral_rhythm_df = pd.concat((spectral_rhythm_df, is_spectral_rhythm), axis=1).set_index(time) return spectral_rhythm_df, model
# -------------------- if cross_validation: # xval_scores gives: % in cluster w/ high confidence; 10th percentile of confidence; score; bayesian info content (currently only for GMM) xval_scores = cross_validate_model(data, model_type, K_range, seed_range, num_clusters_range, tol) ''' ------------------------------------------------------------------------------------------------------------------------------------- # ------------------------ Generate Model -------------------------------------- # --------------------------------------------------------------------------------------------------------------------------------------''' # ------------------------- # Initialize mixture model # ------------------------- if model_type == 'hmm': model = hmm.GaussianHMM(n_components=num_clusters, covariance_type="full", algorithm='viterbi', tol=.00001, random_state=seed) elif model_type == 'gmm': model = mixture.GaussianMixture(n_components=num_clusters, tol=.00001, covariance_type='full', random_state=seed) # --------------------------- # Fit and save mixture model # --------------------------- print('fitting model...') model.fit(data) # fit model if os.path.isfile(file_location_data_library + '_' + model_type + '_' + model_type_and_name_tag) and do_not_overwrite:
def learn(self, data): self._model = hmm.GaussianHMM(self._nb_states, "full", verbose=True) # self._model = hmm.GMMHMM(self._nb_states, n_mix=5, covariance_type="full", verbose=True) self._model.fit(data) print("Model learnt")
def run(period): #print('getting historical') #hist = getHistorical(period, readFiles())[0] #print('getting historical test') testFiles = readTestModelFiles() testFiles['time'] = pd.to_datetime(testFiles['time'], infer_datetime_format=True) testFiles = testFiles.set_index('time').loc['1/1/2018':'1/1/2019'] print(testFiles) vol = int(testFiles['volume'].sum()) print(vol) histT = getHistorical(period, testFiles)[0] #conv = convert(hist) #hist.to_csv('models/Hist-V1.csv') #histT.to_csv('models/Test-V1.csv') #pd.DataFrame(conv).to_csv('models/Model-V1.csv') #for i in conv: # print(i) #------------------------------------------------------------------------------------------------------------------- print('make hmm') HMM = hmm.GaussianHMM(n_components = 11 , covariance_type="full", random_state=7, n_iter = 1000) HMM.fit(readModel()) print(HMM.sample(10)) print(HMM.transmat_) print('complete') #------------------------------------------------------------------------------------------------------------------- scores = defaultdict(list) pSize = random.randint(10, 75) strt = random.randint(8, histT.__len__()-pSize) for j in range(15): pSize = random.randint(10, 75) for i in range(75): #if(i == 0 and not scores[pSize] == None): # break strt = random.randint(6, histT.__len__()-pSize) pred, sc, ret = predict(HMM, histT, strt, strt+pSize, 5, 25000, False) scores[pSize].append((pred, sc, ret)) #------------------------------------------------------------------------------------------------------------------- predictedCloseForTest, _, _ = predict(HMM, histT, strt, strt+pSize, 3, 25000, True) trueOpenForTest = histT.iloc[strt:strt+pSize]['open'].values trueCloseForTest = histT.iloc[strt:strt+pSize]['close'].values print("50 random periods w/50 different random tests resuts::") for i in scores.keys(): s = str(sum(n for _, n, _ in scores[i])/len(scores[i]))[0:5] ret = str(sum(n for _, _, n in scores[i])/len(scores[i]))[0:5] print("For the 75 random tests over " + str(i) + " periods, the HMM determined the direction correctly: " + s + "% of the time. Ret: " + ret)
def train(self, data, real_labels, list_features, dim_features): """ Train a supervised HMM classifier based on the data and labels in input input: data: a list (n_seq) of array (n_feature x length of sequence) containing the data used to train the model real_labels: a list (n_seq) of array (1 x length of sequence) containing the annotated labels of the state list_feature: a list containaing the name of the features used to train the model dim_feature: a list containing the dimension of each feature The parameters of the HMM trained are: startprob_: an array (1 x n_state) containing the initial state probabilities transmat_: an array (n_state x n_state) containing the transition matrix probability And the Gaussian distribution representing the emission probabilities represented by: means_: an array (n_state x n_feature) containing for each state the means of the multivariate Gaussian function covars_: an array (n_state x n_feature x n_feature) containing for each state the covariance matrix of the multivariate Gaussian function """ self.n_seq = len(data) self.list_features = list_features self.dim_features = dim_features self.n_feature = int(sum(dim_features)) # Concatenate all the sequence in one and create a vector with the length of each sequence obs = [] obs = data[0] lengths = [] lengths.append(len(data[0])) labels = real_labels[0] for i in range(1, self.n_seq): obs = np.concatenate([obs, data[i]]) lengths.append(len(data[i])) labels = np.concatenate([labels, real_labels[i]]) # Get the list and number of states self.list_states, labels = np.unique(labels, return_inverse=True) self.n_states = len(self.list_states) self.model = hmm.GaussianHMM(n_components=self.n_states, covariance_type="full") Y = labels.reshape(-1, 1) == np.arange(len(self.list_states)) end = np.cumsum(lengths) start = end - lengths # Compute the initial probabilities init_prob = Y[start].sum(axis=0)/Y[start].sum() # init_prob = np.ones(self.n_states)/self.n_states # Compute the transition matrix probabilities trans_prob = np.zeros((self.n_states, self.n_states)).astype(int) for i in range(1, len(labels)): trans_prob[labels[i-1], labels[i]] += 1 trans_prob = trans_prob/np.sum(trans_prob, axis=0) # Compute the emission distribution Mu, covars = tools.mean_and_cov(obs, labels, self.n_states, self.list_features) # Update the parameters of the model self.model.startprob_ = init_prob self.model.transmat_ = trans_prob.T self.model.means_ = Mu self.model.covars_ = covars return
# The transition probability matrix tp_ub = 1 / time_unbound tp_uu = 1 - tp_ub tp_bu = 1 / time_bound tp_bb = 1 - tp_bu transmat = np.array([[tp_uu, tp_ub], [tp_bu, tp_bb]]) # The means of each state means = np.array([[0.0], [1.0]]) # The covariance of each component covars = np.tile(np.identity(1), (2, 1, 1)) / SNR # Build an HMM instance and set parameters model = hmm.GaussianHMM(n_components=2, covariance_type="full") # Set the parameters to generate samples model.startprob_ = startprob model.transmat_ = transmat model.means_ = means model.covars_ = covars # Generate samples X, Z_true = model.sample(n_frame) X_true = np.zeros(n_frame) for i in range(2): X_true[Z_true == i] = X[Z_true == i].mean() # Set a new model for traidning
Ob1 = O1 #第一个HMM模型的观测数据 Ob2 = O2 #第二个HMM模型的观测数据 Return = (data['close'] / data['open'] - 1).values #求出每一天的日对数收益率 from hmmlearn import hmm len1 = len(data[:'2010']) #2010年以前的数据作为训练数据 len2 = len(data['2011':]) #2011年以后的数据作为回测数据 Signal = np.zeros(len2) np.random.seed(1) N_state1 = 3 #第一个HMM隐藏状态的个数设置为3 N_state2 = 4 #第二个HMM隐藏状态的个数设置为4 for i in range(len2): #滚动训练,每个月更新一次 if data.index[len1 + i - 1].month != data.index[len1 + i].month: remodel1 = hmm.GaussianHMM(n_components=N_state1) remodel1.fit(Ob1[:len1 + i]) remodel2 = hmm.GaussianHMM(n_components=N_state2) remodel2.fit(Ob2[:len1 + i]) s_pre1 = remodel1.predict(Ob1[:len1 + i]) #对历史数据做状态序列的预测 s_pre2 = remodel2.predict(Ob2[:len1 + i]) Re = Return[:len1 + i] #取出历史数据的收益率序列 #各个状态在历史数据中的平均收益率 Expect=np.array([np.mean(Re[(s_pre1==j)*(s_pre2==k)]) \ for j in range(N_state1) for k in range(N_state2)]) #各个状态在第二天的发生概率 Pro=np.array([remodel1.transmat_[s_pre1[-1],j]*remodel2.transmat_[s_pre2[-1],k]\ for j in range(N_state1) for k in range(N_state2)]) preReturn = Pro.dot(Expect) #根据转移概率矩阵预测下一天的期望收益 if preReturn > 0.: Signal[i] = 1
def evaluate(self):#, SNR_min, dwell_min, dwell_max): blinking = 1 dwell_min = 1 dwell_max = 100 if self.noise > 1/SNR_min: return False x = running_avg(self.I_frame, 3) self.I_s = np.array([x[0]]+x.tolist()+[x[-1]]) signal = self.I_s > noise_cutoff t_b = [] t_ub = [] for i in range(len(signal)-1): if (signal[i] == False) & (signal[i+1] == True): t_b.append(i) if (signal[i] == True) & (signal[i+1] == False): t_ub.append(i) if len(t_b)*len(t_ub) == 0: return False if t_ub[0] < t_b[0]: # remove pre-existing binding del t_ub[0] if len(t_b)*len(t_ub) == 0: return False if t_ub[-1] < t_b[-1]: # remove unfinished binding del t_b[-1] if len(t_b)*len(t_ub) == 0: return False # combine blinking blink_ub = [] blink_b = [] if len(t_b) > 1: for i in range(len(t_b)-1): if abs(t_ub[i] - t_b[i+1]) <= blinking: blink_ub.append(t_ub[i]) blink_b.append(t_b[i+1]) if len(blink_ub) > 0: for i in range(len(blink_ub)): t_ub.remove(blink_ub[i]) t_b.remove(blink_b[i]) # delete too short or too long binding transient_ub = [] transient_b = [] for i in range(len(t_b)): if (t_ub[i] - t_b[i] < dwell_min): transient_ub.append(t_ub[i]) transient_b.append(t_b[i]) if (t_ub[i] - t_b[i] > dwell_max): transient_ub.append(t_ub[i]) transient_b.append(t_b[i]) if len(transient_b) > 0: for i in range(len(transient_b)): t_ub.remove(transient_ub[i]) t_b.remove(transient_b[i]) if len(t_b)*len(t_ub) == 0: return False self.dwell = [] self.waiting = [] self.SNR = [] self.I_fit = np.zeros(len(signal)) for i in range(len(t_b)): self.dwell.append(t_ub[i] - t_b[i]) if i < len(t_b)-1: self.waiting.append(t_b[i+1] - t_ub[i]) I_mean = np.mean(self.I_frame[t_b[i]:t_ub[i]]) self.SNR.append(I_mean/self.noise) self.I_fit[t_b[i]+1:t_ub[i]+1] = I_mean # HMM X = self.I_frame.reshape(len(signal), 1) # Set a new model for traidning remodel = hmm.GaussianHMM(n_components=2, covariance_type="full", n_iter=100) # Set initial parameters for training remodel.startprob_ = np.array([5.0, 0.5]) remodel.transmat_ = np.array([[0.9, 0.1], [0.1, 0.9]]) remodel.means_ = np.array([0, 1]) remodel.covars_ = np.tile(np.identity(1), (2, 1, 1)) * self.noise**2 # Estimate model parameters (training) remodel.fit(X) # Find most likely state sequence corresponding to X Z_predict = remodel.predict(X) # Reorder state number such that X[Z=0] < X[Z=1] if X[Z_predict == 0].mean() > X[Z_predict == 1].mean(): Z_predict = 1 - Z_predict remodel.transmat_ = np.array([[remodel.transmat_[1][1], remodel.transmat_[1][0]], [remodel.transmat_[0][1], remodel.transmat_[0][0]]]) self.tp_ub = remodel.transmat_[0][1] self.tp_bu = remodel.transmat_[1][0] # Sequence of predicted states self.I_predict = np.zeros(len(X)) for i in range(2): self.I_predict[Z_predict == i] = X[Z_predict == i].mean() return True
def partial_fit(self, train_main, train_appliances, **load_kwargs): self.models = [] self.num_appliances = 0 self.appliances = [] train_main = pd.concat(train_main, axis=0) train_app_tmp = [] for app_name, df_list in train_appliances: df_list = pd.concat(df_list, axis=0) train_app_tmp.append((app_name, df_list)) # All the initializations required by the model train_appliances = train_app_tmp learnt_model = OrderedDict() means_vector = [] one_hot_states_vector = [] pi_s_vector = [] transmat_vector = [] states_vector = [] train_main = train_main.values.flatten().reshape((-1, 1)) for appliance_name, power in train_appliances: # print (appliance_name) # Learning the pi's and transistion probabliites for each appliance using a simple HMM self.appliances.append(appliance_name) X = power.values.reshape((-1, 1)) learnt_model[appliance_name] = hmm.GaussianHMM( self.default_num_states, "full") # Fit learnt_model[appliance_name].fit(X) means = learnt_model[appliance_name].means_.flatten().reshape( (-1, 1)) states = learnt_model[appliance_name].predict(X) transmat = learnt_model[appliance_name].transmat_ counter = Counter(states.flatten()) total = 0 keys = list(counter.keys()) keys.sort() for i in keys: total += counter[i] pi = [] for i in keys: pi.append(counter[i] / total) pi = np.array(pi) nb_classes = self.default_num_states targets = states.reshape(-1) means_vector.append(means) pi_s_vector.append(pi) transmat_vector.append(transmat.T) states_vector.append(states) self.num_appliances += 1 self.signal_aggregates[appliance_name] = ( np.mean(X) * self.time_period).reshape((-1, )) self.means_vector = means_vector self.pi_s_vector = pi_s_vector self.means_vector = means_vector self.transmat_vector = transmat_vector print("Finished Training")
def test_bad_covariance_type(self): hmm.GaussianHMM(20, self.covariance_type) self.assertRaises(ValueError, hmm.GaussianHMM, 20, 'badcovariance_type')
# star 特征 star = cv2.xfeatures2d.StarDetector_create() keypoints = star.detect(gray) sift = cv2.xfeatures2d.SIFT_create() _, desc = sift.compute(gray, keypoints) # 获得特征矩阵 if len(descs) == 0: descs = desc else: descs = np.append(descs, desc, axis=0) train_x.append(descs) train_y.append(label) # 训练模型 models = dict() for descs, label in zip(train_x, train_y): model = hl.GaussianHMM(n_componets=4, covariance_type="diag", n_iter=1000) models[label] = model.fit(descs) # 开始测试 test_objects = search_objects("E:\\达内学习文件\\17 机器学习\\data\\objects\\testing") test_x, test_y, test_z = list(), list(), list() for label, filenames in train_objects.items(): test_z.append([]) descs = np.array([]) for filename in filenames: image = cv2.imread(filename) gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # 转为灰度图
search_speeches('../data/speeches/training', train_speeches) train_x, train_y = [], [] for label, filenames in train_speeches.items(): mfccs = np.array([]) for filename in filenames: sample_rate, sigs = wf.read(filename) mfcc = sf.mfcc(sigs, sample_rate) if len(mfccs) == 0: mfccs = mfcc else: mfccs = np.append(mfccs, mfcc, axis=0) train_x.append(mfccs) train_y.append(label) modles = {} for mfccs, label in zip(train_x, train_y): model = hl.GaussianHMM(n_components=4, covariance_type='diag', n_iter=1000) modles[label] = model.fit(mfccs) test_speeches = {} search_speeches('../data/speeches/testing', test_speeches) test_x, test_y = [], [] for label, filenames in test_speeches.items(): mfccs = np.array([]) for filename in filenames: sample_rate, sigs = wf.read(filename) mfcc = sf.mfcc(sigs, sample_rate) if len(mfccs) == 0: mfccs = mfcc else: mfccs = np.append(mfccs, mfcc, axis=0) test_x.append(mfccs) test_y.append(label)
volume = np.array([q[6] for q in quotes])[1:] # Take diff of close value. Note that this makes # ``len(diff) = len(close_t) - 1``, therefore, other quantities also # need to be shifted by 1. diff = np.diff(close_v) dates = dates[1:] close_v = close_v[1:] # Pack diff and volume for training. X = np.column_stack([diff, volume]) print("fitting to HMM and decoding ...", end="") # Make an HMM instance and execute fit model = hmm.GaussianHMM(n_components=2, covariance_type="diag", n_iter=1000).fit(X) # Predict the optimal sequence of internal hidden state hidden_states = model.predict(X) print("done") print("Transition matrix") print(model.transmat_) print() print("Means and vars of each hidden state") for i in range(model.n_components): print("{0}th hidden state".format(i)) print("mean = ", model.means_[i]) print("var = ", np.diag(model.covars_[i]))
def main(): file="/home/shaoguang/anaconda3/shaoguang123/bishe_csg/hq2.csv" hq=pd.read_csv(file,encoding="utf-8") hq1=hq.iloc[:,[0,1,2,4,5]] #处理数据求得每日各行业板块涨幅 col=int(hq1.iloc[:,0:1].size) dat=int(col/28) val=np.zeros((dat,28)) row_list=np.zeros(28) Column_list=np.arange(0,dat,1) for i in range(28): row_list[i]=hq1.iloc[i*dat,0] for i in range(dat): for j in range(28): val[dat-i-1][j]=hq1.iloc[i+dat*j,4] data=pd.DataFrame(val,index=Column_list,columns=row_list) #求取每日涨幅排行前5的行业 order1=[] order2=[] order3=[] order4=[] order5=[] for i in range(dat): Sector_list=[] for j in range(28): Sector_list.append((row_list[j],val[i][j])) top = heapq.nlargest(5,Sector_list,key=lambda s: s[1]) order1.append(top[0][0]),order2.append(top[1][0]),order3.append(top[2][0]),order4.append(top[3][0]),order5.append(top[4][0]) order_list=pd.DataFrame() order_list.insert(0,"order1",order1),order_list.insert(1,"order2",order2),order_list.insert(2,"order3",order3),order_list.insert(3,"order4",order4),order_list.insert(4,"order5",order5) #PCA降维 pca=PCA(n_components=10,copy=False) A=pca.fit_transform(data) #参数设置 n=3 #隐状态数目 T=350 #样本窗口大小 t=1 #预测天数 w_n=5 #与当前交易日相同市场隐含状态相同行业轮动特征且似然值最接近的天数 index = 0 step = t win=0 lose=0 win0=0 win1=0 win2=0 win3=0 win4=0 win5=0 win6=0 while index+T < len(A)-step: model = hmm.GaussianHMM(n_components= n, covariance_type="spherical", n_iter=1000).fit(A[index:index+T]) hist_info = [] hiddenStatus = model.predict(A[index:index+T]) #print (hiddenStatus) for i in range(index, index+T): #hiddenStatu = model.predict(A[index+i : index+i+1]) score = model.score(A[i: i+1]) day_tuple = (i, hiddenStatus[i-index], score) hist_info.append(day_tuple) #print (hist_info) last_hiddenStatus = hist_info[-1][1] last_score = hist_info[-1][2] last_index = hist_info[-1][0] print(last_index) sameStatus = [] cnt=0 for (x,y,z) in hist_info[:-1]: if y == last_hiddenStatus:#市场隐含状态匹配 if isIn(last_index,x,order_list) and isIn(last_index-1,x-1,order_list):#行业轮动特征匹配 diff = abs(z - last_score) sameStatus.append((x, diff)) cnt+=1 if(cnt<w_n): index += step continue pos_diffs = heapq.nsmallest(w_n, sameStatus, key=lambda s: s[1]) #加权预测 weights = [5,4,3,2,1] d={} for i in range(w_n): if order_list.iloc[pos_diffs[i][0]+1,0] in d: d[order_list.iloc[pos_diffs[i][0]+1,0]]+=weights[i]/2 else: d[order_list.iloc[pos_diffs[i][0]+1,0]]=weights[i]/2 for i in range(w_n): for j in range(1,3): if order_list.iloc[pos_diffs[i][0]+1,j] in d: d[order_list.iloc[pos_diffs[i][0]+1,j]]+=weights[i]*(3-j)/6 d=sorted(d.items(),key = lambda asd:asd[1],reverse=True) print("&&&&&&&&&&&&&&&&&&&&&&&&&&") print(d) top_5=[] for i in range(5): top_5.append(order_list.iloc[last_index+1,i]) print(last_index) print(top_5) if d[0][0] in top_5 or d[1][0] in top_5 or (len(d)<3 or d[2][0] in top_5): win+=1 print("win") if d[0][0] in top_5: win0+=1 if len(d)<2: if d[0][0] in top_5: win1+=1 elif d[1][0] in top_5: win1+=1 if len(d)<2: if d[0][0] in top_5: win2+=1 elif len(d)<3: if d[0][0] in top_5 or d[1][0] in top_5: win2+=1 elif d[2][0] in top_5: win2+=1 if len(d)<2: if d[0][0] in top_5: win3+=1 elif d[0][0] in top_5 and d[1][0] in top_5: win3+=1 if len(d)<2: if d[0][0] in top_5: win4+=1 elif d[0][0] in top_5 or d[1][0] in top_5: win4+=1 if len(d)<2: if d[0][0] in top_5: win5+=1 elif len(d)<3: if d[0][0] in top_5 and d[1][0] in top_5: win5+=1 elif d[0][0] in top_5 and d[1][0] in top_5 and d[2][0] in top_5: win5+=1 if len(d)<2: if d[0][0] in top_5: win6+=1 elif len(d)<3: if d[0][0] in top_5 or d[1][0] in top_5: win6+=1 elif d[0][0] in top_5 or d[1][0] in top_5 or d[2][0] in top_5: win6+=1 else: lose+=1 index += step print(win) print(lose) print(win/(win+lose)) print(win0/(win+lose)) print(win1/(win+lose)) print(win2/(win+lose)) print(win3/(win+lose)) print(win4/(win+lose)) print(win5/(win+lose)) print(win6/(win+lose)) print("Done")
def hmm_build_train(program_path): dataset_path = os.path.join(program_path, 'dataset') print 'creating the datasets path' preictal_data_path = os.path.join(dataset_path, 'final_preictal_training_dataset.hdf5') interictal_data_path = os.path.join(dataset_path, 'final_interictal_training_dataset.hdf5') testing_data_path = os.path.join(dataset_path, 'processed_testing_training_dataset.hdf5') preictal_model_loaded = False interictal_model_loaded = False models_path = os.path.join(program_path, 'models') # check if model are saved if os.path.exists(models_path): # hmm inside the models' folder hmm_files = next(os.walk(models_path))[2] for m_file in hmm_files: if ('hmm_preictal' in m_file) and ('.npy' not in m_file): # calculate the whole path data_path = os.path.join(models_path, m_file) # load the model preictal_hmm = joblib.load(data_path) # turn on flag so the code does not re-train the model preictal_model_loaded = True elif ('hmm_interictal' in m_file) and ('.npy' not in m_file): # calculate the whole path data_path = os.path.join(models_path, m_file) # load the model interictal_hmm = joblib.load(data_path) # turn on flag so the code does not re-train the model interictal_model_loaded = True # create location for storing models for later use if not os.path.exists(models_path): os.mkdir(models_path) # check if model loaded if not preictal_model_loaded: print 'loading preictal dataset' preictal_dataset = h5py.File(name=preictal_data_path, mode='r') # calculate the length of each of the unique matlab files conforming the preictal dataset list_of_lengths = [239766] * 29 rest_of_array = int(preictal_dataset['training data'].shape[0] - np.sum(list_of_lengths)) list_of_lengths.append(rest_of_array) preictal_length = np.array(list_of_lengths) if np.sum(preictal_length) != preictal_dataset['training data'].shape[0]: raise ValueError('preictal length variable does not match preictal dataset length') print 'creating a preictal Gaussian HMM object' preictal_hmm = hmm.GaussianHMM(n_components=8, verbose=True) print '\ttraining the model' preictal_hmm.fit(preictal_dataset['training data'], preictal_length) print '\tstoring model' hmm_preictal_path_filename = os.path.join(models_path, 'hmm_preictal') joblib.dump(preictal_hmm, hmm_preictal_path_filename) # check if model loaded if not interictal_model_loaded: print 'loading interictal dataset' interictal_dataset = h5py.File(name=interictal_data_path, mode='r') # calculate the length of each of the unique matlab files conforming the interictal dataset list_of_lengths = [239766] * 449 rest_of_array = int(interictal_dataset['training data'].shape[0] - np.sum(list_of_lengths)) list_of_lengths.append(rest_of_array) interictal_length = np.array(list_of_lengths) if np.sum(interictal_length) != interictal_dataset['training data'].shape[0]: raise ValueError('preictal length variable does not match preictal dataset length') # interictal_length = np.array([239766] * 300) # interictal_length = np.array([239766] * 200) # interictal_length = np.array([239766] * 100) print 'creating a interictal Gaussian HMM object' interictal_hmm = hmm.GaussianHMM(n_components=8, verbose=True) print '\ttraining the model' # 450 interictal_hmm.fit(interictal_dataset['training data'], interictal_length) # 200 # interictal_hmm.fit(interictal_dataset['training data'][:47953200], interictal_length) # 100 # interictal_hmm.fit(interictal_dataset['training data'][:23976600], interictal_length) print '\tstoring model' hmm_interictal_path_filename = os.path.join(models_path, 'hmm_interictal') joblib.dump(preictal_hmm, hmm_interictal_path_filename) print 'loading testing dataset' testing_dataset = h5py.File(name=testing_data_path, mode='r') true_results = obtain_true_results() true_count = 0.0 output_file = open('results.csv','w') for testing_key in testing_dataset.keys(): print 'calculating likelihoodi for {0}'.format(testing_key) interictal_log_prob, _ = interictal_hmm.decode(testing_dataset[testing_key].value) preictal_log_prob, _ = preictal_hmm.decode(testing_dataset[testing_key].value) if interictal_log_prob > preictal_log_prob: # 0 = interictal if true_results[testing_key] == 0: true_count += 1 row_w = '{0},{1}'.format(testing_key,0) output_file.write(row_w) output_file.write('\n') else: # 1 = preictal if true_results[testing_key] == 1: true_count += 1 row_w = '{0},{1}'.format(testing_key,1) output_file.write(row_w) output_file.write('\n') accuracy = true_count / len(testing_dataset.keys()) print 'accuracy={0}'.format(accuracy)
def partial_fit(self, train_main, train_appliances, **load_kwargs): """ Train using 1d FHMM. """ print(".........................FHMM partial_fit.................") train_main = pd.concat(train_main, axis=0) train_app_tmp = [] for app_name, df_list in train_appliances: df_list = pd.concat(df_list, axis=0) train_app_tmp.append((app_name, df_list)) self.app_names.append(app_name) print(train_main.shape) train_appliances = train_app_tmp learnt_model = OrderedDict() num_meters = len(train_appliances) if num_meters > 12: max_num_clusters = 2 else: max_num_clusters = 3 for appliance, meter in train_appliances: meter_data = meter.dropna() X = meter_data.values.reshape((-1, 1)) if not len(X): print( "Submeter '{}' has no samples, skipping...".format(meter)) continue assert X.ndim == 2 self.X = X if self.num_of_states > 0: # User has specified the number of states for this appliance num_total_states = self.num_of_states else: # Find the optimum number of states states = cluster(meter_data, max_num_clusters) num_total_states = len(states) print("Training model for submeter '{}'".format(appliance)) learnt_model[appliance] = hmm.GaussianHMM(num_total_states, "full") # Fit learnt_model[appliance].fit(X) print("Learnt model for : " + appliance) # Check to see if there are any more chunks. # TODO handle multiple chunks per appliance. # Combining to make a AFHMM self.meters = [] new_learnt_models = OrderedDict() for meter in learnt_model: print(meter) startprob, means, covars, transmat = sort_learnt_parameters( learnt_model[meter].startprob_, learnt_model[meter].means_, learnt_model[meter].covars_, learnt_model[meter].transmat_) new_learnt_models[meter] = hmm.GaussianHMM(startprob.size, "full") new_learnt_models[meter].startprob_ = startprob new_learnt_models[meter].transmat_ = transmat new_learnt_models[meter].means_ = means new_learnt_models[meter].covars_ = covars # UGLY! But works. self.meters.append(meter) learnt_model_combined = create_combined_hmm(new_learnt_models) self.individual = new_learnt_models self.model = learnt_model_combined print("print ...........", self.model) print("FHMM partial_fit end.................")
def train_across_buildings(self, ds, list_of_buildings, list_of_appliances, min_activation=0.05, **load_kwargs): """ :param ds: nilmtk.Dataset :param list_of_buildings: List of buildings to use for training :param list_of_appliances: List of appliances (nilm-metadata names) :param min_activation: Minimum activation (in fraction) to use a home in training :param load_kwargs: :return: """ _check_memory(len(list_of_appliances)) self.list_of_appliances = list_of_appliances models = {} for appliance in list_of_appliances: print("Training for", appliance) o = [] for building_num in list_of_buildings: building = ds.buildings[building_num] elec = building.elec try: df = next(elec[appliance].load(**load_kwargs)).squeeze() appl_power = df.dropna().values.reshape(-1, 1) activation = (df > 10).sum() * 1.0 / len(df) if activation > min_activation: o.append(appl_power) except: pass if len(o) > 1: o = np.array(o) mod = hmm.GaussianHMM(2, "full") mod.fit(o) models[appliance] = mod print("Means for %s are" % appliance) print(mod.means_) else: print("Not enough samples for %s" % appliance) new_learnt_models = OrderedDict() for appliance, appliance_model in iteritems(models): startprob, means, covars, transmat = sort_learnt_parameters( appliance_model.startprob_, appliance_model.means_, appliance_model.covars_, appliance_model.transmat_) new_learnt_models[appliance] = hmm.GaussianHMM( startprob.size, "full", startprob, transmat) new_learnt_models[appliance].means_ = means new_learnt_models[appliance].covars_ = covars learnt_model_combined = create_combined_hmm(new_learnt_models) self.individual = new_learnt_models self.model = learnt_model_combined self.meters = [ nilmtk.global_meter_group.select_using_appliances( type=appliance).meters[0] for appliance in iterkeys(self.individual) ]