Example #1
0
    def train(self, metergroup, num_states_dict={}, **load_kwargs):
        """Train using 1d FHMM.

        Places the learnt model in `model` attribute
        The current version performs training ONLY on the first chunk.
        Online HMMs are welcome if someone can contribute :)
        Assumes all pre-processing has been done.
        """
        learnt_model = OrderedDict()
        num_meters = len(metergroup.meters)
        if num_meters > 12:
            max_num_clusters = 2
        else:
            max_num_clusters = 3

        _check_memory(len((metergroup.submeters().meters)))

        for i, meter in enumerate(metergroup.submeters().meters):
            power_series = meter.power_series(**load_kwargs)
            meter_data = next(power_series).dropna()
            X = meter_data.values.reshape((-1, 1))

            if not len(X):
                print(
                    "Submeter '{}' has no samples, skipping...".format(meter))
                continue

            assert X.ndim == 2
            self.X = X
            num_total_states = None

            # Check if the user has specific the number of states for this meter
            num_total_states = num_states_dict.get(meter)

            # If not, check if the number of states for the appliances was specified
            if num_total_states is None:
                num_apps_states = []
                for appliance in meter.appliances:
                    num_app_state = num_states_dict.get(appliance)
                    if num_app_state is None:
                        num_app_state = num_states_dict.get(
                            appliance.identifier.type)

                    if num_app_state is not None:
                        num_apps_states.append(num_app_state)

                if num_apps_states:
                    num_total_states = sum(num_apps_states)

            if num_states_dict.get(meter) is not None or num_states_dict.get(
                    meter) is not None:
                # User has specified the number  of states for this appliance
                num_total_states = num_states_dict.get(meter)

            # Otherwise, find the optimum number of states via clustering
            if num_total_states is None:
                states = cluster(meter_data, max_num_clusters)
                num_total_states = len(states)

            print("Training model for submeter '{}' with {} states".format(
                meter, num_total_states))
            learnt_model[meter] = hmm.GaussianHMM(num_total_states, "full")

            # Fit
            learnt_model[meter].fit(X)

            # Check to see if there are any more chunks.
            # TODO handle multiple chunks per appliance.
            try:
                next(power_series)
            except StopIteration:
                pass
            else:
                warn("The current implementation of FHMM"
                     " can only handle a single chunk.  But there are multiple"
                     " chunks available.  So have only trained on the"
                     " first chunk!")

        # Combining to make a AFHMM
        self.meters = []
        new_learnt_models = OrderedDict()
        for meter in learnt_model:
            startprob, means, covars, transmat = sort_learnt_parameters(
                learnt_model[meter].startprob_, learnt_model[meter].means_,
                learnt_model[meter].covars_, learnt_model[meter].transmat_)

            new_learnt_models[meter] = hmm.GaussianHMM(startprob.size, "full")
            new_learnt_models[meter].startprob_ = startprob
            new_learnt_models[meter].transmat_ = transmat
            new_learnt_models[meter].means_ = means
            new_learnt_models[meter].covars_ = covars
            # UGLY! But works.
            self.meters.append(meter)

        learnt_model_combined = create_combined_hmm(new_learnt_models)
        self.individual = new_learnt_models
        self.model = learnt_model_combined
Example #2
0
    pd.set_option('display.width', 100)
    np.set_printoptions(edgeitems=100)
    mpl.rcParams['font.sans-serif'] = [u'SimHei']
    mpl.rcParams['axes.unicode_minus'] = False

    n_components = 3
    data = pd.read_excel(io='Current.xls', sheetname='Sheet1', header=0)
    # data['Current'] = MinMaxScaler().fit_transform(data['Current'])
    data['Current'] *= 1e6

    # 去除明显的异常值
    data_clean(False)

    x = data['Time'].reshape(-1, 1)
    y = data['Current'].reshape(-1, 1)
    model = hmm.GaussianHMM(n_components=n_components, covariance_type='full', n_iter=10)
    model.fit(y)
    components = model.predict_proba(y)
    components_state = model.predict(y)
    components_pd = pd.DataFrame(components, columns=np.arange(n_components), index=data.index)
    data = pd.concat((data, components_pd), axis=1)
    print 'data = \n', data

    plt.figure(num=1, facecolor='w', figsize=(8, 9))
    plt.subplot(n_components+1, 1, 1)
    plt.plot(x, y, 'r.-', lw=0.2)
    plt.ylim(extend(y.min(), y.max()))
    plt.grid(b=True, ls=':')
    plt.xlabel(u'时间', fontsize=14)
    plt.ylabel(u'电流强度', fontsize=14)
    plt.title(u'原始电流的变化情况', fontsize=16)
Example #3
0
        np.sum(np.divide(np.absolute(predicted_data - true_data), true_data),
               0), true_data.shape[0])


for stock in STOCKS:
    dataset = np.genfromtxt(stock, delimiter=',')
    predicted_stock_data = np.empty([0, dataset.shape[1]])
    likelihood_vect = np.empty([0, 1])
    aic_vect = np.empty([0, 1])
    bic_vect = np.empty([0, 1])
    for states in STATE_SPACE:
        num_params = states**2 + states
        dirichlet_params_states = np.random.randint(1, 50, states)
        #model = hmm.GaussianHMM(n_components=states, covariance_type='full', startprob_prior=dirichlet_params_states, transmat_prior=dirichlet_params_states, tol=0.0001, n_iter=NUM_ITERS, init_params='mc')
        model = hmm.GaussianHMM(n_components=states,
                                covariance_type='full',
                                tol=0.0001,
                                n_iter=NUM_ITERS)
        model.fit(dataset[NUM_TEST:, :])
        if model.monitor_.iter == NUM_ITERS:
            print('Increase number of iterations')
            sys.exit(1)
        likelihood_vect = np.vstack((likelihood_vect, model.score(dataset)))
        aic_vect = np.vstack(
            (aic_vect, -2 * model.score(dataset) + 2 * num_params))
        bic_vect = np.vstack((bic_vect, -2 * model.score(dataset) +
                              num_params * np.log(dataset.shape[0])))

    opt_states = np.argmin(bic_vect) + 2
    print('Optimum number of states are {}'.format(opt_states))

    for idx in reversed(range(NUM_TEST)):
Example #4
0
    def predict(self):    
        # Set a new model for traidning
        self.remodel = hmm.GaussianHMM(n_components=2, covariance_type="full", n_iter=100)        
        
        # Set initial parameters for training
        self.remodel.startprob_ = np.array([0.5, 0.5])
        self.remodel.transmat_ = np.array([[0.5, 0.5], 
                                      [0.5, 0.5]])
        self.remodel.means_ = np.array([0, 1])
        self.remodel.covars_ = np.tile(np.identity(1), (2, 1, 1)) / self.SNR**2    

        self.Z_predict = [None] * self.n_sample
        self.converged = [None] * self.n_sample
        self.X_mean = [None] * self.n_sample
        self.X_var = [None] * self.n_sample    
        self.SNR = np.zeros(self.n_sample)
        self.tp = [None] * self.n_sample 
        self.tp_ub = np.zeros(self.n_sample) 
        self.tp_bu = np.zeros(self.n_sample) 
        self.tb_HMM = np.zeros(self.n_sample)
        self.tu_HMM = np.zeros(self.n_sample)  
        
        for i in range(n_sample):              
            # Estimate model parameters (training)
            self.remodel.fit(self.X[i])  
        
            # Find most likely state sequence corresponding to X
            Z_predict = self.remodel.predict(self.X[i])
            Z_predict = Z_predict.reshape(self.n_frame, 1)  
            X_mean = self.remodel.means_ # Mean  
            X_var = self.remodel.covars_ # Covariance   
                      
### Simplify the following 
            tp = self.remodel.transmat_ # Transition probability                 
            self.converged[i] = self.remodel.monitor_.converged # Check convergence
            self.SNR[i] = (abs(X_mean[1][0]-X_mean[0][0])/(np.mean(X_var))**0.5)            

            # Assign them such that X[state==0]=0 and X[state==1]=1
            if X_mean[0] <= X_mean[1]:  
                self.Z_predict[i] = Z_predict
                self.X_mean[i] = [X_mean[0][0], X_mean[1][0]]
                self.X_var[i] = [X_var[0][0][0], X_var[1][0][0]]             
                self.tp[i] = [[tp[0][0], tp[0][1]],
                              [tp[1][0], tp[1][1]]]
            else:     
                self.Z_predict[i] = 1 - Z_predict 
                self.X_mean[i] = [X_mean[1][0], X_mean[0][0]]
                self.X_var[i] = [X_var[1][0][0], X_var[0][0][0]]           
                self.tp[i] = [[tp[1][1], tp[1][0]],
                              [tp[0][1], tp[0][0]]]
  
            # HMM estimate of bound (tb) and unbound time (tu)
            self.tp_ub[i] = self.tp[i][0][1] + 1/n_frame # Transition prob from unbound to bound
            self.tp_bu[i] = self.tp[i][1][0] + 1/n_frame # Transition prob from bound to unbound
            self.tb_HMM[i] = 1/self.tp_bu[i] # Bound time
            self.tu_HMM[i] = 1/self.tp_ub[i] # Unbound time
      
        # Check the convergence
        print("%.1f %% converged." %(sum([int(i) for i in self.converged])/self.n_sample*100))

        # Label only good data
        cond1 = np.array(self.tb_HMM) <= n_frame*0.5
#        cond1 = ~outliers(self.tb_HMM)
        cond2 = np.array(self.tu_HMM) <= n_frame*0.5  
#        cond2 = ~outliers(self.tu_HMM)
        cond3 = ~outliers(self.SNR)
        self.good_data = cond1 & cond2 & cond3

        # Log transition probability
        self.log_tp_ub = np.log10(np.array(self.tp_ub[self.good_data]))
        self.log_tp_bu = np.log10(np.array(self.tp_bu[self.good_data]))              
        
        # MLE fitting with a Gaussian function
        result_bu = MLE_G(self.log_tp_bu)
        result_ub = MLE_G(self.log_tp_ub)               
        self.m_b, self.s_b = result_bu["x"]
        self.m_u, self.s_u = result_ub["x"]               
        self.tb_MLE = 1/10**(self.m_b) 
        self.tu_MLE = 1/10**(self.m_u)           
        error_tb = 100*(self.tb_MLE/self.time_bound-1)
        error_tu = 100*(self.tu_MLE/self.time_unbound-1)  
        print("Time bound (MLE) = %.1f (%.1f %%)" %(self.tb_MLE, error_tb)) 
        print("Time unbound (MLE) = %.1f (%.1f %%) \n" %(self.tu_MLE, error_tu)) 

        # ----------------------------------------------------------------------
        # HMM prediction with concatenated data   
        self.remodel.fit(self.X_conc) # Fit (train) to find the parameters         
        Z_predict_conc = self.remodel.predict(self.X_conc) # Predict the most likely trajectory
        self.Z_predict_conc = Z_predict_conc.reshape(self.n_frame*self.n_sample, 1)          
        self.converged_conc = self.remodel.monitor_.converged # Check the convergence
        self.tp_conc = self.remodel.transmat_ # Transition probability

        # Reorder state number such that X[Z=0] < X[Z=1] 
        if self.X_conc[Z_predict_conc == 0].mean() > self.X_conc[Z_predict_conc == 1].mean():
            self.Z_predict_conc = 1 - self.Z_predict_conc
            self.tp_conc = np.array([[self.tp_conc[1][1], self.tp_conc[1][0]],
                                     [self.tp_conc[0][1], self.tp_conc[0][0]]])

        self.tp_bu_conc = self.tp_conc[1][0] + 1/n_frame # Transition prob from unbound to bound
        self.tp_ub_conc = self.tp_conc[0][1] + 1/n_frame # Transition prob from bound to unbound
        self.tb_HMM_conc = 1/self.tp_bu_conc # Bound time
        self.tu_HMM_conc = 1/self.tp_ub_conc # Unbound time
        error_tb = 100*(self.tb_HMM_conc/self.time_bound-1)
        error_tu = 100*(self.tu_HMM_conc/self.time_unbound-1)      
        print("HMM_concatenated is %s" %(["not converged.", "converged."][int(self.converged_conc)]))
        print("Time bound (HMM, conc) = %.1f (%.1f %%)" %(self.tb_HMM_conc, error_tb))
        print("Time unbound (HMM, conc) = %.1f (%.1f %%)\n" %(self.tu_HMM_conc, error_tu))
            # user_score for the video superframes.
            user_score = np.array(ground_truth['user_score'], dtype=np.double)
            lengths.append(len(X))
            i = i + 1

        # rest of the videos.
        else:
            filename = os.path.join(root, file)
            vid_str = io.loadmat(filename)
            X1 = np.array(vid_str['vid_str']['c3d_fc6'][0][0])
            lengths.append(len(X1))
            X = np.concatenate((X1, X), axis=0)

# number of HMM states or the states present in the video.
num_of_states = 30
model = hmm.GaussianHMM(n_components=num_of_states)
model.fit(X, lengths)

# value of the states after training.
states = model.means_

# transition probability of the states.
state_trans_prob = model.transmat_

# initial probability of the states.
state_init_prob = model.startprob_

# video data for which subset is to be found.
target_video = Y

M = len(states)
Example #6
0
def makeModel(components, converted, scalar, hist, histT, vol, i, dataPoints, vers):
    HMM = hmm.GaussianHMM(n_components = components, covariance_type="full", n_iter = 750, verbose = False)
    HMM.fit(converted, lengths = [x.__len__() for x in hist])
    joblib.dump(HMM, "models/"+vers+str(i) + "-" + str(components) + "-" + str(1)+".pkl")
    return runTests(HMM, histT, 250, 1, vol, i, dataPoints, vers + str(i) + "-" + str(components) + "-" + str(1), scalar)
Example #7
0
import numpy as np
import pandas as pd
from hmmlearn import hmm

#path="/pb_winning_numbers.csv"
pb_data=pd.read_csv('data.csv')
model = hmm.GaussianHMM(n_components=69, covariance_type="full")
start=np.full((1,69),0.01492475362)
transition=np.full((69,69),0.01449275362)
emmision= np.identity(69)
model.startprob_ = start
model.transmat_ = transition
model.means_ =emmision
Example #8
0
def experiment_1(n_components, dataLength, div):
    print("HMM_test : experiment_1 : start")
    result = []
    score = []
    result.append(0)
    for step in range(1000):
        print("HMM_test : experiment_1 : STEP " + str(step))
        # データを生成
        print("HMM_test : experiment_1 : making data ")
        datas = []
        datas.extend(makeData(testfunc_circle, div, 0, 2 * np.pi, 10))
        datas.extend(makeData(testfunc_sigmoid, dataLength - div, 0, 600, 10))
        # datas.extend(makeData(testfunc_cubic, 100, 0, 2*np.pi, 0.1))
        # モデルを生成
        model = hmm.GaussianHMM(n_components=n_components,
                                covariance_type="diag")
        print("HMM_test : experiment_1 : fitting model")
        model.fit(datas[0:dataLength])
        # 符号化
        print("HMM_test : experiment_1 : predict label")
        res = model.predict(datas[0:dataLength])
        # 境界の場所を探す
        print("HMM_test : experiment_1 : counting div")
        idx = div
        tempMin = -1
        while True:
            if res[idx] != res[idx - 1]:
                tempMin = abs(div - idx)
                break
            else:
                idx = idx - 1
            #
        #
        idx = div
        while True:
            if res[idx] != res[idx + 1]:
                tempMin = min(tempMin, abs(div - idx + 1))
                break
            else:
                idx = idx + 1
            #
        #
        # ヒストグラムに加える
        result.append(tempMin)
        print("HMM_test : experiment_1 : get pbobability")
        # tempMin が乱択されたヒストグラムに対して有意な値であるか検証
        #   ・res を用いてランダムなヒストグラムを生成
        randomHist = generate_random_hist(res, 10000)
        #   ・ヒストグラムをもとにガウス分布を生成
        myu = sum(randomHist) / len(randomHist)
        var = np.var(np.array(randomHist))
        #   ・生成された ガウス分布のパラメータを用いてtempMin の生成確立を算出
        tempScore = 1 / math.sqrt(2 * math.pi * var) * math.exp(
            -1 * (tempMin - myu) * (tempMin - myu) / var)
        # 記録
        score.append(tempScore)
    #
    # ヒストグラムを書く
    print(
        "HMM_test : experiment_1 : result (the set of distance between truth and estmated div)"
    )
    print(result)
    plt.hist(result, bins=dataLength)
    plt.title("result")
    plt.show()
    print(
        "HMM_test : experiment_1 : score (the set of probability that the estimated div was picked from random-Hist)"
    )
    print(score)
    plt.hist(score, bins=dataLength)
    plt.title("score")
    plt.show()
    print("experiment_1 : Successfully terminated.")
Example #9
0
time_bins = np.load("models/time_bins.npy")
lengths = np.load("models/lengths.npy")

for condition_label in condition_labels:

    # get training set trials
    training_idx = np.load("models/training_idx_" + condition_label + ".npy")
    training_dataset, training_lengths = get_conditioned_dataset(dataset, lengths, training_idx)
    training_dataset_reduced, _ = get_conditioned_dataset(dataset_reduced, lengths, training_idx)

    testing_idx = np.load("models/testing_idx_" + condition_label + ".npy")
    testing_dataset, testing_lengths = get_conditioned_dataset(dataset, lengths, testing_idx)
    testing_dataset_reduced, _ = get_conditioned_dataset(dataset_reduced, lengths, testing_idx)

    # Fit model
    model = hmm.GaussianHMM(n_components=n_compoments)
    model.fit(training_dataset_reduced, training_lengths)
    print("model converged: " + str(model.monitor_.converged))

    # Save model
    with open("models/hmm_" + condition_label + "_" + str(n_compoments) + ".pkl", "wb") as file:
        pickle.dump(model, file)

    # Try Decoding
    [logprob, states] = model.decode(testing_dataset_reduced, lengths=testing_lengths, algorithm="viterbi")
    print(logprob)

    # Plot
    visual_times = np.load("models/visual_times.npy")[testing_idx]
    cue_times = np.load("models/cue_times.npy")[testing_idx]
    feedback_times = np.load("models/feedback_times.npy")[testing_idx]
Example #10
0
def hmm_algo(base_object,
             batched_setting,
             logger,
             algorithm,
             kmeans,
             n_states,
             quickrun=''):

    # initialize the loaded model flag
    loaded_model = False

    if quickrun:

        files_in_data_folder = ''
        # check if the data folder exists and if it does, get all the files
        if os.path.exists(base_object.saved_model_dir):
            files_in_data_folder = os.listdir(base_object.saved_model_dir)

        if 'low' in base_object.test_activity:
            tmp = base_object.test_activity.split('_')
            activity = tmp[0] + '_l'
        elif 'high' in base_object.test_activity:
            tmp = base_object.test_activity.split('_')
            activity = tmp[0] + '_h'
        else:
            activity = base_object.test_activity

        # check all the files in the folder and look for the model file
        for sfile in files_in_data_folder:
            # check if user, activity and hmm keyword are part of the file
            if (base_object.test_user in sfile) and (activity in sfile) and \
                    ('hmm' in sfile) and ('.npy' not in sfile):
                logger.getLogger('line.tab.regular').info('hmm model found')
                logger.getLogger('tab.regular.line').info(
                    'using hmm model {0}'.format(sfile))
                # calculate the whole path
                data_path = os.path.join(base_object.saved_model_dir, sfile)
                # load the model
                hmm_model = joblib.load(data_path)
                # turn on flag so the code does not re-train the model
                loaded_model = True
                logger.getLogger('tab.regular.time').info('hmm model loaded')
                break

    # check if flag is on
    if not loaded_model:

        nc = n_states
        cov_type = 'full'
        iterations = 10
        logger.getLogger('tab.regular.time').info(
            'defining Gaussian Hidden Markov Model.')
        logger.getLogger('tab.regular').info('\tmodel parameters')
        msg = '\t\tnumber of states:{0}'.format(nc)
        logger.getLogger('tab.regular').info(msg)
        msg = '\t\tnumber of iterations:{0}'.format(iterations)
        logger.getLogger('tab.regular').info(msg)
        msg = '\t\tcovariance type:{0}'.format(cov_type)
        logger.getLogger('tab.regular').info(msg)
        # defining models
        hmm_model = hmm.GaussianHMM(n_components=nc,
                                    covariance_type=cov_type,
                                    n_iter=iterations,
                                    verbose=True)

        if batched_setting:
            first_run = True
            total_batches, batched_lengths = batch(
                base_object.training_dataset_lengths, 30)

            last_batch_index = 0
            end = 0
            for index, sliced_length in enumerate(batched_lengths):

                msg = 'starting training Gaussian Hidden Markov Model on batch {1} out of {2}'. \
                    format(index, total_batches)
                logger.getLogger('tab.regular.time').info(msg)

                end += np.sum(sliced_length).astype(np.int32)
                msg = 'size of dataset: {0}'.format(
                    base_object.training_testing_dataset_object[
                        'training data'][last_batch_index:end].shape)
                logger.getLogger('tab.regular').debug(msg)

                if first_run:
                    hmm_model.fit(
                        X=base_object.training_testing_dataset_object[
                            'training data'][last_batch_index:end],
                        lengths=sliced_length,
                        logger=logger,
                        kmeans_opt=kmeans)
                    first_run = False
                else:
                    # by setting init_params='', we will be able to cascaded the training
                    # results from the previous fitting runs
                    hmm_model.init_params = ''
                    hmm_model.fit(
                        X=base_object.training_testing_dataset_object[
                            'training data'][last_batch_index:end],
                        lengths=sliced_length,
                        logger=logger,
                        kmeans_opt=kmeans)

                last_batch_index = end

        else:
            hmm_model.fit(
                X=base_object.training_testing_dataset_object['training data'],
                logger=logger,
                kmeans_opt=kmeans,
                lengths=base_object.training_dataset_lengths)

        logger.getLogger('tab.regular.time').info(
            'finished training Hidden Markov Model.')

        # create a name for a file based on the user, activity and the time
        filename = 'hmm_' + base_object.test_user + '_' + base_object.test_activity + '_' + \
                   str(datetime.now().strftime('%Y%m%d%H%M%S'))
        # calculate the whole path
        hmm_path_filename = os.path.join(base_object.saved_model_dir, filename)
        logger.getLogger('tab.regular').debug(
            'hmm model stored as {0}'.format(filename))
        logger.getLogger('tab.regular').debug('location {0}'.format(
            base_object.saved_model_dir))

        # if data folder does not exists, make it
        if not os.path.exists(base_object.saved_model_dir):
            os.mkdir(base_object.saved_model_dir)

            # store the model so its not needed to re-train it
        joblib.dump(hmm_model, hmm_path_filename)

    logger.getLogger('tab.regular.time').info('calculating predictions')
    train_predictions = hmm_model.predict_proba(
        base_object.training_testing_dataset_object['training data'],
        lengths=base_object.training_dataset_lengths)
    test_predictions = hmm_model.predict_proba(
        base_object.training_testing_dataset_object['testing data'])

    hmm_object = ResultClass()
    # using the model, run algorithms
    hmm_object.classification(
        train_predictions=train_predictions,
        traininglabels=base_object.
        training_testing_dataset_object['training labels'],
        test_predictions=test_predictions,
        testinglabels=base_object.
        training_testing_dataset_object['testing labels'],
        logger=logger)

    hmm_object.show_results(logger=logger)
Example #11
0
    fig = plt.subplot(1, n_trials, i + 1)

    # Load time pointers for the given trial
    trial = conditioned_trials[i]
    visual_time = trials['visStim_times'][trial]
    cue_time = trials['cue_times'][trial]
    feedback_time = trials['feedback_times'][trial]

    # generate the spike count histograms
    t0 = visual_time - pre_stim_dt
    tf = feedback_time + post_resp_dt
    [dataset, time_bins] = generate_spike_counts(recording_name, brain_region, neuron_min_score, bin_dt, t0, tf)
    (n_neurons, n_bins) = dataset.shape

    # Create a hmm model
    model = hmm.GaussianHMM(n_components=n_compoments, n_iter=1000)
    model.fit(dataset.T)
    [logprob, states] = model.decode(dataset.T)

    # Find the best mapping of the state sequences
    if i == 0:
        states_trial0 = states
    else:
        states = map_states(n_compoments, states_trial0, states)

    # Plot
    title = brain_region + ' trial#' + str(trial)
    plot_psths(dataset, time_bins, title, visual_time, cue_time, feedback_time)
    add_states_2_psth(fig, states, colors, n_neurons)

plt.show()
seq = seq.split(',')
sequence = []
for i in seq:
    sequence.append(int(i))

final_testing = []
for t in sequence:
    a = []
    a.append(t)
    final_testing.append(a)
num_forward = int(raw_input("Input number of future plays to predict: "))
print('Analyzing ' + str(num_forward) + ' play(s) into the future')
print(final_testing)

startprob, transmat, means, covars = estimate_parameters(X, y)
model = hmm.GaussianHMM(receiver_number, "full")
model.startprob_ = startprob
model.transmat_ = transmat
model.means_  = means

new_covars = []
for c in covars:
    outermost = []
    for i in c:
        outer = []
        for j in i:
            if j == 0:
                outer.append(0.00001) # HMM hates zeros. Replace them with a minimal value
            else:
                outer.append(j)
        outermost.append(outer)
Example #13
0
    #datas = get_mfc_data('C:/Users/18341/Desktop/book/听觉/实验3-语音识别/语料/features/')

    datas = get_mfc_data('F:/HIT/大三上/视听觉/lab3/组/gzx_sound_mfcc/')

    #model = hmm.GaussianHMM(n_components = 5, n_iter = 20, tol = 0.01, covariance_type="diag")

    #hmms = dict()

    #datas = get_mfc_data('C:/Users/18341/Desktop/book/听觉/实验3-语音识别/语料/features/')

    hmms = dict()
    for category in datas:
        Qs = datas[category]
        n_hidden = 6
        model = hmm.GaussianHMM(n_components=5,
                                n_iter=20,
                                tol=0.01,
                                covariance_type="diag")
        vstack_Qs = np.vstack(tuple(Qs[:-3]))
        model.fit(vstack_Qs, [Q.shape[0] for Q in Qs[:-3]])
        print('success fit')
        hmms[category] = model

    #test
    correct_num = 0
    for category in datas:
        for test_sample in datas[category][-3:]:
            print('real_category:', category)
            max_score = -1 * np.inf
            predict = -1
            for predict_category in hmms:
                model = hmms[predict_category]
def detect_spectral_rhythm(time,
                           lfps,
                           sampling_frequency,
                           multitaper_params=_DEFAULT_MULTITAPER_PARAMS,
                           hmm_params=_DEFAULT_HMM_PARAMS,
                           frequency_band=(10, 16)):
    '''Find spectral rhythm times using spectral power and an HMM.

    Parameters
    ----------
    time : ndarray, shape (n_time,)
    lfps : ndarray, shape (n_time, n_signals)
    sampling_frequency : float
    multitaper_params : dict, optional
    hmm_params : dict, optional
    freq_band : tuple, optional

    Returns
    -------
    results : pandas.DataFrame, shape (n_time, 3)
    model : hmmlearn.GaussianHMM instance

    '''
    power_time, spectral_rhythm_band_power = estimate_spectral_rhythm_power(
        atleast_2d(lfps),
        sampling_frequency,
        start_time=time[0],
        multitaper_params=multitaper_params,
        frequency_band=frequency_band)
    spectral_rhythm_band_power = spectral_rhythm_band_power.reshape(
        (power_time.shape[0], -1))

    model = hmm.GaussianHMM(**hmm_params).fit(
        np.log(spectral_rhythm_band_power))

    state_ind = model.predict(np.log(spectral_rhythm_band_power))
    if (spectral_rhythm_band_power[state_ind == 0].mean() >
            spectral_rhythm_band_power[state_ind == 1].mean()):
        spectral_rhythm_ind = 0
    else:
        spectral_rhythm_ind = 1

    power_time = pd.Index(power_time, name='time')
    time = pd.Index(time, name='time')

    is_spectral_rhythm = np.zeros_like(state_ind, dtype=np.bool)
    is_spectral_rhythm[state_ind == spectral_rhythm_ind] = True
    is_spectral_rhythm = (pd.DataFrame(
        dict(is_spectral_rhythm=is_spectral_rhythm),
        index=power_time).reindex(index=time,
                                  method='pad').reset_index(drop=True))

    spectral_rhythm_probability = model.predict_proba(
        np.log(spectral_rhythm_band_power))
    spectral_rhythm_df = (pd.DataFrame(
        dict(probability=spectral_rhythm_probability[:, spectral_rhythm_ind]),
        index=power_time).reindex(index=time).reset_index(
            drop=True).interpolate())

    spectral_rhythm_df = pd.concat((spectral_rhythm_df, is_spectral_rhythm),
                                   axis=1).set_index(time)

    return spectral_rhythm_df, model
Example #15
0
    # --------------------
    if cross_validation:
        # xval_scores gives: % in cluster w/ high confidence; 10th percentile of confidence; score; bayesian info content (currently only for GMM)
        xval_scores = cross_validate_model(data, model_type, K_range,
                                           seed_range, num_clusters_range, tol)
    ''' -------------------------------------------------------------------------------------------------------------------------------------
    # ------------------------              Generate Model                            --------------------------------------
    # --------------------------------------------------------------------------------------------------------------------------------------'''

    # -------------------------
    # Initialize mixture model
    # -------------------------
    if model_type == 'hmm':
        model = hmm.GaussianHMM(n_components=num_clusters,
                                covariance_type="full",
                                algorithm='viterbi',
                                tol=.00001,
                                random_state=seed)
    elif model_type == 'gmm':
        model = mixture.GaussianMixture(n_components=num_clusters,
                                        tol=.00001,
                                        covariance_type='full',
                                        random_state=seed)

    # ---------------------------
    # Fit and save mixture model
    # ---------------------------
    print('fitting model...')
    model.fit(data)  # fit model
    if os.path.isfile(file_location_data_library + '_' + model_type + '_' +
                      model_type_and_name_tag) and do_not_overwrite:
Example #16
0
 def learn(self, data):
     self._model = hmm.GaussianHMM(self._nb_states, "full", verbose=True)
     #        self._model = hmm.GMMHMM(self._nb_states, n_mix=5, covariance_type="full", verbose=True)
     self._model.fit(data)
     print("Model learnt")
Example #17
0
def run(period):
    #print('getting historical')
    #hist = getHistorical(period, readFiles())[0]
    #print('getting historical test')
    

    testFiles = readTestModelFiles()
    testFiles['time'] =  pd.to_datetime(testFiles['time'], infer_datetime_format=True)
    testFiles = testFiles.set_index('time').loc['1/1/2018':'1/1/2019']
    print(testFiles)

    vol = int(testFiles['volume'].sum())

    print(vol)

    histT = getHistorical(period, testFiles)[0]

    #conv = convert(hist)

    #hist.to_csv('models/Hist-V1.csv')
    #histT.to_csv('models/Test-V1.csv')
    #pd.DataFrame(conv).to_csv('models/Model-V1.csv')
    

    #for i in conv:
    #    print(i)
    
    #-------------------------------------------------------------------------------------------------------------------

    print('make hmm')
    
    HMM = hmm.GaussianHMM(n_components = 11 , covariance_type="full", random_state=7, n_iter = 1000)

    HMM.fit(readModel())
    print(HMM.sample(10))
    print(HMM.transmat_)
    print('complete')
    
    #-------------------------------------------------------------------------------------------------------------------
    scores  = defaultdict(list)
    pSize = random.randint(10, 75)
    strt = random.randint(8, histT.__len__()-pSize)
    for j in range(15):
        pSize = random.randint(10, 75)
        
        
        for i in range(75):
            #if(i == 0 and not scores[pSize] == None):
            #    break
            strt = random.randint(6, histT.__len__()-pSize)
            pred, sc, ret = predict(HMM, histT, strt, strt+pSize, 5, 25000, False)
            scores[pSize].append((pred, sc, ret))
        

    #-------------------------------------------------------------------------------------------------------------------

    predictedCloseForTest, _, _ = predict(HMM, histT, strt, strt+pSize, 3, 25000, True)
    trueOpenForTest       = histT.iloc[strt:strt+pSize]['open'].values
    trueCloseForTest      = histT.iloc[strt:strt+pSize]['close'].values

    print("50 random periods w/50 different random tests resuts::")

    for i in scores.keys():
        s = str(sum(n for _, n, _ in scores[i])/len(scores[i]))[0:5]
        ret = str(sum(n for _, _, n in scores[i])/len(scores[i]))[0:5]
        print("For the 75 random tests over " + str(i) + " periods, the HMM determined the direction correctly: " + s + "% of the time. Ret: " + ret)
Example #18
0
	def train(self, data, real_labels, list_features, dim_features):
		""" Train a supervised HMM classifier based on the data and labels in input

		input:
		data: a list (n_seq) of array (n_feature x length of sequence) containing the data used to train the model
		real_labels: a list (n_seq) of array (1 x length of sequence) containing the annotated labels of the state
		list_feature: a list containaing the name of the features used to train the model
		dim_feature: a list containing the dimension of each feature

		The parameters of the HMM trained are:
		startprob_: an array (1 x n_state) containing the initial state probabilities
		transmat_: an array (n_state x n_state) containing the transition matrix probability
		And the Gaussian distribution representing the emission probabilities represented by:
		means_: an array (n_state x n_feature) containing for each state the means of the multivariate Gaussian function
		covars_: an array (n_state x n_feature x n_feature) containing for each state the covariance matrix 
			of the multivariate Gaussian function
		"""
		self.n_seq = len(data)
		self.list_features = list_features
		self.dim_features = dim_features
		self.n_feature = int(sum(dim_features))

		# Concatenate all the sequence in one and create a vector with the length of each sequence
		obs = []
		obs = data[0]
		lengths = []
		lengths.append(len(data[0]))
		labels = real_labels[0]

		for i in range(1, self.n_seq):
			obs = np.concatenate([obs, data[i]])
			lengths.append(len(data[i]))
			labels = np.concatenate([labels, real_labels[i]])

		# Get the list and number of states
		self.list_states, labels = np.unique(labels, return_inverse=True)
		self.n_states = len(self.list_states)

		self.model = hmm.GaussianHMM(n_components=self.n_states, covariance_type="full")

		Y = labels.reshape(-1, 1) == np.arange(len(self.list_states))
		end = np.cumsum(lengths)
		start = end - lengths



		# Compute the initial probabilities
		init_prob = Y[start].sum(axis=0)/Y[start].sum()
		# init_prob = np.ones(self.n_states)/self.n_states

		# Compute the transition matrix probabilities
		trans_prob = np.zeros((self.n_states, self.n_states)).astype(int)
		for i in range(1, len(labels)):
			trans_prob[labels[i-1], labels[i]] += 1


		
		trans_prob = trans_prob/np.sum(trans_prob, axis=0)

		# Compute the emission distribution
		Mu, covars = tools.mean_and_cov(obs, labels, self.n_states, self.list_features)

		# Update the parameters of the model
		self.model.startprob_ = init_prob
		self.model.transmat_ = trans_prob.T
		self.model.means_ = Mu
		self.model.covars_ = covars

		return
# The transition probability matrix
tp_ub = 1 / time_unbound
tp_uu = 1 - tp_ub
tp_bu = 1 / time_bound
tp_bb = 1 - tp_bu
transmat = np.array([[tp_uu, tp_ub], [tp_bu, tp_bb]])

# The means of each state
means = np.array([[0.0], [1.0]])

# The covariance of each component
covars = np.tile(np.identity(1), (2, 1, 1)) / SNR

# Build an HMM instance and set parameters
model = hmm.GaussianHMM(n_components=2, covariance_type="full")

# Set the parameters to generate samples
model.startprob_ = startprob
model.transmat_ = transmat
model.means_ = means
model.covars_ = covars

# Generate samples
X, Z_true = model.sample(n_frame)

X_true = np.zeros(n_frame)
for i in range(2):
    X_true[Z_true == i] = X[Z_true == i].mean()

# Set a new model for traidning
Example #20
0
Ob1 = O1  #第一个HMM模型的观测数据
Ob2 = O2  #第二个HMM模型的观测数据

Return = (data['close'] / data['open'] - 1).values  #求出每一天的日对数收益率
from hmmlearn import hmm
len1 = len(data[:'2010'])  #2010年以前的数据作为训练数据
len2 = len(data['2011':])  #2011年以后的数据作为回测数据
Signal = np.zeros(len2)
np.random.seed(1)
N_state1 = 3  #第一个HMM隐藏状态的个数设置为3
N_state2 = 4  #第二个HMM隐藏状态的个数设置为4

for i in range(len2):
    #滚动训练,每个月更新一次
    if data.index[len1 + i - 1].month != data.index[len1 + i].month:
        remodel1 = hmm.GaussianHMM(n_components=N_state1)
        remodel1.fit(Ob1[:len1 + i])
        remodel2 = hmm.GaussianHMM(n_components=N_state2)
        remodel2.fit(Ob2[:len1 + i])
    s_pre1 = remodel1.predict(Ob1[:len1 + i])  #对历史数据做状态序列的预测
    s_pre2 = remodel2.predict(Ob2[:len1 + i])
    Re = Return[:len1 + i]  #取出历史数据的收益率序列
    #各个状态在历史数据中的平均收益率
    Expect=np.array([np.mean(Re[(s_pre1==j)*(s_pre2==k)]) \
                     for j in range(N_state1) for k in range(N_state2)])
    #各个状态在第二天的发生概率
    Pro=np.array([remodel1.transmat_[s_pre1[-1],j]*remodel2.transmat_[s_pre2[-1],k]\
                  for j in range(N_state1) for k in range(N_state2)])
    preReturn = Pro.dot(Expect)  #根据转移概率矩阵预测下一天的期望收益
    if preReturn > 0.:
        Signal[i] = 1
Example #21
0
    def evaluate(self):#, SNR_min, dwell_min, dwell_max):
        blinking = 1
        dwell_min = 1
        dwell_max = 100

        if self.noise > 1/SNR_min: 
            return False        
        
        x = running_avg(self.I_frame, 3)
        self.I_s = np.array([x[0]]+x.tolist()+[x[-1]])       
        signal = self.I_s > noise_cutoff

        t_b = []
        t_ub = []
        for i in range(len(signal)-1):
            if (signal[i] == False) & (signal[i+1] == True):
                t_b.append(i)
            if (signal[i] == True) & (signal[i+1] == False):
                t_ub.append(i)
        
        if len(t_b)*len(t_ub) == 0: 
            return False 
        if t_ub[0] < t_b[0]: # remove pre-existing binding
            del t_ub[0]
        if len(t_b)*len(t_ub) == 0: 
            return False                
        if t_ub[-1] < t_b[-1]: # remove unfinished binding
            del t_b[-1]
        if len(t_b)*len(t_ub) == 0: 
            return False      

        # combine blinking
        blink_ub = []
        blink_b = []             
        if len(t_b) > 1:  
            for i in range(len(t_b)-1):   
                if abs(t_ub[i] - t_b[i+1]) <= blinking: 
                    blink_ub.append(t_ub[i])
                    blink_b.append(t_b[i+1])
                  
            if len(blink_ub) > 0:
                for i in range(len(blink_ub)):
                    t_ub.remove(blink_ub[i])
                    t_b.remove(blink_b[i])

        # delete too short or too long binding
        transient_ub = []
        transient_b = []
        for i in range(len(t_b)):                                      
            if (t_ub[i] - t_b[i] < dwell_min): 
                transient_ub.append(t_ub[i])
                transient_b.append(t_b[i])

            if (t_ub[i] - t_b[i] > dwell_max): 
                transient_ub.append(t_ub[i])
                transient_b.append(t_b[i])
                
        if len(transient_b) > 0:
            for i in range(len(transient_b)):
                t_ub.remove(transient_ub[i])
                t_b.remove(transient_b[i])

        if len(t_b)*len(t_ub) == 0: 
            return False    
              
        self.dwell = []  
        self.waiting = []   
        self.SNR = []
        self.I_fit = np.zeros(len(signal))          
        for i in range(len(t_b)): 
            self.dwell.append(t_ub[i] - t_b[i])
            if i < len(t_b)-1:
                self.waiting.append(t_b[i+1] - t_ub[i])
            I_mean = np.mean(self.I_frame[t_b[i]:t_ub[i]])
            self.SNR.append(I_mean/self.noise)            
            self.I_fit[t_b[i]+1:t_ub[i]+1] = I_mean
                    
        # HMM      
        X = self.I_frame.reshape(len(signal), 1)
            
        # Set a new model for traidning
        remodel = hmm.GaussianHMM(n_components=2, covariance_type="full", n_iter=100)        
        
        # Set initial parameters for training
        remodel.startprob_ = np.array([5.0, 0.5])
        remodel.transmat_ = np.array([[0.9, 0.1], 
                                      [0.1, 0.9]])
        remodel.means_ = np.array([0, 1])
        remodel.covars_ = np.tile(np.identity(1), (2, 1, 1)) * self.noise**2        
        
    
        # Estimate model parameters (training)
        remodel.fit(X)  
        
        # Find most likely state sequence corresponding to X
        Z_predict = remodel.predict(X)
        
        # Reorder state number such that X[Z=0] < X[Z=1] 
        if X[Z_predict == 0].mean() > X[Z_predict == 1].mean():
            Z_predict = 1 - Z_predict
            remodel.transmat_ = np.array([[remodel.transmat_[1][1], remodel.transmat_[1][0]],
                                          [remodel.transmat_[0][1], remodel.transmat_[0][0]]])
   
        self.tp_ub = remodel.transmat_[0][1] 
        self.tp_bu = remodel.transmat_[1][0]
        
        # Sequence of predicted states        
        self.I_predict = np.zeros(len(X))
        for i in range(2):
            self.I_predict[Z_predict == i] = X[Z_predict == i].mean()  
                          
        return True
Example #22
0
    def partial_fit(self, train_main, train_appliances, **load_kwargs):

        self.models = []
        self.num_appliances = 0
        self.appliances = []
        train_main = pd.concat(train_main, axis=0)
        train_app_tmp = []
        for app_name, df_list in train_appliances:
            df_list = pd.concat(df_list, axis=0)
            train_app_tmp.append((app_name, df_list))

        # All the initializations required by the model
        train_appliances = train_app_tmp
        learnt_model = OrderedDict()
        means_vector = []
        one_hot_states_vector = []
        pi_s_vector = []
        transmat_vector = []
        states_vector = []
        train_main = train_main.values.flatten().reshape((-1, 1))

        for appliance_name, power in train_appliances:
            # print (appliance_name)
            # Learning the pi's and transistion probabliites  for each appliance using a simple HMM
            self.appliances.append(appliance_name)
            X = power.values.reshape((-1, 1))
            learnt_model[appliance_name] = hmm.GaussianHMM(
                self.default_num_states, "full")
            # Fit
            learnt_model[appliance_name].fit(X)
            means = learnt_model[appliance_name].means_.flatten().reshape(
                (-1, 1))
            states = learnt_model[appliance_name].predict(X)
            transmat = learnt_model[appliance_name].transmat_
            counter = Counter(states.flatten())
            total = 0
            keys = list(counter.keys())
            keys.sort()

            for i in keys:
                total += counter[i]
            pi = []

            for i in keys:
                pi.append(counter[i] / total)
            pi = np.array(pi)
            nb_classes = self.default_num_states
            targets = states.reshape(-1)
            means_vector.append(means)
            pi_s_vector.append(pi)
            transmat_vector.append(transmat.T)
            states_vector.append(states)
            self.num_appliances += 1
            self.signal_aggregates[appliance_name] = (
                np.mean(X) * self.time_period).reshape((-1, ))

        self.means_vector = means_vector
        self.pi_s_vector = pi_s_vector
        self.means_vector = means_vector
        self.transmat_vector = transmat_vector
        print("Finished Training")
Example #23
0
 def test_bad_covariance_type(self):
     hmm.GaussianHMM(20, self.covariance_type)
     self.assertRaises(ValueError, hmm.GaussianHMM, 20,
                       'badcovariance_type')
Example #24
0
    # star 特征
        star = cv2.xfeatures2d.StarDetector_create()
        keypoints = star.detect(gray)
        sift = cv2.xfeatures2d.SIFT_create()
        _, desc = sift.compute(gray, keypoints)   # 获得特征矩阵
        if len(descs) == 0:
            descs = desc
        else:
            descs = np.append(descs, desc, axis=0)
    train_x.append(descs)
    train_y.append(label)

# 训练模型
models = dict()
for descs, label in zip(train_x, train_y):
    model = hl.GaussianHMM(n_componets=4, covariance_type="diag", n_iter=1000)
    models[label] = model.fit(descs)


# 开始测试

test_objects = search_objects("E:\\达内学习文件\\17 机器学习\\data\\objects\\testing")

test_x, test_y, test_z = list(), list(), list()

for label, filenames in train_objects.items():
    test_z.append([])
    descs = np.array([])
    for filename in filenames:
        image = cv2.imread(filename)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)   # 转为灰度图
Example #25
0
search_speeches('../data/speeches/training', train_speeches)
train_x, train_y = [], []
for label, filenames in train_speeches.items():
    mfccs = np.array([])
    for filename in filenames:
        sample_rate, sigs = wf.read(filename)
        mfcc = sf.mfcc(sigs, sample_rate)
        if len(mfccs) == 0:
            mfccs = mfcc
        else:
            mfccs = np.append(mfccs, mfcc, axis=0)
    train_x.append(mfccs)
    train_y.append(label)
modles = {}
for mfccs, label in zip(train_x, train_y):
    model = hl.GaussianHMM(n_components=4, covariance_type='diag', n_iter=1000)
    modles[label] = model.fit(mfccs)
test_speeches = {}
search_speeches('../data/speeches/testing', test_speeches)
test_x, test_y = [], []
for label, filenames in test_speeches.items():
    mfccs = np.array([])
    for filename in filenames:
        sample_rate, sigs = wf.read(filename)
        mfcc = sf.mfcc(sigs, sample_rate)
        if len(mfccs) == 0:
            mfccs = mfcc
        else:
            mfccs = np.append(mfccs, mfcc, axis=0)
    test_x.append(mfccs)
    test_y.append(label)
Example #26
0
volume = np.array([q[6] for q in quotes])[1:]

# Take diff of close value. Note that this makes
# ``len(diff) = len(close_t) - 1``, therefore, other quantities also
# need to be shifted by 1.
diff = np.diff(close_v)
dates = dates[1:]
close_v = close_v[1:]

# Pack diff and volume for training.
X = np.column_stack([diff, volume])

print("fitting to HMM and decoding ...", end="")

# Make an HMM instance and execute fit
model = hmm.GaussianHMM(n_components=2, covariance_type="diag",
                        n_iter=1000).fit(X)

# Predict the optimal sequence of internal hidden state
hidden_states = model.predict(X)

print("done")

print("Transition matrix")
print(model.transmat_)
print()

print("Means and vars of each hidden state")
for i in range(model.n_components):
    print("{0}th hidden state".format(i))
    print("mean = ", model.means_[i])
    print("var = ", np.diag(model.covars_[i]))
Example #27
0
def main():
    file="/home/shaoguang/anaconda3/shaoguang123/bishe_csg/hq2.csv"
    hq=pd.read_csv(file,encoding="utf-8")
    hq1=hq.iloc[:,[0,1,2,4,5]]
    
    #处理数据求得每日各行业板块涨幅
    col=int(hq1.iloc[:,0:1].size)
    dat=int(col/28)
    val=np.zeros((dat,28))
    row_list=np.zeros(28)
    Column_list=np.arange(0,dat,1)
    for i in range(28):
        row_list[i]=hq1.iloc[i*dat,0]
    for i in range(dat):
        for j in range(28):
            val[dat-i-1][j]=hq1.iloc[i+dat*j,4]
    
    data=pd.DataFrame(val,index=Column_list,columns=row_list)
    
    #求取每日涨幅排行前5的行业
    order1=[]
    order2=[]
    order3=[]
    order4=[]
    order5=[]
    for i in range(dat):
        Sector_list=[]
        for j in range(28):
            Sector_list.append((row_list[j],val[i][j]))
        top = heapq.nlargest(5,Sector_list,key=lambda s: s[1])
        order1.append(top[0][0]),order2.append(top[1][0]),order3.append(top[2][0]),order4.append(top[3][0]),order5.append(top[4][0])
    order_list=pd.DataFrame()
    order_list.insert(0,"order1",order1),order_list.insert(1,"order2",order2),order_list.insert(2,"order3",order3),order_list.insert(3,"order4",order4),order_list.insert(4,"order5",order5)
    
    #PCA降维
    pca=PCA(n_components=10,copy=False)
    A=pca.fit_transform(data)
    
    #参数设置
    n=3 #隐状态数目
    T=350 #样本窗口大小
    t=1 #预测天数
    w_n=5 #与当前交易日相同市场隐含状态相同行业轮动特征且似然值最接近的天数
    index = 0 
    step = t
    win=0
    lose=0
    win0=0
    win1=0
    win2=0
    win3=0
    win4=0
    win5=0
    win6=0
    
    
    while index+T < len(A)-step:
        model = hmm.GaussianHMM(n_components= n, covariance_type="spherical", n_iter=1000).fit(A[index:index+T])
        hist_info = [] 
        hiddenStatus = model.predict(A[index:index+T])
        
        #print (hiddenStatus)
        for i in range(index, index+T):
            #hiddenStatu = model.predict(A[index+i : index+i+1])
            score = model.score(A[i: i+1])
            day_tuple = (i, hiddenStatus[i-index], score)
            hist_info.append(day_tuple) 
            
        #print (hist_info)
        last_hiddenStatus = hist_info[-1][1]
        last_score = hist_info[-1][2]
        last_index = hist_info[-1][0]
        print(last_index)
        
        sameStatus = []
        cnt=0
        for (x,y,z) in hist_info[:-1]:
            if y == last_hiddenStatus:#市场隐含状态匹配
                if  isIn(last_index,x,order_list) and isIn(last_index-1,x-1,order_list):#行业轮动特征匹配
                    diff = abs(z - last_score)
                    sameStatus.append((x, diff))
                    cnt+=1
        if(cnt<w_n): 
            index += step
            continue
        pos_diffs = heapq.nsmallest(w_n, sameStatus, key=lambda s: s[1])
            
        #加权预测
        weights = [5,4,3,2,1]
        d={}
        for i in range(w_n):
            if order_list.iloc[pos_diffs[i][0]+1,0] in d:
                d[order_list.iloc[pos_diffs[i][0]+1,0]]+=weights[i]/2
            else:
                d[order_list.iloc[pos_diffs[i][0]+1,0]]=weights[i]/2
                    
        for i in range(w_n):
            for j in range(1,3):
                if order_list.iloc[pos_diffs[i][0]+1,j] in d:
                    d[order_list.iloc[pos_diffs[i][0]+1,j]]+=weights[i]*(3-j)/6
        d=sorted(d.items(),key = lambda asd:asd[1],reverse=True)
        print("&&&&&&&&&&&&&&&&&&&&&&&&&&")
        print(d)
                
        
        top_5=[]
        for i in range(5):
            top_5.append(order_list.iloc[last_index+1,i])
    
        print(last_index)
        print(top_5)
    
        if d[0][0] in top_5 or d[1][0] in top_5 or (len(d)<3 or d[2][0] in top_5):
            win+=1
            print("win")
            if d[0][0] in top_5:
                win0+=1
            
            if len(d)<2:
                if d[0][0] in top_5:
                    win1+=1
            elif d[1][0] in top_5:
                win1+=1
                
            if len(d)<2:
                if d[0][0] in top_5:
                    win2+=1
            elif len(d)<3:
                if d[0][0] in top_5 or d[1][0] in top_5:
                    win2+=1
            elif d[2][0] in top_5:
                win2+=1
            
            if len(d)<2:
                if d[0][0] in top_5:
                    win3+=1
            elif d[0][0] in top_5 and d[1][0] in top_5:
                win3+=1
                
            if len(d)<2:
                if d[0][0] in top_5:
                    win4+=1
            elif d[0][0] in top_5 or d[1][0] in top_5:
                win4+=1
             
            if len(d)<2:
                if d[0][0] in top_5:
                    win5+=1
            elif len(d)<3:
                if d[0][0] in top_5 and d[1][0] in top_5:
                    win5+=1
            elif d[0][0] in top_5 and d[1][0] in top_5 and d[2][0] in top_5:
                win5+=1
                
            if len(d)<2:
                if d[0][0] in top_5:
                    win6+=1
            elif len(d)<3:
                if d[0][0] in top_5 or d[1][0] in top_5:
                    win6+=1
            elif d[0][0] in top_5 or d[1][0] in top_5 or d[2][0] in top_5:
                win6+=1
        else:
            lose+=1
        index += step
    
    print(win)
    print(lose)
    print(win/(win+lose))
    print(win0/(win+lose))
    print(win1/(win+lose))
    print(win2/(win+lose))
    print(win3/(win+lose))
    print(win4/(win+lose))
    print(win5/(win+lose))
    print(win6/(win+lose))
    print("Done")
def hmm_build_train(program_path):

    dataset_path = os.path.join(program_path, 'dataset')

    print 'creating the datasets path'
    preictal_data_path = os.path.join(dataset_path, 'final_preictal_training_dataset.hdf5')
    interictal_data_path = os.path.join(dataset_path, 'final_interictal_training_dataset.hdf5')
    testing_data_path = os.path.join(dataset_path, 'processed_testing_training_dataset.hdf5')

    preictal_model_loaded = False
    interictal_model_loaded = False

    models_path = os.path.join(program_path, 'models')
    # check if model are saved
    if os.path.exists(models_path):
        # hmm inside the models' folder
        hmm_files = next(os.walk(models_path))[2]

        for m_file in hmm_files:
            if ('hmm_preictal' in m_file) and ('.npy' not in m_file):
                # calculate the whole path
                data_path = os.path.join(models_path, m_file)
                # load the model
                preictal_hmm = joblib.load(data_path)
                # turn on flag so the code does not re-train the model
                preictal_model_loaded = True
            elif ('hmm_interictal' in m_file) and ('.npy' not in m_file):
                # calculate the whole path
                data_path = os.path.join(models_path, m_file)
                # load the model
                interictal_hmm = joblib.load(data_path)
                # turn on flag so the code does not re-train the model
                interictal_model_loaded = True

    # create location for storing models for later use
    if not os.path.exists(models_path):
        os.mkdir(models_path)

    # check if model loaded
    if not preictal_model_loaded:
        print 'loading preictal dataset'
        preictal_dataset = h5py.File(name=preictal_data_path, mode='r')
        # calculate the length of each of the unique matlab files conforming the preictal dataset
        list_of_lengths = [239766] * 29
        rest_of_array = int(preictal_dataset['training data'].shape[0] - np.sum(list_of_lengths))
        list_of_lengths.append(rest_of_array)
        preictal_length = np.array(list_of_lengths)
        
        if np.sum(preictal_length) != preictal_dataset['training data'].shape[0]:
            raise ValueError('preictal length variable does not match preictal dataset length')
        print 'creating a preictal Gaussian HMM object'
        preictal_hmm = hmm.GaussianHMM(n_components=8, verbose=True)
        print '\ttraining the model'
        preictal_hmm.fit(preictal_dataset['training data'], preictal_length)

        print '\tstoring model'
        hmm_preictal_path_filename = os.path.join(models_path, 'hmm_preictal')
        joblib.dump(preictal_hmm, hmm_preictal_path_filename)

    # check if model loaded
    if not interictal_model_loaded:
        print 'loading interictal dataset'
        interictal_dataset = h5py.File(name=interictal_data_path, mode='r')
        # calculate the length of each of the unique matlab files conforming the interictal dataset
        list_of_lengths = [239766] * 449
        rest_of_array = int(interictal_dataset['training data'].shape[0] - np.sum(list_of_lengths))
        list_of_lengths.append(rest_of_array)
        interictal_length = np.array(list_of_lengths)

        if np.sum(interictal_length) != interictal_dataset['training data'].shape[0]:
            raise ValueError('preictal length variable does not match preictal dataset length')
        # interictal_length = np.array([239766] * 300)
        # interictal_length = np.array([239766] * 200)
        # interictal_length = np.array([239766] * 100)
        print 'creating a interictal Gaussian HMM object'
        interictal_hmm = hmm.GaussianHMM(n_components=8, verbose=True)
        print '\ttraining the model'
        # 450
        interictal_hmm.fit(interictal_dataset['training data'], interictal_length)
        # 200
        # interictal_hmm.fit(interictal_dataset['training data'][:47953200], interictal_length)
        # 100
        # interictal_hmm.fit(interictal_dataset['training data'][:23976600], interictal_length)

        print '\tstoring model'
        hmm_interictal_path_filename = os.path.join(models_path, 'hmm_interictal')
        joblib.dump(preictal_hmm, hmm_interictal_path_filename)

    print 'loading testing dataset'
    testing_dataset = h5py.File(name=testing_data_path, mode='r')

    true_results = obtain_true_results()
    true_count = 0.0

    output_file = open('results.csv','w')
    for testing_key in testing_dataset.keys():
        print 'calculating likelihoodi for {0}'.format(testing_key)
        interictal_log_prob, _ = interictal_hmm.decode(testing_dataset[testing_key].value)
        preictal_log_prob, _ = preictal_hmm.decode(testing_dataset[testing_key].value)

        if interictal_log_prob > preictal_log_prob:
            # 0 = interictal
            if true_results[testing_key] == 0:
                true_count += 1
            row_w = '{0},{1}'.format(testing_key,0)
            output_file.write(row_w)
            output_file.write('\n')
        else:
            # 1 = preictal
            if true_results[testing_key] == 1:
                true_count += 1
            row_w = '{0},{1}'.format(testing_key,1)
            output_file.write(row_w)
            output_file.write('\n')

    accuracy = true_count / len(testing_dataset.keys())
    print 'accuracy={0}'.format(accuracy)
Example #29
0
    def partial_fit(self, train_main, train_appliances, **load_kwargs):
        """
		Train using 1d FHMM.
		"""
        print(".........................FHMM partial_fit.................")

        train_main = pd.concat(train_main, axis=0)
        train_app_tmp = []

        for app_name, df_list in train_appliances:
            df_list = pd.concat(df_list, axis=0)
            train_app_tmp.append((app_name, df_list))
            self.app_names.append(app_name)

        print(train_main.shape)

        train_appliances = train_app_tmp

        learnt_model = OrderedDict()
        num_meters = len(train_appliances)
        if num_meters > 12:
            max_num_clusters = 2
        else:
            max_num_clusters = 3

        for appliance, meter in train_appliances:

            meter_data = meter.dropna()
            X = meter_data.values.reshape((-1, 1))

            if not len(X):
                print(
                    "Submeter '{}' has no samples, skipping...".format(meter))
                continue

            assert X.ndim == 2
            self.X = X

            if self.num_of_states > 0:
                # User has specified the number of states for this appliance
                num_total_states = self.num_of_states

            else:
                # Find the optimum number of states
                states = cluster(meter_data, max_num_clusters)
                num_total_states = len(states)

            print("Training model for submeter '{}'".format(appliance))
            learnt_model[appliance] = hmm.GaussianHMM(num_total_states, "full")

            # Fit
            learnt_model[appliance].fit(X)
            print("Learnt model for : " + appliance)

            # Check to see if there are any more chunks.
            # TODO handle multiple chunks per appliance.

        # Combining to make a AFHMM
        self.meters = []
        new_learnt_models = OrderedDict()
        for meter in learnt_model:
            print(meter)
            startprob, means, covars, transmat = sort_learnt_parameters(
                learnt_model[meter].startprob_, learnt_model[meter].means_,
                learnt_model[meter].covars_, learnt_model[meter].transmat_)

            new_learnt_models[meter] = hmm.GaussianHMM(startprob.size, "full")
            new_learnt_models[meter].startprob_ = startprob
            new_learnt_models[meter].transmat_ = transmat
            new_learnt_models[meter].means_ = means
            new_learnt_models[meter].covars_ = covars
            # UGLY! But works.
            self.meters.append(meter)

        learnt_model_combined = create_combined_hmm(new_learnt_models)
        self.individual = new_learnt_models
        self.model = learnt_model_combined

        print("print ...........", self.model)

        print("FHMM partial_fit end.................")
Example #30
0
    def train_across_buildings(self,
                               ds,
                               list_of_buildings,
                               list_of_appliances,
                               min_activation=0.05,
                               **load_kwargs):
        """

        :param ds: nilmtk.Dataset
        :param list_of_buildings: List of buildings to use for training
        :param list_of_appliances: List of appliances (nilm-metadata names)
        :param min_activation: Minimum activation (in fraction) to use a home in training
        :param load_kwargs:
        :return:
        """

        _check_memory(len(list_of_appliances))

        self.list_of_appliances = list_of_appliances
        models = {}

        for appliance in list_of_appliances:
            print("Training for", appliance)
            o = []
            for building_num in list_of_buildings:

                building = ds.buildings[building_num]
                elec = building.elec
                try:
                    df = next(elec[appliance].load(**load_kwargs)).squeeze()
                    appl_power = df.dropna().values.reshape(-1, 1)
                    activation = (df > 10).sum() * 1.0 / len(df)
                    if activation > min_activation:
                        o.append(appl_power)
                except:
                    pass

            if len(o) > 1:
                o = np.array(o)
                mod = hmm.GaussianHMM(2, "full")
                mod.fit(o)
                models[appliance] = mod
                print("Means for %s are" % appliance)
                print(mod.means_)
            else:
                print("Not enough samples for %s" % appliance)

        new_learnt_models = OrderedDict()
        for appliance, appliance_model in iteritems(models):
            startprob, means, covars, transmat = sort_learnt_parameters(
                appliance_model.startprob_, appliance_model.means_,
                appliance_model.covars_, appliance_model.transmat_)
            new_learnt_models[appliance] = hmm.GaussianHMM(
                startprob.size, "full", startprob, transmat)
            new_learnt_models[appliance].means_ = means
            new_learnt_models[appliance].covars_ = covars

        learnt_model_combined = create_combined_hmm(new_learnt_models)
        self.individual = new_learnt_models
        self.model = learnt_model_combined
        self.meters = [
            nilmtk.global_meter_group.select_using_appliances(
                type=appliance).meters[0]
            for appliance in iterkeys(self.individual)
        ]