コード例 #1
0
ファイル: rul_pred.py プロジェクト: VenkateshMohan3434/hmm
def hmm_train(features):

    gmmhmm = GMMHMM(n_components=30, n_mix=8)
    gmmhmm.startprob_ = np.array([
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0
    ])
    l = np.identity(30) * 0.95
    for i in range(l.shape[0] - 1):
        l[i, i + 1] = 0.05
    l[-1, -1] = 1
    gmmhmm.transmat_ = l
    gmmhmm.fit(features)
    preds = gmmhmm.predict(features)
    print(preds)
コード例 #2
0
seq_d = dataset_dining.randomSequence('dining.chineseRestaurant', 10)
print 'dining l:'
print seq_d
seq_f = dataset_fitness.randomSequence('fitness.running', 5)
print 'fitness'
print seq_f
seq_w = dataset_work.randomSequence('work.office', 5)
print 'work'
print seq_w
seq_s = dataset_shop.randomSequence('shopping.mall', 5)
print 'shopping'
print seq_s



model_dining.fit(D)
model_fitness.fit(F)
model_work.fit(W)
model_shop.fit(S)


print model_dining.startprob_.tolist()
print model_dining.transmat_.tolist()


print 'After training'

print ' - Classification for seq dining s-'

print 'dining result:'
print model_dining.score(np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
コード例 #3
0
def newtrain(speakers, name):
    #folder="C:/Anaconda codes/speaker reco/something new/for hack/add new people/"
    folder = "C:/Anaconda codes/Hackverse/servermodel/clientfiles/"
    s = list(speakers)
    l = len(speakers)
    #name= input("enter your name")

    speakers.append(name)

    new_person = speakers[l]

    #rint(new_person)

    try:
        os.makedirs("clientfiles/dataset/" + name)
    except:
        print("already exists")
        return (s)
    #os.mkdir(folder+"dataset/"+ name)

    x = "clientfiles/dataset/" + name + "/"
    samples(x)

    training_speaker_name = name

    file_path = x
    file_names = os.listdir(file_path)
    #print((len(file_names)))

    lengths = np.empty(len(file_names))
    #print(np.shape(lengths))

    feature_vectors = np.empty([20, 0])

    for i in range(len(file_names)):
        x, rate = librosa.load(file_path + file_names[i])  #loads the file
        #rate, x = wavfile.read(file_names[i])
        x = librosa.feature.mfcc(y=x[0:int(len(x) / 1.25)],
                                 sr=rate)  #extracts mfcc

        #x = mfcc(x[0:len(x)/1.25], samplerate=rate)
        lengths[i] = int(len(x.transpose()))

        #print(np.shape(x))

        feature_vectors = np.concatenate((feature_vectors, x), axis=1)
        #feature_vectors = np.vstack((feature_vectors, x.transpose()))

    #print(((lengths)))
    #print(np.shape(feature_vectors))

    #TRAINING A MODEL

    N = 3  # Number of States of HMM
    Mixtures = 64  # Number of Gaussian Mixtures.

    model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag')

    startprob = np.ones(N) * (10**(-30))  # Left to Right Model
    startprob[0] = 1.0 - (N - 1) * (10**(-30))
    transmat = np.zeros([N, N])  # Initial Transmat for Left to Right Model
    #print(startprob,'\n',transmat)
    for i in range(N):
        for j in range(N):
            transmat[i, j] = 1 / (N - i)
    transmat = np.triu(transmat, k=0)
    transmat[transmat == 0] = (10**(-30))

    model = GMMHMM(n_components=N,
                   n_mix=Mixtures,
                   covariance_type='diag',
                   init_params="mcw",
                   n_iter=100)

    model.startprob_ = startprob
    model.transmat_ = transmat
    #print(startprob,'\n',transmat)

    feature = feature_vectors.transpose()
    #print(np.shape(feature))

    lengths = [int(x) for x in lengths]
    #print(type(lengths[0]))

    model.fit(feature, lengths)

    joblib.dump(model, folder + "/models/" + name + ".pkl")
    return (speakers)
コード例 #4
0
class GMMHMMTrainer(BaseTrainer):
    '''A wrapper to GMMHMM

    Attributes
    ----------
    _model: init params
    gmmhmm: hmmlearn GMMHMM instance
    params_: params after fit
    train_data_: current train datas
    '''
    def __init__(self, _model):
        super(GMMHMMTrainer, self).__init__(_model)

        hmm_params = _model['hmmParams']
        gmm_params = _model['gmmParams']
        n_iter = _model.get('nIter', 50)

        transmat = np.array(hmm_params['transMat'])
        transmat_prior = np.array(hmm_params['transMatPrior'])
        n_component = hmm_params['nComponent']
        startprob = np.array(hmm_params['startProb'])
        startprob_prior = np.array(hmm_params['startProbPrior'])

        n_mix = gmm_params['nMix']
        covariance_type = gmm_params['covarianceType']
        gmms = gmm_params.get('gmms', None)

        gmm_obj_list = []
        if not gmms:
            gmm_obj_list = None
        else:
            for gmm in gmms:
                gmm_obj = GMM(n_components=gmm['nComponent'],
                              covariance_type=gmm['covarianceType'])
                gmm_obj.covars_ = np.array(gmm['covars'])
                gmm_obj.means_ = np.array(gmm['means'])
                gmm_obj.weights_ = np.array(gmm['weights'])
                gmm_obj_list.append(gmm_obj)

        self.gmmhmm = GMMHMM(n_components=n_component,
                             n_mix=n_mix,
                             gmms=gmm_obj_list,
                             n_iter=n_iter,
                             covariance_type=covariance_type,
                             transmat=transmat,
                             transmat_prior=transmat_prior,
                             startprob=startprob,
                             startprob_prior=startprob_prior)

    def __repr__(self):
        return '<GMMHMMTrainer instance>\n\tinit_models:%s\n\tparams:%s\n\ttrain_data:%s' % (
            self._model, self.params_, self.train_data_)

    def fit(self, train_data):
        train_data = np.array(train_data)
        self.gmmhmm.fit(train_data)

        gmms_ = []
        for gmm in self.gmmhmm.gmms_:
            gmms_.append({
                'nComponent': gmm.n_components,
                'nIter': gmm.n_iter,
                'means': gmm.means_.tolist(),
                'covars': gmm.covars_.tolist(),
                'weights': gmm.weights_.tolist(),
                'covarianceType': gmm.covariance_type,
            })
        self.train_data_ += train_data.tolist()
        self.params_ = {
            'nIter': self.gmmhmm.n_iter,
            'hmmParams': {
                'nComponent': self.gmmhmm.n_components,
                'transMat': self.gmmhmm.transmat_.tolist(),
                'transMatPrior': self.gmmhmm.transmat_prior.tolist(),
                'startProb': self.gmmhmm.startprob_.tolist(),
                'startProbPrior': self.gmmhmm.startprob_prior.tolist(),
            },
            'gmmParams': {
                'nMix': self.gmmhmm.n_mix,
                'covarianceType': self.gmmhmm.covariance_type,
                'gmms': gmms_,
            }
        }
コード例 #5
0
samples_raw_3, labels_3, _ = FileReader.read(FILE_PATH_3)
samples_raw_3 = samples_raw_3[:, 0:6]

window_size = 100

X_train, X_test, y_train, y_test = train_test_split(np.vstack([samples_raw_1, samples_raw_2, samples_raw_3]),
                                                    np.vstack([labels_1, labels_2, labels_3]), train_size=0.6)

samples_healthy = X_train[y_train.ravel() == 0, :]
samples_unhealthy = X_train[y_train.ravel() == 1, :]

model_healthy = GMMHMM()
model_unhealthy = GMMHMM()

seqs, lengths = PreProcessor.split2sequences(samples_healthy, window_size)
model_healthy.fit(seqs, lengths)

seqs, lengths = PreProcessor.split2sequences(samples_unhealthy, window_size)
model_unhealthy.fit(seqs, lengths)

seqs, lengths = PreProcessor.split2sequences(X_test, window_size)
accuracy = 0
for i in range(0, len(lengths)):
    ll_healthy, post_healthy = model_healthy.score_samples(seqs[i*window_size:(i+1)*window_size,:])
    ll_unhealthy, post_unhealthy = model_unhealthy.score_samples(seqs[i*window_size:(i+1)*window_size,:])

    print("[" + str(ll_unhealthy) + "|" + str(ll_unhealthy) + "]")
    prediction_sample = 0 if ll_healthy > ll_unhealthy else 0
    if prediction_sample == y_test[i*window_size]:
       accuracy += 1/len(lengths)
コード例 #6
0
ファイル: train15.py プロジェクト: snaggled/bombogenesis
class StockPredictor(object):
    def __init__(self,
                 ticker,
                 chunks=9,
                 delta=0,
                 n_hidden_states=5,
                 n_latency_days=10,
                 n_steps_frac_change=10,
                 n_steps_frac_high=30,
                 n_steps_frac_low=10,
                 n_iter=100,
                 verbose=False,
                 prediction_date=None):

        self.total_score = 0
        self.verbose = verbose
        self.ticker = ticker
        self.n_latency_days = n_latency_days
        self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter)
        self.chunks = chunks
        self.delta = delta
        self.prediction_date = prediction_date
        self.fetch_training_data()
        self._compute_all_possible_outcomes(n_steps_frac_change,
                                            n_steps_frac_high,
                                            n_steps_frac_low)

    def fetch_training_data(self):

        print("Fetching training data ...")
        res = es.search(index="market",
                        doc_type="quote",
                        size=10000,
                        body={"query": {
                            "match": {
                                "ticker": self.ticker
                            }
                        }})
        self.training_data = json_normalize(res['hits']['hits'])
        self.chunked_training_data = self.training_data

        #vectors = []
        #chunked_training_data_lengths = []
        #start_index = 0
        #end_index = start_index + self.chunks
        #delta_date_index = end_index + self.delta

        #while delta_date_index <= len(self.training_data):
        #training_chunk = self.training_data[start_index:end_index]
        #    delta_chunk = self.training_data.iloc[delta_date_index]
        #    total_chunk = training_chunk.append(delta_chunk)
        #    #print("%s training_chunk to train %s" % (total_chunk, self.ticker))
        #    start_index = end_index + 1
        #    end_index = start_index + self.chunks
        #    delta_date_index = end_index + self.delta
        #    vectors.append(total_chunk)
        #    chunked_training_data_lengths.append(len(total_chunk))
        #    if self.verbose: print(total_chunk)

        #self.chunked_training_data = pd.DataFrame(np.concatenate(vectors), columns = self.training_data.columns)
        #self.chunked_training_data_lengths = chunked_training_data_lengths

        if self.verbose:
            print("Latest record for training:\n%s" %
                  self.chunked_training_data.tail(1))
        latest_date = self.chunked_training_data.tail(1)['_source.timestamp']
        datetime_object = datetime.datetime.strptime(latest_date.values[0],
                                                     '%Y-%m-%dT%H:%M:%S')

        if self.prediction_date == None:
            prediction_date = datetime_object + timedelta(days=self.delta + 1)
            self.prediction_date = datetime.datetime.strftime(
                prediction_date, '%Y-%m-%dT%H:%M:%S')

    @staticmethod
    def _extract_features(data):

        frac_change = np.array(
            data['_source.change'])  #(close_price - open_price) / open_price
        frac_high = np.array(data['_source.change_high']
                             )  #(high_price - open_price) / open_price
        frac_low = np.array(
            data['_source.change_low'])  #(open_price - low_price) / open_price

        return np.column_stack((frac_change, frac_high, frac_low))

    def fit(self):
        print('Extracting Features')
        feature_vector = StockPredictor._extract_features(
            self.chunked_training_data)
        if self.verbose: print("feature vector %s" % feature_vector)
        print('Training Model with %s features' % feature_vector.size)
        print(
            "Latest date to be used in training is %s" %
            self.chunked_training_data.tail(1)['_source.timestamp'].values[0])
        #self.hmm.fit(feature_vector, self.chunked_training_data_lengths)
        self.hmm.fit(feature_vector)
        print('Model trained')

    def _compute_all_possible_outcomes(self, n_steps_frac_change,
                                       n_steps_frac_high, n_steps_frac_low):
        frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change)
        frac_high_range = np.linspace(0, 0.05, n_steps_frac_high)
        frac_low_range = np.linspace(0, 0.05, n_steps_frac_low)

        self.all_possible_outcomes = np.array(
            list(
                itertools.product(frac_change_range, frac_high_range,
                                  frac_low_range)))

    def json_data_for_trade(self):

        rows = list()

        # meta
        ticker = self.ticker
        date = self.prediction_date
        total_score = self.total_score
        id = "%s-%s-%s" % (ticker, date, total_score)

        meta = {
            "index": {
                "_index": TRADE_INDEX_NAME,
                "_type": TRADE_TYPE_NAME,
                "_id": id
            }
        }
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.total_score = total_score
        row.timestamp = self.prediction_date
        row.ticker = self.ticker
        rows.append(json.dumps(row))

        return rows

    def json_data_for_outcome(self, outcome, score):

        rows = list()

        # meta
        ticker = self.ticker
        date = self.prediction_date
        vector = outcome
        id = "%s-%s-%s" % (ticker, date, vector)

        meta = {"index": {"_index": INDEX_NAME, "_type": TYPE_NAME, "_id": id}}
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.frac_change = outcome[0]
        row.frac_high_range = outcome[1]
        row.frac_low_range = outcome[2]
        open_price = self.training_data.tail(1)['_source.open'].values[0]
        predicted_close = open_price * (1 + outcome[0])
        expected_value = outcome[0] * score
        row.predicted_close = predicted_close
        row.expected_value = expected_value
        row.timestamp = self.prediction_date
        row.score = score
        row.chunks = self.chunks
        row.delta = self.delta
        row.score = score
        row.ticker = self.ticker
        rows.append(json.dumps(row))

        return rows

    def delete_prediction_data(self, ticker):
        print("Deleting prediction data for ... %s" % self.ticker)
        es.delete_by_query(index=INDEX_NAME,
                           doc_type=TYPE_NAME,
                           body={'query': {
                               'match': {
                                   'ticker': self.ticker
                               }
                           }})

    def predict_outcomes(self):

        print("predicting outcomes for: %s" % self.prediction_date)
        previous_testing_data = self.training_data.tail(
            self.n_latency_days).index

        if self.verbose:
            print("previous_testing_data %s" % previous_testing_data)

        test_data = self.training_data.iloc[previous_testing_data]

        if self.verbose:
            print("Using the following slice of data:")
            print("[%s]" % previous_testing_data)
            print(test_data)

        test_data_features = StockPredictor._extract_features(test_data)

        # to blow everything away - may need to recreate/refresh indexes in ES!
        #self.delete_and_create_index()

        bulk_data = list()
        trade_data = list()
        outcome_score = []

        for possible_outcome in self.all_possible_outcomes:

            test_feature_vectors = np.row_stack(
                (test_data_features, possible_outcome))
            score = self.hmm.score(test_feature_vectors)

            # ignoring scores <= 0
            if score > 0:
                rows = self.json_data_for_outcome(possible_outcome, score)
                bulk_data.append(rows)

                if possible_outcome[0] > 0:
                    self.total_score = self.total_score + score
                if possible_outcome[0] < 0:
                    self.total_score = self.total_score - score
                trade_rows = self.json_data_for_trade()
                trade_data.append(trade_rows)

        print("Exporting predictions to ES")

        es_array = self.format_data_for_es(bulk_data)
        res = es.bulk(index=INDEX_NAME, body=es_array, refresh=True)

        es_array = self.format_data_for_es(trade_data)
        res = es.bulk(index=TRADE_INDEX_NAME, body=es_array, refresh=True)

    def format_data_for_es(self, data):
        es_array = ""
        for row in data:
            es_array += row[0]
            es_array += "\n"
            es_array += row[1]
            es_array += "\n"
        return es_array
コード例 #7
0
def trainingGMMHMM(
        dataset,  # training dataset.
        n_c,  # number of hmm's components (ie. hidden states)
        n_m,  # number of gmm's mixtures (ie. Gaussian model)
        start_prob_prior=None,  # prior of start hidden states probabilities.
        trans_mat_prior=None,  # prior of transition matrix.
        start_prob=None,  # the start hidden states probabilities.
        trans_mat=None,  # the transition matrix.
        gmms=None,  # models' params of gmm
        covar_type='full',
        n_i=50):
    # Initiation of dataset.
    # d = Dataset(dataset)
    X = dataset.getDataset()
    # Initiation of GMM.
    _GMMs = []
    if gmms is None:
        _GMMs = None
    else:
        for gmm in gmms:
            _GMM = GMM(n_components=n_m, covariance_type=covar_type)
            _GMM.covars_ = np.array(gmm["covars"])
            _GMM.means_ = np.array(gmm["means"])
            _GMM.weights_ = np.array(gmm["weights"])
            _GMMs.append(_GMM)
    # Initiation of GMMHMM.
    model = GMMHMM(startprob_prior=np.array(start_prob_prior),
                   transmat_prior=np.array(trans_mat_prior),
                   startprob=np.array(start_prob),
                   transmat=np.array(trans_mat),
                   gmms=_GMMs,
                   n_components=n_c,
                   n_mix=n_m,
                   covariance_type=covar_type,
                   n_iter=n_i)
    # Training.
    model.fit(X)
    # The result.
    new_gmmhmm = {
        "nComponent": n_c,
        "nMix": n_m,
        "covarianceType": covar_type,
        "hmmParams": {
            "startProb": model.startprob_.tolist(),
            "transMat": model.transmat_.tolist()
        },
        "gmmParams": {
            "nMix": n_m,
            "covarianceType": covar_type,
            "params": []
        }
    }

    for i in range(0, n_m):
        gaussian_model = {
            "covars": model.gmms_[i].covars_.tolist(),
            "means": model.gmms_[i].means_.tolist(),
            "weights": model.gmms_[i].weights_.tolist()
        }
        new_gmmhmm["gmmParams"]["params"].append(gaussian_model)

    return new_gmmhmm
コード例 #8
0
def main():
    outdir = r'./training_files/multi'
    outdir2 = r'./training_files/arnab'
    outdir3 = r'./training_files/kejriwal'
    outdir4 = r'./training_files/ravish'
    outdir5 = r'./training_files/not-shouting'
    outdir6 = r'./training_files/shouting'
    outdir7 = r'./training_files/single'
    outdir8 = r'./training_files/modi'
    outdir9 = r'./training_files/ond_more'

    #create 3 hmm one for each case

    multi = GMMHMM(5, 2)
    discuss = GMMHMM(5, 2)
    arnab = GMMHMM(5, 2)
    kejriwal = GMMHMM(5, 2)
    ravish = GMMHMM(5, 2)

    notshouting = GMMHMM(5, 2)
    shouting = GMMHMM(5, 2)
    single = GMMHMM(5, 2)

    #training for multi

    l = get_files_list(outdir)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    multi.fit(obs)

    #training for arnab

    l = get_files_list(outdir2)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    arnab.fit(obs)

    #training for kejriwal

    l = get_files_list(outdir3)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    kejriwal.fit(obs)

    #training for ravish

    l = get_files_list(outdir4)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    ravish.fit(obs)

    #training for notshouting

    l = get_files_list(outdir5)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    notshouting.fit(obs)

    #training for shouting

    l = get_files_list(outdir6)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    shouting.fit(obs)

    #training for single

    l = get_files_list(outdir7)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    single.fit(obs)

    #Its time for some testing
    q = []
    t = "testcase_output.txt"
    out = open(t, "w")

    #Read test file and make list of list of sequence 10   for --->1
    #te=["test1.txt","test2.txt","test3.txt","test4.txt","test5.txt","test6.txt","test7.txt","test8.txt","test9.txt","test10.txt"]

    #f=open("expected.txt")
    #d_expected={}
    '''
	
	for line in f:
		x=line.strip().split()
		d_expected[x[0]]={'arnab':float(x[1]),'kejriwal':float(x[2]),'ravish':float(x[3])}
	'''

    te = get_files_list(r'./testing_files')
    #te=["test1.txt","test2.txt","test3.txt"]
    for ad in te:
        d = {"arnab": 0, "kejriwal": 0, "ravish": 0}
        f = open(ad, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            #print individual_obs
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

        p = []
        p_choosen = []
        p1_choosen = []
        p1 = []
        p2 = []
        p2_choosen = []

        #print obs
        for i in obs:
            p.append((shouting.score(i), "shouting"))
            p.append((notshouting.score(i), "notshouting"))
            p_choosen.append(max(p, key=lambda x: x[0]))
            p = []
        for i in obs:
            p1.append((arnab.score(i), "arnab"))
            p1.append((kejriwal.score(i), "kejriwal"))
            p1.append((ravish.score(i), "ravish"))
            p1_choosen.append(max(p1, key=lambda x: x[0]))
            p1 = []

        for i in obs:
            p2.append((multi.score(i), "multi"))
            p2.append((single.score(i), "single"))
            p2_choosen.append(max(p2, key=lambda x: x[0]))
            p2 = []
        #print p

        p = []
        p1 = []
        p_choosen = [b for a, b in p_choosen]
        p1_choosen = [b for a, b in p1_choosen]
        p2_choosen = [b for a, b in p2_choosen]
        '''
		#print p_choosen
		#print the state sequence with the timestamp in the output file
	
		t="testcase_output_9.txt"
		out=open(t,"a+")

		out.write(str(ad)+"--->")
		out.write(p_choosen[0])
		out.write("\n")
		'''

        #calculate the amount per second and append to the same file

        #print p_choosen
        #print p1_choosen
        shouting1 = []
        notshouting1 = []

        totaltime = len(p_choosen) * 0.05

        single_count = 0

        for i in range(len(p_choosen)):
            if p2_choosen[i] == "single":
                single_count += 1
                if p_choosen[i] == "shouting":
                    shouting1.append(p1_choosen[i])
                elif p_choosen[i] == "notshouting":
                    notshouting1.append(p1_choosen[i])
        #print d
        d_shouting = {"arnab": 0, "kejriwal": 0, "ravish": 0}
        d_notshouting = {"arnab": 0, "kejriwal": 0, "ravish": 0}

        for i in shouting1:
            d_shouting[i] += 1

        for i in notshouting1:
            d_notshouting[i] += 1

        #print p_choosen

        out.write("\n*******--> " + str(ad) + "  <--*******\n")
        #write arnab,ravish and kejri
        fn = ad.strip().split("/")
        fn = fn[len(fn) - 1]

        #out.write("Time predicted for questioning: "+str((d5['question'])*0.05)+" seconds.\n")
        #out.write("Time predicted for discussion: "+str((d5['discuss'])*0.05)+" seconds.\n")
        out.write("\nChecking single HMM and multi HMM:\n")
        out.write("Number of instance of Single: " + str(single_count) + "\n")
        out.write(
            "\nChecking shouting and non-shouting HMM for all Single instances:\n"
        )
        out.write("Number of instance of Shouting: " + str(len(shouting1)) +
                  "\n")
        out.write("Number of instance of Not-shouting: " +
                  str(len(notshouting1)) + "\n")
        out.write(
            "\nChecking the frequency of each speaker in both both shouting and not shouting instance...\n"
        )
        out.write("Shouting instance: \n" + str(d_shouting) + "\n")
        out.write("Not-Shouting instance: \n" + str(d_notshouting) + "\n")

        out.write("\nResult:\n")

        for c, d in d_shouting.items():
            out.write(
                str(c) + " was shouting for " + str(d * 0.05) + " sec.\n")

        out.write("\n")

        for c, d in d_notshouting.items():
            out.write(
                str(c) + " was not shouting for " + str(d * 0.05) + " sec.\n")

        out.write("\n")
        for c, d in d_shouting.items():
            out.write(
                str(c) + " was shouting for " +
                str(((d * 0.05) / totaltime) * 100) + " % of time.\n")

        out.write("\n")

        for c, d in d_notshouting.items():
            out.write(
                str(c) + " was not shouting for " +
                str(((d * 0.05) / totaltime) * 100) + " sec.\n")

        out.write("\n")

        print d_shouting
        print d_notshouting
コード例 #9
0
startprob[0] = 1.0 - (N - 1) * (10**(-30))
transmat = np.zeros([N, N])  # Initial Transmat for Left to Right Model
print(startprob, '\n', transmat)
for i in range(N):
    for j in range(N):
        transmat[i, j] = 1 / (N - i)
transmat = np.triu(transmat, k=0)
transmat[transmat == 0] = (10**(-30))

model = GMMHMM(n_components=N,
               n_mix=Mixtures,
               covariance_type='diag',
               init_params="mcw",
               n_iter=100)

model.startprob_ = startprob
model.transmat_ = transmat
print(startprob, '\n', transmat)

feature = feature_vectors.transpose()
print(np.shape(feature))

lengths = [int(x) for x in lengths]
print(type(lengths[0]))

model.fit(feature, lengths)

joblib.dump(
    model, "C:/Anaconda codes/speaker reco/something new/for hack/models/" +
    training_speaker_name + ".pkl")
コード例 #10
0
print 'dining s:'
print seq_d_s
seq_d = dataset_dining.randomSequence('dining.chineseRestaurant', 10)
print 'dining l:'
print seq_d
seq_f = dataset_fitness.randomSequence('fitness.running', 5)
print 'fitness'
print seq_f
seq_w = dataset_work.randomSequence('work.office', 5)
print 'work'
print seq_w
seq_s = dataset_shop.randomSequence('shopping.mall', 5)
print 'shopping'
print seq_s

model_dining.fit(D)
model_fitness.fit(F)
model_work.fit(W)
model_shop.fit(S)

print model_dining.startprob_.tolist()
print model_dining.transmat_.tolist()

print 'After training'

print ' - Classification for seq dining s-'

print 'dining result:'
print model_dining.score(
    np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
print 'fitness result:'
コード例 #11
0
import os
from hmmlearn.hmm import GMMHMM
from python_speech_features import mfcc
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
import numpy as np
import sys

input_folder = '/home/sachin/Downloads/cmu_us_awb_arctic-0.95-release/cmu_us_awb_arctic/wav'
hmm_models = []

X = np.array([])
for filename in os.listdir(input_folder):
    filepath = os.path.join(input_folder, filename)
    sampling_freq, audio = wavfile.read(filepath)
    mfcc_features = mfcc(audio, sampling_freq)
    if len(X) == 0:
        X = mfcc_features
    else:
        X = np.append(X, mfcc_features, axis=0)


model = GMMHMM(n_components=3, n_mix=45, n_iter=100)
X_train, X_test = train_test_split(X, train_size=0.7)
hmm_models.append(model.fit(X_train))

print(model.score(X_test))
# startprob = np.ones(N) * (10**(-30))  # Left to Right Model
# startprob[0] = 1.0 - (N-1)*(10**(-30))
# transmat = np.zeros([N, N])  # Initial Transmat for Left to Right Model
# for i in range(N):
#     for j in range(N):
#         transmat[i, j] = 1/(N-i)
# transmat = np.triu(transmat, k=0)
# transmat[transmat == 0] = (10**(-30))
# model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag', init_params="mcw")
# model.startprob_ = startprob
# model.transmat_ = transmat

""" MODEL WITHOUT INITIAL PARAMETERS """

model = GMMHMM(n_components=N, n_mix=Mixtures, covariance_type='diag')

""" MODEL FITTING """

model.fit(feature_vectors)

""" STORING THE MODEL """

sample = GMMModel(model, "FAML")  # TODO: Change Name as well.
pickle.dump(sample, f)

"""" FUTURE EXTENSIONS """

# TODO: Use score method to evaluate the model and run multiple iterations until best fit.
# TODO: Create a loop so that multiple speakers can be trained in one run.
コード例 #13
0
def trainingGMMHMM(
        dataset,  # training dataset.
        n_c,  # number of hmm's components (ie. hidden states)
        n_m,  # number of gmm's mixtures (ie. Gaussian model)
        start_prob_prior=None,  # prior of start hidden states probabilities.
        trans_mat_prior=None,  # prior of transition matrix.
        start_prob=None,  # the start hidden states probabilities.
        trans_mat=None,  # the transition matrix.
        gmms=None,  # models' params of gmm
        covar_type='full',
        n_i=50
):
    # Initiation of dataset.
    # d = Dataset(dataset)
    X = dataset.getDataset()
    # Initiation of GMM.
    _GMMs = []
    if gmms is None:
        _GMMs = None
    else:
        for gmm in gmms:
            _GMM = GMM(n_components=n_m, covariance_type=covar_type)
            _GMM.covars_ = np.array(gmm["covars"])
            _GMM.means_ = np.array(gmm["means"])
            _GMM.weights_ = np.array(gmm["weights"])
            _GMMs.append(_GMM)
    # Initiation of GMMHMM.
    model = GMMHMM(
        startprob_prior=np.array(start_prob_prior),
        transmat_prior=np.array(trans_mat_prior),
        startprob=np.array(start_prob),
        transmat=np.array(trans_mat),
        gmms=_GMMs,
        n_components=n_c,
        n_mix=n_m,
        covariance_type=covar_type,
        n_iter=n_i
    )
    # Training.
    model.fit(X)
    # The result.
    new_gmmhmm = {
        "nComponent": n_c,
        "nMix": n_m,
        "covarianceType": covar_type,
        "hmmParams": {
            "startProb": model.startprob_.tolist(),
            "transMat": model.transmat_.tolist()
        },
        "gmmParams": {
            "nMix": n_m,
            "covarianceType": covar_type,
            "params": []
        }
    }

    for i in range(0, n_m):
        gaussian_model = {
            "covars": model.gmms_[i].covars_.tolist(),
            "means": model.gmms_[i].means_.tolist(),
            "weights": model.gmms_[i].weights_.tolist()
        }
        new_gmmhmm["gmmParams"]["params"].append(gaussian_model)

    return new_gmmhmm
コード例 #14
0
class GMMHMMTrainer(BaseTrainer):
    '''A wrapper to GMMHMM

    Attributes
    ----------
    _model: init params
    gmmhmm: hmmlearn GMMHMM instance
    params_: params after fit
    train_data_: current train datas
    '''

    def __init__(self, _model):
        super(GMMHMMTrainer, self).__init__(_model)

        hmm_params = _model['hmmParams']
        gmm_params = _model['gmmParams']
        n_iter = _model.get('nIter', 50)

        transmat = np.array(hmm_params['transMat'])
        transmat_prior = np.array(hmm_params['transMatPrior'])
        n_component = hmm_params['nComponent']
        startprob = np.array(hmm_params['startProb'])
        startprob_prior = np.array(hmm_params['startProbPrior'])

        n_mix = gmm_params['nMix']
        covariance_type = gmm_params['covarianceType']
        gmms = gmm_params.get('gmms', None)

        gmm_obj_list = []
        if not gmms:
            gmm_obj_list = None
        else:
            for gmm in gmms:
                gmm_obj = GMM(n_components=gmm['nComponent'], covariance_type=gmm['covarianceType'])
                gmm_obj.covars_ = np.array(gmm['covars'])
                gmm_obj.means_ = np.array(gmm['means'])
                gmm_obj.weights_ = np.array(gmm['weights'])
                gmm_obj_list.append(gmm_obj)

        self.gmmhmm = GMMHMM(n_components=n_component, n_mix=n_mix, gmms=gmm_obj_list,
                             n_iter=n_iter, covariance_type=covariance_type,
                             transmat=transmat, transmat_prior=transmat_prior,
                             startprob=startprob, startprob_prior=startprob_prior)

    def __repr__(self):
        return '<GMMHMMTrainer instance>\n\tinit_models:%s\n\tparams:%s\n\ttrain_data:%s' % (self._model,
                                                                                         self.params_, self.train_data_)

    def fit(self, train_data):
        train_data = np.array(train_data)
        self.gmmhmm.fit(train_data)

        gmms_ = []
        for gmm in self.gmmhmm.gmms_:
            gmms_.append({
                'nComponent': gmm.n_components,
                'nIter': gmm.n_iter,
                'means': gmm.means_.tolist(),
                'covars': gmm.covars_.tolist(),
                'weights': gmm.weights_.tolist(),
                'covarianceType': gmm.covariance_type,
            })
        self.train_data_ += train_data.tolist()
        self.params_ = {
            'nIter': self.gmmhmm.n_iter,
            'hmmParams': {
                'nComponent': self.gmmhmm.n_components,
                'transMat': self.gmmhmm.transmat_.tolist(),
                'transMatPrior': self.gmmhmm.transmat_prior.tolist(),
                'startProb': self.gmmhmm.startprob_.tolist(),
                'startProbPrior': self.gmmhmm.startprob_prior.tolist(),
            },
            'gmmParams': {
                'nMix': self.gmmhmm.n_mix,
                'covarianceType': self.gmmhmm.covariance_type,
                'gmms': gmms_,
            }
        }
コード例 #15
0
ファイル: train.py プロジェクト: snaggled/bombogenesis
class StockPredictor(object):
    def __init__(self,
                 ticker,
                 n_hidden_states=5,
                 n_latency_days=10,
                 n_steps_frac_change=50,
                 n_steps_frac_high=30,
                 n_steps_frac_low=10,
                 n_iter=1000,
                 verbose=False):

        self.verbose = verbose
        self.ticker = ticker
        self.n_latency_days = n_latency_days

        self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter)

        self.fetch_training_data()
        self.fetch_latest_data()  # to predict

        self._compute_allall_possible_outcomes(n_steps_frac_change,
                                               n_steps_frac_high,
                                               n_steps_frac_low)

    def fetch_latest_data(self):

        print("Fetching latest data ...")
        res = es.search(index="market",
                        doc_type="quote",
                        size=10000,
                        body={"query": {
                            "match": {
                                "ticker": self.ticker
                            }
                        }})
        latest_data = json_normalize(res['hits']['hits'])
        self.latest_data = latest_data.tail(1)
        if self.verbose: print("Latest data:\n%s" % self.latest_data)

    def fetch_training_data(self):

        print("Fetching training data ...")
        res = es.search(index="market",
                        doc_type="quote",
                        size=10000,
                        body={"query": {
                            "match": {
                                "ticker": self.ticker
                            }
                        }})
        self.training_data = json_normalize(res['hits']['hits'])
        self.training_data.drop(self.training_data.tail(1).index, inplace=True)
        print("%s records to train %s" %
              (len(self.training_data.index), self.ticker))
        if self.verbose:
            print("Latest record for training:\n%s" %
                  self.training_data.tail(1))

        # tbd - to use es instead
        #q = query % (self.ticker, "lt", datetime.date.today().strftime("%Y-%m-%d"))
        #print(q)
        #res = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, size=10000, body=query)

    @staticmethod
    def _extract_features(data):

        frac_change = np.array(
            data['_source.change'])  #(close_price - open_price) / open_price
        frac_high = np.array(data['_source.change_high']
                             )  #(high_price - open_price) / open_price
        frac_low = np.array(
            data['_source.change_low'])  #(open_price - low_price) / open_price

        return np.column_stack((frac_change, frac_high, frac_low))

    def fit(self):
        print('Extracting Features')
        feature_vector = StockPredictor._extract_features(self.training_data)
        if self.verbose: print("feature vector %s" % feature_vector)
        print('Training Model with %s features' % feature_vector.size)
        print("Latest date to be used in training is %s" %
              self.training_data.tail(1)['_source.timestamp'].values[0])
        self.hmm.fit(feature_vector)
        print('Model trained')

    def _compute_allall_possible_outcomes(self, n_steps_frac_change,
                                          n_steps_frac_high, n_steps_frac_low):
        frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change)
        frac_high_range = np.linspace(0, 0.1, n_steps_frac_high)
        frac_low_range = np.linspace(0, 0.1, n_steps_frac_low)

        self.all_possible_outcomes = np.array(
            list(
                itertools.product(frac_change_range, frac_high_range,
                                  frac_low_range)))

    def json_data_for_outcome(self, day, outcome, score):

        rows = list()

        # meta
        ticker = day['_source.ticker']
        date = day['_source.timestamp']
        vector = outcome
        id = "%s-%s-%s" % (ticker, date, vector)

        meta = {"index": {"_index": INDEX_NAME, "_type": TYPE_NAME, "_id": id}}
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.frac_change = outcome[0]
        row.frac_high_range = outcome[1]
        row.frac_low_range = outcome[2]
        open_price = day['_source.open'].values[0]
        predicted_close = open_price * (1 + outcome[0])
        expected_value = outcome[0] * score
        row.predicted_close = predicted_close
        row.expected_value = expected_value
        row.timestamp = day['_source.timestamp'].values[0]
        row.score = score
        row.ticker = day['_source.ticker'].values[0]
        rows.append(json.dumps(row))

        return rows

    def predict_outcomes(self):

        print("predicting outcomes for: %s" %
              self.latest_data['_source.timestamp'].values[0])
        previous_testing_data = self.training_data.tail(
            self.n_latency_days).index

        if self.verbose:
            print("previous_testing_data %s" % previous_testing_data)

        test_data = self.training_data.iloc[previous_testing_data]

        if self.verbose:
            print("Using the following slice of data:")
            print("[%s]" % previous_testing_data)
            print(test_data)

        test_data_features = StockPredictor._extract_features(test_data)

        # to blow everything away - may need to recreate/refresh indexes in ES!
        #self.delete_and_create_index()

        bulk_data = list()
        outcome_score = []

        for possible_outcome in self.all_possible_outcomes:

            test_feature_vectors = np.row_stack(
                (test_data_features, possible_outcome))

            if self.verbose:
                print("Final test feature set:")
                print("[%s]" % test_feature_vectors)

            score = self.hmm.score(test_feature_vectors)

            # ignoring scores <= 0
            if score > 0:
                rows = self.json_data_for_outcome(self.latest_data,
                                                  possible_outcome, score)
                bulk_data.append(rows)

        # format for ES, ugly
        es_array = ""
        for row in bulk_data:
            es_array += row[0]
            es_array += "\n"
            es_array += row[1]
            es_array += "\n"

        #print("Deleting prediction data for ... %s" % day['_source.ticker'])
        #es.delete_by_query(index=INDEX_NAME,doc_type=TYPE_NAME, body={'query': {'match': {'ticker': day['_source.ticker']}}})

        print("Exporting predictions to ES")
        if self.verbose: print(es_array)
        res = es.bulk(index=INDEX_NAME, body=es_array, refresh=True)
コード例 #16
0
class HiddenMarkovModel(BaseModel):
    def __init__(self):

        # Create some assets:
        assetsList = [
            Asset('WS30', 'traditional', 'historical'),  # Index US
            Asset('XAUUSD', 'traditional', 'historical'),  # Commodity
            Asset('GDAXIm', 'traditional', 'historical'),  # Index EUR
            Asset('EURUSD', 'traditional', 'historical'),  # Major
            Asset('GBPJPY', 'traditional', 'historical')
        ]  # Minor

        # Initialize the ResearchStudy class:
        super().__init__('HiddenMarkovModel', assetsList)

        # Make a random seed to reproduce results:
        np.random.seed(33)

        # Print to see if working:
        #logger.warning(self.PORTFOLIO._portfolioDict['WS30'])

    def _defineModelParameters(self):

        # Define the model:
        #self.model = GaussianHMM(n_components=2,
        #                         covariance_type="full",
        #                         n_iter=200,
        #                         verbose=True)
        self.model = GMMHMM(n_components=2,
                            covariance_type="full",
                            n_iter=20,
                            verbose=True)

    def _monitorConvergence(self):

        # Print:
        logger.warning(f"Model Converged: {self.model.monitor_.converged}")

    def _monitorHistory(self):

        # Print:
        logger.warning(f"Model History: {self.model.monitor_.history}")

    def _fitTheModel(self, saveDirectory):

        # Loop the portfolio dict:
        for eachAssetName, eachAssetDataFrame in self.PORTFOLIO._portfolioDict.items(
        ):

            # Re-initialize the parameters:
            self._defineModelParameters()

            # Fit the model:
            # Get the returns into a 2D array > Actually, it is (X,) > We should conver to (X,1)
            RETURNS_RESHAPED = np.column_stack([eachAssetDataFrame["Returns"]])
            self.model.fit(RETURNS_RESHAPED)
            logger.warning(
                f"Model Score for asset <{eachAssetName}>: {self.model.score(RETURNS_RESHAPED)}"
            )

            # Check convergence and history:
            self._monitorConvergence()
            self._monitorHistory()

            # Predict the hidden states based on the returns:
            HIDDEN_STATES = self.model.predict(RETURNS_RESHAPED)
            #logger.warning(HIDDEN_STATES)

            # Save the model:
            if saveDirectory:
                self._saveModel(assetModelName=eachAssetName,
                                saveDirectory=saveDirectory)

            # Create the new column in the dataframe:
            eachAssetDataFrame['HiddenStates'] = HIDDEN_STATES

    def _saveDataFrames(self, saveDirectory):

        # Save each dataframe:
        for eachAssetName, eachAssetDataFrame in self.PORTFOLIO._portfolioDict.items(
        ):

            logger.warning(
                f'[{self._saveDataFrames.__name__}] - Looping for asset <{eachAssetName}>...'
            )
            eachAssetDataFrame.to_csv(saveDirectory +
                                      f'/{eachAssetName}_DF.csv')

    def _saveModel(self, assetModelName, saveDirectory):

        # Save the model:
        with open(saveDirectory + f'/HMM_{assetModelName}.pickle',
                  'wb') as pickle_file:
            pickle.dump(self.model, pickle_file)

    def _loadModel(self, assetModelName, loadDirectory):

        # Load the model:
        with open(loadDirectory + f'/HMM_{assetModelName}.pickle',
                  'rb') as pickle_file:
            self.model = pickle.load(pickle_file)

    def _plotModelOutput(self, saveDirectory='', showIt=False):

        # Plot:
        for eachAssetName, eachAssetDataFrame in self.PORTFOLIO._portfolioDict.items(
        ):

            logger.warning(
                f'[{self._plotModelOutput.__name__}] - Looping for asset <{eachAssetName}>...'
            )

            # We will just get part of the dataframe for the plot:
            eachAssetDataFrame_Little = eachAssetDataFrame[:200].copy()
            eachAssetDataFrame_Little['date'] = range(
                1,
                len(eachAssetDataFrame_Little) + 1)

            # Create the figure:
            f1, ax = plt.subplots(3, figsize=(10, 5))

            # Create the colormap:
            colormap = cm.get_cmap('rainbow')

            # Create the plots:
            ax[0].scatter(eachAssetDataFrame_Little.date,
                          eachAssetDataFrame_Little.close,
                          c=eachAssetDataFrame_Little.HiddenStates,
                          cmap=colormap,
                          label='Hidden States',
                          s=80)
            ax[0].set_xlabel('Hidden States',
                             horizontalalignment='center',
                             verticalalignment='center',
                             fontsize=12,
                             labelpad=20)
            ax[0].set_ylabel('Observations',
                             horizontalalignment='center',
                             verticalalignment='center',
                             fontsize=12,
                             labelpad=20)
            ax[0].legend(loc='best')

            ax[1].plot(eachAssetDataFrame_Little.date,
                       eachAssetDataFrame_Little.close,
                       label='Close Price')
            ax[1].set_xlabel('Observations',
                             horizontalalignment='center',
                             verticalalignment='center',
                             fontsize=12,
                             labelpad=20)
            ax[1].set_ylabel('Close Price',
                             horizontalalignment='center',
                             verticalalignment='center',
                             fontsize=12,
                             labelpad=20)
            ax[1].legend(loc='best')

            ax[2].plot(eachAssetDataFrame_Little.date,
                       eachAssetDataFrame_Little.Returns,
                       label='Returns')
            ax[2].set_xlabel('Observations',
                             horizontalalignment='center',
                             verticalalignment='center',
                             fontsize=12,
                             labelpad=20)
            ax[2].set_ylabel('Returns',
                             horizontalalignment='center',
                             verticalalignment='center',
                             fontsize=12,
                             labelpad=20)
            ax[2].legend(loc='best')

            plt.grid(linestyle='dotted')
            plt.subplots_adjust(left=0.09,
                                bottom=0.20,
                                right=0.94,
                                top=0.90,
                                wspace=0.2,
                                hspace=0)
            f1.canvas.set_window_title(
                f'Hidden Markov Model + more data plot for asset <{eachAssetName}>'
            )
            #f1.tight_layout()

            # In PNG:
            plt.savefig(saveDirectory + f'/HMM_{eachAssetName}.png')

            # Show it:
            if showIt:
                plt.show()