コード例 #1
0
def trainModelGMM(X, lengths, states, num_gaus):

    model = GMMHMM(n_components=states, n_mix=num_gaus,n_iter=1000,verbose=True).fit(X,lengths)

    print('Mixture Models + HMM')
    print(model.predict(X))
    print(model.monitor_.converged)
    print(model.monitor_)
    print(model.score(X, lengths))
コード例 #2
0
model_dining.fit(D)
model_fitness.fit(F)
model_work.fit(W)
model_shop.fit(S)


print model_dining.startprob_.tolist()
print model_dining.transmat_.tolist()


print 'After training'

print ' - Classification for seq dining s-'

print 'dining result:'
print model_dining.score(np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
print 'fitness result:'
print model_fitness.score(np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
print 'shop result:'
print model_shop.score(np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
print 'work result:'
print model_work.score(np.array(dataset_dining._convetNumericalSequence(seq_d_s)))

print ' - Classification for seq dining l-'

print 'dining result:'
print model_dining.score(np.array(dataset_dining._convetNumericalSequence(seq_d)))
print 'fitness result:'
print model_fitness.score(np.array(dataset_dining._convetNumericalSequence(seq_d)))
print 'work result:'
print model_work.score(np.array(dataset_dining._convetNumericalSequence(seq_d)))
コード例 #3
0
ファイル: train15.py プロジェクト: snaggled/bombogenesis
class StockPredictor(object):
    def __init__(self,
                 ticker,
                 chunks=9,
                 delta=0,
                 n_hidden_states=5,
                 n_latency_days=10,
                 n_steps_frac_change=10,
                 n_steps_frac_high=30,
                 n_steps_frac_low=10,
                 n_iter=100,
                 verbose=False,
                 prediction_date=None):

        self.total_score = 0
        self.verbose = verbose
        self.ticker = ticker
        self.n_latency_days = n_latency_days
        self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter)
        self.chunks = chunks
        self.delta = delta
        self.prediction_date = prediction_date
        self.fetch_training_data()
        self._compute_all_possible_outcomes(n_steps_frac_change,
                                            n_steps_frac_high,
                                            n_steps_frac_low)

    def fetch_training_data(self):

        print("Fetching training data ...")
        res = es.search(index="market",
                        doc_type="quote",
                        size=10000,
                        body={"query": {
                            "match": {
                                "ticker": self.ticker
                            }
                        }})
        self.training_data = json_normalize(res['hits']['hits'])
        self.chunked_training_data = self.training_data

        #vectors = []
        #chunked_training_data_lengths = []
        #start_index = 0
        #end_index = start_index + self.chunks
        #delta_date_index = end_index + self.delta

        #while delta_date_index <= len(self.training_data):
        #training_chunk = self.training_data[start_index:end_index]
        #    delta_chunk = self.training_data.iloc[delta_date_index]
        #    total_chunk = training_chunk.append(delta_chunk)
        #    #print("%s training_chunk to train %s" % (total_chunk, self.ticker))
        #    start_index = end_index + 1
        #    end_index = start_index + self.chunks
        #    delta_date_index = end_index + self.delta
        #    vectors.append(total_chunk)
        #    chunked_training_data_lengths.append(len(total_chunk))
        #    if self.verbose: print(total_chunk)

        #self.chunked_training_data = pd.DataFrame(np.concatenate(vectors), columns = self.training_data.columns)
        #self.chunked_training_data_lengths = chunked_training_data_lengths

        if self.verbose:
            print("Latest record for training:\n%s" %
                  self.chunked_training_data.tail(1))
        latest_date = self.chunked_training_data.tail(1)['_source.timestamp']
        datetime_object = datetime.datetime.strptime(latest_date.values[0],
                                                     '%Y-%m-%dT%H:%M:%S')

        if self.prediction_date == None:
            prediction_date = datetime_object + timedelta(days=self.delta + 1)
            self.prediction_date = datetime.datetime.strftime(
                prediction_date, '%Y-%m-%dT%H:%M:%S')

    @staticmethod
    def _extract_features(data):

        frac_change = np.array(
            data['_source.change'])  #(close_price - open_price) / open_price
        frac_high = np.array(data['_source.change_high']
                             )  #(high_price - open_price) / open_price
        frac_low = np.array(
            data['_source.change_low'])  #(open_price - low_price) / open_price

        return np.column_stack((frac_change, frac_high, frac_low))

    def fit(self):
        print('Extracting Features')
        feature_vector = StockPredictor._extract_features(
            self.chunked_training_data)
        if self.verbose: print("feature vector %s" % feature_vector)
        print('Training Model with %s features' % feature_vector.size)
        print(
            "Latest date to be used in training is %s" %
            self.chunked_training_data.tail(1)['_source.timestamp'].values[0])
        #self.hmm.fit(feature_vector, self.chunked_training_data_lengths)
        self.hmm.fit(feature_vector)
        print('Model trained')

    def _compute_all_possible_outcomes(self, n_steps_frac_change,
                                       n_steps_frac_high, n_steps_frac_low):
        frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change)
        frac_high_range = np.linspace(0, 0.05, n_steps_frac_high)
        frac_low_range = np.linspace(0, 0.05, n_steps_frac_low)

        self.all_possible_outcomes = np.array(
            list(
                itertools.product(frac_change_range, frac_high_range,
                                  frac_low_range)))

    def json_data_for_trade(self):

        rows = list()

        # meta
        ticker = self.ticker
        date = self.prediction_date
        total_score = self.total_score
        id = "%s-%s-%s" % (ticker, date, total_score)

        meta = {
            "index": {
                "_index": TRADE_INDEX_NAME,
                "_type": TRADE_TYPE_NAME,
                "_id": id
            }
        }
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.total_score = total_score
        row.timestamp = self.prediction_date
        row.ticker = self.ticker
        rows.append(json.dumps(row))

        return rows

    def json_data_for_outcome(self, outcome, score):

        rows = list()

        # meta
        ticker = self.ticker
        date = self.prediction_date
        vector = outcome
        id = "%s-%s-%s" % (ticker, date, vector)

        meta = {"index": {"_index": INDEX_NAME, "_type": TYPE_NAME, "_id": id}}
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.frac_change = outcome[0]
        row.frac_high_range = outcome[1]
        row.frac_low_range = outcome[2]
        open_price = self.training_data.tail(1)['_source.open'].values[0]
        predicted_close = open_price * (1 + outcome[0])
        expected_value = outcome[0] * score
        row.predicted_close = predicted_close
        row.expected_value = expected_value
        row.timestamp = self.prediction_date
        row.score = score
        row.chunks = self.chunks
        row.delta = self.delta
        row.score = score
        row.ticker = self.ticker
        rows.append(json.dumps(row))

        return rows

    def delete_prediction_data(self, ticker):
        print("Deleting prediction data for ... %s" % self.ticker)
        es.delete_by_query(index=INDEX_NAME,
                           doc_type=TYPE_NAME,
                           body={'query': {
                               'match': {
                                   'ticker': self.ticker
                               }
                           }})

    def predict_outcomes(self):

        print("predicting outcomes for: %s" % self.prediction_date)
        previous_testing_data = self.training_data.tail(
            self.n_latency_days).index

        if self.verbose:
            print("previous_testing_data %s" % previous_testing_data)

        test_data = self.training_data.iloc[previous_testing_data]

        if self.verbose:
            print("Using the following slice of data:")
            print("[%s]" % previous_testing_data)
            print(test_data)

        test_data_features = StockPredictor._extract_features(test_data)

        # to blow everything away - may need to recreate/refresh indexes in ES!
        #self.delete_and_create_index()

        bulk_data = list()
        trade_data = list()
        outcome_score = []

        for possible_outcome in self.all_possible_outcomes:

            test_feature_vectors = np.row_stack(
                (test_data_features, possible_outcome))
            score = self.hmm.score(test_feature_vectors)

            # ignoring scores <= 0
            if score > 0:
                rows = self.json_data_for_outcome(possible_outcome, score)
                bulk_data.append(rows)

                if possible_outcome[0] > 0:
                    self.total_score = self.total_score + score
                if possible_outcome[0] < 0:
                    self.total_score = self.total_score - score
                trade_rows = self.json_data_for_trade()
                trade_data.append(trade_rows)

        print("Exporting predictions to ES")

        es_array = self.format_data_for_es(bulk_data)
        res = es.bulk(index=INDEX_NAME, body=es_array, refresh=True)

        es_array = self.format_data_for_es(trade_data)
        res = es.bulk(index=TRADE_INDEX_NAME, body=es_array, refresh=True)

    def format_data_for_es(self, data):
        es_array = ""
        for row in data:
            es_array += row[0]
            es_array += "\n"
            es_array += row[1]
            es_array += "\n"
        return es_array
コード例 #4
0
def main():
    outdir = r'./training_files/multi'
    outdir2 = r'./training_files/arnab'
    outdir3 = r'./training_files/kejriwal'
    outdir4 = r'./training_files/ravish'
    outdir5 = r'./training_files/not-shouting'
    outdir6 = r'./training_files/shouting'
    outdir7 = r'./training_files/single'
    outdir8 = r'./training_files/modi'
    outdir9 = r'./training_files/ond_more'

    #create 3 hmm one for each case

    multi = GMMHMM(5, 2)
    discuss = GMMHMM(5, 2)
    arnab = GMMHMM(5, 2)
    kejriwal = GMMHMM(5, 2)
    ravish = GMMHMM(5, 2)

    notshouting = GMMHMM(5, 2)
    shouting = GMMHMM(5, 2)
    single = GMMHMM(5, 2)

    #training for multi

    l = get_files_list(outdir)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    multi.fit(obs)

    #training for arnab

    l = get_files_list(outdir2)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    arnab.fit(obs)

    #training for kejriwal

    l = get_files_list(outdir3)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    kejriwal.fit(obs)

    #training for ravish

    l = get_files_list(outdir4)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    ravish.fit(obs)

    #training for notshouting

    l = get_files_list(outdir5)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    notshouting.fit(obs)

    #training for shouting

    l = get_files_list(outdir6)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    shouting.fit(obs)

    #training for single

    l = get_files_list(outdir7)

    for i in l:
        f = open(i, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

    single.fit(obs)

    #Its time for some testing
    q = []
    t = "testcase_output.txt"
    out = open(t, "w")

    #Read test file and make list of list of sequence 10   for --->1
    #te=["test1.txt","test2.txt","test3.txt","test4.txt","test5.txt","test6.txt","test7.txt","test8.txt","test9.txt","test10.txt"]

    #f=open("expected.txt")
    #d_expected={}
    '''
	
	for line in f:
		x=line.strip().split()
		d_expected[x[0]]={'arnab':float(x[1]),'kejriwal':float(x[2]),'ravish':float(x[3])}
	'''

    te = get_files_list(r'./testing_files')
    #te=["test1.txt","test2.txt","test3.txt"]
    for ad in te:
        d = {"arnab": 0, "kejriwal": 0, "ravish": 0}
        f = open(ad, "r")
        obs = []
        i_sequence = []
        count = 0
        for line in f:
            individual_obs = line.strip().split(",")
            #print individual_obs
            individual_obs = [float(i) for i in individual_obs]
            i_sequence.append(individual_obs)
            count += 1
            if count == 10:
                obs.append(numpy.array(i_sequence))
                count = 0
                i_sequence = []

        p = []
        p_choosen = []
        p1_choosen = []
        p1 = []
        p2 = []
        p2_choosen = []

        #print obs
        for i in obs:
            p.append((shouting.score(i), "shouting"))
            p.append((notshouting.score(i), "notshouting"))
            p_choosen.append(max(p, key=lambda x: x[0]))
            p = []
        for i in obs:
            p1.append((arnab.score(i), "arnab"))
            p1.append((kejriwal.score(i), "kejriwal"))
            p1.append((ravish.score(i), "ravish"))
            p1_choosen.append(max(p1, key=lambda x: x[0]))
            p1 = []

        for i in obs:
            p2.append((multi.score(i), "multi"))
            p2.append((single.score(i), "single"))
            p2_choosen.append(max(p2, key=lambda x: x[0]))
            p2 = []
        #print p

        p = []
        p1 = []
        p_choosen = [b for a, b in p_choosen]
        p1_choosen = [b for a, b in p1_choosen]
        p2_choosen = [b for a, b in p2_choosen]
        '''
		#print p_choosen
		#print the state sequence with the timestamp in the output file
	
		t="testcase_output_9.txt"
		out=open(t,"a+")

		out.write(str(ad)+"--->")
		out.write(p_choosen[0])
		out.write("\n")
		'''

        #calculate the amount per second and append to the same file

        #print p_choosen
        #print p1_choosen
        shouting1 = []
        notshouting1 = []

        totaltime = len(p_choosen) * 0.05

        single_count = 0

        for i in range(len(p_choosen)):
            if p2_choosen[i] == "single":
                single_count += 1
                if p_choosen[i] == "shouting":
                    shouting1.append(p1_choosen[i])
                elif p_choosen[i] == "notshouting":
                    notshouting1.append(p1_choosen[i])
        #print d
        d_shouting = {"arnab": 0, "kejriwal": 0, "ravish": 0}
        d_notshouting = {"arnab": 0, "kejriwal": 0, "ravish": 0}

        for i in shouting1:
            d_shouting[i] += 1

        for i in notshouting1:
            d_notshouting[i] += 1

        #print p_choosen

        out.write("\n*******--> " + str(ad) + "  <--*******\n")
        #write arnab,ravish and kejri
        fn = ad.strip().split("/")
        fn = fn[len(fn) - 1]

        #out.write("Time predicted for questioning: "+str((d5['question'])*0.05)+" seconds.\n")
        #out.write("Time predicted for discussion: "+str((d5['discuss'])*0.05)+" seconds.\n")
        out.write("\nChecking single HMM and multi HMM:\n")
        out.write("Number of instance of Single: " + str(single_count) + "\n")
        out.write(
            "\nChecking shouting and non-shouting HMM for all Single instances:\n"
        )
        out.write("Number of instance of Shouting: " + str(len(shouting1)) +
                  "\n")
        out.write("Number of instance of Not-shouting: " +
                  str(len(notshouting1)) + "\n")
        out.write(
            "\nChecking the frequency of each speaker in both both shouting and not shouting instance...\n"
        )
        out.write("Shouting instance: \n" + str(d_shouting) + "\n")
        out.write("Not-Shouting instance: \n" + str(d_notshouting) + "\n")

        out.write("\nResult:\n")

        for c, d in d_shouting.items():
            out.write(
                str(c) + " was shouting for " + str(d * 0.05) + " sec.\n")

        out.write("\n")

        for c, d in d_notshouting.items():
            out.write(
                str(c) + " was not shouting for " + str(d * 0.05) + " sec.\n")

        out.write("\n")
        for c, d in d_shouting.items():
            out.write(
                str(c) + " was shouting for " +
                str(((d * 0.05) / totaltime) * 100) + " % of time.\n")

        out.write("\n")

        for c, d in d_notshouting.items():
            out.write(
                str(c) + " was not shouting for " +
                str(((d * 0.05) / totaltime) * 100) + " sec.\n")

        out.write("\n")

        print d_shouting
        print d_notshouting
コード例 #5
0
import os
from hmmlearn.hmm import GMMHMM
from python_speech_features import mfcc
from scipy.io import wavfile
from sklearn.model_selection import train_test_split
import numpy as np
import sys

input_folder = '/home/sachin/Downloads/cmu_us_awb_arctic-0.95-release/cmu_us_awb_arctic/wav'
hmm_models = []

X = np.array([])
for filename in os.listdir(input_folder):
    filepath = os.path.join(input_folder, filename)
    sampling_freq, audio = wavfile.read(filepath)
    mfcc_features = mfcc(audio, sampling_freq)
    if len(X) == 0:
        X = mfcc_features
    else:
        X = np.append(X, mfcc_features, axis=0)


model = GMMHMM(n_components=3, n_mix=45, n_iter=100)
X_train, X_test = train_test_split(X, train_size=0.7)
hmm_models.append(model.fit(X_train))

print(model.score(X_test))
コード例 #6
0
    print "checking num", picChecked
    print '-----------------'

    try:
        resultGau0 = model0Gau.score(newTestPictures[picChecked])
        resultGau1 = model1Gau.score(newTestPictures[picChecked])
        resultGau2 = model2Gau.score(newTestPictures[picChecked])
        resultGau3 = model3Gau.score(newTestPictures[picChecked])
        resultGau4 = model4Gau.score(newTestPictures[picChecked])
        resultGau5 = model5Gau.score(newTestPictures[picChecked])
        resultGau6 = model6Gau.score(newTestPictures[picChecked])
    except ValueError:
        print 'err gau'

    try:
        resultMix0 = model0GauMix.score(newTestPictures[picChecked])
        resultMix1 = model1GauMix.score(newTestPictures[picChecked])
        resultMix2 = model2GauMix.score(newTestPictures[picChecked])
        resultMix3 = model3GauMix.score(newTestPictures[picChecked])
        resultMix4 = model4GauMix.score(newTestPictures[picChecked])
        resultMix5 = model5GauMix.score(newTestPictures[picChecked])
        resultMix6 = model6GauMix.score(newTestPictures[picChecked])
    except ValueError:
        print 'err gaumix'

    try:
        resultMulti0 = model0Multi.score(newTestPictures[picChecked])
        resultMulti1 = model1Multi.score(newTestPictures[picChecked])
        resultMulti2 = model2Multi.score(newTestPictures[picChecked])
        resultMulti3 = model3Multi.score(newTestPictures[picChecked])
        resultMulti4 = model4Multi.score(newTestPictures[picChecked])
コード例 #7
0
print seq_s

model_dining.fit(D)
model_fitness.fit(F)
model_work.fit(W)
model_shop.fit(S)

print model_dining.startprob_.tolist()
print model_dining.transmat_.tolist()

print 'After training'

print ' - Classification for seq dining s-'

print 'dining result:'
print model_dining.score(
    np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
print 'fitness result:'
print model_fitness.score(
    np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
print 'shop result:'
print model_shop.score(
    np.array(dataset_dining._convetNumericalSequence(seq_d_s)))
print 'work result:'
print model_work.score(
    np.array(dataset_dining._convetNumericalSequence(seq_d_s)))

print ' - Classification for seq dining l-'

print 'dining result:'
print model_dining.score(
    np.array(dataset_dining._convetNumericalSequence(seq_d)))
コード例 #8
0
ファイル: train.py プロジェクト: snaggled/bombogenesis
class StockPredictor(object):
    def __init__(self,
                 ticker,
                 n_hidden_states=5,
                 n_latency_days=10,
                 n_steps_frac_change=50,
                 n_steps_frac_high=30,
                 n_steps_frac_low=10,
                 n_iter=1000,
                 verbose=False):

        self.verbose = verbose
        self.ticker = ticker
        self.n_latency_days = n_latency_days

        self.hmm = GMMHMM(n_components=n_hidden_states, n_iter=n_iter)

        self.fetch_training_data()
        self.fetch_latest_data()  # to predict

        self._compute_allall_possible_outcomes(n_steps_frac_change,
                                               n_steps_frac_high,
                                               n_steps_frac_low)

    def fetch_latest_data(self):

        print("Fetching latest data ...")
        res = es.search(index="market",
                        doc_type="quote",
                        size=10000,
                        body={"query": {
                            "match": {
                                "ticker": self.ticker
                            }
                        }})
        latest_data = json_normalize(res['hits']['hits'])
        self.latest_data = latest_data.tail(1)
        if self.verbose: print("Latest data:\n%s" % self.latest_data)

    def fetch_training_data(self):

        print("Fetching training data ...")
        res = es.search(index="market",
                        doc_type="quote",
                        size=10000,
                        body={"query": {
                            "match": {
                                "ticker": self.ticker
                            }
                        }})
        self.training_data = json_normalize(res['hits']['hits'])
        self.training_data.drop(self.training_data.tail(1).index, inplace=True)
        print("%s records to train %s" %
              (len(self.training_data.index), self.ticker))
        if self.verbose:
            print("Latest record for training:\n%s" %
                  self.training_data.tail(1))

        # tbd - to use es instead
        #q = query % (self.ticker, "lt", datetime.date.today().strftime("%Y-%m-%d"))
        #print(q)
        #res = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, size=10000, body=query)

    @staticmethod
    def _extract_features(data):

        frac_change = np.array(
            data['_source.change'])  #(close_price - open_price) / open_price
        frac_high = np.array(data['_source.change_high']
                             )  #(high_price - open_price) / open_price
        frac_low = np.array(
            data['_source.change_low'])  #(open_price - low_price) / open_price

        return np.column_stack((frac_change, frac_high, frac_low))

    def fit(self):
        print('Extracting Features')
        feature_vector = StockPredictor._extract_features(self.training_data)
        if self.verbose: print("feature vector %s" % feature_vector)
        print('Training Model with %s features' % feature_vector.size)
        print("Latest date to be used in training is %s" %
              self.training_data.tail(1)['_source.timestamp'].values[0])
        self.hmm.fit(feature_vector)
        print('Model trained')

    def _compute_allall_possible_outcomes(self, n_steps_frac_change,
                                          n_steps_frac_high, n_steps_frac_low):
        frac_change_range = np.linspace(-0.1, 0.1, n_steps_frac_change)
        frac_high_range = np.linspace(0, 0.1, n_steps_frac_high)
        frac_low_range = np.linspace(0, 0.1, n_steps_frac_low)

        self.all_possible_outcomes = np.array(
            list(
                itertools.product(frac_change_range, frac_high_range,
                                  frac_low_range)))

    def json_data_for_outcome(self, day, outcome, score):

        rows = list()

        # meta
        ticker = day['_source.ticker']
        date = day['_source.timestamp']
        vector = outcome
        id = "%s-%s-%s" % (ticker, date, vector)

        meta = {"index": {"_index": INDEX_NAME, "_type": TYPE_NAME, "_id": id}}
        rows.append(json.dumps(meta))

        # data
        row = ObjDict()
        row.frac_change = outcome[0]
        row.frac_high_range = outcome[1]
        row.frac_low_range = outcome[2]
        open_price = day['_source.open'].values[0]
        predicted_close = open_price * (1 + outcome[0])
        expected_value = outcome[0] * score
        row.predicted_close = predicted_close
        row.expected_value = expected_value
        row.timestamp = day['_source.timestamp'].values[0]
        row.score = score
        row.ticker = day['_source.ticker'].values[0]
        rows.append(json.dumps(row))

        return rows

    def predict_outcomes(self):

        print("predicting outcomes for: %s" %
              self.latest_data['_source.timestamp'].values[0])
        previous_testing_data = self.training_data.tail(
            self.n_latency_days).index

        if self.verbose:
            print("previous_testing_data %s" % previous_testing_data)

        test_data = self.training_data.iloc[previous_testing_data]

        if self.verbose:
            print("Using the following slice of data:")
            print("[%s]" % previous_testing_data)
            print(test_data)

        test_data_features = StockPredictor._extract_features(test_data)

        # to blow everything away - may need to recreate/refresh indexes in ES!
        #self.delete_and_create_index()

        bulk_data = list()
        outcome_score = []

        for possible_outcome in self.all_possible_outcomes:

            test_feature_vectors = np.row_stack(
                (test_data_features, possible_outcome))

            if self.verbose:
                print("Final test feature set:")
                print("[%s]" % test_feature_vectors)

            score = self.hmm.score(test_feature_vectors)

            # ignoring scores <= 0
            if score > 0:
                rows = self.json_data_for_outcome(self.latest_data,
                                                  possible_outcome, score)
                bulk_data.append(rows)

        # format for ES, ugly
        es_array = ""
        for row in bulk_data:
            es_array += row[0]
            es_array += "\n"
            es_array += row[1]
            es_array += "\n"

        #print("Deleting prediction data for ... %s" % day['_source.ticker'])
        #es.delete_by_query(index=INDEX_NAME,doc_type=TYPE_NAME, body={'query': {'match': {'ticker': day['_source.ticker']}}})

        print("Exporting predictions to ES")
        if self.verbose: print(es_array)
        res = es.bulk(index=INDEX_NAME, body=es_array, refresh=True)