def test_backward(): initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]]) x = np.matrix([-0.2, 2.6, 1.3]) c = np.matrix([1.0, 0.1625, 0.8266, 0.0581]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) pX, logS = g1.prob(np.matrix([[g1], [g2]]), x) betaHat = mc.backward(mc, pX, c) print 'betaHat:', betaHat print 'expected: [1.0003 1.0393 0; 8.4182 9.3536 2.0822]' initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.9, 0.1], [0.1, 0.9]]) x = np.matrix([-0.2, 2.6, 1.3]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) pX, logS = g1.prob(np.matrix([g1, g2]), x) alphaHat, c = mc.forward(mc, pX) betaHat = mc.backward(mc, pX, c) print 'betaHat:', betaHat print 'expected: [1.0 6.798238264 1.125986646; 5.223087455 5.75095566 1.125986646]'
def jsonToMarkovChain(path, encoding=None): if encoding: file = io.open(path, 'r', encoding=encoding) else: file = open(path, 'r') data = json.load(file) if all(k in data for k in ARRAY_KEYS): return MarkovChain(data[SS], data[ID], data[TM]) else: return MarkovChain(data)
def initLeftRightMC(self, nStates, stateDuration=None): defaultDuration = 10.0 if nStates <= 1: print 'Number of states must be > 1' if stateDuration is None: stateDuration = defaultDuration if type(stateDuration) == float or type( stateDuration) == np.float64 or len(stateDuration) == 1: stateDuration = np.tile(stateDuration, (nStates, 1)) elif len(stateDuration) != nStates: print 'Incompatible length of state durations' minDiagProb = 0.1 D = np.maximum(np.ones((stateDuration.shape)), stateDuration) aii = np.maximum( np.ones((D.shape)) * minDiagProb, np.divide((D - 1), D)) aij = (1 - aii) aij = np.diagflat(aij, 1) aij = aij[0:nStates, :] A = np.concatenate((np.diagflat(aii), np.zeros( (nStates, 1))), axis=1) + aij p0 = np.concatenate((np.matrix([1]), np.zeros((nStates - 1, 1))), axis=0) mc = MarkovChain(p0, A) return mc
def multi_dim_observation(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.99, 0.01], [0.03, 0.97]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([[0], [0]]), cov=np.matrix([[2, 1], [1, 4]])) g2 = GaussD(mean=np.matrix([[3], [3]]), cov=np.matrix([[2, 1], [1, 4]])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def parse_porn_csv(csv_file, title_index): markov_db_filename = './{csv}_markov_db'.format(csv=csv_file) if os.path.isfile('./{csv}_markov_db'.format(csv=csv_file)): return MarkovChain(markov_db_filename) porn_titles = StringIO.StringIO() with open(csv_file) as porn_csv: reader = csv.DictReader(porn_csv, delimiter='|', fieldnames=[ 'iframe', 'thumbnail', 'samples', 'title', 'tags', 'more_tags', 'unknown', 'length', 'views', 'likes', 'dislikes' ]) for row in reader: porn_titles.write(row['title'] + '.') mc = MarkovChain(markov_db_filename) mc.generateDatabase(porn_titles.getvalue()) mc.dumpdb() return mc
def finite_duration(): initMatrix = np.matrix([[0.75], [0.25]]) transitionMatrix = np.matrix([[0.4, 0.4, 0.2], [0.1, 0.6, 0.3]]) markovChain = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) h = HMM(markovChain, np.matrix([[g1], [g2]])) [X, S] = h.rand(h, 100) return (X, S)
def __init__(self, user, filename, repliesFilename): """ user: The id of the user that you want to collect data from and that will be used in the app. filename: the name of the file that you want to store user's tweets (exluding replies) e.g: "data_tweets.json". Please insert {} in this file otherwise it'll crash. repliesFilename: the name of the file that you want to store user's tweets (only replies to other users) ex: "data_replies.json". Please insert {} in this file otherwise it'll crash. """ self._user = user self._stream = None self._filename = filename self._MarkovChain = MarkovChain(self._filename) self._repliesFilename = repliesFilename self._repliedUsers = {}
def generate_dict(api, user_id): """Generates Dictionary, no RT no @""" tweets = list() for tweet in tweepy.Cursor(api.user_timeline, id=user_id).items(): tweet_text = tweet._json['text'].encode('utf-8') #tweet_text = json.dumps(tweet).encode('utf-8') if not str.startswith(tweet_text, 'RT') and not str.startswith( tweet_text, '@'): tweets.append(tweet_text) markov_chain = MarkovChain(tweets) return markov_chain
def test_forward(): initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2])) # output sequence x = np.matrix([-0.2, 2.6, 1.3]) pX, logS = g1.prob(np.matrix([g1, g2]), x) alphaHat, c = mc.forward(mc, pX) print 'alphaHat:', alphaHat, 'expected: [1 0.3847 0.4189; 0 0.6153 0.5811]' print 'c:', c, 'expected: [1 0.1625 0.8266 0.0581]' h = HMM(mc, np.matrix([[g1], [g2]])) # logP = P(X|h) logP = h.logprob(h, x) print 'logP: ', logP, 'expected: -9.1877' initMatrix = np.matrix([[1.0], [0]]) transitionMatrix = np.matrix([[0.0, 1.0, 0.0], [0.0, 0.7, 0.3]]) x = np.matrix([-0.2, 2.6, 1.3]) mc = MarkovChain(initMatrix, transitionMatrix) g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1])) h1 = HMM(mc, np.matrix([[g1], [g2]])) transitionMatrix = np.matrix([[0.5, 0.5, 0.0], [0.0, 0.5, 0.5]]) mc2 = MarkovChain(initMatrix, transitionMatrix) g3 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1])) g4 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1])) h2 = HMM(mc2, np.matrix([[g3], [g4]])) logP = h1.logprob(np.matrix([h1, h2]), x) print 'logP:', logP, 'expected: [-5.562463348 -6.345037882]'
def __init__(self, soundfont_name=None): if soundfont_name is None: fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa') else: fluidsynth.init(soundfont_name, 'alsa') self.m_chain = MarkovChain(get_all_progressions()) self.sim = self.m_chain.infinite_progression() down1 = (0.7, 0.05, 0.2) down2 = (0.2, 0.05, 0.7) off = (0.0, 0.4, 0.1) self.bassproba = [down1, off, down2, off, down1, off, down2, off] self.current = self.m_chain.START
def generate_words(): my_file = open("./frost.txt", "r") lines = my_file.readlines() words_list = [] for line in lines: words = line.split() for word in words: words_list.append(word) # myhistogram = histogram() markovchain = MarkovChain(words_list) # sentence = "" # num_words = 10 # for i in range(num_words): # word = sample_by_frequency(myhistogram) # print(word) # sentence += " " + word return markovchain.walk(10)
def main(arg=None): """Main Function""" lines = [] arg = sys.argv if len(arg) < 2: lines = get_default_lines() else: input_file = open(arg[1], 'r') raw_lines = input_file.read() lines = raw_lines.strip().split('\n') print lines markov_chain = MarkovChain(lines) markov_dict = markov_chain.get_dictionary() print markov_dict print markov_chain.generate_line() print markov_chain.generate_line() print markov_chain.generate_line()
def get_markov_chain(): if os.path.isfile(FILE): with open(FILE, 'rb') as pickle_file: return pickle.load(pickle_file) else: return MarkovChain()
def __init__(self, k, n): self.mc = MarkovChain() self.k = k self.n = n
def main(): # Load dataset with given txt file X = load_dataset(txt_name="player_name.txt") markov_object = MarkovChain(dataset=X) markov_object.markov_chain_process()
def setUp(self): self.mc = MarkovChain("empty.txt")
"GreatEspectations.txt", "TaleOfTwoCities.txt" ] for i in range(len(bookArray)): bookArray[i] = "/home/team2/Project/" + bookArray[i] #enter number of sentences here numSentences = 12 #enter word you would like to start with startWord = 'a' markovModel = DFMaker(bookArray) edgeDF = markovModel.createEdgeDF(sc) verticiesDF = markovModel.getVerticiesDF(sc) graphFrame = GraphFrame(verticiesDF, edgeDF) sentences = "" if verticiesDF[verticiesDF["id"] == startWord].collect() == []: print("The word you entered does not appear in any of the texts!") else: markovChain = MarkovChain(graphFrame, startWord) while numSentences > 0: currentState = markovChain.getState() if currentState in string.punctuation: numSentences -= 1 sentences += markovChain.getState() + " " markovChain.nextState() print(sentences) print(markovModel.sortedLikeliness("the"))
nb_train = len(train_instances) # print(nb_train) test_data_path = data_dir + 'test_lines.txt' test_instances = MC_utils.read_instances_lines_from_file(test_data_path) nb_test = len(test_instances) # print(nb_test) print("---------------------@Build knowledge-------------------------------") MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, item_freq_dict, user_dict = MC_utils.build_knowledge(train_instances+test_instances) if not os.path.exists(o_dir): os.makedirs(o_dir) saved_file = os.path.join(o_dir, 'transition_matrix_MC.npz') # print("Save model in ", saved_file) transition_matrix = sp.load_npz(saved_file) mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict, transition_matrix, mc_order) if ex_file is not None: ex_instances = MC_utils.read_instances_lines_from_file(ex_file) else : ex_instances = test_instances for i in random.sample(ex_instances, nb_predict): elements = i.split('|') b_seq = elements[1:] # prev_basket = [item for item in re.split('[\\s]+',b_seq[-2].strip())] prev_item = [] for prev_basket in b_seq[:-1]: prev_item += re.split('[\\s]+', prev_basket.strip()) target_basket = [item for item in re.split('[\\s]+',b_seq[-1].strip())] topk_item = mc_model.top_predicted_item(prev_item, topk) correct_set = set(topk_item).intersection(set(target_basket))
from MarkovChain import MarkovChain if __name__ == "__main__": transition_matrix = [[0.8, 0.19, 0.01], [0.2, 0.7, 0.1], [0.1, 0.2, 0.7]] weather_chain = MarkovChain( transition_matrix=transition_matrix, states=[ 'GGG_GGG', 'GGG_BGG', 'GGG_GBG', 'GGG_GGB', 'GGG_GBB', 'GGG_BBG', 'GGG_BGB', 'GGG_BBB', 'GGB_GGG', 'GGB_BGG', 'GGB_GBG', 'GGB_GGB', 'GGB_GBB', 'GGB_BGB', 'GGB_BBG', 'GGB_BBB', 'GBG_GGG', 'GBG_BGG', 'GBG_GBG', 'GBG_GGB', 'GBG_GBB', 'BGG_BBG', 'BGG_BGB', 'BGG_BBB', 'BGG_GGG', 'BGG_BGG', 'BGG_GBG', 'BGG_GGB', 'BGG_GBB', 'BGG_BBG', 'BGG_BGB', 'BGG_BBB', 'GBB_GGG', 'GBB_BGG', 'GBB_GBG', 'GBB_GGB', 'GBB_GBB', 'GBB_BBG', 'GBB_BGB', 'GBB_BBB', 'BBG_GGG', 'BBG_BGG', 'BBG_GBG', 'BBG_GGB', 'BBG_GBB', 'BBG_BBG', 'BBG_BGB', 'BBG_BBB', 'BGB_GGG', 'BGB_BGG', 'BGB_GBG', 'BGB_GGB', 'BGB_GBB', 'BGB_BBG', 'BGB_BGB', 'BGB_BBB', 'BBB_GGG', 'BBB_BGG', 'BBB_GBG', 'BBB_GGB', 'BBB_GBB', 'BBB_BBG', 'BBB_BGB', 'BBB_BBB' ]) print(weather_chain.next_state(current_state='GGG_GGG')) print(weather_chain.next_state(current_state='Snowy')) weather_chain.generate_states(current_state='Snowy', no=10) #env = MarkovChain() #RL = QLearningTable(actions=list(range(env.n_actions))) #update()
#!/usr/bin/env python3 from sys import argv import sqlite3 from datetime import datetime from MarkovChain import MarkovChain from config import COMMENT_FILE, MARKOV_DB, MIN_LEN, LOG_FILE mc = MarkovChain(MARKOV_DB) def makeDatabase(): with open(COMMENT_FILE, 'r') as f: mc.generateDatabase(f.read()) mc.dumpdb() def printComments(qty): for i in range(0, qty): x = mc.generateString() while len(x) < MIN_LEN: x = mc.generateString() print(str(i) + " - " + x[:140]) def singleComment(): comment = mc.generateString() while len(comment) < MIN_LEN: comment = mc.generateString() return comment