Exemple #1
0
def test_backward():
    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]])
    x = np.matrix([-0.2, 2.6, 1.3])
    c = np.matrix([1.0, 0.1625, 0.8266, 0.0581])

    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))

    pX, logS = g1.prob(np.matrix([[g1], [g2]]), x)
    betaHat = mc.backward(mc, pX, c)
    print 'betaHat:', betaHat
    print 'expected: [1.0003 1.0393 0; 8.4182 9.3536 2.0822]'

    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.9, 0.1], [0.1, 0.9]])
    x = np.matrix([-0.2, 2.6, 1.3])

    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))

    pX, logS = g1.prob(np.matrix([g1, g2]), x)
    alphaHat, c = mc.forward(mc, pX)
    betaHat = mc.backward(mc, pX, c)
    print 'betaHat:', betaHat
    print 'expected: [1.0 6.798238264 1.125986646; 5.223087455 5.75095566 1.125986646]'
Exemple #2
0
def jsonToMarkovChain(path, encoding=None):

    if encoding:
        file = io.open(path, 'r', encoding=encoding)
    else:
        file = open(path, 'r')

    data = json.load(file)
    if all(k in data for k in ARRAY_KEYS):
        return MarkovChain(data[SS], data[ID], data[TM])
    else:
        return MarkovChain(data)
Exemple #3
0
    def initLeftRightMC(self, nStates, stateDuration=None):
        defaultDuration = 10.0
        if nStates <= 1:
            print 'Number of states must be > 1'
        if stateDuration is None:
            stateDuration = defaultDuration
        if type(stateDuration) == float or type(
                stateDuration) == np.float64 or len(stateDuration) == 1:
            stateDuration = np.tile(stateDuration, (nStates, 1))
        elif len(stateDuration) != nStates:
            print 'Incompatible length of state durations'

        minDiagProb = 0.1
        D = np.maximum(np.ones((stateDuration.shape)), stateDuration)
        aii = np.maximum(
            np.ones((D.shape)) * minDiagProb, np.divide((D - 1), D))
        aij = (1 - aii)
        aij = np.diagflat(aij, 1)
        aij = aij[0:nStates, :]
        A = np.concatenate((np.diagflat(aii), np.zeros(
            (nStates, 1))), axis=1) + aij
        p0 = np.concatenate((np.matrix([1]), np.zeros((nStates - 1, 1))),
                            axis=0)

        mc = MarkovChain(p0, A)

        return mc
Exemple #4
0
def multi_dim_observation():
    initMatrix = np.matrix([[0.75], [0.25]])
    transitionMatrix = np.matrix([[0.99, 0.01], [0.03, 0.97]])
    markovChain = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([[0], [0]]), cov=np.matrix([[2, 1], [1, 4]]))
    g2 = GaussD(mean=np.matrix([[3], [3]]), cov=np.matrix([[2, 1], [1, 4]]))
    h = HMM(markovChain, np.matrix([[g1], [g2]]))
    [X, S] = h.rand(h, 100)

    return (X, S)
def parse_porn_csv(csv_file, title_index):
    markov_db_filename = './{csv}_markov_db'.format(csv=csv_file)
    if os.path.isfile('./{csv}_markov_db'.format(csv=csv_file)):
        return MarkovChain(markov_db_filename)
    porn_titles = StringIO.StringIO()
    with open(csv_file) as porn_csv:
        reader = csv.DictReader(porn_csv,
                                delimiter='|',
                                fieldnames=[
                                    'iframe', 'thumbnail', 'samples', 'title',
                                    'tags', 'more_tags', 'unknown', 'length',
                                    'views', 'likes', 'dislikes'
                                ])
        for row in reader:
            porn_titles.write(row['title'] + '.')
    mc = MarkovChain(markov_db_filename)
    mc.generateDatabase(porn_titles.getvalue())
    mc.dumpdb()
    return mc
Exemple #6
0
def finite_duration():
    initMatrix = np.matrix([[0.75], [0.25]])
    transitionMatrix = np.matrix([[0.4, 0.4, 0.2], [0.1, 0.6, 0.3]])
    markovChain = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))
    h = HMM(markovChain, np.matrix([[g1], [g2]]))
    [X, S] = h.rand(h, 100)

    return (X, S)
 def __init__(self, user, filename, repliesFilename):
     """
     user: The id of the user that you want to collect data from and that will be used in the app.
     filename: the name of the file that you want to store user's tweets (exluding replies) e.g: "data_tweets.json". Please insert {} in this file otherwise it'll crash.
     repliesFilename: the name of the file that you want to store user's tweets (only replies to other users) ex: "data_replies.json". Please insert {} in this file otherwise it'll crash.
     """
     self._user = user
     self._stream = None
     self._filename = filename
     self._MarkovChain = MarkovChain(self._filename)
     self._repliesFilename = repliesFilename
     self._repliedUsers = {}
def generate_dict(api, user_id):
    """Generates Dictionary, no RT no @"""

    tweets = list()
    for tweet in tweepy.Cursor(api.user_timeline, id=user_id).items():
        tweet_text = tweet._json['text'].encode('utf-8')
        #tweet_text = json.dumps(tweet).encode('utf-8')
        if not str.startswith(tweet_text, 'RT') and not str.startswith(
                tweet_text, '@'):
            tweets.append(tweet_text)

    markov_chain = MarkovChain(tweets)
    return markov_chain
Exemple #9
0
def test_forward():

    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.9, 0.1, 0], [0, 0.9, 0.1]])
    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([2]))

    # output sequence
    x = np.matrix([-0.2, 2.6, 1.3])
    pX, logS = g1.prob(np.matrix([g1, g2]), x)
    alphaHat, c = mc.forward(mc, pX)
    print 'alphaHat:', alphaHat, 'expected: [1 0.3847 0.4189; 0 0.6153 0.5811]'
    print 'c:', c, 'expected: [1 0.1625 0.8266 0.0581]'

    h = HMM(mc, np.matrix([[g1], [g2]]))
    # logP = P(X|h)
    logP = h.logprob(h, x)
    print 'logP: ', logP, 'expected: -9.1877'

    initMatrix = np.matrix([[1.0], [0]])
    transitionMatrix = np.matrix([[0.0, 1.0, 0.0], [0.0, 0.7, 0.3]])
    x = np.matrix([-0.2, 2.6, 1.3])
    mc = MarkovChain(initMatrix, transitionMatrix)
    g1 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g2 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1]))
    h1 = HMM(mc, np.matrix([[g1], [g2]]))

    transitionMatrix = np.matrix([[0.5, 0.5, 0.0], [0.0, 0.5, 0.5]])
    mc2 = MarkovChain(initMatrix, transitionMatrix)
    g3 = GaussD(mean=np.matrix([0]), stdev=np.matrix([1]))
    g4 = GaussD(mean=np.matrix([3]), stdev=np.matrix([1]))
    h2 = HMM(mc2, np.matrix([[g3], [g4]]))

    logP = h1.logprob(np.matrix([h1, h2]), x)
    print 'logP:', logP, 'expected: [-5.562463348 -6.345037882]'
Exemple #10
0
    def __init__(self, soundfont_name=None):

        if soundfont_name is None:
            fluidsynth.init('../soundfonts/soundfont.sf2', 'alsa')
        else:
            fluidsynth.init(soundfont_name, 'alsa')

        self.m_chain = MarkovChain(get_all_progressions())
        self.sim = self.m_chain.infinite_progression()

        down1 = (0.7, 0.05, 0.2)
        down2 = (0.2, 0.05, 0.7)
        off = (0.0, 0.4, 0.1)

        self.bassproba = [down1, off, down2, off, down1, off, down2, off]

        self.current = self.m_chain.START
Exemple #11
0
def generate_words():
    my_file = open("./frost.txt", "r")
    lines = my_file.readlines()
    words_list = []
    for line in lines:
        words = line.split()
        for word in words:
            words_list.append(word)
    # myhistogram = histogram()
    markovchain = MarkovChain(words_list)
    # sentence = ""

    # num_words = 10
    # for i in range(num_words):
    #     word = sample_by_frequency(myhistogram)
    #     print(word)
    #     sentence += " " + word

    return markovchain.walk(10)
Exemple #12
0
def main(arg=None):
    """Main Function"""

    lines = []
    arg = sys.argv

    if len(arg) < 2:
        lines = get_default_lines()
    else:
        input_file = open(arg[1], 'r')
        raw_lines = input_file.read()
        lines = raw_lines.strip().split('\n')

    print lines

    markov_chain = MarkovChain(lines)
    markov_dict = markov_chain.get_dictionary()
    print markov_dict
    print markov_chain.generate_line()
    print markov_chain.generate_line()
    print markov_chain.generate_line()
def get_markov_chain():
    if os.path.isfile(FILE):
        with open(FILE, 'rb') as pickle_file:
            return pickle.load(pickle_file)
    else:
        return MarkovChain()
Exemple #14
0
 def __init__(self, k, n):
     self.mc = MarkovChain()
     self.k = k
     self.n = n
def main():

    # Load dataset with given txt file
    X = load_dataset(txt_name="player_name.txt")
    markov_object = MarkovChain(dataset=X)
    markov_object.markov_chain_process()
Exemple #16
0
 def setUp(self):
     self.mc = MarkovChain("empty.txt")
Exemple #17
0
    "GreatEspectations.txt", "TaleOfTwoCities.txt"
]
for i in range(len(bookArray)):
    bookArray[i] = "/home/team2/Project/" + bookArray[i]

#enter number of sentences here
numSentences = 12

#enter word you would like to start with
startWord = 'a'

markovModel = DFMaker(bookArray)
edgeDF = markovModel.createEdgeDF(sc)
verticiesDF = markovModel.getVerticiesDF(sc)
graphFrame = GraphFrame(verticiesDF, edgeDF)

sentences = ""
if verticiesDF[verticiesDF["id"] == startWord].collect() == []:
    print("The word you entered does not appear in any of the texts!")
else:
    markovChain = MarkovChain(graphFrame, startWord)
    while numSentences > 0:
        currentState = markovChain.getState()
        if currentState in string.punctuation:
            numSentences -= 1
        sentences += markovChain.getState() + " "
        markovChain.nextState()

print(sentences)
print(markovModel.sortedLikeliness("the"))
    nb_train = len(train_instances)
    # print(nb_train)

    test_data_path = data_dir + 'test_lines.txt'
    test_instances = MC_utils.read_instances_lines_from_file(test_data_path)
    nb_test = len(test_instances)
    # print(nb_test)
    print("---------------------@Build knowledge-------------------------------")
    MAX_SEQ_LENGTH, item_dict, reversed_item_dict, item_probs, item_freq_dict, user_dict = MC_utils.build_knowledge(train_instances+test_instances)

    if not os.path.exists(o_dir):
        os.makedirs(o_dir)
    saved_file = os.path.join(o_dir, 'transition_matrix_MC.npz')
    # print("Save model in ", saved_file)
    transition_matrix = sp.load_npz(saved_file)
    mc_model = MarkovChain(item_dict, reversed_item_dict, item_freq_dict, transition_matrix, mc_order)

    if ex_file is not None:
        ex_instances = MC_utils.read_instances_lines_from_file(ex_file)
    else :
        ex_instances = test_instances
    for i in random.sample(ex_instances, nb_predict):
        elements = i.split('|')
        b_seq = elements[1:]
        # prev_basket = [item for item in re.split('[\\s]+',b_seq[-2].strip())]
        prev_item = []
        for prev_basket in b_seq[:-1]:
            prev_item += re.split('[\\s]+', prev_basket.strip())
        target_basket = [item for item in re.split('[\\s]+',b_seq[-1].strip())]
        topk_item = mc_model.top_predicted_item(prev_item, topk)
        correct_set = set(topk_item).intersection(set(target_basket))
Exemple #19
0
from MarkovChain import MarkovChain

if __name__ == "__main__":
    transition_matrix = [[0.8, 0.19, 0.01], [0.2, 0.7, 0.1], [0.1, 0.2, 0.7]]

    weather_chain = MarkovChain(
        transition_matrix=transition_matrix,
        states=[
            'GGG_GGG', 'GGG_BGG', 'GGG_GBG', 'GGG_GGB', 'GGG_GBB', 'GGG_BBG',
            'GGG_BGB', 'GGG_BBB', 'GGB_GGG', 'GGB_BGG', 'GGB_GBG', 'GGB_GGB',
            'GGB_GBB', 'GGB_BGB', 'GGB_BBG', 'GGB_BBB', 'GBG_GGG', 'GBG_BGG',
            'GBG_GBG', 'GBG_GGB', 'GBG_GBB', 'BGG_BBG', 'BGG_BGB', 'BGG_BBB',
            'BGG_GGG', 'BGG_BGG', 'BGG_GBG', 'BGG_GGB', 'BGG_GBB', 'BGG_BBG',
            'BGG_BGB', 'BGG_BBB', 'GBB_GGG', 'GBB_BGG', 'GBB_GBG', 'GBB_GGB',
            'GBB_GBB', 'GBB_BBG', 'GBB_BGB', 'GBB_BBB', 'BBG_GGG', 'BBG_BGG',
            'BBG_GBG', 'BBG_GGB', 'BBG_GBB', 'BBG_BBG', 'BBG_BGB', 'BBG_BBB',
            'BGB_GGG', 'BGB_BGG', 'BGB_GBG', 'BGB_GGB', 'BGB_GBB', 'BGB_BBG',
            'BGB_BGB', 'BGB_BBB', 'BBB_GGG', 'BBB_BGG', 'BBB_GBG', 'BBB_GGB',
            'BBB_GBB', 'BBB_BBG', 'BBB_BGB', 'BBB_BBB'
        ])

    print(weather_chain.next_state(current_state='GGG_GGG'))

    print(weather_chain.next_state(current_state='Snowy'))

    weather_chain.generate_states(current_state='Snowy', no=10)

    #env = MarkovChain()

    #RL = QLearningTable(actions=list(range(env.n_actions)))
    #update()
#!/usr/bin/env python3
from sys import argv
import sqlite3
from datetime import datetime
from MarkovChain import MarkovChain
from config import COMMENT_FILE, MARKOV_DB, MIN_LEN, LOG_FILE

mc = MarkovChain(MARKOV_DB)


def makeDatabase():
    with open(COMMENT_FILE, 'r') as f:
        mc.generateDatabase(f.read())
        mc.dumpdb()


def printComments(qty):
    for i in range(0, qty):
        x = mc.generateString()
        while len(x) < MIN_LEN:
            x = mc.generateString()
        print(str(i) + " - " + x[:140])


def singleComment():
    comment = mc.generateString()
    while len(comment) < MIN_LEN:
        comment = mc.generateString()
    return comment