Esempio n. 1
0
    def getD2Star(self,seqA,seqB,k,r,flag,sequences,kmersetdic,kmer_pro):
        seqLis=[]
        # 变成list
        seqLis.append(seqA)
        seqLis.append(seqB)
        Sq =Sequence.Sequence()
        # 获取 关键字集合 字典dic
        kmerSet,dic =Sq.getSeqKerSet(seqLis,k)
        # 获取kmer概率
        Ma=markov.Markov()
#        kmerPA={}
#        kmerPB={}
        if flag==False:
            lisFeaA=Sq.getD2SCount(seqA,seqLis,k,r,flag,dic)
            lisFeaB=Sq.getD2SCount(seqB,seqLis,k,r,flag,dic)
#            kmerPA=Ma.get_Single_kmer_Pro(seqA,seqLis,k,r)
#            kmerPB=Ma.get_Single_kmer_Pro(seqB,seqLis,k,r)
        else:
            lisFeaA=Sq.getD2SCount(seqA,seqLis,k,r,flag,kmersetdic,kmer_pro)
            lisFeaB=Sq.getD2SCount(seqB,seqLis,k,r,flag,kmersetdic,kmer_pro)
#            kmerPA=Ma.get_Mul_kmer_Pro(seqA,sequences,k,r)
#            kmerPB=Ma.get_Mul_kmer_Pro(seqB,sequences,k,r)
        #计算D2Star
        su=0.0
        lenA=len(seqA)
        lenB=len(seqB)
        for key in dict.keys(lisFeaA):
            su=su+(lisFeaA[key]*lisFeaB[key])/math.sqrt(lenA*kmer_pro[key]*lenB*kmer_pro[key])
        return 1/(su+np.spacing(1))
Esempio n. 2
0
    def make_model(self, uid, span_in_weeks):

        # check to see if the uid is known
        try:
            self.__cursor.execute('select uid from usr where uid=?', (uid, ))
        except sqlite3.IntegrityError:
            print "make_model called for unknown uid %d" % uid
            return None

        # what's the latest time in the database for this user
        max_time = None
        try:
            self.__cursor.execute(
                'select max(ref_time) from location_snapshot where uid=?',
                (uid, ))
            mPt = self.__cursor.fetchone()

            max_time = datetime.strptime(str(mPt[0]), "%Y-%m-%d %H:%M:%S")
        except sqlite3.IntegrityError:
            print "could not get max ref_time from database"
            return None

        # count how much data is available for this user in the time range of
        # now to now - span_in_days
        d1 = max_time - timedelta(weeks=span_in_weeks)
        d2 = max_time

        print "D1: ", d1
        print "D2: ", d2

        data = None
        try:
            self.__cursor.execute(
                'select count(*) from location_snapshot where uid=? and ref_time>=? and ref_time<=?',
                (uid, d1, d2))
            num_entries = int(self.__cursor.fetchone()[0])

            self.__cursor.execute('select count(*) from location where uid=?',
                                  (uid, ))
            num_places = int(self.__cursor.fetchone()[0])

            # get actual data points
            self.__cursor.execute(
                'select loc.ref_time as "ts [timestamp]", position.name from location_snapshot loc, location position where loc.uid=? and loc.ref_time>? and loc.ref_time<? and loc.uid=position.uid and loc.location_id=position.lid',
                (uid, d1, d2))

            data = self.__cursor.fetchall()

        except sqlite3.IntegrityError:
            print "bad count for num_entries and/or num_places %d/%d" % (
                num_entries, num_places)
            return None

        print "num_places: %d" % num_places
        print "num_entries: %d" % num_entries

        steps = int((24.0 * 7. * 60.0) / 15.0)
        m = markov.Markov(steps, num_places, 15, data)

        return m
Esempio n. 3
0
    def load(self, filename):
        """Load a markov chain from a file.
        """

        self.generator = None
        self.markov = markov.Markov()
        self.markov.load(filename)
Esempio n. 4
0
def generate_poem(subject):
    m = markov.Markov(order=1)
    root = "./shakespeare/"
    path = os.path.join(root, subject)
    m.walk_directory(path)
    poem = m.generate_output(max_words=100)
    return poem
Esempio n. 5
0
def cmd_markov(client, msg):
    yield from discord_typing(client, msg)

    tmp = msg.content[8:]
    target = ""

    if tmp == "Maki":
        response = "My markovs always say the same thing"
    else:
        if tmp == "":
            target = "{}-{}".format(msg.server.id, msg.author.id)
        else:
            try:
                target = "{}-{}".format(msg.server.id,
                                        msg.server.get_member_named(tmp).id)
            except AttributeError:
                reponse = "I can't find `{}`".format(tmp)

    if target != "":
        mfile = "./persist/markovs/" + target
        if os.path.isfile(mfile):
            mc = markov.Markov(open(mfile))
            response = mc.generate_text(random.randint(20, 40))
        else:
            response = "I haven't seen `{}` speak yet.".format(tmp)

    yield from discord_send(client, msg, response)
Esempio n. 6
0
    def make_model(self, uid, span_in_weeks, bin_size_minutes):

        print "make_model called"

        # check to see if the uid is known
        try:
            self.__cursor.execute('select uid from usr where uid=?', (uid,))
        except sqlite3.IntegrityError:
            print "make_model called for unknown uid %d" % uid
            return None

        print "make_model uid known"
        # what's the latest time in the database for this user
        max_time = None
        try:
            print "before select: ", uid, " ", type(uid)
            self.__cursor.execute('select max(ref_time) from location_state where uid=?', (uid,))
            print "after select"
            mPt = self.__cursor.fetchone()
            print "mPt: ", mPt

            max_time = datetime.strptime(str(mPt[0]), "%Y-%m-%d %H:%M:%S")
        except sqlite3.IntegrityError:
            print "could not get max ref_time from database"
            return None
        except sqlite3.OperationalError:
            print "location state database does not exist"
            self.__create_location_state_table()
            return None

        print "max time: ", max_time

        # count how much data is available for this user in the time range of
        # now to now - span_in_days
        d1 = max_time - timedelta(weeks=span_in_weeks)
        d2 = max_time

        print "D1: ", d1
        print "D2: ", d2

        data = []
        present = d1
        while(present<=d2):
            ts, name = self.__get_location(uid, present)
            data.append( (ts,name) )
            present = present + timedelta(minutes=bin_size_minutes)

        try:
            self.__cursor.execute('select count(*) from location where uid=?', (uid,))
            num_places = int(self.__cursor.fetchone()[0])
        except sqlite3.IntegrityError:
            print "bad count for num_entries and/or num_places %d/%d" % ( num_entries, num_places )
            return None

        print "num_places: %d" % num_places

        steps = int((24.0 * 7. *60.0 ) / bin_size_minutes)
        m = markov.Markov(steps, num_places, bin_size_minutes, data)

        return m
Esempio n. 7
0
def make_sentence():
    """Generate a sentence from the corpus, using a Markov chain object."""
    obj = markov.Markov(TRAINING_PATH, CHAIN_LENGTH)
    sentence = (obj.generate_sentence())  # start with an initial sentence
    while len(sentence) >= 256:  # short sentences tend to be generated better
        sentence = obj.generate_sentence()
    while '"' in sentence:  # option to skip sentences with quotation marks, since they can be dodgy
        sentence = obj.generate_sentence()
    return error_correction(sentence)
Esempio n. 8
0
    def train(self, n, stream, noparagraphs=False):
        """Train a new markov chain, overwriting the existing one.
        """
        training_data = tokenise.Tokeniser(stream=stream,
                                           noparagraphs=noparagraphs)

        self.markov = markov.Markov(n)
        self.markov.train(training_data)
        self.generator = None
Esempio n. 9
0
def generateMarkovText():
    '''
    Generate the Markov Text.
    '''
    astring = textEntry.get('1.0', END)
    if count_words(astring) <= 100:
        messagebox.showinfo("Generate Markov Text", "We need more words.")
        return
    m = markov.Markov(astring, 50)
    m.train()
    text = m.generate()
    messagebox.showinfo("Generate Markov Text", text)
Esempio n. 10
0
def main(maxWordInSentence, dictFile, genNSentences=50):

    #Create new Markov class
    markovObj = markov.Markov(dictFile=dictFile,
                              maxWordInSentence=maxWordInSentence)

    twitterText = []

    for _ in range(genNSentences):
        text = markovObj.genText()
        print(text)
        if len(text) <= 140 and text.endswith('.'):
            twitterText.append(text)
Esempio n. 11
0
 def __init__(self,
              client,
              help_text="Help text not configured",
              markov_file='main_log.log'):
     self.admins = []
     self.head_admins = []
     self.email = ""
     self.password = ""
     self.help_text = help_text
     self.client = client  #discord.Client()
     self.logger = None
     self.read_conf()
     self.textGen = markov.Markov(open(markov_file))
     self.login()
Esempio n. 12
0
def main():

    dictObj = genDict.GenDict(textFile='testie.txt',
                              outputFile='markovdictfile.txt')
    dictObj.fileToTokens()
    dictObj.writeDictFile()

    #print( "lines: " + str(lineCount) )
    #print( "total words: " + str(wordCount) )

    markovObj = markov.Markov(dictFile='markovdictfile.txt')
    for i in range(10):
        text = markovObj.generateText()
        if text.endswith("."):
            print(text)
Esempio n. 13
0
def main():

	(keyLen, fileList, dictFile) = checkargs()

	markovObj = markov.Markov(keyLen)

	for file in fileList:	
		try:
			markovObj.readFile(file, "utf-8")
		except:
			markovObj.readFile(file, "windows-1252")
	
	markovObj.outputDict(dictFile)

	print( "Generated Markov dictionary %s with processing %s input lines and %s input words " % ( dictFile, str(markovObj.getLineCount()), str(markovObj.getWordCount()) ) )
Esempio n. 14
0
def genDict():
	'''
	Generate the Markov Chain transition matrix. **This function should be run once to generate the dictionary file,
	after the file is created it does not need to be run unless changes are made
	'''
	markovObj = markov.Markov()	#Create new Markov object
	# print(fileList)
	fileList =['formatted_movie_lines.txt']  #file of dialog from the Cornell Movie Dialog Corpus
	for file in fileList:
		try:
			markovObj.readFile(file, "utf-8")
		except:
			markovObj.readFile(file, "windows-1252")

	markovObj.outputDict("MarkovDict.txt") #generate the dictionary
	print( "Generated Markov dictionary %s with processing %s input lines and %s input words " % ( "dict.txt", str(markovObj.getLineCount()), str(markovObj.getWordCount()) ) )
Esempio n. 15
0
    def __init__(self):
        self.markov = markov.Markov()
        self.api = None
        self.last_id_seen = None
        self.last_reply_id_seen = None

        # Attempt to collect the last IDs seen from the JSON dump file.
        try:
            with open(bot_data_file, 'r') as f:
                id_data = json.load(f)
                self.last_id_seen = id_data['last_id_seen']
                self.last_reply_id_seen = id_data['last_reply_id_seen']
        except FileNotFoundError as error:
            twitterbot_logger.error("Could not find bot_data file.  Have you run the setup script yet?",
                                    exc_info=error)
            raise error
Esempio n. 16
0
def main():
    settings = [line.rstrip('\n') for line in file('settings.txt')]
    source = file('corpus/all.txt')
    generator = markov.Markov(source)
    length = random.randint(20, 140)
    output = generator.generate_markov_text(length)
    output = output.lower()
    print output

    # and now to sing:
    # wasn't sure which key was which -
    # https://gist.github.com/smartboyathome/2599146
    token = settings[0]
    token_key = settings[1]
    con_secret = settings[2]
    con_secret_key = settings[3]
    twitter = Twitter(auth=OAuth(token, token_key, con_secret_key, con_secret))
    twitter.statuses.update(status=output)
Esempio n. 17
0
def buildDict(inp, dict, k):
    q = []  # an array to hold current substring length k + 1
    c = inp.read(1)
    while c:  # while current character read in is not EOF, build dict
        q.append(c)
        if len(q) >= k + 1:
            a = q.pop(0)
            for i in range(0, k - 1):
                a = a + q[i]
            if a not in dict.keys(
            ):  # if current substring not in dictionary, create entry
                mark = markov.Markov(a)
                mark.add(q[k - 1])
                dict[a] = mark
            else:  # otherwise update entry with suffix
                mark = dict[a]
                mark.add(q[k - 1])
        c = inp.read(1)
    return dict
Esempio n. 18
0
    def getD2SCount_pre(self, sequence, sequences, k, r, flag, kmersetdic,
                        kmer_pro):
        ses = []
        ses.append(sequence)
        # 获得词统计
        lis, count = self.getSeqCount_pre(ses, k, kmersetdic)

        ma = mk.Markov()
        prodic = {}
        if flag == False:
            prodic = ma.get_Single_kmer_Pro(sequence, sequences, k, r)
#
        n = len(sequence)
        for key in lis[0].keys():
            if lis[0][key] == 0:
                prodic[key] = 0
            else:
                prodic[key] = lis[0][key] - n * kmer_pro[key]
        return self.addfloat(prodic)
Esempio n. 19
0
def main(inpath, outpath, numdocs, minwords, maxwords):
    with open(inpath) as f:
        generator = markov.Markov(f)

    with gzip.open(outpath, 'w') as f:
        for i in xrange(numdocs):
            if i % 1000 == 0:
                print i
            json.dump(
                {
                    "text":
                    generator.generate_markov_text(
                        random.randint(minwords, maxwords)),
                    "level":
                    random.randint(1, 5),
                    "source":
                    random.randint(1, 20),
                    "id":
                    i
                }, f)
            f.write('\n')
Esempio n. 20
0
def parseMessage(bot, update):

    print "parse"
    msg = update.message.text
    msgTokens = word_tokenize(msg)
    tags = nltk.pos_tag(msgTokens)
    print "done parsing"
    print tags

    #Add parts of speech to appropriate lists.
    for t in tags:
        ltag = (t[0].lower(), t[1])
        #nltk seems to not like contractions
        #also, don't store unreasonable length strings
        if "'" in ltag[0] or len(ltag[0]) > maxStringLen or len(ltag[0]) < 3:
            continue
        elif ltag[1] in {'NN', 'NNS', 'NNP', 'NNPS'}:
            nouns.values.append(ltag)
        elif ltag[1] in {'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'}:
            verbs.values.append(ltag)
        elif ltag[1] in {'JJ', 'JJR', 'JJS'}:
            adjectives.values.append(ltag)
        elif ltag[1] in {'RB', 'RBR', 'RBS', 'WRB'}:
            adverbs.values.append(ltag)
        elif t[1] in {'CD'}:
            numbers.values.append(ltag)
        else:
            garbage.values.append(ltag)

    #If we have enough values and it's been at least 20 messages, 1/n chance of posting.
    if randint(0, 7) == 0 and len(nouns.values) > 5 and len(
            verbs.values) > 3 and len(adjectives.values) > 1:
        m = markov.Markov(nouns, adjectives, verbs, adverbs, numbers, pronouns,
                          prepositions, conjunctions, vowels, garbage)
        bot.sendMessage(update.message.chat_id, text=m.string)
        nouns.values = []
        verbs.values = []
        adverbs.values = []
        garbage.values = []
        adjectives.values = []
Esempio n. 21
0
    def getD2StarCount_suf(self, sequence, sequences, k, r, flag, kmersetdic,
                           kmer_pro):
        n = len(sequence)
        ses = []
        ses.append(sequence)
        # 获得词统计
        lis, count = self.getSeqCount(ses, k, kmersetdic)
        ma = mk.Markov()
        prodic = {}
        if flag == False:
            prodic = ma.get_Single_kmer_Pro(sequence, sequences, k, r)
#        else:
##            prodic=ma.get_Mul_kmer_Pro(sequence,sequences,k,r)
#            prodic=kmer_pro
        n = len(sequence)
        for key in lis[0].keys():
            if lis[0][key] == 0:
                prodic[key] = 0
            else:
                prodic[key] = (lis[0][key] - n * kmer_pro[key]) / math.sqrt(
                    n * (kmer_pro[key] + np.spacing(1)))
        return self.addfloat(prodic)
Esempio n. 22
0
def main():
	'''
	This is the main function to call BotBuddy
	'''
	genDict() #**ONLY RUN ONCE,	comment out after inital run and the file is generated. Running this line will take about 3-5 min
	markovObj = markov.Markov(dictFile = dictFile, maxWordInSentence=20)
	print(NAME, ": My name is BotBuddy, what is your name?")
	userName = input("Enter your name: ")
	print(NAME,": Hi ", userName, "! Let's chat :)")
	while True:
		userInput = input('\n'+ userName +": ").lower()
		print()
		parsedInput = parseInput(userInput)
		pronoun, verb, noun = getPOS(parsedInput)
		if userInput in GOODBYE_INPUTS: 	#check for goodbye
			print(NAME +': ' + random.choice(GOODBYE_RESPONSES))
			break
		elif greeting(userInput): #check for a greeting
			print(NAME + ': ' + random.choice(GREETING_RESPONSES))
		elif re.match(r".*knock[- ]?knock joke", userInput): 	#check for knock knock joke
			knockknock(userName)
		else: 	#generate a response
			print(NAME + ': ' + genResponse(pronoun, verb, noun, markovObj, userInput) )
Esempio n. 23
0
def main():

    #Get bot token.
    Config = ConfigParser.ConfigParser()
    Config.read("./jabberwocky.cfg")

    #Create event handler
    updater = Updater(Config.get("BotApi", "Token"))

    dp = updater.dispatcher

    #Add handlers
    dp.addTelegramMessageHandler(parseMessage)
    dp.addTelegramCommandHandler("apologize", apologize)

    #Start up bot.
    updater.start_polling()

    while True:
        if debug:
            cmd = raw_input("Enter command...\n")

            if cmd == 'list':
                print 'Nouns:'
                print nouns.values

                print 'Verbs:'
                print verbs.values

                print 'Adverbs:'
                print adverbs.values

                print 'Numbers:'
                print numbers.values

                print 'Adjectives:'
                print adjectives.values

                print 'Garbage:'
                print garbage.values

            #Force generation of a random sentence.
            elif cmd == 'forceprint':
                m = markov.Markov(nouns, adjectives, verbs, adverbs, numbers,
                                  pronouns, prepositions, conjunctions, vowels,
                                  garbage)
                print m.string

            #Shutdown bot.
            elif cmd == 'quit':
                updater.stop()
                sys.exit()

            elif cmd == 'help':
                print 'Commands are: list, forceprint, quit.'

            else:
                print 'Commmand not recognized.'

        else:
            time.sleep(1)
Esempio n. 24
0
#!/usr/bin/python
# -*- coding: utf-8 -*-

from flask import Flask
from flask import jsonify
from flask import request
from flask import render_template
import os
import json
import markov

app = Flask(__name__)
messages = []

mark = markov.Markov()


@app.route("/")
def main():
    return render_template("index.html", data=messages)


@app.route('/chat', methods=['POST'])
def process():
    data = json.loads(request.data)
    text = data['text']
    response = mark.main(text)
    response = 'ගුණේ: '.decode('utf-8') + response
    response = {'response': response}
    return jsonify(**response)
Esempio n. 25
0
import markov as mc

with open('harry_potter_fanfic.txt') as f:
    text = f.read()

module1 = mc.Markov(text)
# module2 = mc.Markov.from_file('harry_potter_fanfic.txt')
# module4 = mc.Markov.from_pickle('pickled_markov.p')

# print(module2.gen_chain())
# print(module4.gen_chain())
print(module1.gen_chain())

# module1.to_pickle('pumparum.p')
Esempio n. 26
0
def calculate_2d_map(ind=0):
    st = time.time()
    physics_model = {}
    # multiple of eV
    physics_model['E_scale'] = 1
    # multiple of nm
    physics_model['dx_scale'] = 1
    physics_model['kT'] = 350e-6

    # just initial param to generate the graph object
    #b1 = [get_random(-200e-3,sigma_mean=0.02),get_random(-0.6),get_random(0.05),1]
    #d1 = [200e-3,get_random(-0.1),get_random(0.05),1]
    #b2 = [get_random(-250e-3,sigma_mean=0.02),get_random(0.0),get_random(0.05),1]
    #d2 = [200e-3,get_random(0.1),get_random(0.05),1]
    #b3 = [get_random(-200e-3,sigma_mean=0.02),get_random(0.6),get_random(0.05),1]

    b1 = [-200e-3, -0.6, 0.05, 1]
    d1 = [200e-3, -0.2, 0.05, 1]
    b2 = [-250e-3, 0.0, 0.05, 1]
    d2 = [200e-3, 0.2, 0.05, 1]
    b3 = [-200e-3, 0.6, 0.05, 1]

    x = np.linspace(-1, 1, 100)
    physics_model['x'] = x
    physics_model['list_b'] = [b1, d1, b2, d2, b3]
    physics_model['V'] = potential_profile.V_x_wire(x, physics_model['list_b'])

    physics_model['K_onsite'] = np.random.uniform(5e-3, 5e-3)
    physics_model['sigma'] = x[1] - x[0]
    physics_model['x_0'] = 0.001 * (x[1] - x[0])
    physics_model['mu_l'] = (300.01e-3, 300.0e-3)
    physics_model['battery_weight'] = 100
    physics_model['short_circuit_current'] = 1
    physics_model['QPC_current_scale'] = 1e-4

    graph_model = (1, 1)
    tf_strategy = 'simple'

    graph = markov.Markov(graph_model, physics_model, tf_strategy)
    graph.find_n_dot_estimate()

    N_v = 25

    V_d1_vec = np.linspace(50e-3, 300e-3, N_v)
    V_d2_vec = np.linspace(50e-3, 300e-3, N_v)
    output_vec = []

    for i in range(N_v):
        print(i)
        for j in range(N_v):
            d1[0] = V_d1_vec[i]
            d2[0] = V_d2_vec[j]
            physics_model['list_b'] = [b1, d1, b2, d2, b3]
            physics_model['V'] = potential_profile.V_x_wire(
                x, physics_model['list_b'])
            output_vec += [calculate_current((graph, physics_model))]

    # data is a dictionary with two keys, 'input' and 'output'
    # data['input'] = {physics_model, graph_model, tf_strategy}
    # data['output'] : list with output from calculate current
    data = {}
    data['input'] = {
        'physics_model': physics_model,
        'graph_model': graph_model,
        'tf_strategy': tf_strategy,
        'V_d1_vec': V_d1_vec,
        'V_d2_vec': V_d2_vec
    }

    data['output'] = output_vec

    import datetime
    dt = str(datetime.datetime.now())
    #np.save('/Users/ssk4/data/double_dot_' + str(N_v) + '_grid_' + dt + '.npy',data)
    # during testing
    np.save('/Users/ssk4/data/double_dot_test1.npy', data)

    return (time.time() - st)
Esempio n. 27
0
 def test_markov(self):
     m = mk.Markov('ab')
     res = m.predict('a')
     self.assertEqual(res, 'b')
Esempio n. 28
0
def init_fake_tweets():
    global markov_obj
    markov_obj = markov.Markov("./raw_tweets_text.txt")
Esempio n. 29
0
import os
import time
from sqlite3 import dbapi2 as sqlite3
from flask import Flask, g, request, render_template, redirect, url_for
import random
import json
import markov
from HTMLParser import HTMLParser

app = Flask(__name__)

#declare global objects
filtered_tweets = []
markov_obj = markov.Markov("./raw_tweets_text.txt")
html_parser = HTMLParser()

#set path to database
app.config.update(
    dict(DATABASE=os.path.join(app.root_path,
                               'AlternativeTweetsLeaderboard.db')))


#initialize the server
@app.cli.command('init')
def init():
    print "Initializing..."
    init_db()
    init_tweets()
    init_fake_tweets()
    print "Initialized the server"
Esempio n. 30
0
        print("----------------D2-----------------")
        print("k=", k)
        feature = "d" + str(k) + "countLis"

        sim = []
        for i in range(len(testLis)):
            tmp = 1 / (dis.getD2_feature(
                eval(feature)[testLis[i][0]],
                eval(feature)[testLis[i][1]]))
            sim.append(tmp)
        auc = roc_auc_score(testlabel, sim)
        print(auc)

        for r in range(0, 2):
            ## 获取kmer 概率集合
            mar = markov.Markov()
            ## 获得概率
            kmer_pro = mar.get_Mulk_Mul_kmer_Pro(datasets, k, k, r)
            dicp = "d" + str(k) + "dic"
            #######d2s d2star特征--------------------
            d2scountLis = sq.getD2SMul_SeqCount(datasets, k, r, True,
                                                eval(dicp), kmer_pro)
            d2starcountLis = sq.getD2Star_Mul_seq_Count(
                datasets, k, r, True, eval(dicp), kmer_pro)

            print("----------------D2s-----------------")

            print("k=", k)

            feature = "d" + str(2) + "scountLis"