def getD2Star(self,seqA,seqB,k,r,flag,sequences,kmersetdic,kmer_pro): seqLis=[] # 变成list seqLis.append(seqA) seqLis.append(seqB) Sq =Sequence.Sequence() # 获取 关键字集合 字典dic kmerSet,dic =Sq.getSeqKerSet(seqLis,k) # 获取kmer概率 Ma=markov.Markov() # kmerPA={} # kmerPB={} if flag==False: lisFeaA=Sq.getD2SCount(seqA,seqLis,k,r,flag,dic) lisFeaB=Sq.getD2SCount(seqB,seqLis,k,r,flag,dic) # kmerPA=Ma.get_Single_kmer_Pro(seqA,seqLis,k,r) # kmerPB=Ma.get_Single_kmer_Pro(seqB,seqLis,k,r) else: lisFeaA=Sq.getD2SCount(seqA,seqLis,k,r,flag,kmersetdic,kmer_pro) lisFeaB=Sq.getD2SCount(seqB,seqLis,k,r,flag,kmersetdic,kmer_pro) # kmerPA=Ma.get_Mul_kmer_Pro(seqA,sequences,k,r) # kmerPB=Ma.get_Mul_kmer_Pro(seqB,sequences,k,r) #计算D2Star su=0.0 lenA=len(seqA) lenB=len(seqB) for key in dict.keys(lisFeaA): su=su+(lisFeaA[key]*lisFeaB[key])/math.sqrt(lenA*kmer_pro[key]*lenB*kmer_pro[key]) return 1/(su+np.spacing(1))
def make_model(self, uid, span_in_weeks): # check to see if the uid is known try: self.__cursor.execute('select uid from usr where uid=?', (uid, )) except sqlite3.IntegrityError: print "make_model called for unknown uid %d" % uid return None # what's the latest time in the database for this user max_time = None try: self.__cursor.execute( 'select max(ref_time) from location_snapshot where uid=?', (uid, )) mPt = self.__cursor.fetchone() max_time = datetime.strptime(str(mPt[0]), "%Y-%m-%d %H:%M:%S") except sqlite3.IntegrityError: print "could not get max ref_time from database" return None # count how much data is available for this user in the time range of # now to now - span_in_days d1 = max_time - timedelta(weeks=span_in_weeks) d2 = max_time print "D1: ", d1 print "D2: ", d2 data = None try: self.__cursor.execute( 'select count(*) from location_snapshot where uid=? and ref_time>=? and ref_time<=?', (uid, d1, d2)) num_entries = int(self.__cursor.fetchone()[0]) self.__cursor.execute('select count(*) from location where uid=?', (uid, )) num_places = int(self.__cursor.fetchone()[0]) # get actual data points self.__cursor.execute( 'select loc.ref_time as "ts [timestamp]", position.name from location_snapshot loc, location position where loc.uid=? and loc.ref_time>? and loc.ref_time<? and loc.uid=position.uid and loc.location_id=position.lid', (uid, d1, d2)) data = self.__cursor.fetchall() except sqlite3.IntegrityError: print "bad count for num_entries and/or num_places %d/%d" % ( num_entries, num_places) return None print "num_places: %d" % num_places print "num_entries: %d" % num_entries steps = int((24.0 * 7. * 60.0) / 15.0) m = markov.Markov(steps, num_places, 15, data) return m
def load(self, filename): """Load a markov chain from a file. """ self.generator = None self.markov = markov.Markov() self.markov.load(filename)
def generate_poem(subject): m = markov.Markov(order=1) root = "./shakespeare/" path = os.path.join(root, subject) m.walk_directory(path) poem = m.generate_output(max_words=100) return poem
def cmd_markov(client, msg): yield from discord_typing(client, msg) tmp = msg.content[8:] target = "" if tmp == "Maki": response = "My markovs always say the same thing" else: if tmp == "": target = "{}-{}".format(msg.server.id, msg.author.id) else: try: target = "{}-{}".format(msg.server.id, msg.server.get_member_named(tmp).id) except AttributeError: reponse = "I can't find `{}`".format(tmp) if target != "": mfile = "./persist/markovs/" + target if os.path.isfile(mfile): mc = markov.Markov(open(mfile)) response = mc.generate_text(random.randint(20, 40)) else: response = "I haven't seen `{}` speak yet.".format(tmp) yield from discord_send(client, msg, response)
def make_model(self, uid, span_in_weeks, bin_size_minutes): print "make_model called" # check to see if the uid is known try: self.__cursor.execute('select uid from usr where uid=?', (uid,)) except sqlite3.IntegrityError: print "make_model called for unknown uid %d" % uid return None print "make_model uid known" # what's the latest time in the database for this user max_time = None try: print "before select: ", uid, " ", type(uid) self.__cursor.execute('select max(ref_time) from location_state where uid=?', (uid,)) print "after select" mPt = self.__cursor.fetchone() print "mPt: ", mPt max_time = datetime.strptime(str(mPt[0]), "%Y-%m-%d %H:%M:%S") except sqlite3.IntegrityError: print "could not get max ref_time from database" return None except sqlite3.OperationalError: print "location state database does not exist" self.__create_location_state_table() return None print "max time: ", max_time # count how much data is available for this user in the time range of # now to now - span_in_days d1 = max_time - timedelta(weeks=span_in_weeks) d2 = max_time print "D1: ", d1 print "D2: ", d2 data = [] present = d1 while(present<=d2): ts, name = self.__get_location(uid, present) data.append( (ts,name) ) present = present + timedelta(minutes=bin_size_minutes) try: self.__cursor.execute('select count(*) from location where uid=?', (uid,)) num_places = int(self.__cursor.fetchone()[0]) except sqlite3.IntegrityError: print "bad count for num_entries and/or num_places %d/%d" % ( num_entries, num_places ) return None print "num_places: %d" % num_places steps = int((24.0 * 7. *60.0 ) / bin_size_minutes) m = markov.Markov(steps, num_places, bin_size_minutes, data) return m
def make_sentence(): """Generate a sentence from the corpus, using a Markov chain object.""" obj = markov.Markov(TRAINING_PATH, CHAIN_LENGTH) sentence = (obj.generate_sentence()) # start with an initial sentence while len(sentence) >= 256: # short sentences tend to be generated better sentence = obj.generate_sentence() while '"' in sentence: # option to skip sentences with quotation marks, since they can be dodgy sentence = obj.generate_sentence() return error_correction(sentence)
def train(self, n, stream, noparagraphs=False): """Train a new markov chain, overwriting the existing one. """ training_data = tokenise.Tokeniser(stream=stream, noparagraphs=noparagraphs) self.markov = markov.Markov(n) self.markov.train(training_data) self.generator = None
def generateMarkovText(): ''' Generate the Markov Text. ''' astring = textEntry.get('1.0', END) if count_words(astring) <= 100: messagebox.showinfo("Generate Markov Text", "We need more words.") return m = markov.Markov(astring, 50) m.train() text = m.generate() messagebox.showinfo("Generate Markov Text", text)
def main(maxWordInSentence, dictFile, genNSentences=50): #Create new Markov class markovObj = markov.Markov(dictFile=dictFile, maxWordInSentence=maxWordInSentence) twitterText = [] for _ in range(genNSentences): text = markovObj.genText() print(text) if len(text) <= 140 and text.endswith('.'): twitterText.append(text)
def __init__(self, client, help_text="Help text not configured", markov_file='main_log.log'): self.admins = [] self.head_admins = [] self.email = "" self.password = "" self.help_text = help_text self.client = client #discord.Client() self.logger = None self.read_conf() self.textGen = markov.Markov(open(markov_file)) self.login()
def main(): dictObj = genDict.GenDict(textFile='testie.txt', outputFile='markovdictfile.txt') dictObj.fileToTokens() dictObj.writeDictFile() #print( "lines: " + str(lineCount) ) #print( "total words: " + str(wordCount) ) markovObj = markov.Markov(dictFile='markovdictfile.txt') for i in range(10): text = markovObj.generateText() if text.endswith("."): print(text)
def main(): (keyLen, fileList, dictFile) = checkargs() markovObj = markov.Markov(keyLen) for file in fileList: try: markovObj.readFile(file, "utf-8") except: markovObj.readFile(file, "windows-1252") markovObj.outputDict(dictFile) print( "Generated Markov dictionary %s with processing %s input lines and %s input words " % ( dictFile, str(markovObj.getLineCount()), str(markovObj.getWordCount()) ) )
def genDict(): ''' Generate the Markov Chain transition matrix. **This function should be run once to generate the dictionary file, after the file is created it does not need to be run unless changes are made ''' markovObj = markov.Markov() #Create new Markov object # print(fileList) fileList =['formatted_movie_lines.txt'] #file of dialog from the Cornell Movie Dialog Corpus for file in fileList: try: markovObj.readFile(file, "utf-8") except: markovObj.readFile(file, "windows-1252") markovObj.outputDict("MarkovDict.txt") #generate the dictionary print( "Generated Markov dictionary %s with processing %s input lines and %s input words " % ( "dict.txt", str(markovObj.getLineCount()), str(markovObj.getWordCount()) ) )
def __init__(self): self.markov = markov.Markov() self.api = None self.last_id_seen = None self.last_reply_id_seen = None # Attempt to collect the last IDs seen from the JSON dump file. try: with open(bot_data_file, 'r') as f: id_data = json.load(f) self.last_id_seen = id_data['last_id_seen'] self.last_reply_id_seen = id_data['last_reply_id_seen'] except FileNotFoundError as error: twitterbot_logger.error("Could not find bot_data file. Have you run the setup script yet?", exc_info=error) raise error
def main(): settings = [line.rstrip('\n') for line in file('settings.txt')] source = file('corpus/all.txt') generator = markov.Markov(source) length = random.randint(20, 140) output = generator.generate_markov_text(length) output = output.lower() print output # and now to sing: # wasn't sure which key was which - # https://gist.github.com/smartboyathome/2599146 token = settings[0] token_key = settings[1] con_secret = settings[2] con_secret_key = settings[3] twitter = Twitter(auth=OAuth(token, token_key, con_secret_key, con_secret)) twitter.statuses.update(status=output)
def buildDict(inp, dict, k): q = [] # an array to hold current substring length k + 1 c = inp.read(1) while c: # while current character read in is not EOF, build dict q.append(c) if len(q) >= k + 1: a = q.pop(0) for i in range(0, k - 1): a = a + q[i] if a not in dict.keys( ): # if current substring not in dictionary, create entry mark = markov.Markov(a) mark.add(q[k - 1]) dict[a] = mark else: # otherwise update entry with suffix mark = dict[a] mark.add(q[k - 1]) c = inp.read(1) return dict
def getD2SCount_pre(self, sequence, sequences, k, r, flag, kmersetdic, kmer_pro): ses = [] ses.append(sequence) # 获得词统计 lis, count = self.getSeqCount_pre(ses, k, kmersetdic) ma = mk.Markov() prodic = {} if flag == False: prodic = ma.get_Single_kmer_Pro(sequence, sequences, k, r) # n = len(sequence) for key in lis[0].keys(): if lis[0][key] == 0: prodic[key] = 0 else: prodic[key] = lis[0][key] - n * kmer_pro[key] return self.addfloat(prodic)
def main(inpath, outpath, numdocs, minwords, maxwords): with open(inpath) as f: generator = markov.Markov(f) with gzip.open(outpath, 'w') as f: for i in xrange(numdocs): if i % 1000 == 0: print i json.dump( { "text": generator.generate_markov_text( random.randint(minwords, maxwords)), "level": random.randint(1, 5), "source": random.randint(1, 20), "id": i }, f) f.write('\n')
def parseMessage(bot, update): print "parse" msg = update.message.text msgTokens = word_tokenize(msg) tags = nltk.pos_tag(msgTokens) print "done parsing" print tags #Add parts of speech to appropriate lists. for t in tags: ltag = (t[0].lower(), t[1]) #nltk seems to not like contractions #also, don't store unreasonable length strings if "'" in ltag[0] or len(ltag[0]) > maxStringLen or len(ltag[0]) < 3: continue elif ltag[1] in {'NN', 'NNS', 'NNP', 'NNPS'}: nouns.values.append(ltag) elif ltag[1] in {'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'}: verbs.values.append(ltag) elif ltag[1] in {'JJ', 'JJR', 'JJS'}: adjectives.values.append(ltag) elif ltag[1] in {'RB', 'RBR', 'RBS', 'WRB'}: adverbs.values.append(ltag) elif t[1] in {'CD'}: numbers.values.append(ltag) else: garbage.values.append(ltag) #If we have enough values and it's been at least 20 messages, 1/n chance of posting. if randint(0, 7) == 0 and len(nouns.values) > 5 and len( verbs.values) > 3 and len(adjectives.values) > 1: m = markov.Markov(nouns, adjectives, verbs, adverbs, numbers, pronouns, prepositions, conjunctions, vowels, garbage) bot.sendMessage(update.message.chat_id, text=m.string) nouns.values = [] verbs.values = [] adverbs.values = [] garbage.values = [] adjectives.values = []
def getD2StarCount_suf(self, sequence, sequences, k, r, flag, kmersetdic, kmer_pro): n = len(sequence) ses = [] ses.append(sequence) # 获得词统计 lis, count = self.getSeqCount(ses, k, kmersetdic) ma = mk.Markov() prodic = {} if flag == False: prodic = ma.get_Single_kmer_Pro(sequence, sequences, k, r) # else: ## prodic=ma.get_Mul_kmer_Pro(sequence,sequences,k,r) # prodic=kmer_pro n = len(sequence) for key in lis[0].keys(): if lis[0][key] == 0: prodic[key] = 0 else: prodic[key] = (lis[0][key] - n * kmer_pro[key]) / math.sqrt( n * (kmer_pro[key] + np.spacing(1))) return self.addfloat(prodic)
def main(): ''' This is the main function to call BotBuddy ''' genDict() #**ONLY RUN ONCE, comment out after inital run and the file is generated. Running this line will take about 3-5 min markovObj = markov.Markov(dictFile = dictFile, maxWordInSentence=20) print(NAME, ": My name is BotBuddy, what is your name?") userName = input("Enter your name: ") print(NAME,": Hi ", userName, "! Let's chat :)") while True: userInput = input('\n'+ userName +": ").lower() print() parsedInput = parseInput(userInput) pronoun, verb, noun = getPOS(parsedInput) if userInput in GOODBYE_INPUTS: #check for goodbye print(NAME +': ' + random.choice(GOODBYE_RESPONSES)) break elif greeting(userInput): #check for a greeting print(NAME + ': ' + random.choice(GREETING_RESPONSES)) elif re.match(r".*knock[- ]?knock joke", userInput): #check for knock knock joke knockknock(userName) else: #generate a response print(NAME + ': ' + genResponse(pronoun, verb, noun, markovObj, userInput) )
def main(): #Get bot token. Config = ConfigParser.ConfigParser() Config.read("./jabberwocky.cfg") #Create event handler updater = Updater(Config.get("BotApi", "Token")) dp = updater.dispatcher #Add handlers dp.addTelegramMessageHandler(parseMessage) dp.addTelegramCommandHandler("apologize", apologize) #Start up bot. updater.start_polling() while True: if debug: cmd = raw_input("Enter command...\n") if cmd == 'list': print 'Nouns:' print nouns.values print 'Verbs:' print verbs.values print 'Adverbs:' print adverbs.values print 'Numbers:' print numbers.values print 'Adjectives:' print adjectives.values print 'Garbage:' print garbage.values #Force generation of a random sentence. elif cmd == 'forceprint': m = markov.Markov(nouns, adjectives, verbs, adverbs, numbers, pronouns, prepositions, conjunctions, vowels, garbage) print m.string #Shutdown bot. elif cmd == 'quit': updater.stop() sys.exit() elif cmd == 'help': print 'Commands are: list, forceprint, quit.' else: print 'Commmand not recognized.' else: time.sleep(1)
#!/usr/bin/python # -*- coding: utf-8 -*- from flask import Flask from flask import jsonify from flask import request from flask import render_template import os import json import markov app = Flask(__name__) messages = [] mark = markov.Markov() @app.route("/") def main(): return render_template("index.html", data=messages) @app.route('/chat', methods=['POST']) def process(): data = json.loads(request.data) text = data['text'] response = mark.main(text) response = 'ගුණේ: '.decode('utf-8') + response response = {'response': response} return jsonify(**response)
import markov as mc with open('harry_potter_fanfic.txt') as f: text = f.read() module1 = mc.Markov(text) # module2 = mc.Markov.from_file('harry_potter_fanfic.txt') # module4 = mc.Markov.from_pickle('pickled_markov.p') # print(module2.gen_chain()) # print(module4.gen_chain()) print(module1.gen_chain()) # module1.to_pickle('pumparum.p')
def calculate_2d_map(ind=0): st = time.time() physics_model = {} # multiple of eV physics_model['E_scale'] = 1 # multiple of nm physics_model['dx_scale'] = 1 physics_model['kT'] = 350e-6 # just initial param to generate the graph object #b1 = [get_random(-200e-3,sigma_mean=0.02),get_random(-0.6),get_random(0.05),1] #d1 = [200e-3,get_random(-0.1),get_random(0.05),1] #b2 = [get_random(-250e-3,sigma_mean=0.02),get_random(0.0),get_random(0.05),1] #d2 = [200e-3,get_random(0.1),get_random(0.05),1] #b3 = [get_random(-200e-3,sigma_mean=0.02),get_random(0.6),get_random(0.05),1] b1 = [-200e-3, -0.6, 0.05, 1] d1 = [200e-3, -0.2, 0.05, 1] b2 = [-250e-3, 0.0, 0.05, 1] d2 = [200e-3, 0.2, 0.05, 1] b3 = [-200e-3, 0.6, 0.05, 1] x = np.linspace(-1, 1, 100) physics_model['x'] = x physics_model['list_b'] = [b1, d1, b2, d2, b3] physics_model['V'] = potential_profile.V_x_wire(x, physics_model['list_b']) physics_model['K_onsite'] = np.random.uniform(5e-3, 5e-3) physics_model['sigma'] = x[1] - x[0] physics_model['x_0'] = 0.001 * (x[1] - x[0]) physics_model['mu_l'] = (300.01e-3, 300.0e-3) physics_model['battery_weight'] = 100 physics_model['short_circuit_current'] = 1 physics_model['QPC_current_scale'] = 1e-4 graph_model = (1, 1) tf_strategy = 'simple' graph = markov.Markov(graph_model, physics_model, tf_strategy) graph.find_n_dot_estimate() N_v = 25 V_d1_vec = np.linspace(50e-3, 300e-3, N_v) V_d2_vec = np.linspace(50e-3, 300e-3, N_v) output_vec = [] for i in range(N_v): print(i) for j in range(N_v): d1[0] = V_d1_vec[i] d2[0] = V_d2_vec[j] physics_model['list_b'] = [b1, d1, b2, d2, b3] physics_model['V'] = potential_profile.V_x_wire( x, physics_model['list_b']) output_vec += [calculate_current((graph, physics_model))] # data is a dictionary with two keys, 'input' and 'output' # data['input'] = {physics_model, graph_model, tf_strategy} # data['output'] : list with output from calculate current data = {} data['input'] = { 'physics_model': physics_model, 'graph_model': graph_model, 'tf_strategy': tf_strategy, 'V_d1_vec': V_d1_vec, 'V_d2_vec': V_d2_vec } data['output'] = output_vec import datetime dt = str(datetime.datetime.now()) #np.save('/Users/ssk4/data/double_dot_' + str(N_v) + '_grid_' + dt + '.npy',data) # during testing np.save('/Users/ssk4/data/double_dot_test1.npy', data) return (time.time() - st)
def test_markov(self): m = mk.Markov('ab') res = m.predict('a') self.assertEqual(res, 'b')
def init_fake_tweets(): global markov_obj markov_obj = markov.Markov("./raw_tweets_text.txt")
import os import time from sqlite3 import dbapi2 as sqlite3 from flask import Flask, g, request, render_template, redirect, url_for import random import json import markov from HTMLParser import HTMLParser app = Flask(__name__) #declare global objects filtered_tweets = [] markov_obj = markov.Markov("./raw_tweets_text.txt") html_parser = HTMLParser() #set path to database app.config.update( dict(DATABASE=os.path.join(app.root_path, 'AlternativeTweetsLeaderboard.db'))) #initialize the server @app.cli.command('init') def init(): print "Initializing..." init_db() init_tweets() init_fake_tweets() print "Initialized the server"
print("----------------D2-----------------") print("k=", k) feature = "d" + str(k) + "countLis" sim = [] for i in range(len(testLis)): tmp = 1 / (dis.getD2_feature( eval(feature)[testLis[i][0]], eval(feature)[testLis[i][1]])) sim.append(tmp) auc = roc_auc_score(testlabel, sim) print(auc) for r in range(0, 2): ## 获取kmer 概率集合 mar = markov.Markov() ## 获得概率 kmer_pro = mar.get_Mulk_Mul_kmer_Pro(datasets, k, k, r) dicp = "d" + str(k) + "dic" #######d2s d2star特征-------------------- d2scountLis = sq.getD2SMul_SeqCount(datasets, k, r, True, eval(dicp), kmer_pro) d2starcountLis = sq.getD2Star_Mul_seq_Count( datasets, k, r, True, eval(dicp), kmer_pro) print("----------------D2s-----------------") print("k=", k) feature = "d" + str(2) + "scountLis"