def main(): parser = argparse.ArgumentParser('run the markov model') parser.add_argument('--mode', type=str, choices=['prepro', 'markov']) parser.add_argument('--train_json', type=str, default=train_record, help='the processed train json file') parser.add_argument('--test_json', type=str, default=test_record, help='the processed test json file') parser.add_argument('--status_label', type=str, default=status_label, help='the status label') parser.add_argument('--data_dir', type=str, default=data_dir, help='where to read data') parser.add_argument('--pred_dir', type=str, default=pred_dir, help='the dir to save predict result') # for preprocessing parser.add_argument('--class_num', type=int, default=18, help='the class number') parser.add_argument('--min_length', type=int, default=2, help='the flow under this parameter will be filtered') parser.add_argument('--max_packet_length', type=int, default=6000, help='the largest packet length') parser.add_argument('--split_ratio', type=float, default=0.8, help='ratio of train set of target app') parser.add_argument('--keep_ratio', type=float, default=1.0, help='ratio of keeping the example (for small dataset test)') parser.add_argument('--markov_models', type=str, default='SLC-LR', help='markov methods, split by \'#\'') config = parser.parse_args() if config.mode == 'prepro': preprocess.preprocess(config) elif config.mode == 'markov': Markov.markov(config) else: print('unknown mode, only support train now') raise Exception
def privmsg(self, user, channel, msg): # terminate if command not given by a user if not user: return # try a keyword match keyword = re.match("!(\w*)", msg) if keyword != None: print "I should be doing something with keyword %s" % (keyword.groups()[0],) args = re.match("!\w* (.*)", msg) if args != None: print "and something with arguments %s" % (args.groups()[0]) return m = re.match(self.nickname + "[:,]{0,1} (.*)",msg) if self.nickname in msg and m != None and m.groups()[0]: msg = m.groups()[0] Markov.learn(msg, self.factory.chainLen) sentence = Markov.talk(msg, self.factory.chainLen, self.factory.maxWords) if sentence == None: print sentence self.msg(self.factory.channel, user.split('!', 1)[0] + ": " + sentence)
def load(self, personality, cacheDir): """Load a personality into memory by loading its Markov chains""" self.brains[personality] = dict() self.brains[personality]["fwd"] = Markov.MarkovChain( cacheDir + personality + "-fwd", 1, False) self.brains[personality]["rev"] = Markov.MarkovChain( cacheDir + personality + "-rev", 1, False)
def random_text(df, start_words, num_clusters=5, num_sentences=100, len_sentences=30): column_name = 'cluster_{}'.format(num_clusters) text = [] cluster_num = [] for c_num in xrange(num_clusters): print '\n \nGenerating sentences for cluster: {}\n'.format(c_num) df_c = df[df[column_name] == c_num].reset_index() rand_rows = np.random.randint(1, len(df_c), num_sentences) df_c = df_c.iloc[rand_rows, :] for file_name in df_c.file: file_ = open(file_name) markov = Markov.Markov(file_) try: out = markov.generate_markov_text(20, start_words) print '\n\t{}'.format(out) text.append(out) cluster_num.append(c_num) except: continue out_file_name = 'random_sentence_{}.csv'.format(start_words.strip()) pd.DataFrame({ 'cluster_num': cluster_num, 'random_sentence': text }).to_csv(out_file_name) return
def main(): halfwidth = 0.5 iterations = 100 xs = CrossSection.CrossSection(xS=0.5, nu=1.0, xF=0.5, xG=0) print "Cross Section: %s" %xs Chart = Gnuplot.Gnuplot() title = "100 iterations max" Chart.title(title) Chart.xlabel("Number of Bins (length of vectors)") Chart.ylabel("Dominant eigenvalue") Chart("set yrange[0.35:]") Histories = [500, 1000, 5000, 10000] Zones = [5, 10, 20, 30, 50, 75, 100, 200, 500] for h in Histories: eValues = [] for z in Zones: geo = Geometry.Geometry(z, [[-halfwidth, halfwidth]]) print "geoL %s\nHistories per iteration = %i" %(geo, h) uni = scipy.ones(geo.bins) uSource = fissionSource.histogramSource(uni, geo) mark = Markov.Markov(geo, xs, h) amc = arnoldiMC.arnoldiMC(mark) amc.arnoldi(uSource, iterations) eValues.append(amc.eValue) eData = Gnuplot.Data(Zones, eValues, with="lines", title="# histories: %i" %h) Chart.replot(eData)
def __init__(self): self.model_states = {'RHN':0, 'RMN':1, 'WHN':2, 'WMN':3, 'RHF':4, 'RMF':5, 'WHF':6, 'WMF':7} self.rw_states = {0:{0:(4,5), 1:(0,1)}, 1:{0:(6,7), 1:(2,3)}} # first index read/write, second near/far self.hit_states = (0, 2, 4, 6) self.MM = Markov.MarkovModel(len(self.model_states)) self.granularity = np.uint64(6) self.last_address = -1 self.tmp_trans = None
def create_twitter_markov(api): try: with open(FILE_NAME, 'r') as f: file = f.read().replace('\n', '') except IOError: create_word_file(api) with open(FILE_NAME, 'r') as f: file = f.read().replace('\n', '') return Markov.get_model_from_block(file)
def makeChain(name, sourceDir, cacheDir): """Parse the source text to generate new Markov chains.""" sourceFile = sourceDir + name + ".txt" fwd = Markov.MarkovChain(cacheDir + name + "-fwd", 1, True) rev = Markov.MarkovChain(cacheDir + name + "-rev", 1, True) try: for line in open(sourceFile): words = line.split() if len(words) > 0: fwd.add(words) words.reverse() rev.add(words) except IOError: print "The source named '%s' does not exist" % (name, ) finally: fwd.close() rev.close()
def main(): # Create lyric fetcher fetcher = LyricFetcher() # Get artist name artistname = input("Enter name of Artist: ") while artistname.lower() != "quit": # Get lyrics for Artist lyrics = fetcher.getLyrics(artistname) # Feed into model & generate new lyrics model = Markov(lyrics) print("\nBigram\n") print(paragraphtorap(model.bigram_generate())) print("\nTrigram\n") print(paragraphtorap(model.ngram_generate(3))) print("\n4-gram\n") print(paragraphtorap(model.ngram_generate(4))) # Start again artistname = input("Enter name of Artist: ")
def __init__(self, variable, w, bins=[], permutations=0, nlag=0): self.variable = variable n, T = self.variable.shape self.w = w lag = slag(w, variable) regRel = variable / lag self.regRel = regRel self.lag = lag baseRel = variable / mean(variable) self.baseRel = baseRel rangeT = arange(T) if bins: self.bins = bins else: self.quints = [ Markov.Quintilizer(baseRel[:, time]).quintiles for time in range(T) ] self.bins = self.quints bins = self.bins results = [ self.traceValue(variable[:, t], lag[:, t], bins[t]) for t in rangeT ] self.traces = [i[0] for i in results] self.tMats = [i[1] for i in results] self.tOrig = results #self.tMats = tmat comp = zeros([T, 1]) if permutations: TP = [] permRes = zeros([T, permutations]) for p in range(permutations): variable = permutate(variable) wy = slag(w, variable) results = [ self.traceValue(variable[:, t], wy[:, t], bins[t]) for t in rangeT ] traces = [i[0] for i in results] test = traces < self.traces comp += test TP.append(traces) self.pvalue = comp self.TP = array(TP) comp = self.TP < self.traces self.comp = sum(comp) self.pvalue = self.comp * 1. / permutations self.permutations = permutations else: self.permutations = 0
def makeWordModel(filename): infile = open(filename) model = Markov() for line in infile: words = line.split() for w in words: model.add(w) infile.close() #Add a sentinel at the end of the text model.add(None) model.reset() return model
def density_plots(): diff, hl = mk_p.get().split(":")[1].strip().split(",") diff = int(diff) hl = int(hl) br_d, br_u = Markov.get_brackets(self.ts, diff, hl) fig, ax = plt.subplots(2, 2, figsize=(10, 10)) ax[0, 0].plot(self.ts) ax[0, 0].fill_between(range(0, len(self.ts)), br_d, br_u, color=(0.5, 0.5, 0.5, 0.2)) uda = Markov.ud_array(self.ts, br_d, br_u) u, d, m = Markov.udm_lengths(uda) ax[0, 1].hist(u, bins=range(0, max(u))) ax[1, 0].hist(d, bins=range(0, max(d))) ax[1, 1].hist(m, bins=range(0, max(u))) ax[0, 0].set_title("bins") ax[0, 1].set_title("U") ax[1, 0].set_title("D") ax[1, 1].set_title("M") plt.show()
def testtrackHistory(): """ """ geo = Geometry.Geometry(10, [[-1, 1]]) xs = CrossSection.CrossSection() mark = Markov.Markov(geo, xs, histories=1000) n = particle.neutron() bank = fissionBank.fissionBank() bank.append(n, 5) newbank = fissionBank.fissionBank() print "xs: %s" % xs for neut in bank[:1]: neut.setRandDirection() mark.trackHistory(neut, newbank, 1.0)
def main(self): bins = 50 halfwidth=0.5 histories = 10000 iterations = 50 restarts = 5 geo = Geometry.Geometry(bins, [[-halfwidth,halfwidth]]) xs = CrossSection.CrossSection(xS=0.5,nu=1.0,xF=0.5,xG=0) self.mark = Markov.Markov(geo, xs, histories) self.mark.score = self.score self.Q = scipy.zeros((bins,0)) for i in xrange(bins): print "I am at %i" %i point = scipy.zeros((bins)) point[i] = 1 pSource = fissionSource.histogramSource(point,geo) self.response = fissionBank.fissionBank() self.mark.transport(pSource) q = fissionSource.histogramSource(self.response,geo) q = q*(1.0/self.mark.histories) self.printVector(q) self.Q = scipy.column_stack((self.Q,q)) q = scipy.ones(bins) q = q/scipy.linalg.norm(q,2) print "Calling Deterministic Arnoldi" adtm = ArnoldiDtm.Arnoldi(self.Q, iterations, restarts) eValues, eVectors = adtm.Arnoldi(q) print "Eigenvalues: " self.printVector(eValues) print "Dominant eigenvector: " self.printVector(eVectors[:,-1]) print "\nAll eigenvectors: " self.printM(eVectors) Chart = Gnuplot.Gnuplot() Chart.title("Histories per 'vector': %i, bins = %i" %(histories, bins)) length = len(eValues)-1 for i in xrange(5): data = Gnuplot.Data(eVectors[:,length-i],with='lines', title='vector %i' %i) Chart.replot(data)
def testtransport(): """ """ geo = Geometry.Geometry(10, [[-1, 1]]) xs = CrossSection.CrossSection() mark = Markov.Markov(geo, xs, histories=1000) bank = fissionBank.fissionBank() n = particle.neutron() bank.append(n, 10) for neut in bank: neut.setRandDirection() newbank = mark.transport(bank, 1) print "leak-left: %.4f, leak-right: %.4f, weight-killed: %.4f" % ( mark.minLeakage, mark.maxLeakage, mark.wgtKilled)
def main(): histories = 10000 iterations = 10 xs = CrossSection.CrossSection(xS=0.5, nu=1.0, xF=0.5, xG=0) geo = Geometry.Geometry(10, [[-0.5, 0.5]]) uni = scipy.ones(geo.bins) point = scipy.zeros(geo.bins) point[0] = 1 uSource = fissionSource.histogramSource(uni, geo) pSource = fissionSource.histogramSource(point, geo) uSource = uSource / scipy.linalg.norm(uSource, 2) mark = Markov.Markov(geo, xs, histories) amc = arnoldiMC.arnoldiMC(mark) # calc_eVector(amc) EigenPairs(amc)
def testRoulette(): """ """ geo = Geometry.Geometry(10, [[-1, 1]]) xs = CrossSection.CrossSection() mark = Markov.Markov(geo, xs, histories=1000) n = particle.neutron() bank = fissionBank.fissionBank() bank.append(n, 1000000) print "weight-in: %s" % (len(bank)) for i in xrange(len(bank)): bank[i] = mark.russianRoulette(bank[i]) print "\nweight-out: %s, +- %.4f" % (bankWeight(bank), math.sqrt( len(bank)))
def __init__(self): """Initiate bot by loading JSON, setting a lock and connecting to the API. """ print("Initiating bot uwu") self.json_lock = threading.Lock() self.markov_lock = threading.Lock() try: with open("data.json", "r") as f: self.data = json.load(f) except IOError: self.data = { "base": input("What account is your ebook based on? "), "keys": { "consumer_token": input("Consumer key "), "consumer_secret": input("Consumer secret "), }, "last_id": 1, "last_reply": 1, "uid": 0, } self.dump() self.api = self.connect() if self.data["uid"] == 0: try: self.data["uid"] = self.api.lookup_users( screen_names=[self.data["base"]])[0].id except tweepy.TweepError as e: print("Couldn't get uid twt") exit() d = date.now() self.wait = 3.6e3 - (60 * d.minute + d.second) self.chain = Markov.Chain() # This really long regex array filters out tags, websites, tickers, # weird quotes, long white space, and beginning spaces. self.ignore = [ r"[ |\.]?(@[A-Za-z0-9_]{1,15})(?![A-Z0-9a-z])", r" ?(https?|www)[A-Za-z0-9:\/\.\-_?=%@~\+]*", r" ?\$[A-Za-z]{1,6}(?![A-Za-z])", r'(?<=\s)"\s', r"^ ", r'"', ] self.space_filter = (r"(?<= ) {1,}", ) self.special = ",.?!:;"
def dummy(self, hw, Chart, title='None'): """ hw: Halfwidth of geometry Chart: Gnuplot object where plotting occurs. title: Title of plot, if None, title will be eigenvalue """ valuesvsBins = [] for bin in self.Bins: print "Bins = %i" %bin geo = Geometry.Geometry(bin, [[-hw,hw]]) mark = Markov.Markov(geo, self.xs, self.histories) amc = arnoldiMC.arnoldiMC(mark) uSource = fissionSource.histogramSource(scipy.ones(bin)) Values, Vectors = amc.arnoldi(uSource, self.iterations) valuesvsBins.append(Values[-1]) title = 'hw = %.2f' %hw kData = Gnuplot.Data(self.Bins, valuesvsBins, with='lines', title=title) Chart.replot(kData) return Chart
def Orthogonal(self, hw, bins): """ Orthogonal will run Arnoldi's method and determine if the basis vectors are orthogonal. hw: halfwidth of geometry bins: number of spatial bins """ geo = Geometry.Geometry(bins, [[-hw, hw]]) mark = Markov.Markov(geo, self.xs, self.histories) amc = arnoldiMC.arnoldiMC(mark) uSource = fissionSource.histogramSource(scipy.ones(bins)) Values, Vectors = amc.ERAM(uSource, 5, self.iterations) # Values, Vectors = amc.arnoldi(uSource, self.iterations) n = len(amc.Q) O = scipy.zeros((n, n)) for i in xrange(n): for j in xrange(n): O[i, j] = scipy.dot(amc.Q[i], amc.Q[j]) print "Orthogonality:" amc.printM(O)
def main(): histories = 1000 iterations = 50 trials = 50 halfWidth = 0.5 xs = CrossSection.CrossSection(xS=0.5, nu=1.0, xF=0.5, xG=0) print "Cross Section: %s" %xs geo = Geometry.Geometry(100, [[-halfWidth, halfWidth]]) mark = Markov.Markov(geo,xs, histories) uni = scipy.ones(geo.bins) uSource = fissionSource.histogramSource(uni, geo) Chart = Gnuplot.Gnuplot() Chart.xlabel('Iteration') Chart.ylabel('Eigenvalue estimate') Chart.title('histories per Arnoldi iteration: %i' %histories) for j in xrange(5): eValues = [] for i in xrange(trials): amc = arnoldiMC.arnoldiMC(mark) amc.arnoldi(uSource, iterations) print "eValue = %s" %amc.eValue eValues.append(amc.eValue) average_eValue = sum(eValues)/trials # Calculate Variance var_eValue = 0 for value in eValues: var_eValue += math.pow((value - average_eValue),2) var_eValue = var_eValue/(trials - 1) eData = Gnuplot.Data(eValues, with='lines', title="%6.4f, %6.4f" %(average_eValue, math.sqrt(var_eValue))) Chart.replot(eData)
# Analyze the data total = len(data) prev = data.pop() freq['-']['-'] += 1 up = False while data: curr = data.pop() if prev[-1] < curr[-1] and up: freq['+']['+'] += 1.0 up = True elif prev[-1] > curr[-1] and up: freq['+']['-'] += 1.0 up = False elif prev[-1] < curr[-1]: freq['-']['+'] += 1.0 up = True else: freq['-']['-'] += 1.0 up = False prev = curr for k in freq: count = sum([i for i in freq[k].values()]) for k2 in freq[k]: freq[k][k2] = freq[k][k2] / count return freq if __name__ == '__main__': mchains = [(symbol, Markov.Markov(analyzer, symbol=symbol)) for symbol in ['IBM','YHOO','GOOG']]
word_count = sum( len(line.split()) for line in open(r"C:\Users\owner\school\Fire_On_The_Mountain.txt")) with open("Fire_On_The_Mountain.txt", 'r') as foo: lines = len(foo.readlines()) avg_words = int(round(word_count / lines)) #print() #print("num words =", word_count) #print("num lines =", lines) #print("avg words per line =", avg_words) #print() message = Markov.read_file('Scarlet_Fire.txt') chain = Markov.build_chain(message) #message = Markov.generate_message(chain, avg_words) #print(message) stanza = Markov.generate_message(chain, avg_words * 22) unique_words = lyrical.make_unique_list() (first_rhyme, second_rhyme) = lyrical.choose_rhymes(unique_words) lyrical.do_rhymes(first_rhyme, second_rhyme) #print(stanza) def play_with_string(stanza): #print(stanza)
from PyQt5.QtGui import QColor from PyQt5.QtCore import Qt from pagesetting import * from pagemain import * from Markov import * class Pages: def __init__(self): self.pageSetting = Ui_pageSetting() self.pageMain = Ui_pageMain() pages = Pages() markov = Markov() countN = 0 start = [-1, -1] def main(): global countN arr = [] app = QApplication(sys.argv) pageMain = QMainWindow() pages.pageMain.setupUi(pageMain) pageMain.show() pages.pageMain.btn_map.clicked.connect(lambda: setParams(pageMain)) QApplication.processEvents()
from Markov import * from collections import Counter city_weather = { 'New York': 'rainy', 'Chicago': 'snowy', 'Seattle': 'rainy', 'Boston': 'hailing', 'Miami': 'windy', 'Los Angeles': 'cloudy', 'San Francisco': 'windy' } city_simulation_result = {} for city in city_weather: mk = Markov(day_zero_weather=city_weather[city]) city_simulation_result[city] = Counter( mk.get_weather_for_day(day=7, trials=100)) for city in city_simulation_result: print(city + ": " + str(dict(city_simulation_result[city]))) print("\n") print("Most likely weather in seven days") print("----------------------------------") for city in city_simulation_result: print(city + ": " + max(city_simulation_result[city], key=city_simulation_result[city].get))
return freq def analyze(freq): for k in freq: count = sum([i for i in freq[k].values()]) for k2 in freq[k]: freq[k][k2] = freq[k][k2] / count return freq def construct_sentence(start, stop, markov): sentence = start current_word = start while current_word != stop: try: next_word = markov.next(current_word) sentence += ' ' + next_word current_word = next_word except KeyError: break return sentence if __name__ == '__main__': freq = dict() filenames = ['Ulysses.txt'] for f in filenames: freq = train(f, freq) M = Markov.Markov(analyze, freq)
def main(WHICHSAMPLES, wf, WHICHCLASS, dbs=None, appslist=None): PACKETS = [] with open(WHICHCLASS + 'Occ.txt') as packseq: for line in packseq: PACKETS.append(line.replace('\n', '')) packseq.close() allnodes = PACKETS allnodes.append('selfdefined') allnodes.append('obfuscated') Header = [] Header.append('filename') for i in range(0, len(allnodes)): for j in range(0, len(allnodes)): Header.append(allnodes[i] + 'To' + allnodes[j]) print 'Header is long ', len(Header) Fintime = [] dbcounter = 0 for v in range(0, len(WHICHSAMPLES)): numApps = os.listdir('graphs/' + WHICHSAMPLES[v] + '/') DatabaseRes = [] DatabaseRes.append(Header) leng = len(numApps) checks = [ 0, 999, 1999, 2999, 3999, 4999, 5999, 6999, 7999, 8999, 9999, 10999, 11999, 12999 ] for i in range(0, len(numApps)): if i in checks: print 'starting ', i + 1, ' of ', leng if wf == 'Y': with open(WHICHCLASS + '/' + WHICHSAMPLES[v] + '/' + str(numApps[i])) as callseq: specificapp = [] for line in callseq: specificapp.append(line) callseq.close() else: specificapp = [] for line in dbs[dbcounter][i]: specificapp.append(line) Startime = time() MarkMat = mk.main(specificapp, allnodes, wf) MarkRow = [] if wf == 'Y': MarkRow.append(numApps[i]) else: MarkRow.append(appslist[dbcounter][i]) for i in range(0, len(MarkMat)): for j in range(0, len(MarkMat)): MarkRow.append(MarkMat[i][j]) DatabaseRes.append(MarkRow) Fintime.append(time() - Startime) dbcounter += 1 f = open('Features/' + WHICHCLASS + '/' + WHICHSAMPLES[v] + '.csv', 'w') for line in DatabaseRes: f.write(str(line) + '\n') f.close
import sys import os from CMBellbot import * import Markov from twisted.internet import reactor if __name__ == "__main__": chLen = 2 try: chan = sys.argv[1] except IndexError: print "Please specify a channel name." print "Example; \n$ python main.py bellman" exit() if os.path.exists("markovmind.data"): f = open("markovmind.data") i = 0 for row in f: i += 1 Markov.learn(row, chLen, False) f.close() print "Mind is up again and %i rows loaded!" % (i,) reactor.connectTCP('se.quakenet.org', 6667, CMBellbotFactory('#' + chan)) reactor.run()
def __init__(self,tagSets): self.model = Markov.MarkovModel() self.tagSets = tagSets self.fullRules = [] self.partRules = []
def load(self, brain): self.brain = Markov.MarkovChain(self.cache + brain + "-fwd", 1, False)
print 'starting ', i + 1, ' of ', leng if wf == 'Y': with open('family/' + str(numApps[i])) as callseq: #Families/Trail1 specificapp = [] for line in callseq: specificapp.append(line.replace('\n', '')) callseq.close() #print "specificapp: ", specificapp else: specificapp = [] for line in dbs[dbcounter][i]: specificapp.append(line) Startime = time() MarkMat = mk.main(specificapp, allnodes, wf) MarkRow = [] if wf == 'Y': MarkRow.append(numApps[i]) else: MarkRow.append(appslist[dbcounter][i]) for i in range(0, len(MarkMat)): for j in range(0, len(MarkMat)): MarkRow.append(MarkMat[i][j]) DatabaseRes.append(MarkRow) Fintime.append(time() - Startime) dbcounter += 1 f = open('Features/' + WHICHCLASS + '/' + "result" "" + '.csv', 'w') for line in DatabaseRes:
rhymes[key] = 0 break else: margin += 1 except: print(line) return rhymes def generate_versified_sentence(): while (True): line = Markov.model.make_sentence() line = versify(line) rhymes = count_rhymes(line) if max([rhymes[key] for key in rhymes.keys()]) > 0: return line def generate_stanzas_random(amount=3): result = "" for i in range(amount): result += generate_versified_sentence() + '\n\n' return result if __name__ == '__main__': Markov.load_model() print(generate_stanzas_random())
def __init__(self, fcp): self.fcp = fcp self.to_solve_mdp = Markov.Markov(fcp) self.val_act_dict = {}