def start(conn, n): cursor = conn.cursor() mg = mk.MarkovGenerator() #email = emails[random.randint(0, len(emails) - 1)] with open(data_path + "poll_replies.txt", 'w+') as f_out: total_replies = 0 query = "SELECT id, state, county, city from users" cursor.execute(query) users = cursor.fetchall() query = "SELECT id, state, county, city from polls" cursor.execute(query) polls = cursor.fetchall() while total_replies < n: poll = polls[random.randint(0, len(polls) - 1)] reply_count = 0 for user in users: if user[1] == poll[1] or user[2] == poll[2] or user[3] == poll[ 3]: if random.randint(0, 2) == 1: content = mg.generate_sentence(random.randint(2, 5)) f_out.write(str(user[0]) + "\n") f_out.write(str(poll[0]) + "\n") f_out.write(content + "\n") total_replies += 1 reply_count += 1 if reply_count == 10: break
def generate_markov_status(): # Generates a tweet via markov chains and a given corpus. Randomly chooses a length between 30 and 140 characters # and an ngram length from 2-4 for variation. Runs until it generates a tweet with "toast" or "bread" with open(corpus_file) as f: corpus = f.read() gen = markov.MarkovGenerator(corpus, length=random.randint(50, 140), ngram=random.randint(2, 4)) status = "" status = gen.generate_words() status_tokenized = nltk.word_tokenize(status) status_tokenized_clean = [ word for word in nltk.word_tokenize(status) if word not in string.punctuation ] rand_words = random.sample( status_tokenized_clean, random.randint(1, int(len(status_tokenized_clean) / 3))) text = nltk.Text(word.lower() for word in nltk.corpus.brown.words()) for i in range(len(status_tokenized)): word = status_tokenized[i] if word in rand_words: status_tokenized[i] = text.similar(word) while not ("toast" in status) or ("bread" in status): status = gen.generate_words() return status
def start(conn, n): #email = emails[random.randint(0, len(emails) - 1)] cursor = conn.cursor() mg = mk.MarkovGenerator() with open(data_path + "posts.txt", 'w+') as f_out: query = "SELECT id, state, county, city from users" cursor.execute(query) users = cursor.fetchall() for i in range(n): state = -1 county = -1 city = -1 user = users[random.randint(0, len(users) - 1)] title = mg.generate_sentence(1) content = mg.generate_sentence(random.randint(2, 5)) location = random.randint(1, 4) if location == 1: state = user[1] if location == 2: county = user[2] if location == 3: city = user[3] f_out.write(str(user[0]) + "\n") f_out.write(title + "\n") f_out.write(content + "\n") f_out.write(str(state) + "\n") f_out.write(str(county) + "\n") f_out.write(str(city) + "\n")
def post(size_sent, bot_data): """ Вызывает генератор постов :param size_sent: Количесвто предложений в посте :type size_sent: int :param bot_data: данные бота :type bot_data: list """ logger = Logger(get_save_path(bot_data, type_log='posts')) if bot_data['active_posts'] == 1: database = 'AGE{}SEX{}'.format(bot_data['age'], bot_data['gender']) generator = markov.MarkovGenerator( markov.MarkovGenerator.GenerateStrategy(database=database, window_size=3)) tmp_post = generator.generate_sentence(size_sent=size_sent) post = orthography.orthography(tmp_post) logger.save_log(log_message=post) methods.create_post(post, login=bot_data['login'], password=bot_data['password']) else: logger.save_log(log_message='flag "active_posts" in mysql: {}'.format( bot_data['active_posts']))
def main(lineMax=5000, logfileName='ircsimul.log', writeStdOut=False, realTime=False, logInitialPopulation=False): # create character maps for various text processing/writing functions helpers.makeTransMaps() # load up markov generator markovGenerator = markov.MarkovGenerator(sourcefileName, reasonsfileName) # load channel channel = Channel(channelName, markovGenerator, initialUserCount) # open log fileObjectList = [] fileObjectList.append(open(logfileName, 'wt', encoding='utf8')) if writeStdOut: fileObjectList.append(sys.stdout) log = Log(fileObjectList) # get current date date = datetime.datetime.utcnow() daycache = date.day # create queue queue = PriorityQueue() # write opening of log log.writeLogOpening(date) # populates channel with initialPopulation users if logInitialPopulation: for i in range(0, initialPopulation): event = JoinEvent(date, channel.selectOfflineUser(), channel) queue.put(event) else: for i in range(0, initialPopulation): channel.setOnline(channel.selectOfflineUser()) # bulk of messages currentEvent = None while True: if not lineMax == -1: if log.totalLines >= lineMax - 1: break # empty queue try: while not queue.empty(): currentEvent = queue.get() if currentEvent: line = currentEvent.process() if line: now = datetime.datetime.utcnow() if realTime and (currentEvent.date > now): delta = currentEvent.date - datetime.datetime.utcnow( ) print(str(delta.total_seconds())) time.sleep(delta.total_seconds()) log.write(line) log.flush() else: log.write(line) else: break except Empty: pass # check if day changed, if so, write day changed message # TODO: make this event based if daycache != date.day: log.writeDayChange(date) daycache = date.day # generate line determineType = random() if determineType > joinPartProbability: # user message user = channel.selectOnlineUser() event = MessageEvent( date, user, flavourText(markovGenerator.generateMessage(), user)) elif determineType > actionProbability: # random join/part event user = channel.selectUser() if user in channel.online: if random() < quitProbability: event = QuitEvent(date, user, markovGenerator.generateReason(), channel) else: event = LeaveEvent(date, user, markovGenerator.generateReason(), channel) else: event = JoinEvent(date, user, channel) elif determineType > kickProbability: # user action # TODO: implement variable user action text event = UserActionEvent(date, channel.selectOnlineUser(), "does action") else: # kick event event = KickEvent(date, channel.selectOnlineUser(), channel.selectOnlineUser(), markovGenerator.generateReason(), channel) queue.put(event) # makes sure some amount of peeps are online or offline # TODO: check if population checks could be made obsolete by having the next join/parts already cached # TODO: move to channel class later? if channel.onlineUsers < minOnline: event = JoinEvent(date, channel.selectOfflineUser(), channel) queue.put(event) if (channel.userCount - channel.onlineUsers) < minOffline: if random() < quitProbability: event = QuitEvent(date, user, markovGenerator.generateReason(), channel) else: event = LeaveEvent(date, user, markovGenerator.generateReason(), channel) queue.put(event) # TODO: is += possible here? date = date + datetime.timedelta(seconds=choice(timeSpan) * (sin((date.hour) / 24 * pi) + 1.5)) # write log closing message log.writeLogClosing(date) # close log file log.lfs[0].close()
DB_FILE = "words.db" supported_ngram_sizes = [3,4,5] source_files = [ "./data/portmanteau_and_markov.txt", ] gens = {} for source_file in source_files: corpus_name = utils.filename_from_path(source_file) seen = DbSet(DB_FILE, table=corpus_name, col="word") gens[corpus_name] = { n: iter(markov.MarkovGenerator(n, source_file, already_seen=seen)) for n in supported_ngram_sizes } @app.route('/word/<corpus>/<int:ngram_size>') def get_portmanteau(corpus, ngram_size): if ngram_size in supported_ngram_sizes: word = next(gens[corpus][ngram_size]) print(f"N = {ngram_size}, CORPUS = {corpus}: SENDING '{word}'.") return word else: abort(404) @app.route('/portmantauthor/register_user/<username>') def register_user(username):
# info: http://www.decontextualize.com/teaching/dwwp/topics-n-grams-and-markov-chains/ import sys import markov generator = markov.MarkovGenerator(n=3, max=500) for line in sys.stdin: line = line.strip() generator.feed(line) for i in range(5): print generator.generate() def feed(self, text): tokens = self.tokenize(text) # discard if line is too short if len(tokens) < self.n: return #store the first ngram of this line beginning = tuple(tokens[:self.n]) self.beginnings.append(beginning) for i in range(len(tokens) - self.n): gram = tuple(tokens[i:i + self.n]) next = tokens[i + self.n] # get the element after the gram if gram in self.ngrams: self.ngrams[gram].append(next)
def setupCulture( ident, countrycodes, srccount, depth, ranks=None ): print "Setup Culture : ", ident, "-"*10 cachedPlacenames = os.path.join( 'data', ident+'_placenames.json') if os.path.exists( cachedPlacenames ): print "Reading cached placenames ", cachedPlacenames #result = pickle.load( open(cachedCulture, 'rb')) result = Culture() gen = markov.MarkovGenerator( ) gen.loadCached(cachedPlacenames) result.placeNameGenerator = gen else: placeNameSrcList = os.path.join( 'data', ident+'_cities.txt' ) if not os.path.exists( placeNameSrcList ): cities = filterCities( countrycodes ) fp = open ( placeNameSrcList, "wt" ) for city in cities: fp.write( city+'\n' ) fp.close() else: # Read cached filtered names cities = [] for line in open( placeNameSrcList ): cities.append( string.strip(line)) print "Setup Culture", ident, len(cities), "cities " # Make a city generator gen = markov.MarkovGenerator( depth=depth ) random.shuffle(cities) trainCities = cities[:srccount] for w in trainCities: wseq = list(w) gen.trainOne( wseq ) gen.trainFinish() result = Culture() result.placeNameGenerator = gen #pickle.dump( result, open(cachedCulture, 'wb' )) gen.cache( cachedPlacenames ) # Rank if ranks: result.ranks = ranks # Test cities # targetNum = 20 # uniqCount = 0 # for i in range(targetNum): # #city = result.genPlaceName() # city = result.genContinentName() # print city.title() CULTURES[ident] = result return result