def start(conn, n):
    cursor = conn.cursor()
    mg = mk.MarkovGenerator()
    #email = emails[random.randint(0, len(emails) - 1)]
    with open(data_path + "poll_replies.txt", 'w+') as f_out:
        total_replies = 0

        query = "SELECT id, state, county, city from users"
        cursor.execute(query)
        users = cursor.fetchall()

        query = "SELECT id, state, county, city from polls"
        cursor.execute(query)
        polls = cursor.fetchall()

        while total_replies < n:
            poll = polls[random.randint(0, len(polls) - 1)]
            reply_count = 0
            for user in users:
                if user[1] == poll[1] or user[2] == poll[2] or user[3] == poll[
                        3]:

                    if random.randint(0, 2) == 1:

                        content = mg.generate_sentence(random.randint(2, 5))
                        f_out.write(str(user[0]) + "\n")
                        f_out.write(str(poll[0]) + "\n")
                        f_out.write(content + "\n")
                        total_replies += 1
                        reply_count += 1

                        if reply_count == 10:
                            break
Example #2
0
def generate_markov_status():
    # Generates a tweet via markov chains and a given corpus. Randomly chooses a length between 30 and 140 characters
    # and an ngram length from 2-4 for variation. Runs until it generates a tweet with "toast" or "bread"
    with open(corpus_file) as f:
        corpus = f.read()
    gen = markov.MarkovGenerator(corpus,
                                 length=random.randint(50, 140),
                                 ngram=random.randint(2, 4))
    status = ""

    status = gen.generate_words()
    status_tokenized = nltk.word_tokenize(status)
    status_tokenized_clean = [
        word for word in nltk.word_tokenize(status)
        if word not in string.punctuation
    ]
    rand_words = random.sample(
        status_tokenized_clean,
        random.randint(1, int(len(status_tokenized_clean) / 3)))
    text = nltk.Text(word.lower() for word in nltk.corpus.brown.words())
    for i in range(len(status_tokenized)):
        word = status_tokenized[i]
        if word in rand_words:
            status_tokenized[i] = text.similar(word)

    while not ("toast" in status) or ("bread" in status):
        status = gen.generate_words()
    return status
Example #3
0
def start(conn, n):
	#email = emails[random.randint(0, len(emails) - 1)]
	cursor = conn.cursor()

	mg = mk.MarkovGenerator()
	with open(data_path + "posts.txt", 'w+') as f_out:
		query = "SELECT id, state, county, city from users"
		cursor.execute(query)
		users = cursor.fetchall()

		for i in range(n):
			state = -1
			county = -1
			city = -1

			user = users[random.randint(0, len(users) - 1)]

			title = mg.generate_sentence(1)

			content = mg.generate_sentence(random.randint(2, 5))

			location = random.randint(1, 4)

			if location == 1:
				state = user[1]
			if location == 2:
				county = user[2]
			if location == 3:
				city = user[3]


			f_out.write(str(user[0]) + "\n")
			f_out.write(title + "\n")
			f_out.write(content + "\n")
			f_out.write(str(state) + "\n")
			f_out.write(str(county) + "\n")
			f_out.write(str(city) + "\n")
Example #4
0
def post(size_sent, bot_data):
    """
    Вызывает генератор постов

    :param size_sent: Количесвто предложений в посте
    :type size_sent: int
    :param bot_data: данные бота
    :type bot_data: list
    """
    logger = Logger(get_save_path(bot_data, type_log='posts'))
    if bot_data['active_posts'] == 1:
        database = 'AGE{}SEX{}'.format(bot_data['age'], bot_data['gender'])
        generator = markov.MarkovGenerator(
            markov.MarkovGenerator.GenerateStrategy(database=database,
                                                    window_size=3))
        tmp_post = generator.generate_sentence(size_sent=size_sent)
        post = orthography.orthography(tmp_post)
        logger.save_log(log_message=post)
        methods.create_post(post,
                            login=bot_data['login'],
                            password=bot_data['password'])
    else:
        logger.save_log(log_message='flag "active_posts" in mysql: {}'.format(
            bot_data['active_posts']))
Example #5
0
def main(lineMax=5000,
         logfileName='ircsimul.log',
         writeStdOut=False,
         realTime=False,
         logInitialPopulation=False):
    # create character maps for various text processing/writing functions
    helpers.makeTransMaps()

    # load up markov generator
    markovGenerator = markov.MarkovGenerator(sourcefileName, reasonsfileName)

    # load channel
    channel = Channel(channelName, markovGenerator, initialUserCount)

    # open log
    fileObjectList = []
    fileObjectList.append(open(logfileName, 'wt', encoding='utf8'))
    if writeStdOut:
        fileObjectList.append(sys.stdout)
    log = Log(fileObjectList)

    # get current date
    date = datetime.datetime.utcnow()

    daycache = date.day

    # create queue
    queue = PriorityQueue()

    # write opening of log
    log.writeLogOpening(date)

    # populates channel with initialPopulation users
    if logInitialPopulation:
        for i in range(0, initialPopulation):
            event = JoinEvent(date, channel.selectOfflineUser(), channel)
            queue.put(event)
    else:
        for i in range(0, initialPopulation):
            channel.setOnline(channel.selectOfflineUser())

    # bulk of messages
    currentEvent = None
    while True:
        if not lineMax == -1:
            if log.totalLines >= lineMax - 1:
                break
        # empty queue
        try:
            while not queue.empty():
                currentEvent = queue.get()
                if currentEvent:
                    line = currentEvent.process()
                    if line:
                        now = datetime.datetime.utcnow()
                        if realTime and (currentEvent.date > now):
                            delta = currentEvent.date - datetime.datetime.utcnow(
                            )
                            print(str(delta.total_seconds()))
                            time.sleep(delta.total_seconds())
                            log.write(line)
                            log.flush()
                        else:
                            log.write(line)
                else:
                    break
        except Empty:
            pass

        # check if day changed, if so, write day changed message
        # TODO: make this event based
        if daycache != date.day:
            log.writeDayChange(date)
            daycache = date.day

        # generate line
        determineType = random()
        if determineType > joinPartProbability:
            # user message
            user = channel.selectOnlineUser()
            event = MessageEvent(
                date, user, flavourText(markovGenerator.generateMessage(),
                                        user))
        elif determineType > actionProbability:
            # random join/part event
            user = channel.selectUser()
            if user in channel.online:
                if random() < quitProbability:
                    event = QuitEvent(date, user,
                                      markovGenerator.generateReason(),
                                      channel)
                else:
                    event = LeaveEvent(date, user,
                                       markovGenerator.generateReason(),
                                       channel)
            else:
                event = JoinEvent(date, user, channel)
        elif determineType > kickProbability:
            # user action
            # TODO: implement variable user action text
            event = UserActionEvent(date, channel.selectOnlineUser(),
                                    "does action")
        else:
            # kick event
            event = KickEvent(date, channel.selectOnlineUser(),
                              channel.selectOnlineUser(),
                              markovGenerator.generateReason(), channel)
        queue.put(event)

        # makes sure some amount of peeps are online or offline
        # TODO: check if population checks could be made obsolete by having the next join/parts already cached
        # TODO: move to channel class later?
        if channel.onlineUsers < minOnline:
            event = JoinEvent(date, channel.selectOfflineUser(), channel)
            queue.put(event)
        if (channel.userCount - channel.onlineUsers) < minOffline:
            if random() < quitProbability:
                event = QuitEvent(date, user, markovGenerator.generateReason(),
                                  channel)
            else:
                event = LeaveEvent(date, user,
                                   markovGenerator.generateReason(), channel)
            queue.put(event)

        # TODO: is += possible here?
        date = date + datetime.timedelta(seconds=choice(timeSpan) *
                                         (sin((date.hour) / 24 * pi) + 1.5))

    # write log closing message
    log.writeLogClosing(date)

    # close log file
    log.lfs[0].close()
Example #6
0
DB_FILE = "words.db"


supported_ngram_sizes = [3,4,5]
source_files = [
    "./data/portmanteau_and_markov.txt",
]

gens = {}


for source_file in source_files:
    corpus_name = utils.filename_from_path(source_file)
    seen = DbSet(DB_FILE, table=corpus_name, col="word")
    gens[corpus_name] = {
        n: iter(markov.MarkovGenerator(n, source_file, already_seen=seen))
        for n in supported_ngram_sizes
    }


@app.route('/word/<corpus>/<int:ngram_size>')
def get_portmanteau(corpus, ngram_size):
    if ngram_size in supported_ngram_sizes:
        word = next(gens[corpus][ngram_size])
        print(f"N = {ngram_size}, CORPUS = {corpus}: SENDING '{word}'.")
        return word
    else:
        abort(404)

@app.route('/portmantauthor/register_user/<username>')
def register_user(username):
Example #7
0
#  info: http://www.decontextualize.com/teaching/dwwp/topics-n-grams-and-markov-chains/
import sys
import markov

generator = markov.MarkovGenerator(n=3, max=500)

for line in sys.stdin:
    line = line.strip()
    generator.feed(line)

for i in range(5):
    print generator.generate()


def feed(self, text):
    tokens = self.tokenize(text)

    # discard if line is too short
    if len(tokens) < self.n:
        return

    #store the first ngram of this line
    beginning = tuple(tokens[:self.n])
    self.beginnings.append(beginning)

    for i in range(len(tokens) - self.n):
        gram = tuple(tokens[i:i + self.n])
        next = tokens[i + self.n]  # get the element after the gram

        if gram in self.ngrams:
            self.ngrams[gram].append(next)
Example #8
0
def setupCulture( ident, countrycodes, srccount, depth, ranks=None ):


    print "Setup Culture : ", ident, "-"*10
    cachedPlacenames = os.path.join( 'data', ident+'_placenames.json')

    if os.path.exists( cachedPlacenames ):
        print "Reading cached placenames ", cachedPlacenames
        #result = pickle.load( open(cachedCulture, 'rb'))
        result = Culture()
        gen = markov.MarkovGenerator( )
        gen.loadCached(cachedPlacenames)
        result.placeNameGenerator = gen

    else:
        placeNameSrcList = os.path.join( 'data', ident+'_cities.txt' )

        if not os.path.exists( placeNameSrcList ):
            cities = filterCities( countrycodes )

            fp = open ( placeNameSrcList, "wt" )
            for city in cities:
                fp.write( city+'\n' )
            fp.close()
        else:
            # Read cached filtered names
            cities = []
            for line in open( placeNameSrcList ):
                cities.append( string.strip(line))

        print "Setup Culture", ident, len(cities), "cities "

        # Make a city generator
        gen = markov.MarkovGenerator( depth=depth )

        random.shuffle(cities)

        trainCities = cities[:srccount]
        for w in trainCities:
            wseq = list(w)
            gen.trainOne( wseq )

        gen.trainFinish()

        result = Culture()
        result.placeNameGenerator = gen

        #pickle.dump( result, open(cachedCulture, 'wb' ))
        gen.cache( cachedPlacenames )

    # Rank
    if ranks:
        result.ranks = ranks

    # Test cities
    # targetNum = 20
    # uniqCount = 0
    # for i in range(targetNum):
    #     #city = result.genPlaceName()
    #     city = result.genContinentName()
    #     print city.title()

    CULTURES[ident] = result

    return result