--------- """ # Stdlib: import re import unicodedata # Internal: from munin.provider import Provider import munin.stopwords # External import guess_language guess_language.use_enchant(True) def normalize_unicode_glyphs(string): return unicodedata.normalize('NFKC', string) def strip_stopwords(words): text = ' '.join(words) language_code = guess_language.guess_language(text) if language_code == 'UNKNOWN': return words stopwords = munin.stopwords.load_stopwords(language_code) if not stopwords: return words
Reference --------- """ # Stdlib: import re import unicodedata # Internal: from munin.provider import Provider import munin.stopwords # External import guess_language guess_language.use_enchant(True) def normalize_unicode_glyphs(string): return unicodedata.normalize('NFKC', string) def strip_stopwords(words): text = ' '.join(words) language_code = guess_language.guess_language(text) if language_code == 'UNKNOWN': return words stopwords = munin.stopwords.load_stopwords(language_code) if not stopwords: return words
import glob import itertools import os import subprocess import re import sys import time import winreg import pywintypes import win32api import win32gui import psutil import guess_language guess_language.use_enchant(False) import sublib import common from util.update_text import decompose_opcodes, undo_space_changes from util.detect_encoding import detect_encoding def get_name(path): return os.path.splitext(os.path.basename(path))[0] def poll_window(path, app_name, timeout=30): class Checker: def __init__(self, filename, app_name):
def find_title(ltit, otit, ltop, otop): ''' Selects a random category. Selects a topic on that category. If the topic was used recently the previous steps are repeated. Removes the first used category. Selects a second random category. Removes topics in category 2 present on category 1. Selects a second random topic from the filtered topics list. Check topic not recently used. Gets a title from the selected second topic. Replaces the second topic in the title with the first topic. Checks that the generated title wasn't tweeted recently. Saves the original titles file. Returns the new title. ''' use_enchant(True) categories = ['w', 'n', 'b', 't', 'e', 's'] valid_topic = False count = 0 while not valid_topic: category = random.choice(categories) topics_1 = get_topics(category) while len(topics_1) > 1 and not valid_topic: topic_1 = random.choice(topics_1) topics_1.remove(topic_1) if topic_1['name'] not in ltop: valid_topic = True else: log.info('Skipping recently used topic: %s', topic_1['name']) count += 1 if count >= 100: log.warning('Exited script with no new topics found') categories.remove(category) log.info('Category 1: %s', category) log.info('Topic 1: %s', topic_1['name']) category = random.choice(categories) log.info('Category 2: %s', category) topics_2 = get_topics(category) for top_1 in topics_1: for top_2 in topics_2: if top_1['name'] == top_2['name']: topics_2.remove(top_2) log.info('Remove duplicate topic: %s', top_2['name']) for i, top_2 in enumerate(topics_2): if topic_1['name'] == top_2['name']: topics_2.pop(i) log.info('Removed chosen topic 1 from topic list 2: %s', topic_1['name']) elif top_2['name'] in otop: topics_2.pop(i) log.info('Removed recently used topic as original: %s', top_2['name']) if len(topics_2) > 0: topic_2 = random.choice(topics_2) else: log.info('No topics left after filtering') return False log.info('Topic 2: %s', topic_2['name']) headline = get_headline(topic_2, otit, topic_1['name']) if headline is not False: log.info('Original title: %s', headline) regex = re.compile(r'\b{0}\b'.format(topic_2['name']), re.IGNORECASE) new_headline = re.sub(regex, topic_1['name'], headline, count=1) if new_headline not in ltit: otit.append(headline) ltop.append(topic_1['name']) otop.append(topic_2['name']) with open('original_titles.txt', 'w') as original_titles: json.dump(otit[-240:], original_titles, ensure_ascii=False) with open('last_topics.txt', 'w') as last_topics: json.dump(ltop[-40:], last_topics, ensure_ascii=False) with open('original_topics.txt', 'w') as original_topics: json.dump(otop[-30:], original_topics, ensure_ascii=False) return new_headline else: log.info('Generated headline tweeted recently: %s', headline) return False else: log.info('Invalid headline discarded') return False log.warning('Script should not reach this point') return False