def get_user_bios(twitter, recent): result = twitter.get_friends_list(screen_name='verified', skip_status=True, include_user_entities=False, count=200) bios = [] for user in reversed(result['users']): desc = clean_description(user['description']) if not desc or len(desc) < 30 or len(desc.split()) < 6: continue # enforce length elif user['protected'] or not user['verified']: continue # respect privacy elif blacklist.isOffensive(user['name']): continue # no bad words in user name elif blacklist.isOffensive(desc): continue # no bad words in description elif not 'en' in user['lang'] or isNotEnglish(desc): continue # avoid non-english elif isTooSimilar(desc, recent): continue # avoid repeating recent tweets elif isTooSimilar(desc, bios): continue # avoid repeating found bios else: bios.append(desc) # valid description print(str(len(bios)) + " bios to tweet") random.shuffle(bios) return bios
def hasAdultContent(data): for gif in data['data']: rating = gif['rating'].lower() if rating.find('r') >= 0 or rating.find('nc') >= 0 or rating.find('x') >= 0: return True elif 'source' in gif and blacklist.isOffensive(gif['source']): return True elif 'username' in gif and blacklist.isOffensive(gif['username']): return True return False
def isValidTrack(track, max_length, recentTracks): # check if we can use this track if blacklist.isOffensive(track.title) or blacklist.isOffensive(track.description): return False elif len(track.title) > max_length or track.duration < 60000: return False elif track.embeddable_by != "all" or track.state != "finished": return False elif track.title in recentTracks: return False elif track.track_type == "original" or track.track_type == "remix": return True else: return False
def getRandomWords(wordList=[]): # get a list of random words from the wordnik API wordnik = swagger.ApiClient(config.wordnik_key, 'http://api.wordnik.com/v4') wordsApi = WordsApi.WordsApi(wordnik) random = wordsApi.getRandomWords(includePartOfSpeech='noun', minCorpusCount=2000, minDictionaryCount=12, hasDictionaryDef='true', maxLength=10) assert random and len(random) > 0, "Wordnik API error" # filter out offensive words for r in random: if not blacklist.isOffensive(r.word): wordList.append(r.word) return wordList
def getHypernyms(wordnik, recent, word): # get hypernyms for the given word query = WordApi.WordApi(wordnik).getRelatedWords( word=word, relationshipTypes='hypernym') hypernyms = [] if not query: return hypernyms for q in query: for hyp in q.words: if hyp in word or word in hyp: continue if any(hyp in s for s in recent): continue if blacklist.isOffensive(hyp): continue if not isNoun(wordnik, hyp): continue hypernyms.append(hyp) return hypernyms
def getHypernyms(wordnik, recent, word): # get hypernyms for the given word query = WordApi.WordApi(wordnik).getRelatedWords( word = word, relationshipTypes = 'hypernym') hypernyms = [] if not query: return hypernyms for q in query: for hyp in q.words: if hyp in word or word in hyp: continue if any(hyp in s for s in recent): continue if blacklist.isOffensive(hyp): continue if not isNoun(wordnik, hyp): continue hypernyms.append(hyp) return hypernyms
def getRandomWords(wordnik, recent): # get a list of random words from the wordnik API random = WordsApi.WordsApi(wordnik).getRandomWords( includePartOfSpeech='noun', minCorpusCount=1000, minDictionaryCount=10, hasDictionaryDef='true', maxLength=10) assert random and len(random) > 0, "Wordnik API error" # filter out offensive words wordList = [] for r in random: if blacklist.isOffensive(r.word): continue if any(r.word in s for s in recent): continue wordList.append(r.word) return wordList
def getRandomWords(wordnik, recent): # get a list of random words from the wordnik API random = WordsApi.WordsApi(wordnik).getRandomWords( includePartOfSpeech='noun', minCorpusCount=1000, minDictionaryCount=10, hasDictionaryDef='true', maxLength=10) assert random and len(random) > 0, "Wordnik API error" # filter out offensive words wordList=[] for r in random: if blacklist.isOffensive(r.word): continue if any(r.word in s for s in recent): continue wordList.append(r.word) return wordList
def get_new_words(crossword_hints): remove_brackets = re.compile(r' \([^)]*\)') remove_topic_punc = re.compile(r'^([^,\(]*)') remove_hint_punc = re.compile(r'^([^.;:!?\(]*)') try: print_safe("Getting random topics from Wikipedia...") random_topics = wikipedia.random(pages=10) except HTTPTimeoutError as e: random_topics = [] for wiki_topic in random_topics: # reject some vague topics right away topic_lower = wiki_topic.lower() if ' in ' in topic_lower: continue if 'list of ' in topic_lower: continue if 'iowa' in topic_lower: continue # too many if blacklist.isOffensive(wiki_topic): continue # clean up the topic string topic = re.sub(remove_brackets, '', wiki_topic) topic = re.search(remove_topic_punc, topic).group(0) topic = topic.strip() # reject too short or too long if len(topic) > 20 or len(topic) < 3 or len(topic.split()) > 4: continue print_safe("Getting summary for " + wiki_topic + "...") summary = get_summary(wiki_topic) if summary: # avoid disambiguation blurbs if 'this article is about' in summary.lower(): continue # get the description of the topic hint = substring_after(summary, [' is ', ' was ', ' are ', ' were ']) # reject hints that are too vague to solve hint_lower = hint.lower() if ' commune ' in hint_lower: continue if ' parish ' in hint_lower: continue if ' district ' in hint_lower: continue if ' town ' in hint_lower: continue if ' rayon of ' in hint_lower: continue if ' municipality in ' in hint_lower: continue if ' county ' in hint_lower: continue if ' actor' in hint_lower: continue if ' actress' in hint_lower: continue if ' singer' in hint_lower: continue if ' a swimmer ' in hint_lower: continue if 'football player' in hint_lower: continue if 'footballer' in hint_lower: continue if 'figure skater' in hint_lower: continue if 'racing driver' in hint_lower: continue if 'soccer player' in hint_lower: continue if 'basketball player' in hint_lower: continue if 'baseball player' in hint_lower: continue if 'tennis player' in hint_lower: continue if 'cricketer' in hint_lower: continue if 'politician' in hint_lower: continue if 'common year' in hint_lower: continue if ' a year in ' in hint_lower: continue if ' a list of ' in hint_lower: continue if blacklist.isOffensive(hint): continue # clean up the hint string hint = re.sub(r'U\.S\.', 'US', hint) hint = re.sub(r'D\.C\.', 'DC', hint) hint = re.sub(remove_brackets, '', hint) hint = re.search(remove_hint_punc, hint).group(0) hint = hint.strip() if len(hint) < 5 or len(hint) > 36 or len(hint.split()) < 4: continue # too long or too short else: new_topic = { 'topic': topic, 'hint': hint, 'crossword': get_crossword_string(topic) } crossword_hints.append(new_topic) return crossword_hints