def main(): environment = defaults.get_environment() db = DB(environment, today()) _ = FindBotsBehaviour(environment, db) db.disconnect()
def main(): # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.') # parser.add_argument('date', metavar='yyyy-mm-dd', # help='the date to process') # # args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, today()) db_summary = DBSummary(environment) api = TwitterAPI(environment, db_summary) commands = db.get_commands(screen_names=db_summary.get_account_screen_names()) processed_commands = db_summary.get_processed_commands(since_id=db.get_baseline_tweet_id()) for command in commands: if command.id in processed_commands: logger.info(f'Skipping {command.id}. Already processed: {command.text}') else: m = re.match('\+([a-zA-Z0-9_]+) ([A-Z][AB]?)( t ([0-9]+))?( dl ([0-9]+))?', command.text) if m: screen_name = m.group(1) category = m.group(2) rt_threshold = m.group(4) rt_daily_limit = m.group(6) db.set_tweeter_category(screen_name=screen_name, category=category, rt_threshold=rt_threshold, rt_daily_limit=rt_daily_limit) status_text = f'+{screen_name} set to {category}' if rt_threshold is not None: status_text += f' rt threshold {rt_threshold}' if rt_daily_limit is not None: status_text += f' dl {rt_daily_limit}' save_command(command, status_text, db_summary, api.polling_api()) elif command.text.lower()[:5] == 'add #': tag_name = command.text[5:] logger.info(f'Adding {tag_name}') call('python3.7 words.py ' + tag_name, shell=True) tag = db.get_tag_ranges(tag=f'#{tag_name}', min_override=db.get_baseline_tweet_id()) print(tag.name_scores) name_score = tag.name_scores[-2] if len(tag.name_scores) > 1 else None score_text = '{} / {} = {:.1f}'.format(name_score.total_score, name_score.status_count, name_score.total_score / max(name_score.status_count, 1) ) if name_score is not None else '' status_text = f'-{tag_name} added. {score_text} {tag.state}' save_command(command, status_text, db_summary, api.polling_api()) else: if command.text[:2] not in ('To', 'RT'): logger.info(f'Unknown command {command.id}: {command.text}') db_summary.disconnect() db.disconnect()
def main(): # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.') # parser.add_argument('date', metavar='yyyy-mm-dd', # help='the date to process') # # args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, today()) _ = DraftTrends(environment, db)
def main(): environment = defaults.get_environment() db = DB(environment, today()) db_summary = DBSummary(environment) tl = TwitterList(environment, db, db_summary) tl.add_to_lists() tl.remove_from_lists() db.disconnect()
def main(): environment = defaults.get_environment() db = DB(environment, today()) start_date = str(datetime.date.today() - timedelta(days=7)) end_date = str(datetime.date.today() - timedelta(days=1)) logger.info(f'Dates: {start_date} {end_date}') words = {} rows = db.get_top_hashtags(start_date, end_date) for row in rows: words[row[0].lower()] = row[0] logger.info(f'{len(words)} words') for word, hashtag in words.items(): db.set_word_hashtag(word, hashtag) logger.debug(f'{word:>30} {hashtag}') db.disconnect()
def __init__(self, environment, hashtag, tags_list): self.env = environment self.hashtag = hashtag self.boring_words = {} self.banned_tags = {} self.data = {} self.t_new = 0 self.t_foreign = 0 self.t_skip = 0 self.t_retweet = 0 # self.ns_tweet_count = [] # self.ns_total_score = [] self.ns_score_log = [] # self.ns_index = -1 self.retweets = [] self.date = today() self.db = DB(environment, self.date) # self.c = self.db.connect(self.date) self.db_summary = DBSummary(environment) self.load_metadata() self.CONSUMER_KEY = self.env.consumer_key self.CONSUMER_SECRET = self.env.consumer_secret self.current_token = -1 self.hash_tags_re = re.compile(r'(?i)(?<!\w)#[\w\u064b-\u0657]+', re.UNICODE) self.twitters = list() for token in self.db_summary.get_all_tokens(): api = twitter.Api(consumer_key=self.CONSUMER_KEY, consumer_secret=self.CONSUMER_SECRET, access_token_key=token.key, access_token_secret=token.secret, sleep_on_rate_limit=True) self.twitters.append(api) self.today_skey = 0 self.score_names = False if hashtag == 'trends': if os.path.isfile('metadata/name_score.csv'): self.score_names = True logger.info( "metadata/name_score.csv will be used for name scoring.") else: logger.info( "Warning: metadata/name_score.csv does not exist so name scoring is disabled." ) self.batch_id = self.db.get_next_batch_id() self.baseline_tweet_id = self.db.get_baseline_tweet_id() self.today_skey = self.db.get_date_skey(self.date) self.loop_pos = -1 self.all_trends = None if hashtag == 'trends': self.all_trends = self.db.get_trends() self.loop_pos = 0 if tags_list is None: tags_list = [] for (tag, result) in self.all_trends.items(): if result in ('AUTO_ADD', 'MAN_ADD'): tags_list.append({'tag': tag}) orig_tags_list = tags_list tags_list = [] for tagdata in orig_tags_list: tags_list.append( self.db.get_tag_ranges(tagdata['tag'], self.baseline_tweet_id)) print('Tags_list:', tags_list) self.pull_trends(tags_list) self.write_data() elif hashtag == 'home_timeline': status_count = self.pull_data(hashtag) logger.info('{} statuses pulled.'.format(status_count)) self.write_data() elif hashtag == 'lists': lists = self.twitters[ self.db_summary.polling_token_index].GetLists( screen_name=self.env.polling_account) logger.info('{} lists for account {}.'.format( len(lists), self.env.polling_account)) for l in lists: status_count = self.pull_data(l.slug) logger.info('{} statuses pulled for list {}.'.format( status_count, l.slug)) self.write_data() self.db.disconnect()
def main(): env = defaults.get_environment() db = DB(env, today()) # db = DB(env, '2018-12-25') promotion = Promotion() demotedate_c = (datetime.date.today() - timedelta(days=30)).strftime('%Y-%m-%d') demotedate_d = (datetime.date.today() - timedelta(days=90)).strftime('%Y-%m-%d') # Promote to C logger.info("Tweeter Promotion %s" % today()) rows = db.get_tweeter_promotion_stats() # If a person has more than POWER_TWEEP followers, then mark it as F if it is negative - cannot do it for all # tweeps because # would get too many category F's. Don't want to waste resources storing Tweeps we may never encounter in future. logger.info(f'Starting loop of {len(rows)} records.') row_count = len(rows) current_row = 0 for screen_name, pos, neg, blocked, category, relevance_score, followers_count, name, location, time_zone in rows: tweeter = promotion.add(screen_name=screen_name, name=name, category=category, relevance_score=relevance_score, location=location, time_zone=time_zone, followers_count=followers_count) # if relevance_score is None: # relevance_score = 0 # adjustment = 0 if blocked > 3 and blocked > pos and relevance_score <= -10: tweeter.new_category = 'B' elif neg > pos and (category is not None or relevance_score != 0 or followers_count >= POWER_TWEEP): if neg > 3: tweeter.adjust_score(-2) else: tweeter.adjust_score(-1) else: # pos >= neg if pos > 3: tweeter.adjust_score(2) elif pos > 1: tweeter.adjust_score(1) current_row += 1 if current_row % 100 == 0: logger.info( f'{current_row:4d}/{row_count} {category} {screen_name}') # Promote top tweeps logger.info('Promoting top tweeps.') db.c.execute('select screen_name from dim_tweeter where category <= ?', ('C', )) rows = db.c.fetchall() famous = [row[0] for row in rows] trenders = rank_words( f'{env.bot_data_directory}/trenders_published_%s.txt', 7) non_famous = [trender for trender in trenders if trender not in famous] for screen_name in non_famous[:50]: tweeter = promotion.add(screen_name=screen_name) tweeter.adjust_score(1) logger.info('Saving changes.') promotion.save_all() # Demote from D db.set_tweeter_category_by_date(date_category_was_set=demotedate_d, current_category='D', new_category='E') # Demote from C db.set_tweeter_category_by_date(date_category_was_set=demotedate_c, current_category='C', new_category='D') db.disconnect()
def main(): # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.') # parser.add_argument('date', metavar='yyyy-mm-dd', # help='the date to process') # # args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, today()) db_summary = DBSummary(environment) jdata = Publisher.get_pending(environment) # c = db_connect(env.summary_database) trenders_published = list() trenders_all = list() already_processed = list() if 'tweets' in jdata: for tweet in jdata['tweets']: tweet_status = db_summary.get_tweet_status(tweet['t_id']) if tweet_status is None: db_summary.save_tweet(tweet) for item in tweet['items']: db_summary.save_tweet_item(tweet, item) if tweet['type'] == 'trenders' and item['selected'] == 'Y': trenders_all.append(item['tweet_text'][1:]) if tweet['status'] == 'pend-post': trenders_published.append(item['tweet_text'][1:]) elif tweet_status in ['posted', 'rejected']: tweet['status'] = tweet_status already_processed.append(tweet) if len(trenders_published) > 0: with open( f'{environment.bot_data_directory}/trenders_all_{yesterday_file()}.txt', 'a') as f: for sn in trenders_all: f.write("%s\n" % sn) with open( f'{environment.bot_data_directory}/trenders_published_{yesterday_file()}.txt', 'a') as f: for sn in trenders_published: f.write("%s\n" % sn) db_summary.disconnect() trend_date = now() # now = now() # yesterday = (datetime.datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d') if 'trends' in jdata: if len(jdata['trends']) > 0: # c = db_connect() # t = (yesterday,) # c.execute('SELECT max(max_id) max_id FROM tag_history where date <= ?', t) # row = c.fetchone() # max_id = 0 # if row != None: # max_id = row[0] relevant_words = db.get_relevant_words() generic_words = db.get_generic_words() trs = list() for trend in jdata['trends']: tag = '#' + trend['hashtag'].lower() tr = { 'hashtag': trend['hashtag'], 'status': 'posted', 'trend_at': trend_date } trs.append(tr) tag_discovery_result = db.get_tag_discovery_result(tag) status = nvl(tag_discovery_result, 'NONE') if trend['status'] == 'pend-post' and status in ('NONE', 'AUTO_DEL', 'MAN_DEL'): logger.info('Adding: ' + tag) db.save_tag_discovery(tag, 'MAN_ADD') elif trend['status'] == 'pend-del' and status in ('AUTO_ADD', 'MAN_ADD'): logger.info('Deleting: ' + tag) db.save_tag_discovery(tag, 'MAN_DEL') # Trend relevance if 'relevance' in trend: relevance = relevant_words[ trend['hashtag'].lower()] if trend['hashtag'].lower( ) in relevant_words else 'neutral' if trend['relevance'] != relevance: new_relevance = None if trend[ 'relevance'] == 'neutral' else trend['relevance'] db.set_word_relevance(trend['hashtag'], new_relevance) # Trend generic if 'generic' in trend: generic = generic_words[trend['hashtag'].lower( )] if trend['hashtag'].lower() in generic_words else '' if trend['generic'] != generic: new_relevance = None if trend[ 'generic'] == 'neutral' else trend['generic'] db.set_word_generic(trend['hashtag'], new_relevance) data = {'trends': trs} Publisher.publish(environment, data, 'trends') db.commit() open(f'{environment.temp_file_directory}/compute_daily', 'a').close() if 'categories' in jdata: if len(jdata['categories']) > 0: for cat in jdata['categories']: db.set_tweeter_category(cat['screen_name'], cat['category']) logger.info("Category for", cat['screen_name'], "changed to", cat['category']) db.commit() if 'words' in jdata: if len(jdata['words']) > 0: for word in jdata['words']: category = word['category'] if category == '': category = None db.set_word_generic(word['word'], category) logger.info("Generic for", word['word'], "changed to", category) db.disconnect() if len(already_processed) > 0: data = {'tweets': already_processed} Publisher.publish(environment, data, 'posted')
def main(): env = defaults.get_environment() _ = NewDimDB(env, today())