def main(): environment = defaults.get_environment() db = DB(environment, today()) _ = FindBotsBehaviour(environment, db) db.disconnect()
def main(): env = defaults.get_environment() parser = argparse.ArgumentParser(description='Copy the standard database.') parser.add_argument('date', help='the date from which the new database is active') args = parser.parse_args() _ = NewDB(env, args.date)
def main(): # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.') # parser.add_argument('date', metavar='yyyy-mm-dd', # help='the date to process') # # args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, today()) db_summary = DBSummary(environment) api = TwitterAPI(environment, db_summary) commands = db.get_commands(screen_names=db_summary.get_account_screen_names()) processed_commands = db_summary.get_processed_commands(since_id=db.get_baseline_tweet_id()) for command in commands: if command.id in processed_commands: logger.info(f'Skipping {command.id}. Already processed: {command.text}') else: m = re.match('\+([a-zA-Z0-9_]+) ([A-Z][AB]?)( t ([0-9]+))?( dl ([0-9]+))?', command.text) if m: screen_name = m.group(1) category = m.group(2) rt_threshold = m.group(4) rt_daily_limit = m.group(6) db.set_tweeter_category(screen_name=screen_name, category=category, rt_threshold=rt_threshold, rt_daily_limit=rt_daily_limit) status_text = f'+{screen_name} set to {category}' if rt_threshold is not None: status_text += f' rt threshold {rt_threshold}' if rt_daily_limit is not None: status_text += f' dl {rt_daily_limit}' save_command(command, status_text, db_summary, api.polling_api()) elif command.text.lower()[:5] == 'add #': tag_name = command.text[5:] logger.info(f'Adding {tag_name}') call('python3.7 words.py ' + tag_name, shell=True) tag = db.get_tag_ranges(tag=f'#{tag_name}', min_override=db.get_baseline_tweet_id()) print(tag.name_scores) name_score = tag.name_scores[-2] if len(tag.name_scores) > 1 else None score_text = '{} / {} = {:.1f}'.format(name_score.total_score, name_score.status_count, name_score.total_score / max(name_score.status_count, 1) ) if name_score is not None else '' status_text = f'-{tag_name} added. {score_text} {tag.state}' save_command(command, status_text, db_summary, api.polling_api()) else: if command.text[:2] not in ('To', 'RT'): logger.info(f'Unknown command {command.id}: {command.text}') db_summary.disconnect() db.disconnect()
def main(): # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.') # parser.add_argument('date', metavar='yyyy-mm-dd', # help='the date to process') # # args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, today()) _ = DraftTrends(environment, db)
def main(): environment = defaults.get_environment() db = DB(environment, today()) db_summary = DBSummary(environment) tl = TwitterList(environment, db, db_summary) tl.add_to_lists() tl.remove_from_lists() db.disconnect()
def main(): env = defaults.get_environment() parser = argparse.ArgumentParser(description='Download Twitter data.') parser.add_argument('command', help='trends, lists or comma separated hashtags') args = parser.parse_args() if args.command in ('trends', 'lists', 'home_timeline'): command = args.command tags_list = None logger.info('Trendlist') else: command = 'trends' tags_list = [{'tag': '#{}'.format(t)} for t in args.command.split(',')] # database = env.database _ = Words(env, command, tags_list)
def main(): parser = argparse.ArgumentParser( description='Find bots based on activity on the given day.') parser.add_argument('date', metavar='yyyy-mm-dd', help='the date to process') args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, args.date) date_to = args.date _ = FindBots(db, date_to) db.disconnect()
def main(): environment = defaults.get_environment() db = DB(environment, today()) start_date = str(datetime.date.today() - timedelta(days=7)) end_date = str(datetime.date.today() - timedelta(days=1)) logger.info(f'Dates: {start_date} {end_date}') words = {} rows = db.get_top_hashtags(start_date, end_date) for row in rows: words[row[0].lower()] = row[0] logger.info(f'{len(words)} words') for word, hashtag in words.items(): db.set_word_hashtag(word, hashtag) logger.debug(f'{word:>30} {hashtag}') db.disconnect()
def main(): env = defaults.get_environment() db = DB(env, today()) # db = DB(env, '2018-12-25') promotion = Promotion() demotedate_c = (datetime.date.today() - timedelta(days=30)).strftime('%Y-%m-%d') demotedate_d = (datetime.date.today() - timedelta(days=90)).strftime('%Y-%m-%d') # Promote to C logger.info("Tweeter Promotion %s" % today()) rows = db.get_tweeter_promotion_stats() # If a person has more than POWER_TWEEP followers, then mark it as F if it is negative - cannot do it for all # tweeps because # would get too many category F's. Don't want to waste resources storing Tweeps we may never encounter in future. logger.info(f'Starting loop of {len(rows)} records.') row_count = len(rows) current_row = 0 for screen_name, pos, neg, blocked, category, relevance_score, followers_count, name, location, time_zone in rows: tweeter = promotion.add(screen_name=screen_name, name=name, category=category, relevance_score=relevance_score, location=location, time_zone=time_zone, followers_count=followers_count) # if relevance_score is None: # relevance_score = 0 # adjustment = 0 if blocked > 3 and blocked > pos and relevance_score <= -10: tweeter.new_category = 'B' elif neg > pos and (category is not None or relevance_score != 0 or followers_count >= POWER_TWEEP): if neg > 3: tweeter.adjust_score(-2) else: tweeter.adjust_score(-1) else: # pos >= neg if pos > 3: tweeter.adjust_score(2) elif pos > 1: tweeter.adjust_score(1) current_row += 1 if current_row % 100 == 0: logger.info( f'{current_row:4d}/{row_count} {category} {screen_name}') # Promote top tweeps logger.info('Promoting top tweeps.') db.c.execute('select screen_name from dim_tweeter where category <= ?', ('C', )) rows = db.c.fetchall() famous = [row[0] for row in rows] trenders = rank_words( f'{env.bot_data_directory}/trenders_published_%s.txt', 7) non_famous = [trender for trender in trenders if trender not in famous] for screen_name in non_famous[:50]: tweeter = promotion.add(screen_name=screen_name) tweeter.adjust_score(1) logger.info('Saving changes.') promotion.save_all() # Demote from D db.set_tweeter_category_by_date(date_category_was_set=demotedate_d, current_category='D', new_category='E') # Demote from C db.set_tweeter_category_by_date(date_category_was_set=demotedate_c, current_category='C', new_category='D') db.disconnect()
def main(): env = defaults.get_environment() db_summary = DBSummary(env) tweets = db_summary.get_pending_tweets() token = db_summary.get_default_token() twitter_api = twitter.Api(consumer_key=env.consumer_key, consumer_secret=env.consumer_secret, access_token_key=token.key, access_token_secret=token.secret) pub = Publish(env, db_summary, twitter_api) # tweetbucket = list() tcount = 0 for tweet in tweets: tcount += 1 if tweet.status == 'pend-rej': tweet.status = 'rejected' tweet.posted_at = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') elif tweet.type == 'retweet': if tcount > 1: time.sleep(env.tweet_delay) result = dict() if env.post == 'post': result = pub.retweet(tweet) else: logger.info('Tweet %s not posted due to env setting.', tweet.tweet_id) if 'status' in result and result['status'] != 'OK': tweet.status = result['message'] else: if tweet.status == 'pend-unpost': tweet.status = 'unposted' else: tweet.status = 'posted' tweet.posted_at = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') else: if tcount > 1: time.sleep(env.tweet_delay) # weight_multiplier = 1 action = tweet.type i = 10 tweet_text = pub.write_tweet(tweet, i) while len(tweet_text) > 145 and i > 1: i -= 1 tweet_text = pub.write_tweet(tweet, i) if not env.production: tweet_text = tweet_text.replace('#', '-').replace('@', '+') img_file = pub.draw_tweet(tweet, pub.scoreWeight[action]) logger.info('%3d %s', len(tweet_text), tweet_text) if env.post == 'post': photo = open(img_file, 'rb') result = twitter_api.PostUpdate(media=photo, status=tweet_text) print(result) logger.info('Posted!') tweet.status = 'posted' tweet.posted_at = datetime.datetime.now().strftime( '%Y-%m-%d %H:%M:%S') tweet.save_status() # tweetbucket.append(tweet) # if len(tweetbucket) >= 3: # data = {'tweets': tweetbucket} # Publisher.publish(env, data, 'posted') # tweetbucket = list() # time.sleep(10) # if len(tweetbucket) > 0: # data = {'tweets': tweetbucket} # Publisher.publish(env, data, 'posted') db_summary.disconnect()
def main(): # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.') # parser.add_argument('date', metavar='yyyy-mm-dd', # help='the date to process') # # args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, today()) db_summary = DBSummary(environment) jdata = Publisher.get_pending(environment) # c = db_connect(env.summary_database) trenders_published = list() trenders_all = list() already_processed = list() if 'tweets' in jdata: for tweet in jdata['tweets']: tweet_status = db_summary.get_tweet_status(tweet['t_id']) if tweet_status is None: db_summary.save_tweet(tweet) for item in tweet['items']: db_summary.save_tweet_item(tweet, item) if tweet['type'] == 'trenders' and item['selected'] == 'Y': trenders_all.append(item['tweet_text'][1:]) if tweet['status'] == 'pend-post': trenders_published.append(item['tweet_text'][1:]) elif tweet_status in ['posted', 'rejected']: tweet['status'] = tweet_status already_processed.append(tweet) if len(trenders_published) > 0: with open( f'{environment.bot_data_directory}/trenders_all_{yesterday_file()}.txt', 'a') as f: for sn in trenders_all: f.write("%s\n" % sn) with open( f'{environment.bot_data_directory}/trenders_published_{yesterday_file()}.txt', 'a') as f: for sn in trenders_published: f.write("%s\n" % sn) db_summary.disconnect() trend_date = now() # now = now() # yesterday = (datetime.datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d') if 'trends' in jdata: if len(jdata['trends']) > 0: # c = db_connect() # t = (yesterday,) # c.execute('SELECT max(max_id) max_id FROM tag_history where date <= ?', t) # row = c.fetchone() # max_id = 0 # if row != None: # max_id = row[0] relevant_words = db.get_relevant_words() generic_words = db.get_generic_words() trs = list() for trend in jdata['trends']: tag = '#' + trend['hashtag'].lower() tr = { 'hashtag': trend['hashtag'], 'status': 'posted', 'trend_at': trend_date } trs.append(tr) tag_discovery_result = db.get_tag_discovery_result(tag) status = nvl(tag_discovery_result, 'NONE') if trend['status'] == 'pend-post' and status in ('NONE', 'AUTO_DEL', 'MAN_DEL'): logger.info('Adding: ' + tag) db.save_tag_discovery(tag, 'MAN_ADD') elif trend['status'] == 'pend-del' and status in ('AUTO_ADD', 'MAN_ADD'): logger.info('Deleting: ' + tag) db.save_tag_discovery(tag, 'MAN_DEL') # Trend relevance if 'relevance' in trend: relevance = relevant_words[ trend['hashtag'].lower()] if trend['hashtag'].lower( ) in relevant_words else 'neutral' if trend['relevance'] != relevance: new_relevance = None if trend[ 'relevance'] == 'neutral' else trend['relevance'] db.set_word_relevance(trend['hashtag'], new_relevance) # Trend generic if 'generic' in trend: generic = generic_words[trend['hashtag'].lower( )] if trend['hashtag'].lower() in generic_words else '' if trend['generic'] != generic: new_relevance = None if trend[ 'generic'] == 'neutral' else trend['generic'] db.set_word_generic(trend['hashtag'], new_relevance) data = {'trends': trs} Publisher.publish(environment, data, 'trends') db.commit() open(f'{environment.temp_file_directory}/compute_daily', 'a').close() if 'categories' in jdata: if len(jdata['categories']) > 0: for cat in jdata['categories']: db.set_tweeter_category(cat['screen_name'], cat['category']) logger.info("Category for", cat['screen_name'], "changed to", cat['category']) db.commit() if 'words' in jdata: if len(jdata['words']) > 0: for word in jdata['words']: category = word['category'] if category == '': category = None db.set_word_generic(word['word'], category) logger.info("Generic for", word['word'], "changed to", category) db.disconnect() if len(already_processed) > 0: data = {'tweets': already_processed} Publisher.publish(environment, data, 'posted')
def main(): parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.') parser.add_argument('date', metavar='yyyy-mm-dd', help='the date to process') args = parser.parse_args() environment = defaults.get_environment() db = DB(environment, args.date) db_summary = DBSummary(environment) date_skey = db.get_date_skey(args.date) actions = list() action = {'type': 'trends'} actions.append(action) action = {'type': 'mentions'} actions.append(action) action_ind = 0 tweets = list() stat_tweet_count = 0 while action_ind < len(actions): action = actions[action_ind]['type'] # tweeters = None stats = Stats(args.date, action, db, actions, environment, actions[action_ind]['trend'] if action == 'trenders' else None) i = 100 is_tweetable = True if action == "trenders": tweet = stats.write_tweet(i) if not stats.is_trenders_tweet_postable(tweet) or stat_tweet_count >= DAILY_STAT_TWEET_LIMIT: is_tweetable = False elif action == "trends": tweet = stats.write_tweet(i) elif action == "mentions": tweet = stats.write_tweet(i) if is_tweetable: db_summary.save_tweet(tweet) stat_tweet_count += 1 if tweet is not None: tweets.append(tweet) if len(tweets) >= 2: data = {'tweets': tweets, 'date': args.date} Publisher.publish(environment, data, 'draft') tweets = list() time.sleep(10) action_ind += 1 db_summary.disconnect() # Now get app metrics rows = db.get_tweeter_category_counts() metric_dict = {'date': args.date, 'other': 0} for cat, count in rows: if cat is None: cat = ' ' if cat in ('A', 'B', 'C', 'D', 'E', 'F', 'R', ' '): metric_dict[cat] = count else: metric_dict['other'] += count # Get count of total tweets and tweets by category rows = db.get_tweeter_category_tweet_counts(date_skey) metric_dict['tweets_total'] = 0 metric_dict['tweets_other'] = 0 for cat, count in rows: metric_dict['tweets_total'] += count if cat is None: cat = ' ' if cat in ('A', 'B', 'C', 'D', 'E', 'F', 'R', ' '): metric_dict['tweets_' + cat] = count else: metric_dict['tweets_other'] += count # Add file sizes metric_dict['fact_db_size'] = os.path.getsize(environment.database) metric_dict['dim_db_size'] = os.path.getsize(environment.dimension_database) metric_dict['summ_db_size'] = os.path.getsize(environment.summary_database) followers_count = db.get_tweeter_followers_count('pakpolstats') metric_dict['account_followers'] = followers_count data = {'tweets': tweets, 'metrics': metric_dict, 'date': args.date} Publisher.publish(environment, data, 'draft')
def main(): env = defaults.get_environment() _ = NewDimDB(env, today())