Beispiel #1
0
def main():
    environment = defaults.get_environment()
    db = DB(environment, today())

    _ = FindBotsBehaviour(environment, db)

    db.disconnect()
def main():
    # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.')
    # parser.add_argument('date', metavar='yyyy-mm-dd',
    #                     help='the date to process')
    #
    # args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, today())
    db_summary = DBSummary(environment)

    api = TwitterAPI(environment, db_summary)

    commands = db.get_commands(screen_names=db_summary.get_account_screen_names())
    processed_commands = db_summary.get_processed_commands(since_id=db.get_baseline_tweet_id())

    for command in commands:
        if command.id in processed_commands:
            logger.info(f'Skipping {command.id}. Already processed: {command.text}')
        else:
            m = re.match('\+([a-zA-Z0-9_]+) ([A-Z][AB]?)( t ([0-9]+))?( dl ([0-9]+))?', command.text)
            if m:
                screen_name = m.group(1)
                category = m.group(2)
                rt_threshold = m.group(4)
                rt_daily_limit = m.group(6)

                db.set_tweeter_category(screen_name=screen_name,
                                        category=category,
                                        rt_threshold=rt_threshold,
                                        rt_daily_limit=rt_daily_limit)

                status_text = f'+{screen_name} set to {category}'
                if rt_threshold is not None:
                    status_text += f' rt threshold {rt_threshold}'
                if rt_daily_limit is not None:
                    status_text += f' dl {rt_daily_limit}'
                save_command(command, status_text, db_summary, api.polling_api())
            elif command.text.lower()[:5] == 'add #':
                tag_name = command.text[5:]
                logger.info(f'Adding {tag_name}')
                call('python3.7 words.py ' + tag_name, shell=True)
                tag = db.get_tag_ranges(tag=f'#{tag_name}', min_override=db.get_baseline_tweet_id())
                print(tag.name_scores)
                name_score = tag.name_scores[-2] if len(tag.name_scores) > 1 else None
                score_text = '{} / {} = {:.1f}'.format(name_score.total_score,
                                                       name_score.status_count,
                                                       name_score.total_score / max(name_score.status_count, 1)
                                                       ) if name_score is not None else ''
                status_text = f'-{tag_name} added. {score_text} {tag.state}'
                save_command(command, status_text, db_summary, api.polling_api())
            else:
                if command.text[:2] not in ('To', 'RT'):
                    logger.info(f'Unknown command {command.id}: {command.text}')

    db_summary.disconnect()
    db.disconnect()
Beispiel #3
0
def main():
    # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.')
    # parser.add_argument('date', metavar='yyyy-mm-dd',
    #                     help='the date to process')
    #
    # args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, today())

    _ = DraftTrends(environment, db)
Beispiel #4
0
def main():
    environment = defaults.get_environment()
    db = DB(environment, today())
    db_summary = DBSummary(environment)

    tl = TwitterList(environment, db, db_summary)

    tl.add_to_lists()
    tl.remove_from_lists()

    db.disconnect()
Beispiel #5
0
def main():
    environment = defaults.get_environment()
    db = DB(environment, today())

    start_date = str(datetime.date.today() - timedelta(days=7))
    end_date = str(datetime.date.today() - timedelta(days=1))
    logger.info(f'Dates: {start_date} {end_date}')
    words = {}
    rows = db.get_top_hashtags(start_date, end_date)
    for row in rows:
        words[row[0].lower()] = row[0]

    logger.info(f'{len(words)} words')

    for word, hashtag in words.items():
        db.set_word_hashtag(word, hashtag)
        logger.debug(f'{word:>30} {hashtag}')

    db.disconnect()
Beispiel #6
0
    def __init__(self, environment, hashtag, tags_list):
        self.env = environment
        self.hashtag = hashtag

        self.boring_words = {}
        self.banned_tags = {}
        self.data = {}

        self.t_new = 0
        self.t_foreign = 0
        self.t_skip = 0
        self.t_retweet = 0

        # self.ns_tweet_count = []
        # self.ns_total_score = []
        self.ns_score_log = []
        # self.ns_index = -1

        self.retweets = []

        self.date = today()
        self.db = DB(environment, self.date)
        # self.c = self.db.connect(self.date)
        self.db_summary = DBSummary(environment)
        self.load_metadata()

        self.CONSUMER_KEY = self.env.consumer_key
        self.CONSUMER_SECRET = self.env.consumer_secret
        self.current_token = -1

        self.hash_tags_re = re.compile(r'(?i)(?<!\w)#[\w\u064b-\u0657]+',
                                       re.UNICODE)

        self.twitters = list()
        for token in self.db_summary.get_all_tokens():
            api = twitter.Api(consumer_key=self.CONSUMER_KEY,
                              consumer_secret=self.CONSUMER_SECRET,
                              access_token_key=token.key,
                              access_token_secret=token.secret,
                              sleep_on_rate_limit=True)
            self.twitters.append(api)

        self.today_skey = 0

        self.score_names = False
        if hashtag == 'trends':
            if os.path.isfile('metadata/name_score.csv'):
                self.score_names = True
                logger.info(
                    "metadata/name_score.csv will be used for name scoring.")
            else:
                logger.info(
                    "Warning: metadata/name_score.csv does not exist so name scoring is disabled."
                )

        self.batch_id = self.db.get_next_batch_id()
        self.baseline_tweet_id = self.db.get_baseline_tweet_id()

        self.today_skey = self.db.get_date_skey(self.date)
        self.loop_pos = -1

        self.all_trends = None
        if hashtag == 'trends':
            self.all_trends = self.db.get_trends()
            self.loop_pos = 0
            if tags_list is None:
                tags_list = []
                for (tag, result) in self.all_trends.items():
                    if result in ('AUTO_ADD', 'MAN_ADD'):
                        tags_list.append({'tag': tag})
            orig_tags_list = tags_list
            tags_list = []
            for tagdata in orig_tags_list:
                tags_list.append(
                    self.db.get_tag_ranges(tagdata['tag'],
                                           self.baseline_tweet_id))
            print('Tags_list:', tags_list)
            self.pull_trends(tags_list)
            self.write_data()
        elif hashtag == 'home_timeline':
            status_count = self.pull_data(hashtag)
            logger.info('{} statuses pulled.'.format(status_count))
            self.write_data()
        elif hashtag == 'lists':
            lists = self.twitters[
                self.db_summary.polling_token_index].GetLists(
                    screen_name=self.env.polling_account)
            logger.info('{} lists for account {}.'.format(
                len(lists), self.env.polling_account))
            for l in lists:
                status_count = self.pull_data(l.slug)
                logger.info('{} statuses pulled for list {}.'.format(
                    status_count, l.slug))
            self.write_data()

        self.db.disconnect()
Beispiel #7
0
def main():
    env = defaults.get_environment()
    db = DB(env, today())
    # db = DB(env, '2018-12-25')
    promotion = Promotion()

    demotedate_c = (datetime.date.today() -
                    timedelta(days=30)).strftime('%Y-%m-%d')
    demotedate_d = (datetime.date.today() -
                    timedelta(days=90)).strftime('%Y-%m-%d')

    # Promote to C
    logger.info("Tweeter Promotion  %s" % today())

    rows = db.get_tweeter_promotion_stats()

    # If a person has more than POWER_TWEEP followers, then mark it as F if it is negative - cannot do it for all
    # tweeps because
    # would get too many category F's. Don't want to waste resources storing Tweeps we may never encounter in future.
    logger.info(f'Starting loop of {len(rows)} records.')
    row_count = len(rows)
    current_row = 0
    for screen_name, pos, neg, blocked, category, relevance_score, followers_count, name, location, time_zone in rows:
        tweeter = promotion.add(screen_name=screen_name,
                                name=name,
                                category=category,
                                relevance_score=relevance_score,
                                location=location,
                                time_zone=time_zone,
                                followers_count=followers_count)
        # if relevance_score is None:
        #     relevance_score = 0
        # adjustment = 0
        if blocked > 3 and blocked > pos and relevance_score <= -10:
            tweeter.new_category = 'B'
        elif neg > pos and (category is not None or relevance_score != 0
                            or followers_count >= POWER_TWEEP):
            if neg > 3:
                tweeter.adjust_score(-2)
            else:
                tweeter.adjust_score(-1)
        else:  # pos >= neg
            if pos > 3:
                tweeter.adjust_score(2)
            elif pos > 1:
                tweeter.adjust_score(1)
        current_row += 1
        if current_row % 100 == 0:
            logger.info(
                f'{current_row:4d}/{row_count} {category} {screen_name}')

    # Promote top tweeps
    logger.info('Promoting top tweeps.')
    db.c.execute('select screen_name from dim_tweeter where category <= ?',
                 ('C', ))
    rows = db.c.fetchall()
    famous = [row[0] for row in rows]

    trenders = rank_words(
        f'{env.bot_data_directory}/trenders_published_%s.txt', 7)
    non_famous = [trender for trender in trenders if trender not in famous]
    for screen_name in non_famous[:50]:
        tweeter = promotion.add(screen_name=screen_name)
        tweeter.adjust_score(1)

    logger.info('Saving changes.')
    promotion.save_all()

    # Demote from D
    db.set_tweeter_category_by_date(date_category_was_set=demotedate_d,
                                    current_category='D',
                                    new_category='E')

    # Demote from C
    db.set_tweeter_category_by_date(date_category_was_set=demotedate_c,
                                    current_category='C',
                                    new_category='D')

    db.disconnect()
Beispiel #8
0
def main():
    # parser = argparse.ArgumentParser(description='Draft stats for the given day and push to cloud for approval.')
    # parser.add_argument('date', metavar='yyyy-mm-dd',
    #                     help='the date to process')
    #
    # args = parser.parse_args()

    environment = defaults.get_environment()
    db = DB(environment, today())
    db_summary = DBSummary(environment)

    jdata = Publisher.get_pending(environment)

    # c = db_connect(env.summary_database)
    trenders_published = list()
    trenders_all = list()
    already_processed = list()
    if 'tweets' in jdata:
        for tweet in jdata['tweets']:
            tweet_status = db_summary.get_tweet_status(tweet['t_id'])
            if tweet_status is None:
                db_summary.save_tweet(tweet)
                for item in tweet['items']:
                    db_summary.save_tweet_item(tweet, item)
                    if tweet['type'] == 'trenders' and item['selected'] == 'Y':
                        trenders_all.append(item['tweet_text'][1:])
                        if tweet['status'] == 'pend-post':
                            trenders_published.append(item['tweet_text'][1:])
            elif tweet_status in ['posted', 'rejected']:
                tweet['status'] = tweet_status
                already_processed.append(tweet)

        if len(trenders_published) > 0:
            with open(
                    f'{environment.bot_data_directory}/trenders_all_{yesterday_file()}.txt',
                    'a') as f:
                for sn in trenders_all:
                    f.write("%s\n" % sn)
            with open(
                    f'{environment.bot_data_directory}/trenders_published_{yesterday_file()}.txt',
                    'a') as f:
                for sn in trenders_published:
                    f.write("%s\n" % sn)

    db_summary.disconnect()

    trend_date = now()
    # now = now()
    # yesterday = (datetime.datetime.now() - timedelta(days=2)).strftime('%Y-%m-%d')
    if 'trends' in jdata:
        if len(jdata['trends']) > 0:
            # c = db_connect()

            # t = (yesterday,)
            # c.execute('SELECT max(max_id) max_id FROM tag_history where date <= ?', t)
            # row = c.fetchone()
            # max_id = 0
            # if row != None:
            #     max_id = row[0]

            relevant_words = db.get_relevant_words()

            generic_words = db.get_generic_words()

            trs = list()
            for trend in jdata['trends']:
                tag = '#' + trend['hashtag'].lower()
                tr = {
                    'hashtag': trend['hashtag'],
                    'status': 'posted',
                    'trend_at': trend_date
                }
                trs.append(tr)
                tag_discovery_result = db.get_tag_discovery_result(tag)
                status = nvl(tag_discovery_result, 'NONE')

                if trend['status'] == 'pend-post' and status in ('NONE',
                                                                 'AUTO_DEL',
                                                                 'MAN_DEL'):
                    logger.info('Adding: ' + tag)
                    db.save_tag_discovery(tag, 'MAN_ADD')
                elif trend['status'] == 'pend-del' and status in ('AUTO_ADD',
                                                                  'MAN_ADD'):
                    logger.info('Deleting: ' + tag)
                    db.save_tag_discovery(tag, 'MAN_DEL')

                # Trend relevance
                if 'relevance' in trend:
                    relevance = relevant_words[
                        trend['hashtag'].lower()] if trend['hashtag'].lower(
                        ) in relevant_words else 'neutral'
                    if trend['relevance'] != relevance:
                        new_relevance = None if trend[
                            'relevance'] == 'neutral' else trend['relevance']
                        db.set_word_relevance(trend['hashtag'], new_relevance)

                # Trend generic
                if 'generic' in trend:
                    generic = generic_words[trend['hashtag'].lower(
                    )] if trend['hashtag'].lower() in generic_words else ''
                    if trend['generic'] != generic:
                        new_relevance = None if trend[
                            'generic'] == 'neutral' else trend['generic']
                        db.set_word_generic(trend['hashtag'], new_relevance)

            data = {'trends': trs}
            Publisher.publish(environment, data, 'trends')

            db.commit()
            open(f'{environment.temp_file_directory}/compute_daily',
                 'a').close()

    if 'categories' in jdata:
        if len(jdata['categories']) > 0:
            for cat in jdata['categories']:
                db.set_tweeter_category(cat['screen_name'], cat['category'])
                logger.info("Category for", cat['screen_name'], "changed to",
                            cat['category'])

            db.commit()

    if 'words' in jdata:
        if len(jdata['words']) > 0:
            for word in jdata['words']:
                category = word['category']
                if category == '':
                    category = None
                db.set_word_generic(word['word'], category)
                logger.info("Generic for", word['word'], "changed to",
                            category)

    db.disconnect()

    if len(already_processed) > 0:
        data = {'tweets': already_processed}
        Publisher.publish(environment, data, 'posted')
Beispiel #9
0
def main():
    env = defaults.get_environment()

    _ = NewDimDB(env, today())