Exemple #1
0
def main(config: dict, output_dir: Path):
    session = create_session(config['db']['url'])
    context = config['print_export']['context']
    default_tz = dateutil.tz.gettz(config['print_export']['default_tz'])
    if not default_tz:
        raise Exception('Invalid time zone')

    exported_tweets = session.query(ExportedTweet).all()
    tweets_per_page = config['print_export']['tweets_per_page']

    output_filename_template = string.Template(
        config['print_export']['output_filename_template'])
    for i in range(len(exported_tweets) // tweets_per_page + 1):
        start = i * tweets_per_page
        end = start + tweets_per_page
        tweets = exported_tweets[start:end]
        if not tweets:
            return
        dict_tweets = [t.to_dict(default_tz) for t in tweets]
        output_filename = output_filename_template.substitute(i=f'{i:03}',
                                                              **dict_tweets[0])
        output_path = output_dir / output_filename
        output_path.parent.mkdir(parents=True, exist_ok=True)
        with output_path.open('w') as f:
            render_template(
                f,
                tweets=dict_tweets,
                context=context,
                package=config['print_export'].get('template_package'),
                path=config['print_export'].get('template_path'),
            )
    session.close()
Exemple #2
0
def main(config: dict):
    if not config['db'].get('host'):
        logger.error('Old database is not configured. Nothing to do')
        return

    conn = psycopg2.connect(
        host=config['db']['host'],
        database=config['db']['database'],
        user=config['db']['user'],
        password=config['db']['password'],
    )
    cur = conn.cursor()

    session = create_session(config['db']['url'])
    session.configure(autoflush=False, expire_on_commit=False)

    migrate_page_urls(session, cur, config['db']['table_urls'])
    migrate_archived_page_urls(session, cur, config['db']['table_archives'])
    migrate_pages(session, cur, config['db']['table_pages'])
    migrate_page_lines(session, cur, config['db']['table_lines'])
    migrate_parsed_page_lines(session, cur, config['db']['table_parsed'])
    migrate_tweets(session, cur, config['db']['table_reviewed'])
    migrate_posted_tweets(session, cur, config['db']['table_posted'])
    migrate_exported_tweets(session, cur, config['db']['table_print_export'])

    session.close()
    conn.close()
Exemple #3
0
def main(config: dict):
    feeds = config['feeds']
    dates = [
        datetime.datetime.fromisoformat(d) for d in config['archive_dates']
    ]
    session = create_session(config['db']['url'])
    for feed in feeds:
        if not feed.get('name') or not feed.get('url'):
            continue
        for date in dates:
            if count(
                    session.query(ArchivedPageURL).filter(
                        ArchivedPageURL.feed_url == feed['url'],
                        ArchivedPageURL.date == date,
                    )):
                continue
            archived_url = find_closest_snapshot_url(feed['url'], date)
            archived_page_url = ArchivedPageURL(
                feed_url=feed['url'],
                archived_url=archived_url,
                date=date,
            )
            session.add(archived_page_url)
            session.commit()
    session.close()
Exemple #4
0
def main(config: dict, secrets: dict, interactive: bool, dry_run: bool):
    session = create_session(config['db']['url'])
    approved_tweets = session.query(Tweet).filter(
        Tweet.status == TweetReviewStatus.approved)
    posted_tweets = session.query(PostedTweet).all()
    posted_tweets_parsed = [t.parsed for t in posted_tweets]
    pending_tweets = [
        t for t in approved_tweets if t.parsed not in posted_tweets_parsed
    ]
    total_approved_tweets = count(approved_tweets)
    total_posted_tweets = len(posted_tweets)
    total_pending_tweets = len(pending_tweets)

    logger.info('Number of approved tweets: %d', total_approved_tweets)
    logger.info('Number of posted tweets:   %d', total_posted_tweets)
    logger.info('Number of tweets to post:  %d', total_pending_tweets)

    if not total_pending_tweets:
        logger.warning('Nothing to do, all tweets have already been posted')
        return

    i = random.randint(0, total_pending_tweets - 1)
    tweet = pending_tweets[i]
    template_str = deep_get(config, ['post_tweet', 'tweet_template'],
                            default='${text} ${url}')
    text = Template(template_str).substitute(text=tweet.text, url=tweet.url)

    logger.warning(
        '%d/%d/%d posting tweet "%s"',
        i,
        total_pending_tweets,
        total_approved_tweets,
        text,
    )
    if interactive:
        inp = input('Are you sure you want to post this tweet? [y/N] ')
        if inp != 'y':
            print('Bailing out!')
            return
    status_id = post_tweet(text, secrets, dry_run)
    if not status_id:
        return

    posted_tweet = PostedTweet.from_tweet(tweet, text, status_id)
    session.add(posted_tweet)
    session.commit()

    name = config['post_tweet']['profile_name']
    description = Template(
        config['post_tweet']['profile_description_template']).substitute(
            n_posted=total_posted_tweets + 1, n_approved=total_approved_tweets)
    logger.warning(
        'Updating profile, name: "%s", description: "%s"',
        name,
        description,
    )
    update_profile(name, description, secrets, dry_run)
    session.close()
Exemple #5
0
def main(
    config: dict,
    cache_path: str,
    dry_run: bool,
):
    since = deep_get(
        config,
        ['download_pages', 'since'],
        default=None,
        process=datetime.datetime.fromisoformat,
    )
    wait_interval = (
        deep_get(config, ['download_pages', 'wait_min'],
                 default=0,
                 process=int),
        deep_get(config, ['download_pages', 'wait_max'],
                 default=0,
                 process=int),
    )
    timeout = deep_get(config, ['download_pages', 'timeout'],
                       default=10,
                       process=int)
    session = create_session(config['db']['url'])
    logger.info(
        'Selecting pages to download since %s',
        since or 'the beginning of time',
    )
    page_urls_by_feeds = select_page_urls_to_download(session, since)
    print_stats(page_urls_by_feeds)
    page_urls_with_feed_names = {}
    for feed_name, page_urls in page_urls_by_feeds.items():
        for page_url in page_urls:
            if page_url not in page_urls_with_feed_names:
                page_urls_with_feed_names[page_url] = feed_name
    total = len(page_urls_with_feed_names)
    logger.info('Pages to download: %d', total)
    if dry_run:
        logger.warning('This is just a dry run, not downloading any pages')
        return
    page_urls_with_feed_names_list = list(page_urls_with_feed_names.items())
    random.shuffle(page_urls_with_feed_names_list)
    for i, (page_url, feed_name) in enumerate(page_urls_with_feed_names_list):
        logger.info('%d/%d Downloading %s', i + 1, total, page_url)
        text = download_page_text(
            cache_path=cache_path,
            feed_name=feed_name,
            page_url=page_url,
            wait_interval=wait_interval,
            timeout=timeout,
        )
        if text is not None:
            page = Page(url=page_url, text=text)
            session.add(page)
    session.commit()
    session.close()
Exemple #6
0
def main(config: dict, cache_path: Path, approved: bool = False):
    session = create_session(config['db']['url'])
    if approved:
        tweets = session.query(Tweet).filter(
            Tweet.status == TweetReviewStatus.approved)
    else:
        tweets = session.query(PostedTweet).all()
    for tweet in tweets:
        exported_tweet = print_export_tweet(cache_path, tweet)
        if exported_tweet and not count(
                session.query(ExportedTweet).filter(
                    ExportedTweet.text == exported_tweet.text)):
            session.add(exported_tweet)
            session.flush()
    session.commit()
    session.close()
Exemple #7
0
def main(config: dict, cache_path: str):
    feeds = config['feeds']
    timeout = deep_get(
        config, ['download_feeds', 'timeout'], default=30, process=int
    )
    session = create_session(config['db']['url'])
    for feed in feeds:
        if not feed.get('name') or not feed.get('url'):
            continue
        for archived_page_url in session.query(ArchivedPageURL).filter(
            ArchivedPageURL.feed_url == feed['url'],
            ArchivedPageURL.archived_url.isnot(None),
        ):
            logger.info(
                'Found archived feed %s %s',
                archived_page_url.date,
                archived_page_url.archived_url,
            )
            cache_file_path = (
                Path(cache_path)
                / 'feeds'
                / safe_filename(archived_page_url.archived_url)
                / 'feed_pages.csv'
            )
            if cache_file_path.is_file():
                logger.info('Reading from cache')
                with cache_file_path.open('r') as f:
                    page_urls = [
                        clean_url(page_url) for (page_url,) in csv.reader(f)
                    ]
            else:
                try:
                    page_urls = download_feed(
                        archived_page_url.archived_url, timeout=timeout
                    )
                except Exception:
                    logger.error(
                        'Failed to download %s', archived_page_url.archived_url
                    )
                cache_file_path.parent.mkdir(parents=True, exist_ok=True)
                with cache_file_path.open('w') as f:
                    writer = csv.writer(f, lineterminator='\n')
                    writer.writerows((page_url,) for page_url in page_urls)
            save_page_urls(
                session, feed['name'], page_urls, archived_page_url.date
            )
    session.close()
Exemple #8
0
def main(config: dict, secrets: dict, dry_run: bool):
    session = create_session(config['db']['url'])
    api = twitter.Api(
        consumer_key=secrets['consumer_key'],
        consumer_secret=secrets['consumer_secret'],
        access_token_key=secrets['access_token_key'],
        access_token_secret=secrets['access_token_secret'],
        tweet_mode='extended',
    )
    screen_name = 'covid_chance'
    max_pages = 100
    max_id = None
    for _ in range(max_pages):
        last_id = check_posted_tweets(session, api, screen_name, max_id)
        if not last_id:
            break
        max_id = last_id - 1
    session.close()
Exemple #9
0
def main(config: dict):
    session = create_session(config['db']['url'])
    feeds = config['feeds']
    writer = csv.DictWriter(
        sys.stdout,
        fieldnames=(
            'feed_name',
            'n_pages',
            'n_lines',
            'n_parsed',
            'n_approved',
        ),
        quoting=csv.QUOTE_NONNUMERIC,
        lineterminator='\n',
    )
    writer.writeheader()
    for feed in feeds:
        if feed['name']:
            feed_stats = calc_feed_stats(session, feed['name'])
            writer.writerow(feed_stats)
    session.close()
Exemple #10
0
def main(config: dict):
    feeds = config['feeds']
    timeout = deep_get(config, ['download_feeds', 'timeout'],
                       default=30,
                       process=int)
    session = create_session(config['db']['url'])
    with concurrent.futures.ThreadPoolExecutor() as executor:
        futures = [
            executor.submit(
                download_and_save_feed,
                session,
                feed_name=feed['name'],
                feed_url=feed['url'],
                timeout=timeout,
            ) for feed in feeds if feed.get('name') and feed.get('url')
        ]
        for future in concurrent.futures.as_completed(futures):
            try:
                future.result()
            except Exception as e:
                logger.error('Exception: %s', e)
    session.close()
Exemple #11
0
def main(config, review_all: bool, incl_approved: bool):
    session = create_session(config['db']['url'])

    parsed_page_lines = session.query(ParsedPageLine).filter(
        ParsedPageLine.parsed != '')
    reviewed_tweets = session.query(Tweet).filter(
        Tweet.status != TweetReviewStatus.none)
    approved_tweets = [
        t for t in reviewed_tweets if t.status == TweetReviewStatus.approved
    ]
    rejected_tweets = [
        t for t in reviewed_tweets if t.status == TweetReviewStatus.rejected
    ]
    if review_all:
        pending_parsed_page_lines = parsed_page_lines
    else:
        reviewed_tweets_parsed = [
            tweet.parsed for tweet in session.query(Tweet).filter(
                Tweet.status != TweetReviewStatus.none)
        ]
        pending_parsed_page_lines = parsed_page_lines.filter(
            ParsedPageLine.parsed.notin_(reviewed_tweets_parsed))
    pending_tweets = [
        Tweet.from_parsed_page_line(parsed_page_line)
        for parsed_page_line in pending_parsed_page_lines
    ]
    if not review_all:
        if incl_approved:
            pending_tweets += approved_tweets
        else:
            invalid_approved_tweets = [t for t in approved_tweets if t.invalid]
            pending_tweets += invalid_approved_tweets
    total_pending_tweets = len(pending_tweets)

    logger.info('Number of matching lines:   %d',
                session.query(PageLine).count())
    logger.info('Number of parsed tweets:    %d', count(parsed_page_lines))
    logger.info('Number of approved tweets:  %d', len(approved_tweets))
    logger.info('Number of rejected tweets:  %d', len(rejected_tweets))
    logger.info('Number of tweets to review: %d', total_pending_tweets)

    i = 0
    while i < len(pending_tweets):
        tweet = pending_tweets[i]
        print_tweet(
            tweet,
            i=i + 1,
            total=total_pending_tweets,
            highlight=True,
        )
        inp = None
        while inp is None or (inp not in ('y', 'n', 'e', 'q', 's', 'p', '')):
            inp = rlinput('Do you like this tweet? '
                          '"y" or Enter = yes, '
                          '"n" = no, '
                          '"e" = edit, '
                          '"s" = skip (ask next time again), '
                          '"p" = show previous tweet, '
                          '"q" = quit \n'
                          '> ')
        if inp == 'q':
            break
        if inp == 's':
            i = i + 1
            continue
        if inp == 'p':
            i = max(i - 1, 0)
            continue
        if inp in ('y' or ''):
            tweet.status = TweetReviewStatus.approved
        elif inp == 'n':
            tweet.status = TweetReviewStatus.rejected
        elif inp == 'e':
            edited_text = None
            while edited_text is None:
                edited_text = rlinput(
                    'Enter new text or delete it to reject the tweet.\n> ',
                    tweet.edited or tweet.parsed,
                )
            tweet.edited = edited_text
            if edited_text == '':
                tweet.status = TweetReviewStatus.rejected
            else:
                tweet.status = TweetReviewStatus.approved
        else:
            raise NotImplementedError('Invalid input')
        if inspect(tweet).transient:
            session.add(tweet)
        session.commit()
        i = i + 1
    session.close()