コード例 #1
0
def delete_articles_older_than(DAYS, print_progress_for_every_article=False):
    print(f"Finding articles older than {DAYS} days...")
    all_articles = Article.all_older_than(days=DAYS)
    print(f" ... article count: {len(all_articles)}")

    i = 0
    referenced_in_this_batch = 0
    deleted = []
    for each in all_articles:
        i += 1
        if print_progress_for_every_article:
            print(f"#{i} -- ID: {each.id}")

        if is_the_article_referenced(each, True):
            referenced_in_this_batch += 1
            continue

        try:
            articles_cache = ArticlesCache.query.filter_by(
                article_id=each.id).all()
            if articles_cache:
                for each_cache_line in articles_cache:
                    print(
                        f"... ID: {each.id} deleting also cache line: {each_cache_line}"
                    )
                    dbs.delete(each_cache_line)

            deleted.append(each.id)
            dbs.delete(each)

            if i % BATCH_COMMIT_SIZE == 0:
                print(
                    f"Keeping {referenced_in_this_batch} articles from the last {BATCH_COMMIT_SIZE} batch..."
                )
                dbs.commit()
                print(
                    f"... the rest of {BATCH_COMMIT_SIZE-referenced_in_this_batch} are now deleted!!!"
                )
                referenced_in_this_batch = 0

        except sqlalchemy.exc.IntegrityError as e:
            traceback.print_exc()
            dbs.rollback()
            continue

    print(f'Deleted: {deleted}')
コード例 #2
0
deleted = []

print("1. finding urls in activity data...")
all_urls = set()
all_activity_data = UserActivityData.query.all()
for each in all_activity_data:
    url = each.find_url_in_extra_data()
    if url:
        all_urls.add(url)
print(f" ... url count: {len(all_urls)}")

#

print(f"2. finding articles older than {DAYS} days...")
all_articles = Article.all_older_than(days=DAYS)
print(f" ... article count: {len(all_articles)}")

i = 0
for each in all_articles:
    i += 1
    info = UserArticle.find_by_article(each)
    url_found = each.url.as_string() in all_urls

    if info or url_found:
        if info:
            print(f"WON'T DELETE info! {each.id} {each.title}")
            for ainfo in info:
                print(ainfo.user_info_as_string())
        if url_found:
            print(f"WON'T DELETE url_found! {each.id} {each.title}")