def delete_articles_older_than(DAYS, print_progress_for_every_article=False): print(f"Finding articles older than {DAYS} days...") all_articles = Article.all_older_than(days=DAYS) print(f" ... article count: {len(all_articles)}") i = 0 referenced_in_this_batch = 0 deleted = [] for each in all_articles: i += 1 if print_progress_for_every_article: print(f"#{i} -- ID: {each.id}") if is_the_article_referenced(each, True): referenced_in_this_batch += 1 continue try: articles_cache = ArticlesCache.query.filter_by( article_id=each.id).all() if articles_cache: for each_cache_line in articles_cache: print( f"... ID: {each.id} deleting also cache line: {each_cache_line}" ) dbs.delete(each_cache_line) deleted.append(each.id) dbs.delete(each) if i % BATCH_COMMIT_SIZE == 0: print( f"Keeping {referenced_in_this_batch} articles from the last {BATCH_COMMIT_SIZE} batch..." ) dbs.commit() print( f"... the rest of {BATCH_COMMIT_SIZE-referenced_in_this_batch} are now deleted!!!" ) referenced_in_this_batch = 0 except sqlalchemy.exc.IntegrityError as e: traceback.print_exc() dbs.rollback() continue print(f'Deleted: {deleted}')
deleted = [] print("1. finding urls in activity data...") all_urls = set() all_activity_data = UserActivityData.query.all() for each in all_activity_data: url = each.find_url_in_extra_data() if url: all_urls.add(url) print(f" ... url count: {len(all_urls)}") # print(f"2. finding articles older than {DAYS} days...") all_articles = Article.all_older_than(days=DAYS) print(f" ... article count: {len(all_articles)}") i = 0 for each in all_articles: i += 1 info = UserArticle.find_by_article(each) url_found = each.url.as_string() in all_urls if info or url_found: if info: print(f"WON'T DELETE info! {each.id} {each.title}") for ainfo in info: print(ainfo.user_info_as_string()) if url_found: print(f"WON'T DELETE url_found! {each.id} {each.title}")