def check_results_of_post_url(data: dict, sitename: str) -> bool: """ Downloads a URL with gallery-dl, then checks if the downloaded filenames, file content and anchor entries match what was provided by the caller. """ url = data['url'] filenames = data['filenames'] anchors = data['anchors'] log.info("hydownloader-test", f'Testing downloading of posts for site {sitename}') log_file = db.get_rootpath()+f"/logs/test-site-{sitename}-gallery-dl.txt" result_txt = gallery_dl_utils.run_gallery_dl( url=url, ignore_anchor=False, metadata_only=False, log_file=log_file, console_output_file=db.get_rootpath()+f"/test/test-site-{sitename}-gallery-dl-output.txt", unsupported_urls_file=db.get_rootpath()+f"/test/test-site-{sitename}-unsupported-urls-gallery-dl.txt", overwrite_existing=False, subscription_mode=False, test_mode = True ) result = True if result_txt: log.error("hydownloader-test", f"Error returned for {sitename} download: {result_txt}") result = False else: log.info("hydownloader-test", f"Return code for {sitename} download OK") for fname in filenames: abs_fname = db.get_rootpath()+"/test/data/gallery-dl/"+fname if not os.path.isfile(abs_fname): log.error("hydownloader-test", f"Missing expected file: {fname}") result = False else: log.info("hydownloader-test", f"Found expected file: {fname}") for content in filenames[fname]: with open(abs_fname) as f: if re.search(content, f.read()): log.info("hydownloader-test", "Expected file content found") else: log.error("hydownloader-test", f"Expected file content ({content}) NOT found") result = False conn = sqlite3.connect(db.get_rootpath()+"/test/anchor.db") conn.row_factory = sqlite3.Row c = conn.cursor() for anchor in anchors: try: c.execute('select entry from archive where entry = ?', (anchor,)) if len(c.fetchall()): log.info("hydownloader-test", f"Expected anchor {anchor} found in database") else: log.error("hydownloader-test", f"Expected anchor {anchor} NOT found in database") result = False except sqlite3.OperationalError as e: log.error("hydownloader-test", "Error while trying to query anchor database - download failed?", e) result = False return result
def subscription_worker() -> None: global _sub_worker_ended_flag try: log.info("hydownloader", "Starting subscription worker thread...") with _worker_lock: _sub_worker_ended_flag = False while True: time.sleep(2) with _worker_lock: if _end_threads_flag: break subs_due = db.get_due_subscriptions() if not subs_due: with _worker_lock: if _sub_worker_paused_flag: set_subscription_worker_status("paused") else: set_subscription_worker_status("nothing to do: checked for due subscriptions, found none") sub = subs_due[0] if subs_due else None while sub: with _worker_lock: if _end_threads_flag: break if _sub_worker_paused_flag: set_subscription_worker_status("paused") break initial_check = sub['last_check'] is None url = urls.subscription_data_to_url(sub['downloader'], sub['keywords']) check_started_time = time.time() status_msg = f"checking subscription: {sub['id']} (downloader: {sub['downloader']}, keywords: {sub['keywords']})" set_subscription_worker_status(status_msg) log.info(f"subscription-{sub['id']}", status_msg.capitalize()) if initial_check: log.info(f"subscription-{sub['id']}", "This is the first check for this subscription") result = gallery_dl_utils.run_gallery_dl( url=url, ignore_anchor=False, metadata_only=False, log_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-gallery-dl-latest.txt", old_log_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-gallery-dl-old.txt", console_output_file=db.get_rootpath()+f"/temp/subscription-{sub['id']}-gallery-dl-output.txt", unsupported_urls_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-unsupported-urls-gallery-dl-latest.txt", old_unsupported_urls_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-unsupported-urls-gallery-dl-old.txt", overwrite_existing=False, filter_=sub['filter'], chapter_filter=None, subscription_mode=True, abort_after=sub['abort_after'], max_file_count = sub['max_files_initial'] if initial_check else sub['max_files_regular'] ) if result: log.warning(f"subscription-{sub['id']}", "Error: "+result) else: sub['last_successful_check'] = check_started_time sub['last_check'] = check_started_time new_files, skipped_files = process_additional_data(subscription_id = sub['id']) check_ended_time = time.time() db.add_subscription_check(sub['id'], new_files=new_files, already_seen_files=skipped_files, time_started=check_started_time, time_finished=check_ended_time, status=result) db.add_or_update_subscriptions([sub]) status_msg = f"finished checking subscription: {sub['id']} (downloader: {sub['downloader']}, keywords: {sub['keywords']}), new files: {new_files}, skipped: {skipped_files}" set_subscription_worker_status(status_msg) log.info(f"subscription-{sub['id']}", status_msg.capitalize()) subs_due = db.get_due_subscriptions() sub = subs_due[0] if subs_due else None with _worker_lock: if _end_threads_flag: break with _worker_lock: if _end_threads_flag: log.info("hydownloader", "Stopping subscription worker thread") _sub_worker_ended_flag = True except Exception as e: log.fatal("hydownloader", "Uncaught exception in subscription worker thread", e) shutdown()
def url_queue_worker() -> None: global _url_worker_ended_flag try: log.info("hydownloader", "Starting single URL queue worker thread...") with _worker_lock: _url_worker_ended_flag = False while True: time.sleep(2) with _worker_lock: if _end_threads_flag: break urls_to_dl = db.get_urls_to_download() if not urls_to_dl: with _worker_lock: if _url_worker_paused_flag: set_url_worker_status("paused") else: set_url_worker_status("nothing to do: checked for queued URLs, found none") urlinfo = urls_to_dl[0] if urls_to_dl else None while urlinfo: with _worker_lock: if _end_threads_flag: break if _url_worker_paused_flag: set_url_worker_status("paused") break check_time = time.time() status_msg = f"downloading URL: {urlinfo['url']}" set_url_worker_status(status_msg) log.info("single url downloader", status_msg.capitalize()) result = gallery_dl_utils.run_gallery_dl( url=urlinfo['url'], ignore_anchor=urlinfo['ignore_anchor'], metadata_only=urlinfo['metadata_only'], log_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-gallery-dl-latest.txt", old_log_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-gallery-dl-old.txt", console_output_file=db.get_rootpath()+f"/temp/single-url-{urlinfo['id']}-gallery-dl-output.txt", unsupported_urls_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-unsupported-urls-gallery-dl-latest.txt", old_unsupported_urls_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-unsupported-urls-gallery-dl-old.txt", overwrite_existing=urlinfo['overwrite_existing'], filter_=urlinfo['filter'], chapter_filter=None, subscription_mode=False, max_file_count = urlinfo['max_files'] ) if result: log.warning("single url downloader", f"Error while downloading {urlinfo['url']}: {result}") urlinfo['status'] = 1 urlinfo['status_text'] = result else: urlinfo['status'] = 0 urlinfo['status_text'] = 'ok' urlinfo['time_processed'] = check_time new_files, skipped_files = process_additional_data(url_id = urlinfo['id']) urlinfo['new_files'] = new_files urlinfo['already_seen_files'] = skipped_files db.add_or_update_urls([urlinfo]) status_msg = f"finished checking URL: {urlinfo['url']}, new files: {new_files}, skipped: {skipped_files}" set_url_worker_status(status_msg) log.info("single url downloader", status_msg.capitalize()) urls_to_dl = db.get_urls_to_download() urlinfo = urls_to_dl[0] if urls_to_dl else None with _worker_lock: if _end_threads_flag: break with _worker_lock: if _end_threads_flag: log.info("hydownloader", "Stopping single URL queue worker thread") _url_worker_ended_flag = True except Exception as e: log.fatal("hydownloader", "Uncaught exception in URL worker thread", e) shutdown()