Beispiel #1
0
def check_results_of_post_url(data: dict, sitename: str) -> bool:
    """
    Downloads a URL with gallery-dl, then checks if the
    downloaded filenames, file content and anchor entries match what was provided by the caller.
    """
    url = data['url']
    filenames = data['filenames']
    anchors = data['anchors']
    log.info("hydownloader-test", f'Testing downloading of posts for site {sitename}')
    log_file = db.get_rootpath()+f"/logs/test-site-{sitename}-gallery-dl.txt"
    result_txt = gallery_dl_utils.run_gallery_dl(
        url=url,
        ignore_anchor=False,
        metadata_only=False,
        log_file=log_file,
        console_output_file=db.get_rootpath()+f"/test/test-site-{sitename}-gallery-dl-output.txt",
        unsupported_urls_file=db.get_rootpath()+f"/test/test-site-{sitename}-unsupported-urls-gallery-dl.txt",
        overwrite_existing=False,
        subscription_mode=False,
        test_mode = True
    )
    result = True
    if result_txt:
        log.error("hydownloader-test", f"Error returned for {sitename} download: {result_txt}")
        result = False
    else:
        log.info("hydownloader-test", f"Return code for {sitename} download OK")
    for fname in filenames:
        abs_fname = db.get_rootpath()+"/test/data/gallery-dl/"+fname
        if not os.path.isfile(abs_fname):
            log.error("hydownloader-test", f"Missing expected file: {fname}")
            result = False
        else:
            log.info("hydownloader-test", f"Found expected file: {fname}")
            for content in filenames[fname]:
                with open(abs_fname) as f:
                    if re.search(content, f.read()):
                        log.info("hydownloader-test", "Expected file content found")
                    else:
                        log.error("hydownloader-test", f"Expected file content ({content}) NOT found")
                        result = False
    conn = sqlite3.connect(db.get_rootpath()+"/test/anchor.db")
    conn.row_factory = sqlite3.Row
    c = conn.cursor()
    for anchor in anchors:
        try:
            c.execute('select entry from archive where entry = ?', (anchor,))
            if len(c.fetchall()):
                log.info("hydownloader-test", f"Expected anchor {anchor} found in database")
            else:
                log.error("hydownloader-test", f"Expected anchor {anchor} NOT found in database")
                result = False
        except sqlite3.OperationalError as e:
            log.error("hydownloader-test", "Error while trying to query anchor database - download failed?", e)
            result = False
    return result
Beispiel #2
0
def subscription_worker() -> None:
    global _sub_worker_ended_flag
    try:
        log.info("hydownloader", "Starting subscription worker thread...")
        with _worker_lock:
            _sub_worker_ended_flag = False
        while True:
            time.sleep(2)
            with _worker_lock:
                if _end_threads_flag:
                    break
            subs_due = db.get_due_subscriptions()
            if not subs_due:
                with _worker_lock:
                    if _sub_worker_paused_flag:
                        set_subscription_worker_status("paused")
                    else:
                        set_subscription_worker_status("nothing to do: checked for due subscriptions, found none")
            sub = subs_due[0] if subs_due else None
            while sub:
                with _worker_lock:
                    if _end_threads_flag:
                        break
                    if _sub_worker_paused_flag:
                        set_subscription_worker_status("paused")
                        break
                initial_check = sub['last_check'] is None
                url = urls.subscription_data_to_url(sub['downloader'], sub['keywords'])
                check_started_time = time.time()
                status_msg = f"checking subscription: {sub['id']} (downloader: {sub['downloader']}, keywords: {sub['keywords']})"
                set_subscription_worker_status(status_msg)
                log.info(f"subscription-{sub['id']}", status_msg.capitalize())
                if initial_check:
                    log.info(f"subscription-{sub['id']}", "This is the first check for this subscription")
                result = gallery_dl_utils.run_gallery_dl(
                    url=url,
                    ignore_anchor=False,
                    metadata_only=False,
                    log_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-gallery-dl-latest.txt",
                    old_log_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-gallery-dl-old.txt",
                    console_output_file=db.get_rootpath()+f"/temp/subscription-{sub['id']}-gallery-dl-output.txt",
                    unsupported_urls_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-unsupported-urls-gallery-dl-latest.txt",
                    old_unsupported_urls_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-unsupported-urls-gallery-dl-old.txt",
                    overwrite_existing=False,
                    filter_=sub['filter'],
                    chapter_filter=None,
                    subscription_mode=True,
                    abort_after=sub['abort_after'],
                    max_file_count = sub['max_files_initial'] if initial_check else sub['max_files_regular']
                    )
                if result:
                    log.warning(f"subscription-{sub['id']}", "Error: "+result)
                else:
                    sub['last_successful_check'] = check_started_time
                sub['last_check'] = check_started_time
                new_files, skipped_files = process_additional_data(subscription_id = sub['id'])
                check_ended_time = time.time()
                db.add_subscription_check(sub['id'], new_files=new_files, already_seen_files=skipped_files, time_started=check_started_time, time_finished=check_ended_time, status=result)
                db.add_or_update_subscriptions([sub])
                status_msg = f"finished checking subscription: {sub['id']} (downloader: {sub['downloader']}, keywords: {sub['keywords']}), new files: {new_files}, skipped: {skipped_files}"
                set_subscription_worker_status(status_msg)
                log.info(f"subscription-{sub['id']}", status_msg.capitalize())
                subs_due = db.get_due_subscriptions()
                sub = subs_due[0] if subs_due else None
            with _worker_lock:
                if _end_threads_flag:
                    break
        with _worker_lock:
            if _end_threads_flag:
                log.info("hydownloader", "Stopping subscription worker thread")
                _sub_worker_ended_flag = True
    except Exception as e:
        log.fatal("hydownloader", "Uncaught exception in subscription worker thread", e)
        shutdown()
Beispiel #3
0
def url_queue_worker() -> None:
    global _url_worker_ended_flag
    try:
        log.info("hydownloader", "Starting single URL queue worker thread...")
        with _worker_lock:
            _url_worker_ended_flag = False
        while True:
            time.sleep(2)
            with _worker_lock:
                if _end_threads_flag:
                    break
            urls_to_dl = db.get_urls_to_download()
            if not urls_to_dl:
                with _worker_lock:
                    if _url_worker_paused_flag:
                        set_url_worker_status("paused")
                    else:
                        set_url_worker_status("nothing to do: checked for queued URLs, found none")
            urlinfo = urls_to_dl[0] if urls_to_dl else None
            while urlinfo:
                with _worker_lock:
                    if _end_threads_flag:
                        break
                    if _url_worker_paused_flag:
                        set_url_worker_status("paused")
                        break
                check_time = time.time()
                status_msg = f"downloading URL: {urlinfo['url']}"
                set_url_worker_status(status_msg)
                log.info("single url downloader", status_msg.capitalize())
                result = gallery_dl_utils.run_gallery_dl(
                    url=urlinfo['url'],
                    ignore_anchor=urlinfo['ignore_anchor'],
                    metadata_only=urlinfo['metadata_only'],
                    log_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-gallery-dl-latest.txt",
                    old_log_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-gallery-dl-old.txt",
                    console_output_file=db.get_rootpath()+f"/temp/single-url-{urlinfo['id']}-gallery-dl-output.txt",
                    unsupported_urls_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-unsupported-urls-gallery-dl-latest.txt",
                    old_unsupported_urls_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-unsupported-urls-gallery-dl-old.txt",
                    overwrite_existing=urlinfo['overwrite_existing'],
                    filter_=urlinfo['filter'],
                    chapter_filter=None,
                    subscription_mode=False,
                    max_file_count = urlinfo['max_files']
                    )
                if result:
                    log.warning("single url downloader", f"Error while downloading {urlinfo['url']}: {result}")
                    urlinfo['status'] = 1
                    urlinfo['status_text'] = result
                else:
                    urlinfo['status'] = 0
                    urlinfo['status_text'] = 'ok'
                urlinfo['time_processed'] = check_time
                new_files, skipped_files = process_additional_data(url_id = urlinfo['id'])
                urlinfo['new_files'] = new_files
                urlinfo['already_seen_files'] = skipped_files
                db.add_or_update_urls([urlinfo])
                status_msg = f"finished checking URL: {urlinfo['url']}, new files: {new_files}, skipped: {skipped_files}"
                set_url_worker_status(status_msg)
                log.info("single url downloader", status_msg.capitalize())
                urls_to_dl = db.get_urls_to_download()
                urlinfo = urls_to_dl[0] if urls_to_dl else None
            with _worker_lock:
                if _end_threads_flag:
                    break
        with _worker_lock:
            if _end_threads_flag:
                log.info("hydownloader", "Stopping single URL queue worker thread")
                _url_worker_ended_flag = True
    except Exception as e:
        log.fatal("hydownloader", "Uncaught exception in URL worker thread", e)
        shutdown()