def get_conf(name : str) -> Union[str, int, bool]: check_init() if name in _config: return _config[name] if name in C.DEFAULT_CONFIG: log.warning("hydownloader", f'Configuration key not found in user config, default value was used: {name}') return C.DEFAULT_CONFIG[name] log.fatal("hydownloader", f'Invalid configuration key: {name}')
def check_db_version() -> None: c = get_conn().cursor() c.execute('select version from version') v = c.fetchall() if len(v) != 1: log.fatal("hydownloader", "Invalid version table in hydownloader database") if v[0]['version'] != __version__: log.fatal("hydownloader", "Unsupported hydownloader database version found")
def subscription_data_to_url(downloader: str, keywords: str) -> str: """ This function takes a hydownloader downloader name (not the same as a gallery-dl downloader name!) and some keywords and generates a (usually gallery) URL for gallery-dl to download. In Hydrus terms, this does the same thing as a GUG (gallery URL generator). """ if downloader == "gelbooru": return f"https://gelbooru.com/index.php?page=post&s=list&tags={keywords}" if downloader == "pixivuser": return f"https://www.pixiv.net/en/users/{keywords}" if downloader == "pixivranking": return f"https://www.pixiv.net/ranking.php?mode={keywords}" if downloader == "pixivtagsearch": return f"https://www.pixiv.net/en/tags/{keywords}/artworks?s_mode=s_tag" if downloader == "raw": return keywords if downloader == "nijieuser": return f"https://nijie.info/members.php?id={keywords}" if downloader == "lolibooru": return f"https://lolibooru.moe/post?tags={keywords}" if downloader == "patreon": return f"https://www.patreon.com/{keywords}/posts" if downloader == "danbooru": return f"https://danbooru.donmai.us/posts?tags={keywords}" if downloader == "3dbooru": return f"http://behoimi.org/post/index?tags={keywords}" if downloader == "sankaku": return f"https://chan.sankakucomplex.com/?tags={keywords}&commit=Search" if downloader == "artstationuser": return f"https://www.artstation.com/{keywords}" if downloader == "idolcomplex": return f"https://idol.sankakucomplex.com/?tags={keywords}&commit=Search" if downloader == "twitter": return f"https://twitter.com/{keywords}" if downloader == "tumblr": return f"https://{keywords}.tumblr.com" if downloader == "deviantartuser": return f"https://deviantart.com/{keywords}" if downloader == "fanbox": return f"https://{keywords}.fanbox.cc" if downloader == "fantia": return f"https://fantia.jp/fanclubs/{keywords}" if downloader == "webtoons": return f"https://webtoons.com/{keywords}" if downloader == "kemonoparty": return f"https://kemono.party/{keywords}" if downloader == "baraag": return f"https://baraag.net/@{keywords}" if downloader == "hentaifoundry": return f"https://www.hentai-foundry.com/user/{keywords}/profile" if downloader == "yandere": return f"https://yande.re/post?tags={keywords}" log.fatal("hydownloader", f"Invalid downloader: {downloader}")
def update_anchor(path: str, hydrus_master_db: str, sites: str, unrecognized_urls_file: Optional[str], recognized_urls_file: Optional[str]) -> None: """ This function goes through all URLs in a Hydrus database, and tries to match them to known site-specific URL patterns to generate anchor database entries that gallery-dl can recognize. For some sites, the anchor format differs from the gallery-dl default, these are set in gallery-dl-config.json. """ log.init(path, True) db.init(path) if not os.path.isfile(hydrus_master_db): log.fatal("hydownloader-anchor-exporter", "The given client.master.db file does not exist!") hydrus_db = sqlite3.connect(hydrus_master_db) hydrus_db.row_factory = sqlite3.Row anchor_init_needed = not os.path.isfile(path + "/anchor.db") anchor_db = sqlite3.connect(path + "/anchor.db") hc = hydrus_db.cursor() ac = anchor_db.cursor() if anchor_init_needed: ac.execute('CREATE TABLE archive (entry PRIMARY KEY) WITHOUT ROWID') anchor_db.commit() ac.execute('select * from archive') known_anchors = {row[0] for row in ac.fetchall()} log.info("hydownloader-anchor-exporter", "Querying Hydrus database for URLs...") hc.execute('select * from url_domains natural inner join urls') rows = hc.fetchall() all_rows = len(rows) processed = 0 suspicious_urls = set() recognized_urls = set() sites_to_keywords: dict[str, Tuple[list[str], list[str]]] = { 'pixiv': (["pixi"], []), 'gelbooru': (["gelbooru"], []), 'nijie': (["nijie"], []), 'lolibooru': (['lolibooru'], []), 'danbooru': (['danbooru'], []), '3dbooru': (['behoimi'], []), 'sankaku': (['sankaku'], ["idol."]), 'idolcomplex': (["idol.sankaku"], []), 'artstation': (["artstation"], []), 'twitter': (["twitter", "nitter"], []), 'deviantart': (['deviantart'], []), 'tumblr': (["tumblr"], []) } siteset = {x.strip() for x in sites.split(',') if x.strip()} if sites == "all": siteset = set(sites_to_keywords.keys()) anchors: Counter[str] = collections.Counter() for site in siteset: if not site in sites_to_keywords: log.fatal('hydownloader-anchor-exporter', f'Unsupported site: {site}') def process_url(url): patterns = urls.anchor_patterns_from_url(url) if patterns: recognized_urls.add(url) anchors[patterns[0]] += 1 else: suspicious_urls.add(url) log.info("hydownloader-anchor-exporter", "Processing URLs...") for row in rows: processed += 1 if processed % 1000 == 0: print(f"Processed {processed}/{all_rows} URLs") for site in siteset: accepts, rejects = sites_to_keywords[site] url_ok = False for accept in accepts: if accept in row['url']: url_ok = True break if url_ok: for reject in rejects: if reject in row['url']: url_ok = False if url_ok: process_url(row['url']) log.info("hydownloader-anchor-exporter", "Done processing URLs") if unrecognized_urls_file: log.info("hydownloader-anchor-exporter", "Writing unrecognized URLs...") with open(unrecognized_urls_file, 'w') as f: for url in sorted(suspicious_urls): f.write(url.strip() + '\n') log.info("hydownloader-anchor-exporter", "Done writing unrecognized URLs") if recognized_urls_file: log.info("hydownloader-anchor-exporter", "Writing recognized URLs...") with open(recognized_urls_file, 'w') as f: for url in sorted(recognized_urls): f.write(url.strip() + '\n') log.info("hydownloader-anchor-exporter", "Done writing recognized URLs") log.info("hydownloader-anchor-exporter", "Inserting new anchors...") anchor_count = len(anchors.keys()) processed = 0 new_anchor_rows = 0 for anchor in anchors: processed += 1 if processed % 50 == 0: print(f"Inserting new anchors {processed}/{anchor_count}") final_anchors = [anchor] if anchor.startswith("nijie"): for i in range(anchors[anchor]): final_anchors.append(anchor + "_" + str(i)) if anchor.startswith("twitter") or anchor.startswith("tumblr"): for i in range(anchors[anchor] + 1): final_anchors.append(anchor + "_" + str(i)) if anchor.startswith("pixiv"): for i in range(anchors[anchor]): final_anchors.append(anchor + "_p{:02d}".format(i)) for f_a in final_anchors: if f_a in known_anchors: continue ac.execute('insert into archive(entry) values (?)', (f_a, )) new_anchor_rows += 1 log.info( "hydownloader-anchor-exporter", f"Done inserting new anchors, added {new_anchor_rows} entries in total" ) anchor_db.commit() anchor_db.close() hydrus_db.close()
def update_anchor(path: str, hydrus_db_folder: str, sites: str, unrecognized_urls_file: Optional[str], recognized_urls_file: Optional[str], fill_known_urls: bool, keep_old_hydrus_url_data: bool) -> None: """ This function goes through all URLs in a Hydrus database, and tries to match them to known site-specific URL patterns to generate anchor database entries that gallery-dl can recognize. For some sites, the anchor format differs from the gallery-dl default, these are set in gallery-dl-config.json. If enabled, also fills up the known_urls table in the hydownloader DB with all URLs known by Hydrus. """ log.init(path, True) db.init(path) if not os.path.isfile(hydrus_db_folder + "/client.master.db"): log.fatal( "hydownloader-anchor-exporter", "The client.master.db database was not found at the given location!" ) hydrus_db = sqlite3.connect("file:" + hydrus_db_folder + "/client.master.db?mode=ro", uri=True) hydrus_db.row_factory = sqlite3.Row anchor_init_needed = not os.path.isfile(path + "/anchor.db") anchor_db = sqlite3.connect(path + "/anchor.db") hc = hydrus_db.cursor() ac = anchor_db.cursor() if anchor_init_needed: ac.execute('CREATE TABLE archive (entry PRIMARY KEY) WITHOUT ROWID') anchor_db.commit() ac.execute('select * from archive') known_anchors = {row[0] for row in ac.fetchall()} log.info("hydownloader-anchor-exporter", "Querying Hydrus database for URLs...") hc.execute('select * from url_domains natural inner join urls') rows = hc.fetchall() all_rows = len(rows) processed = 0 suspicious_urls = set() recognized_urls = set() current_url_ids = set() deleted_url_ids = set() if fill_known_urls: if not os.path.isfile(hydrus_db_folder + "/client.db"): log.fatal( "hydownloader-anchor-exporter", "The client.db database was not found at the given location!") client_db = sqlite3.connect("file:" + hydrus_db_folder + "/client.db?mode=ro", uri=True) client_db.row_factory = sqlite3.Row cc = client_db.cursor() log.info("hydownloader-anchor-exporter", "Querying Hydrus database for current URL IDs...") cc.execute('select * from current_files natural inner join url_map') for row in cc.fetchall(): current_url_ids.add(row['url_id']) log.info("hydownloader-anchor-exporter", "Querying Hydrus database for deleted URL IDs...") cc.execute('select * from deleted_files natural inner join url_map') for row in cc.fetchall(): deleted_url_ids.add(row['url_id']) client_db.close() if keep_old_hydrus_url_data: log.info( "hydownloader-anchor-exporter", "Old Hydrus URL data will NOT be deleted from the shared hydownloader database" ) else: log.info( "hydownloader-anchor-exporter", "Deleting old Hydrus URL data from shared hydownloader database..." ) db.delete_all_hydrus_known_urls() sites_to_keywords: dict[str, Tuple[list[str], list[str]]] = { 'pixiv': (["pixi"], []), 'gelbooru': (["gelbooru"], []), 'nijie': (["nijie"], []), 'lolibooru': (['lolibooru'], []), 'danbooru': (['danbooru'], []), '3dbooru': (['behoimi'], []), 'sankaku': (['sankaku'], ["idol."]), 'idolcomplex': (["idol.sankaku"], []), 'artstation': (["artstation"], []), 'twitter': (["twitter", "nitter"], []), 'deviantart': (['deviantart'], []), 'tumblr': (["tumblr"], []), 'hentaifoundry': (["hentai-foundry"], []), 'yandere': (["yande.re"], []) } siteset = {x.strip() for x in sites.split(',') if x.strip()} if sites == "all": siteset = set(sites_to_keywords.keys()) anchors: Counter[str] = collections.Counter() for site in siteset: if not site in sites_to_keywords: log.fatal('hydownloader-anchor-exporter', f'Unsupported site: {site}') def process_url(url): patterns = urls.anchor_patterns_from_url(url) if patterns: recognized_urls.add(url) anchors[patterns[0]] += 1 else: suspicious_urls.add(url) log.info("hydownloader-anchor-exporter", "Processing URLs...") for row in rows: processed += 1 if processed % 1000 == 0: print(f"Processed {processed}/{all_rows} URLs", file=sys.stderr) if fill_known_urls: known_url_status = 1 is_current = row['url_id'] in current_url_ids is_deleted = row['url_id'] in deleted_url_ids if is_current and is_deleted: known_url_status = 4 elif is_deleted: known_url_status = 3 elif is_current: known_url_status = 2 db.add_hydrus_known_url(row['url'], known_url_status) for site in siteset: accepts, rejects = sites_to_keywords[site] url_ok = False for accept in accepts: if accept in row['url']: url_ok = True break if url_ok: for reject in rejects: if reject in row['url']: url_ok = False if url_ok: process_url(row['url']) log.info("hydownloader-anchor-exporter", "Done processing URLs") if unrecognized_urls_file: log.info("hydownloader-anchor-exporter", "Writing unrecognized URLs...") with open(unrecognized_urls_file, 'w', encoding='utf-8') as f: for url in sorted(suspicious_urls): f.write(url.strip() + '\n') log.info("hydownloader-anchor-exporter", "Done writing unrecognized URLs") if recognized_urls_file: log.info("hydownloader-anchor-exporter", "Writing recognized URLs...") with open(recognized_urls_file, 'w', encoding='utf-8') as f: for url in sorted(recognized_urls): f.write(url.strip() + '\n') log.info("hydownloader-anchor-exporter", "Done writing recognized URLs") log.info("hydownloader-anchor-exporter", "Inserting new anchors...") anchor_count = len(anchors.keys()) processed = 0 new_anchor_rows = 0 for anchor in anchors: processed += 1 if processed % 50 == 0: print(f"Inserting new anchors {processed}/{anchor_count}", file=sys.stderr) final_anchors = [anchor] if anchor.startswith("nijie"): for i in range(anchors[anchor]): final_anchors.append(anchor + "_" + str(i)) if anchor.startswith("twitter") or anchor.startswith("tumblr"): for i in range(anchors[anchor] + 1): final_anchors.append(anchor + "_" + str(i)) if anchor.startswith("pixiv"): for i in range(anchors[anchor]): final_anchors.append(anchor + "_p{:02d}".format(i)) for f_a in final_anchors: if f_a in known_anchors: continue ac.execute('insert into archive(entry) values (?)', (f_a, )) new_anchor_rows += 1 log.info( "hydownloader-anchor-exporter", f"Done inserting new anchors, added {new_anchor_rows} entries in total" ) anchor_db.commit() anchor_db.close() hydrus_db.close() db.shutdown()
def subscription_worker() -> None: global _sub_worker_ended_flag try: log.info("hydownloader", "Starting subscription worker thread...") with _worker_lock: _sub_worker_ended_flag = False while True: time.sleep(2) with _worker_lock: if _end_threads_flag: break subs_due = db.get_due_subscriptions() if not subs_due: with _worker_lock: if _sub_worker_paused_flag: set_subscription_worker_status("paused") else: set_subscription_worker_status("nothing to do: checked for due subscriptions, found none") sub = subs_due[0] if subs_due else None while sub: with _worker_lock: if _end_threads_flag: break if _sub_worker_paused_flag: set_subscription_worker_status("paused") break initial_check = sub['last_check'] is None url = urls.subscription_data_to_url(sub['downloader'], sub['keywords']) check_started_time = time.time() status_msg = f"checking subscription: {sub['id']} (downloader: {sub['downloader']}, keywords: {sub['keywords']})" set_subscription_worker_status(status_msg) log.info(f"subscription-{sub['id']}", status_msg.capitalize()) if initial_check: log.info(f"subscription-{sub['id']}", "This is the first check for this subscription") result = gallery_dl_utils.run_gallery_dl( url=url, ignore_anchor=False, metadata_only=False, log_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-gallery-dl-latest.txt", old_log_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-gallery-dl-old.txt", console_output_file=db.get_rootpath()+f"/temp/subscription-{sub['id']}-gallery-dl-output.txt", unsupported_urls_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-unsupported-urls-gallery-dl-latest.txt", old_unsupported_urls_file=db.get_rootpath()+f"/logs/subscription-{sub['id']}-unsupported-urls-gallery-dl-old.txt", overwrite_existing=False, filter_=sub['filter'], chapter_filter=None, subscription_mode=True, abort_after=sub['abort_after'], max_file_count = sub['max_files_initial'] if initial_check else sub['max_files_regular'] ) if result: log.warning(f"subscription-{sub['id']}", "Error: "+result) else: sub['last_successful_check'] = check_started_time sub['last_check'] = check_started_time new_files, skipped_files = process_additional_data(subscription_id = sub['id']) check_ended_time = time.time() db.add_subscription_check(sub['id'], new_files=new_files, already_seen_files=skipped_files, time_started=check_started_time, time_finished=check_ended_time, status=result) db.add_or_update_subscriptions([sub]) status_msg = f"finished checking subscription: {sub['id']} (downloader: {sub['downloader']}, keywords: {sub['keywords']}), new files: {new_files}, skipped: {skipped_files}" set_subscription_worker_status(status_msg) log.info(f"subscription-{sub['id']}", status_msg.capitalize()) subs_due = db.get_due_subscriptions() sub = subs_due[0] if subs_due else None with _worker_lock: if _end_threads_flag: break with _worker_lock: if _end_threads_flag: log.info("hydownloader", "Stopping subscription worker thread") _sub_worker_ended_flag = True except Exception as e: log.fatal("hydownloader", "Uncaught exception in subscription worker thread", e) shutdown()
def url_queue_worker() -> None: global _url_worker_ended_flag try: log.info("hydownloader", "Starting single URL queue worker thread...") with _worker_lock: _url_worker_ended_flag = False while True: time.sleep(2) with _worker_lock: if _end_threads_flag: break urls_to_dl = db.get_urls_to_download() if not urls_to_dl: with _worker_lock: if _url_worker_paused_flag: set_url_worker_status("paused") else: set_url_worker_status("nothing to do: checked for queued URLs, found none") urlinfo = urls_to_dl[0] if urls_to_dl else None while urlinfo: with _worker_lock: if _end_threads_flag: break if _url_worker_paused_flag: set_url_worker_status("paused") break check_time = time.time() status_msg = f"downloading URL: {urlinfo['url']}" set_url_worker_status(status_msg) log.info("single url downloader", status_msg.capitalize()) result = gallery_dl_utils.run_gallery_dl( url=urlinfo['url'], ignore_anchor=urlinfo['ignore_anchor'], metadata_only=urlinfo['metadata_only'], log_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-gallery-dl-latest.txt", old_log_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-gallery-dl-old.txt", console_output_file=db.get_rootpath()+f"/temp/single-url-{urlinfo['id']}-gallery-dl-output.txt", unsupported_urls_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-unsupported-urls-gallery-dl-latest.txt", old_unsupported_urls_file=db.get_rootpath()+f"/logs/single-urls-{urlinfo['id']}-unsupported-urls-gallery-dl-old.txt", overwrite_existing=urlinfo['overwrite_existing'], filter_=urlinfo['filter'], chapter_filter=None, subscription_mode=False, max_file_count = urlinfo['max_files'] ) if result: log.warning("single url downloader", f"Error while downloading {urlinfo['url']}: {result}") urlinfo['status'] = 1 urlinfo['status_text'] = result else: urlinfo['status'] = 0 urlinfo['status_text'] = 'ok' urlinfo['time_processed'] = check_time new_files, skipped_files = process_additional_data(url_id = urlinfo['id']) urlinfo['new_files'] = new_files urlinfo['already_seen_files'] = skipped_files db.add_or_update_urls([urlinfo]) status_msg = f"finished checking URL: {urlinfo['url']}, new files: {new_files}, skipped: {skipped_files}" set_url_worker_status(status_msg) log.info("single url downloader", status_msg.capitalize()) urls_to_dl = db.get_urls_to_download() urlinfo = urls_to_dl[0] if urls_to_dl else None with _worker_lock: if _end_threads_flag: break with _worker_lock: if _end_threads_flag: log.info("hydownloader", "Stopping single URL queue worker thread") _url_worker_ended_flag = True except Exception as e: log.fatal("hydownloader", "Uncaught exception in URL worker thread", e) shutdown()
def check_init() -> None: if not _inited: log.fatal("hydownloader", "Database used but not initalized")
def run_job(path: str, job: str, config: Optional[str], verbose: bool, do_it: bool, no_stop_on_missing_metadata: bool) -> None: log.init(path, True) db.init(path) config_path = db.get_rootpath() + '/hydownloader-import-jobs.json' data_path = db.get_datapath() if config: config_path = config if not os.path.isfile(config_path): log.fatal("hydownloader-importer", f"Configuration file not found: {config_path}") jobs = json.load(open(config_path, 'r', encoding='utf-8-sig')) if not job in jobs: log.fatal("hydownloader-importer", f"Job not found in configuration file: {job}") jd = jobs[job] force_add_metadata = jd.get('forceAddMetadata', True) force_add_files = jd.get('forceAddFiles', False) client = hydrus.Client(jd['apiKey'], jd['apiURL']) log.info("hydownloader-importer", f"Starting import job: {job}") # iterate over all files in the data directory for root, dirs, files in os.walk(data_path): for fname in files: # json files hold metadata, don't import them to Hydrus if fname.endswith('.json'): continue # set up some variables # some will be used later in the code, some are meant to be used in user-defined expressions abspath = root + "/" + fname path = os.path.relpath(abspath, start=data_path) split_path = os.path.split(path) fname_noext, fname_ext = os.path.splitext(fname) if fname_ext.startswith('.'): fname_ext = fname_ext[1:] # find the path of the associated json metadata file, check if it exists # for pixiv ugoira, the same metadata file belongs both to the .webm and the .zip, # so this needs special handling json_path = abspath + '.json' if not os.path.isfile(json_path) and abspath.endswith('.webm'): json_path = abspath[:-4] + "zip.json" json_exists = True if not os.path.isfile(json_path): json_exists = False printerr(f"Warning: no metadata file found for {path}") if not no_stop_on_missing_metadata: sys.exit(1) generated_urls = set() generated_tags: set[tuple[str, str]] = set() matched = False # will be true if at least 1 filter group matched the file json_data = None # this will hold the associated json metadata (if available) if verbose: printerr(f"Processing file: {path}...") # iterate over all filter groups, do they match this file? for group in jd['groups']: # evaluate filter, load json metadata if the filter matches and we haven't loaded it yet should_process = False try: should_process = eval(group['filter']) except: printerr(f"Failed to evaluate filter: {group['filter']}") sys.exit(1) if not json_data and json_exists: try: json_data = json.load( open(json_path, encoding='utf-8-sig')) except json.decoder.JSONDecodeError: printerr(f"Failed to parse JSON: {json_path}") sys.exit(1) if not should_process: continue matched = True # get the data for this file from the additional_data db table and process it # set up some variables that user-defined expressions will be able to use additional_data_dicts = db.get_additional_data_for_file(path) if not additional_data_dicts and path.endswith('.webm'): additional_data_dicts = db.get_additional_data_for_file( path[:-4] + "zip") extra_tags: defaultdict[str, list[str]] = defaultdict(list) min_time_added = -1 max_time_added = -1 for d in additional_data_dicts: parse_additional_data(extra_tags, d['data']) if min_time_added == -1 or min_time_added > d['time_added']: min_time_added = d['time_added'] if max_time_added == -1 or max_time_added < d['time_added']: max_time_added = d['time_added'] sub_ids = [] url_ids = [] for d in additional_data_dicts: if d['subscription_id']: sub_ids.append(str(d['subscription_id'])) if d['url_id']: url_ids.append(str(d['url_id'])) # execute user-defined tag and url generator expressions has_error = False for dtype, d in [('tag', x) for x in group.get('tags', [])] + [ ('url', x) for x in group.get('urls', []) ]: skip_on_error = d.get("skipOnError", False) allow_empty = d.get("allowEmpty", False) rule_name = d.get("name") generated_results = [] # if the expression is a single string if isinstance(d["values"], str): try: eval_res = eval(d["values"]) # check result type: must be string or iterable of strings if isinstance(eval_res, str): generated_results = [eval_res] else: for eval_res_str in eval_res: if not isinstance(eval_res_str, str): printerr( f"Invalid result type ({str(type(eval_res_str))}) while evaluating expression: {d['values']}" ) sys.exit(1) else: generated_results.append(eval_res_str) except Exception as e: if verbose: printerr( f"Failed to evaluate expression: {d['values']}" ) print(e) has_error = True else: # multiple expressions (array of strings) for eval_expr in d["values"]: try: eval_res = eval(eval_expr) # check result type: must be string or iterable of strings if isinstance(eval_res, str): generated_results = [eval_res] else: for eval_res_str in eval_res: if not isinstance(eval_res_str, str): printerr( f"Invalid result type ({str(type(eval_res_str))}) while evaluating expression: {eval_expr}" ) sys.exit(1) else: generated_results.append( eval_res_str) except Exception as e: if verbose: printerr( f"Failed to evaluate expression: {eval_expr}" ) printerr(e) has_error = True # check for empty results or failed evaluation, as necessary if not generated_results and not allow_empty: printerr( f"Error: the rule named {rule_name} yielded no results but this is not allowed" ) sys.exit(1) if has_error: printerr( f"Warning: an expression failed to evaluate in the rule named {rule_name}" ) if not skip_on_error: sys.exit(1) # save results of the currently evaluated expressions if dtype == 'url': generated_urls.update(generated_results) else: for repo in d["tagRepos"]: generated_tags.update( (repo, tag) for tag in generated_results) if matched: printerr(f"File matched: {path}...") if not os.path.getsize(abspath): print(f"Found truncated file: {abspath}") sys.exit(1) if verbose: printerr("Generated URLs:") for url in generated_urls: printerr(url) printerr("Generated tags:") for repo, tag in sorted(list(generated_tags), key=lambda x: x[0]): printerr(f"{repo} <- {tag}") if verbose: printerr('Hashing...') # calculate hash, check if Hydrus already knows the file already_added = False if do_it: hasher = hashlib.sha256() with open(abspath, 'rb') as hashedfile: buf = hashedfile.read(65536 * 16) while len(buf) > 0: hasher.update(buf) buf = hashedfile.read(65536 * 16) hexdigest = hasher.hexdigest() if client.file_metadata(hashes=[hexdigest], only_identifiers=True): printerr("File is already in Hydrus") already_added = True # send file, tags, metadata to Hydrus as needed if not already_added or force_add_files: if verbose: printerr("Sending file to Hydrus...") if do_it: client.add_file(abspath) if not already_added or force_add_metadata: if verbose: printerr("Associating URLs...") if do_it: client.associate_url(hashes=[hexdigest], add=generated_urls) if verbose: printerr("Adding tags...") tag_dict = defaultdict(list) for repo, tag in generated_tags: tag_dict[repo].append(tag) if do_it: client.add_tags(hashes=[hexdigest], service_to_tags=tag_dict) else: if verbose: printerr(f"Skipping due to no matching filter: {path}") log.info("hydownloader-importer", f"Finished import job: {job}") db.shutdown()