for idx, (url, result) in enumerate(extr._get_tests()) if result] # setup target directory path = util.path("archive", "testdb", str(datetime.date.today())) os.makedirs(path, exist_ok=True) for idx, extr, url, result in tests: # filename name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx) print(name) # config values setup_test_config() if "options" in result: for key, value in result["options"]: key = key.split(".") config.set(key[:-1], key[-1], value) if "range" in result: config.set((), "image-range", result["range"]) config.set((), "chapter-range", result["range"]) # write test data try: with open(os.path.join(path, name), "w") as outfile: job.DataJob(url, file=outfile, ensure_ascii=False).run() except KeyboardInterrupt: sys.exit()
def import_posts(key): conn = psycopg2.connect(host=config.database_host, dbname=config.database_dbname, user=config.database_user, password=config.database_password, cursor_factory=RealDictCursor) dlconfig.set(('output'), "mode", "null") dlconfig.set(('extractor', 'subscribestar'), "cookies", {"auth_token": key}) dlconfig.set(('extractor', 'subscribestar'), "proxy", get_proxy()) j = job.DataJob("https://subscribestar.adult/feed") j.run() for message in j.data: try: if message[0] == Message.Directory: post = message[-1] file_directory = f"files/subscribestar/{post['author_name']}/{post['post_id']}" attachments_directory = f"attachments/subscribestar/{post['author_name']}/{post['post_id']}" cursor1 = conn.cursor() cursor1.execute( "SELECT * FROM dnp WHERE id = %s AND service = 'subscribestar'", (post['author_name'], )) bans = cursor1.fetchall() if len(bans) > 0: continue check_for_flags('subscribestar', post['author_name'], str(post['post_id'])) cursor2 = conn.cursor() cursor2.execute( "SELECT * FROM booru_posts WHERE id = %s AND service = 'subscribestar'", (str(post['post_id']), )) existing_posts = cursor2.fetchall() if len(existing_posts) > 0: continue stripped_content = strip_tags(post['content']) post_model = { 'id': str(post['post_id']), '"user"': post['author_name'], 'service': 'subscribestar', 'title': (stripped_content[:60] + '..') if len(stripped_content) > 60 else stripped_content, 'content': post['content'], 'embed': {}, 'shared_file': False, 'added': datetime.datetime.now(), 'published': post['date'], 'edited': None, 'file': {}, 'attachments': [] } for attachment in list( filter( lambda msg: post['post_id'] == msg[-1]['post_id'] and msg[0] == Message.Url, j.data)): if (len(post_model['file'].keys()) == 0): filename, _ = download_file( join(config.download_path, file_directory), attachment[-1]['url'], name=attachment[-1]['filename'] + '.' + attachment[-1]['extension']) post_model['file']['name'] = attachment[-1][ 'filename'] + '.' + attachment[-1]['extension'] post_model['file'][ 'path'] = f'/{file_directory}/{filename}' else: filename, _ = download_file( join(config.download_path, attachments_directory), attachment[-1]['url'], name=attachment[-1]['filename'] + '.' + attachment[-1]['extension']) post_model['attachments'].append({ 'name': attachment[-1]['filename'] + '.' + attachment[-1]['extension'], 'path': f'/{attachments_directory}/{filename}' }) post_model['embed'] = json.dumps(post_model['embed']) post_model['file'] = json.dumps(post_model['file']) for i in range(len(post_model['attachments'])): post_model['attachments'][i] = json.dumps( post_model['attachments'][i]) columns = post_model.keys() data = ['%s'] * len(post_model.values()) data[-1] = '%s::jsonb[]' # attachments query = "INSERT INTO booru_posts ({fields}) VALUES ({values})".format( fields=','.join(columns), values=','.join(data)) cursor3 = conn.cursor() cursor3.execute(query, list(post_model.values())) conn.commit() except DownloaderException: continue conn.close()
def import_posts(import_id, key): dlconfig.set(('output'), "mode", "null") dlconfig.set(('extractor', 'subscribestar'), "cookies", {"auth_token": key}) dlconfig.set(('extractor', 'subscribestar'), "proxy", get_proxy()) j = job.DataJob("https://subscribestar.adult/feed") j.run() conn = get_conn() user_id = None for message in j.data: try: if message[0] == Message.Directory: post = message[-1] user_id = post['author_name'] post_id = post['post_id'] file_directory = f"files/subscribestar/{user_id}/{post_id}" attachments_directory = f"attachments/subscribestar/{user_id}/{post_id}" if is_artist_dnp('subscribestar', user_id): log( import_id, f"Skipping post {post_id} from user {user_id} is in do not post list" ) continue if post_exists('subscribestar', user_id, str(post_id)) and not post_flagged( 'subscribestar', user_id, str(post_id)): log( import_id, f'Skipping post {post_id} from user {user_id} because already exists' ) continue log(import_id, f"Starting import: {post_id}") stripped_content = strip_tags(post['content']) post_model = { 'id': str(post_id), '"user"': user_id, 'service': 'subscribestar', 'title': (stripped_content[:60] + '..') if len(stripped_content) > 60 else stripped_content, 'content': post['content'], 'embed': {}, 'shared_file': False, 'added': datetime.datetime.now(), 'published': parse_date(post['date']), 'edited': None, 'file': {}, 'attachments': [] } for attachment in list( filter( lambda msg: post_id == msg[-1]['post_id'] and msg[ 0] == Message.Url, j.data)): if (len(post_model['file'].keys()) == 0): filename, _ = download_file( join(config.download_path, file_directory), attachment[-1]['url'], name=attachment[-1]['filename'] + '.' + attachment[-1]['extension']) post_model['file']['name'] = attachment[-1][ 'filename'] + '.' + attachment[-1]['extension'] post_model['file'][ 'path'] = f'/{file_directory}/{filename}' else: filename, _ = download_file( join(config.download_path, attachments_directory), attachment[-1]['url'], name=attachment[-1]['filename'] + '.' + attachment[-1]['extension']) post_model['attachments'].append({ 'name': attachment[-1]['filename'] + '.' + attachment[-1]['extension'], 'path': f'/{attachments_directory}/{filename}' }) post_model['embed'] = json.dumps(post_model['embed']) post_model['file'] = json.dumps(post_model['file']) for i in range(len(post_model['attachments'])): post_model['attachments'][i] = json.dumps( post_model['attachments'][i]) columns = post_model.keys() data = ['%s'] * len(post_model.values()) data[-1] = '%s::jsonb[]' # attachments query = "INSERT INTO posts ({fields}) VALUES ({values}) ON CONFLICT (id, service) UPDATE SET {updates}".format( fields=','.join(columns), values=','.join(data), updates=','.join( [f'{column}=EXCLUDED.{column}' for column in columns])) cursor3 = conn.cursor() cursor3.execute(query, list(post_model.values())) conn.commit() log(import_id, f"Finished importing {post_id} from user {user_id}", to_client=False) except Exception: log(import_id, f"Error while importing {post_id} from user {user_id}", 'exception') conn.rollback() continue log(import_id, f"Finished scanning for posts.") index_artists()
#!/usr/bin/env python import sys import os.path import datetime ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.realpath(ROOTDIR)) from gallery_dl import extractor, job, config tests = [([url[0] for url in extr.test if url[1]], extr) for extr in extractor.extractors() if hasattr(extr, "test")] if len(sys.argv) > 1: tests = [(urls, extr) for urls, extr in tests if extr.category in sys.argv] path = os.path.join(ROOTDIR, "archive/testdb", str(datetime.date.today())) os.makedirs(path, exist_ok=True) config.load() for urls, extr in tests: for i, url in enumerate(urls): name = "%s-%s-%d.json" % (extr.category, extr.subcategory, i) print(name) with open(os.path.join(path, name), "w") as outfile: job.DataJob(url, file=outfile).run()
#!/usr/bin/env python import sys import os.path import datetime ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.insert(0, os.path.realpath(ROOTDIR)) from gallery_dl import extractor, job, config tests = [([url[0] for url in extr.test], extr) for extr in extractor.extractors() if hasattr(extr, "test")] if len(sys.argv) > 1: tests = [(urls, extr) for urls, extr in tests if extr.category in sys.argv] path = os.path.join(ROOTDIR, "archive/testdb", str(datetime.date.today())) os.makedirs(path, exist_ok=True) config.load() for urls, extr in tests: for i, url in enumerate(urls): name = "%s-%s-%d.json" % (extr.category, extr.subcategory, i) print(name) with open(os.path.join(path, name), "w") as outfile: job.DataJob(url, outfile).run()