Python DataJob 예제들, gallery_dl.job.DataJob Python 예제들

예제 #1

0

파일 보기

         for idx, (url, result) in enumerate(extr._get_tests()) if result]

# setup target directory

path = util.path("archive", "testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)

for idx, extr, url, result in tests:

    # filename
    name = "{}-{}-{}.json".format(extr.category, extr.subcategory, idx)
    print(name)

    # config values
    setup_test_config()

    if "options" in result:
        for key, value in result["options"]:
            key = key.split(".")
            config.set(key[:-1], key[-1], value)
    if "range" in result:
        config.set((), "image-range", result["range"])
        config.set((), "chapter-range", result["range"])

    # write test data
    try:
        with open(os.path.join(path, name), "w") as outfile:
            job.DataJob(url, file=outfile, ensure_ascii=False).run()
    except KeyboardInterrupt:
        sys.exit()

예제 #2

0

파일 보기

def import_posts(key):
    conn = psycopg2.connect(host=config.database_host,
                            dbname=config.database_dbname,
                            user=config.database_user,
                            password=config.database_password,
                            cursor_factory=RealDictCursor)

    dlconfig.set(('output'), "mode", "null")
    dlconfig.set(('extractor', 'subscribestar'), "cookies",
                 {"auth_token": key})
    dlconfig.set(('extractor', 'subscribestar'), "proxy", get_proxy())
    j = job.DataJob("https://subscribestar.adult/feed")
    j.run()

    for message in j.data:
        try:
            if message[0] == Message.Directory:
                post = message[-1]

                file_directory = f"files/subscribestar/{post['author_name']}/{post['post_id']}"
                attachments_directory = f"attachments/subscribestar/{post['author_name']}/{post['post_id']}"

                cursor1 = conn.cursor()
                cursor1.execute(
                    "SELECT * FROM dnp WHERE id = %s AND service = 'subscribestar'",
                    (post['author_name'], ))
                bans = cursor1.fetchall()
                if len(bans) > 0:
                    continue

                check_for_flags('subscribestar', post['author_name'],
                                str(post['post_id']))

                cursor2 = conn.cursor()
                cursor2.execute(
                    "SELECT * FROM booru_posts WHERE id = %s AND service = 'subscribestar'",
                    (str(post['post_id']), ))
                existing_posts = cursor2.fetchall()
                if len(existing_posts) > 0:
                    continue

                stripped_content = strip_tags(post['content'])
                post_model = {
                    'id':
                    str(post['post_id']),
                    '"user"':
                    post['author_name'],
                    'service':
                    'subscribestar',
                    'title':
                    (stripped_content[:60] +
                     '..') if len(stripped_content) > 60 else stripped_content,
                    'content':
                    post['content'],
                    'embed': {},
                    'shared_file':
                    False,
                    'added':
                    datetime.datetime.now(),
                    'published':
                    post['date'],
                    'edited':
                    None,
                    'file': {},
                    'attachments': []
                }

                for attachment in list(
                        filter(
                            lambda msg: post['post_id'] == msg[-1]['post_id']
                            and msg[0] == Message.Url, j.data)):
                    if (len(post_model['file'].keys()) == 0):
                        filename, _ = download_file(
                            join(config.download_path, file_directory),
                            attachment[-1]['url'],
                            name=attachment[-1]['filename'] + '.' +
                            attachment[-1]['extension'])
                        post_model['file']['name'] = attachment[-1][
                            'filename'] + '.' + attachment[-1]['extension']
                        post_model['file'][
                            'path'] = f'/{file_directory}/{filename}'
                    else:
                        filename, _ = download_file(
                            join(config.download_path, attachments_directory),
                            attachment[-1]['url'],
                            name=attachment[-1]['filename'] + '.' +
                            attachment[-1]['extension'])
                        post_model['attachments'].append({
                            'name':
                            attachment[-1]['filename'] + '.' +
                            attachment[-1]['extension'],
                            'path':
                            f'/{attachments_directory}/{filename}'
                        })

                post_model['embed'] = json.dumps(post_model['embed'])
                post_model['file'] = json.dumps(post_model['file'])
                for i in range(len(post_model['attachments'])):
                    post_model['attachments'][i] = json.dumps(
                        post_model['attachments'][i])

                columns = post_model.keys()
                data = ['%s'] * len(post_model.values())
                data[-1] = '%s::jsonb[]'  # attachments
                query = "INSERT INTO booru_posts ({fields}) VALUES ({values})".format(
                    fields=','.join(columns), values=','.join(data))
                cursor3 = conn.cursor()
                cursor3.execute(query, list(post_model.values()))
                conn.commit()
        except DownloaderException:
            continue

    conn.close()

예제 #3

0

파일 보기

def import_posts(import_id, key):
    dlconfig.set(('output'), "mode", "null")
    dlconfig.set(('extractor', 'subscribestar'), "cookies",
                 {"auth_token": key})
    dlconfig.set(('extractor', 'subscribestar'), "proxy", get_proxy())
    j = job.DataJob("https://subscribestar.adult/feed")
    j.run()

    conn = get_conn()
    user_id = None
    for message in j.data:
        try:
            if message[0] == Message.Directory:
                post = message[-1]

                user_id = post['author_name']
                post_id = post['post_id']
                file_directory = f"files/subscribestar/{user_id}/{post_id}"
                attachments_directory = f"attachments/subscribestar/{user_id}/{post_id}"

                if is_artist_dnp('subscribestar', user_id):
                    log(
                        import_id,
                        f"Skipping post {post_id} from user {user_id} is in do not post list"
                    )
                    continue

                if post_exists('subscribestar', user_id,
                               str(post_id)) and not post_flagged(
                                   'subscribestar', user_id, str(post_id)):
                    log(
                        import_id,
                        f'Skipping post {post_id} from user {user_id} because already exists'
                    )
                    continue

                log(import_id, f"Starting import: {post_id}")

                stripped_content = strip_tags(post['content'])
                post_model = {
                    'id':
                    str(post_id),
                    '"user"':
                    user_id,
                    'service':
                    'subscribestar',
                    'title':
                    (stripped_content[:60] +
                     '..') if len(stripped_content) > 60 else stripped_content,
                    'content':
                    post['content'],
                    'embed': {},
                    'shared_file':
                    False,
                    'added':
                    datetime.datetime.now(),
                    'published':
                    parse_date(post['date']),
                    'edited':
                    None,
                    'file': {},
                    'attachments': []
                }

                for attachment in list(
                        filter(
                            lambda msg: post_id == msg[-1]['post_id'] and msg[
                                0] == Message.Url, j.data)):
                    if (len(post_model['file'].keys()) == 0):
                        filename, _ = download_file(
                            join(config.download_path, file_directory),
                            attachment[-1]['url'],
                            name=attachment[-1]['filename'] + '.' +
                            attachment[-1]['extension'])
                        post_model['file']['name'] = attachment[-1][
                            'filename'] + '.' + attachment[-1]['extension']
                        post_model['file'][
                            'path'] = f'/{file_directory}/{filename}'
                    else:
                        filename, _ = download_file(
                            join(config.download_path, attachments_directory),
                            attachment[-1]['url'],
                            name=attachment[-1]['filename'] + '.' +
                            attachment[-1]['extension'])
                        post_model['attachments'].append({
                            'name':
                            attachment[-1]['filename'] + '.' +
                            attachment[-1]['extension'],
                            'path':
                            f'/{attachments_directory}/{filename}'
                        })

                post_model['embed'] = json.dumps(post_model['embed'])
                post_model['file'] = json.dumps(post_model['file'])
                for i in range(len(post_model['attachments'])):
                    post_model['attachments'][i] = json.dumps(
                        post_model['attachments'][i])

                columns = post_model.keys()
                data = ['%s'] * len(post_model.values())
                data[-1] = '%s::jsonb[]'  # attachments
                query = "INSERT INTO posts ({fields}) VALUES ({values}) ON CONFLICT (id, service) UPDATE SET {updates}".format(
                    fields=','.join(columns),
                    values=','.join(data),
                    updates=','.join(
                        [f'{column}=EXCLUDED.{column}' for column in columns]))
                cursor3 = conn.cursor()
                cursor3.execute(query, list(post_model.values()))
                conn.commit()

                log(import_id,
                    f"Finished importing {post_id} from user {user_id}",
                    to_client=False)
        except Exception:
            log(import_id,
                f"Error while importing {post_id} from user {user_id}",
                'exception')
            conn.rollback()
            continue

    log(import_id, f"Finished scanning for posts.")
    index_artists()

예제 #4

0

파일 보기

#!/usr/bin/env python

import sys
import os.path
import datetime

ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.realpath(ROOTDIR))
from gallery_dl import extractor, job, config

tests = [([url[0] for url in extr.test if url[1]], extr)
         for extr in extractor.extractors() if hasattr(extr, "test")]

if len(sys.argv) > 1:
    tests = [(urls, extr) for urls, extr in tests if extr.category in sys.argv]

path = os.path.join(ROOTDIR, "archive/testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)
config.load()

for urls, extr in tests:
    for i, url in enumerate(urls):
        name = "%s-%s-%d.json" % (extr.category, extr.subcategory, i)
        print(name)
        with open(os.path.join(path, name), "w") as outfile:
            job.DataJob(url, file=outfile).run()

예제 #5

0

파일 보기

파일: build_testresult_db.py 프로젝트: Quercum/gallery-dl

#!/usr/bin/env python

import sys
import os.path
import datetime

ROOTDIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, os.path.realpath(ROOTDIR))
from gallery_dl import extractor, job, config

tests = [([url[0] for url in extr.test], extr)
         for extr in extractor.extractors() if hasattr(extr, "test")]

if len(sys.argv) > 1:
    tests = [(urls, extr) for urls, extr in tests if extr.category in sys.argv]

path = os.path.join(ROOTDIR, "archive/testdb", str(datetime.date.today()))
os.makedirs(path, exist_ok=True)
config.load()

for urls, extr in tests:
    for i, url in enumerate(urls):
        name = "%s-%s-%d.json" % (extr.category, extr.subcategory, i)
        print(name)
        with open(os.path.join(path, name), "w") as outfile:
            job.DataJob(url, outfile).run()