def thread_function(q: Queue, thread_lock: threading.Lock, count: int, total: int, client: StashInterface):
    log.LogDebug(f"Created {threading.current_thread().name}")
    while not q.empty():
        image = q.get()

        image_data = {
            'id': image.get('id'),
            'title': image.get('title')
        }
        if image.get('rating'):
            image_data['rating'] = image.get('rating')
        if image.get('studio'):
            image_data['studio_id'] = image.get('studio').get('id')
        if image.get('performers'):
            performer_ids = [p.get('id') for p in image.get('performers')]
            image_data['performer_ids'] = performer_ids
        if image.get('tags'):
            tag_ids = [t.get('id') for t in image.get('tags')]
            image_data['tag_ids'] = tag_ids
        if image.get('galleries'):
            gallery_ids = [g.get('id') for g in image.get('galleries')]
            image_data['gallery_ids'] = gallery_ids

        client.updateImage(image_data)

        thread_lock.acquire()
        count += 1
        log.LogProgress(count / total)
        thread_lock.release()

        q.task_done()
    log.LogDebug(f"{threading.current_thread().name} finished")
    return True
Exemple #2
0
def main():
    input = None

    if len(sys.argv) < 2:
        input = readJSONInput()
        log.LogDebug("Raw input: %s" % json.dumps(input))
    else:
        log.LogDebug("Using command line inputs")
        mode = sys.argv[1]
        log.LogDebug("Command line inputs: {}".format(sys.argv[1:]))

        input = {}
        input['args'] = {"mode": mode}

        # just some hard-coded values
        input['server_connection'] = {
            "Scheme": "http",
            "Port": 9999,
        }

    output = {}
    run(input, output)

    out = json.dumps(output)
    print(out + "\n")
Exemple #3
0
def download(url, downloaded):
    config_path = os.path.join(plugin_folder, 'config.ini')
    log.LogDebug(f"Reading config file at: {config_path}")
    config = configparser.ConfigParser()
    config.read(config_path)
    download_dir = str(pathlib.Path(config.get('PATHS', 'downloadDir') + '/%(id)s.%(ext)s').absolute())
    log.LogDebug("Downloading " + url + " to: " + download_dir)

    ydl = youtube_dl.YoutubeDL({
        'outtmpl': download_dir,
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
    })

    with ydl:
        try:
            meta = ydl.extract_info(url=url, download=True)
            log.LogDebug(meta['id'])
            log.LogDebug("Download finished!")
            downloaded.append({
                "url": url,
                "id": meta.get('id'),
                "title": meta.get('title'),
            })
        except Exception as e:
            log.LogWarning(str(e))
def __copy_tags(client, galleries):
    # TODO: Multithreading
    count = 0
    for gallery in galleries:
        if gallery.get('scenes') is not None:
            if len(gallery.get('scenes')) > 1:
                log.LogInfo(f'Gallery {gallery.get("id")} has multiple scenes, only copying tags from first scene')
            # Select first scene from gallery scenes
            scene_id = gallery.get('scenes')[0].get('id')
            scene = client.getSceneById(scene_id)
            gallery_data = {
                'id': gallery.get('id'),
                'title': scene.get('title')
            }
            if scene.get('details'):
                gallery_data['details'] = scene.get('details')
            if scene.get('url'):
                gallery_data['url'] = scene.get('url')
            if scene.get('date'):
                gallery_data['date'] = scene.get('date')
            if scene.get('rating'):
                gallery_data['rating'] = scene.get('rating')
            if scene.get('studio'):
                gallery_data['studio_id'] = scene.get('studio').get('id')
            if scene.get('tags'):
                tag_ids = [t.get('id') for t in scene.get('tags')]
                gallery_data['tag_ids'] = tag_ids
            if scene.get('performers'):
                performer_ids = [p.get('id') for p in scene.get('performers')]
                gallery_data['performer_ids'] = performer_ids

            client.updateGallery(gallery_data)
            log.LogDebug(f'Copied information to gallery {gallery.get("id")}')
            count += 1
    return count
Exemple #5
0
 def scan_for_new_files(self):
     try:
         query = """
                 mutation {
                     metadataScan (
                         input: {
                             useFileMetadata: true
                             scanGenerateSprites: false
                             scanGeneratePreviews: false
                             scanGenerateImagePreviews: false
                             stripFileExtension: false
                         }
                     )
                 }
         """
         result = self.__callGraphQL(query)
     except ConnectionError:
         query = """
                 mutation {
                     metadataScan (
                         input: {
                             useFileMetadata: true
                         }
                     )
                 }
         """
         result = self.__callGraphQL(query)
     log.LogDebug("ScanResult" + str(result))
Exemple #6
0
def image_studio_copy(client):
    galleries = client.findGalleries()

    # List of gallery ids for each studio
    # {'studio_id': [gallery_ids]}
    studio_mapping = {}

    # Get studio from each gallery and add it to the mapping
    for gallery in galleries:
        studio = gallery.get('studio')
        if studio is not None:
            if studio_mapping.get(studio.get('id')):
                studio_mapping[studio.get('id')].append(int(gallery.get('id')))
            else:
                studio_mapping[studio.get('id')] = [int(gallery.get('id'))]

    log.LogDebug(f'Found {len(studio_mapping)} studios with galleries')

    # Bulk update all images in galleries for each studio
    for studio, galleries in studio_mapping.items():
        studio_id = int(studio)
        log.LogDebug(
            f'There are {len(galleries)} galleries with studio id {studio_id}')

        # Get images with gallery ids
        image_filter = {
            "galleries": {
                "value": galleries,
                "modifier": "INCLUDES"
            }
        }

        images = client.findImages(image_filter)
        log.LogDebug(
            f'There is a total of {len(images)} images with studio id {studio_id}'
        )

        # Only update images with no studio or different studio
        to_update = [
            int(image.get('id')) for image in images
            if (image.get('studio') is None
                or int(image.get('studio').get('id')) != studio_id)
        ]
        log.LogInfo(f'Adding studio {studio_id} to {len(to_update)} images')

        # Bulk update images with studio_id
        client.updateImageStudio(image_ids=to_update, studio_id=studio_id)
def backupDB(client):
    configuration = client.getConfiguration()
    dbPath = configuration['general']['databasePath']
    dbName = os.path.basename(dbPath) + "_ReScrape.sqlite"
    dbDir = os.path.dirname(dbPath)  # Get filename
    newPath = dbDir + "\\" + dbName
    log.LogDebug("Making backup of your database...")
    copyfile(dbPath, newPath)
Exemple #8
0
def add_ph_urls(client):
    count = 0

    scenes = client.findScenesByPathRegex(
        r"-ph[a-z0-9]{13}\.(?:[mM][pP]4|[wW][mM][vV])$")

    for scene in scenes:
        if scene.get('url') is None or scene.get('url') == "":
            try:
                ph_id = os.path.splitext(scene.get('path').split('-ph')[1])[0]
            except IndexError:
                log.LogDebug(f"Error, skipping scene {scene.get('id')}")
                continue
            url = f"https://www.pornhub.com/view_video.php?viewkey=ph{ph_id}"

            scene_data = {'id': scene.get('id'), 'url': url}

            # Required, would be cleared otherwise
            if scene.get('rating'):
                scene_data['rating'] = scene.get('rating')

            tag_ids = []
            for t in scene.get('tags'):
                tag_ids.append(t.get('id'))
            scene_data['tag_ids'] = tag_ids

            performer_ids = []
            for p in scene.get('performers'):
                performer_ids.append(p.get('id'))
            scene_data['performer_ids'] = performer_ids

            if scene.get('studio'):
                scene_data['studio_id'] = scene.get('studio').get('id')

            if scene.get('gallery'):
                scene_data['gallery_id'] = scene.get('gallery').get('id')

            if scene.get('rating'):
                scene_data['rating'] = scene.get('rating')

            client.updateScene(scene_data)
            log.LogDebug(f'Set url for scene {scene.get("id")}')
            count += 1

    log.LogInfo(f"Set urls for {count} scene(s)")
Exemple #9
0
    def _dispatch(self, method, params, kwargs):
        """Logs method invocation and exceptions to Windows Event Viewer.

    This is the entry point into this class.

    Args:
      method: method name, string.
      params: list of parameters to method.
      kwargs: keyword arguments to method

    Returns:
      return value from method.

    Raises:
      common.InvalidMethod:
        if an unknown or invalid (starts with "_") method is invoked.
      other:
        any other exception that a called method may raise.
    """

        # Prevent private and non-existent methods from being accessed via XML-RPC.
        if method.startswith('_') or not hasattr(self, method):
            raise common.InvalidMethod(method)

        # Authentication
        if self.auth_class is not None:
            self._VerifyAuth(method, params, kwargs)
            params = params[self.auth_class.REQUIRED_PARAM_COUNT:]

        self._LogUse(method, params, kwargs)

        # Since XML-RPC Server returns generic exceptions, here we catch all so
        # that we can nicely log them to the Windows Event Viewer and then
        # reraise.
        # pylint: disable-msg=W0703
        try:
            log.LogDebug('Calling method %s(%s)' % (method, params))
            t1 = time.time()
            ret = getattr(self, method)(*params, **kwargs)
            t2 = time.time()
            log.LogDebug('Execution time for method(%s) is %f' %
                         (method, t2 - t1))
        except common.Error, e:
            log.NormalException(method, e)
            raise
Exemple #10
0
    def __findScenesByPathRegex(self, regex, page=1):
        query = """
            query findScenesByPathRegex($filter: FindFilterType!) {
                findScenesByPathRegex(filter:$filter)  {
                    count
                    scenes {
                        title
                        id
                        url
                        rating
                        galleries {id}
                        studio {id}
                        tags {id}
                        performers {id}
                        path
                    }
                }
            }
        """

        variables = {
            "filter": {
                "q": regex,
                "per_page": 100,
                "page": page
            }
        }

        result = self.__callGraphQL(query, variables)
        log.LogDebug(f"Regex found {result.get('findScenesByPathRegex').get('count')} scene(s) on page {page}")

        scenes = result.get('findScenesByPathRegex').get('scenes')

        # If page is full, also scan next page:
        if len(scenes) == 100:
            next_page = self.__findScenesByPathRegex(regex, page + 1)
            for scene in next_page:
                scenes.append(scene)

        if page == 1:
            log.LogDebug(f"Regex found a total of {len(scenes)} scene(s)")
        return scenes
    def __init__(self, conn):
        self.port = conn['Port']
        scheme = conn['Scheme']

        # Session cookie for authentication
        self.cookies = {'session': conn.get('SessionCookie').get('Value')}

        domain = conn.get('Domain') if conn.get('Domain') else 'localhost'

        # Stash GraphQL endpoint
        self.url = scheme + "://" + domain + ":" + str(self.port) + "/graphql"
        log.LogDebug(f"Using stash GraphQl endpoint at {self.url}")
Exemple #12
0
def __bulk_scrape_scene_url(client, scenes, delay=5):
    last_request = -1
    if delay > 0:
        # Initialize last request with current time + delay time
        last_request = time.time() + delay

    # Number of scraped scenes
    count = 0

    total = len(scenes)
    # Index for progress bar
    i = 0

    # Scrape scene with existing metadata
    for scene in scenes:
        # Update status bar
        i += 1
        log.LogProgress(i/total)

        if delay:
            wait(delay, last_request, time.time())

        # Create dict with scene data
        scene_data = {
            'id': scene.get('id'),
        }

        # Extract scraper ID if appended to control tag, then scrape scene
        if '_' in control_tag:
            scraper_id = control_tag.split('_')[-1]
            scraped_data = client.scrapeScene(scene_data, scraper_id)
        else:
            scraped_data = client.scrapeScene(scene_data)

        # No data has been found for this scene
        if scraped_data is None or not any(scraped_data.values()):
            log.LogInfo(f"Could not get data for scene {scene.get('id')}")
            continue

        # Create dict with scene data
        update_data = {
            'id': scene.get('id')
        }
        if scraped_data.get('url'):
            update_data['url'] = scraped_data.get('url')

        # Update scene with scraped scene data
        client.updateScene(update_data)
        log.LogDebug(f"Scraped data for scene {scene.get('id')}")
        count += 1

    return count
def copy_all_tags(client):
    log.LogWarning("#######################################")
    log.LogWarning("Warning! This task will copy all information to all galleries with attached scenes")
    log.LogWarning("You have 30 seconds to cancel this task before it starts copying")
    log.LogWarning("#######################################")

    time.sleep(30)
    log.LogInfo("Start copying information. This may take a while depending on the amount of galleries")
    # Get all galleries
    galleries = client.findGalleriesByTags([])
    log.LogDebug(f"Found {len(galleries)} galleries")
    count = __copy_tags(client, galleries)

    log.LogInfo(f'Copied scene information to {count} galleries')
def copy_tags(client):
    tag = client.findTagIdWithName(control_tag)
    if tag is None:
        sys.exit(f"Tag {control_tag} does not exist. Please create it via the 'Create CopyTags tag' task")

    tag_ids = [tag]

    galleries = client.findGalleriesByTags(tag_ids)

    log.LogDebug(f"Found {len(galleries)} galleries with {control_tag} tag")

    count = __copy_tags(client, galleries)

    log.LogInfo(f'Copied scene information to {count} galleries')
Exemple #15
0
    def __init__(self, conn):
        self.port = conn['Port']
        scheme = conn['Scheme']

        # Session cookie for authentication
        self.cookies = {
            'session': conn.get('SessionCookie').get('Value')
        }

        # If stash does not accept connections from all interfaces use the host specified in the config
        host = conn.get('Host') if '0.0.0.0' not in conn.get('Host') else 'localhost'

        # Stash GraphQL endpoint
        self.url = scheme + "://" + host + ":" + str(self.port) + "/graphql"
        log.LogDebug(f"Using stash GraphQl endpoint at {self.url}")
    def createTagWithName(self, name):
        query = """
            mutation tagCreate($input:TagCreateInput!) {
                tagCreate(input: $input){
                    id
                }
            }
        """
        variables = {'input': {'name': name}}

        result = self.__callGraphQL(query, variables)
        if result.get('tagCreate'):
            log.LogDebug(f"Created tag: {name}")
            return result.get('tagCreate').get("id")
        else:
            log.LogError(f"Could not create tag: {name}")
            return None
    def createMovie(self, name, url=None):
        query = """
        mutation($name: String!, $url: String) {
            movieCreate(input: { name: $name, url: $url }) {
                id
            }
        }
        """
        variables = {'name': name, 'url': url}

        result = self.__callGraphQL(query, variables)
        if result.get("movieCreate"):
            log.LogDebug(f"Created movie: {name}")
            return result.get("movieCreate").get("id")
        else:
            log.LogError(f"Could not create movie: {name}")
            return None
    def createPerformerByName(self, name):
        query = """
            mutation($name: String!) {
                performerCreate(input: { name: $name }) {
                    id
                }
            }
        """

        variables = {'name': name}

        result = self.__callGraphQL(query, variables)
        if result.get('performerCreate'):
            log.LogDebug(f"Created performer: {name}")
            return result.get('performerCreate').get('id')
        else:
            log.LogError(f"Could not create performer: {name}")
            return None
Exemple #19
0
def __bulk_scrape(client,
                  scenes,
                  create_missing_performers=False,
                  create_missing_tags=False,
                  create_missing_studios=False,
                  delay=5):
    last_request = -1
    if delay > 0:
        # Initialize last request with current time + delay time
        last_request = time.time() + delay

    missing_scrapers = list()

    # Number of scraped scenes
    count = 0

    total = len(scenes)
    # Index for progress bar
    i = 0

    # Scrape if url not in missing_scrapers
    for scene in scenes:
        # Update status bar
        i += 1
        log.LogProgress(i / total)

        if scene.get('url') is None or scene.get('url') == "":
            log.LogInfo(f"Scene {scene.get('id')} is missing url")
            continue
        if urlparse(scene.get("url")).netloc not in missing_scrapers:
            if delay:
                wait(delay, last_request, time.time())
            scraped_data = client.scrapeSceneURL(scene.get('url'))
            # If result is null, add url to missing_scrapers
            if scraped_data is None:
                log.LogWarning(
                    f"Missing scraper for {urlparse(scene.get('url')).netloc}")
                missing_scrapers.append(urlparse(scene.get('url')).netloc)
                continue
            # No data has been found for this scene
            if not any(scraped_data.values()):
                log.LogInfo(f"Could not get data for scene {scene.get('id')}")
                continue

            # Create dict with scene data
            update_data = {'id': scene.get('id')}
            if scraped_data.get('title'):
                update_data['title'] = scraped_data.get('title')
            if scraped_data.get('details'):
                update_data['details'] = scraped_data.get('details')
            if scraped_data.get('date'):
                update_data['date'] = scraped_data.get('date')
            if scraped_data.get('image'):
                update_data['cover_image'] = scraped_data.get('image')
            if scraped_data.get('tags'):
                tag_ids = list()
                for tag in scraped_data.get('tags'):
                    if tag.get('stored_id'):
                        tag_ids.append(tag.get('stored_id'))
                    else:
                        if create_missing_tags and tag.get('name') != "":
                            # Capitalize each word
                            tag_name = " ".join(
                                x.capitalize()
                                for x in tag.get('name').split(" "))
                            log.LogInfo(f'Create missing tag: {tag_name}')
                            tag_id = client.createTagWithName(tag_name)
                            tag_ids.append(tag_id)
                if len(tag_ids) > 0:
                    update_data['tag_ids'] = tag_ids

            if scraped_data.get('performers'):
                performer_ids = list()
                for performer in scraped_data.get('performers'):
                    if performer.get('stored_id'):
                        performer_ids.append(performer.get('stored_id'))
                    else:
                        if create_missing_performers and performer.get(
                                'name') != "":
                            performer_name = " ".join(
                                x.capitalize()
                                for x in performer.get('name').split(" "))
                            log.LogInfo(
                                f'Create missing performer: {performer_name}')
                            performer_id = client.createPerformerByName(
                                performer_name)
                            performer_ids.append(performer_id)
                if len(performer_ids) > 0:
                    update_data['performer_ids'] = performer_ids

            if scraped_data.get('studio'):
                studio = scraped_data.get('studio')
                if studio.get('stored_id'):
                    update_data['studio_id'] = studio.get('stored_id')
                else:
                    if create_missing_studios:
                        studio_name = " ".join(
                            x.capitalize()
                            for x in studio.get('name').split(" "))
                        log.LogInfo(f'Creating missing studio {studio_name}')
                        studio_url = '{uri.scheme}://{uri.netloc}'.format(
                            uri=urlparse(scene.get('url')))
                        studio_id = client.createStudio(
                            studio_name, studio_url)
                        update_data['studio_id'] = studio_id

            # Update scene with scraped scene data
            client.updateScene(update_data)
            log.LogDebug(f"Scraped data for scene {scene.get('id')}")
            count += 1

    return count
Exemple #20
0
def tag_scenes(client):
    endRegex = r'\.(?:[mM][pP]4 |[wW][mM][vV])$'
    beginRegex = ".*("
    if not os.path.isfile(downloaded_json) and os.path.isfile(downloaded_backup_json):
        shutil.copyfile(downloaded_backup_json, downloaded_json)
    with open(downloaded_json) as json_file:
        data = json.load(json_file)
        for i in range(0, len(data)):
            if i < len(data) - 1:
                beginRegex += data[i]['id'] + "|"
            else:
                beginRegex += data[i]['id'] + ").*"
        log.LogDebug(beginRegex + endRegex)
        scenes = client.findScenesByPathRegex(beginRegex)

        total = len(scenes)
        i = 0
        for scene in scenes:
            i += 1
            log.LogProgress(i/total)
            log.LogDebug(os.path.join("ScenePath", scene.get('path')))
            basename = os.path.basename(scene.get('path'))
            filename = os.path.splitext(basename)[0]

            found_video = None
            for video in data:
                if video['id'] in filename:
                    found_video = video
                    break
            if found_video is not None:
                scene_data = {
                    'id': scene.get('id'),
                    'url': video['url'],
                    'title': video['title']
                }

                # Required, would be cleared otherwise
                if scene.get('rating'):
                    scene_data['rating'] = scene.get('rating')

                tag_ids = []
                for t in scene.get('tags'):
                    tag_ids.append(t.get('id'))
                tag_ids.append(get_scrape_tag(client))
                scene_data['tag_ids'] = tag_ids

                performer_ids = []
                for p in scene.get('performers'):
                    performer_ids.append(p.get('id'))
                scene_data['performer_ids'] = performer_ids

                if scene.get('studio'):
                    scene_data['studio_id'] = scene.get('studio').get('id')

                if scene.get('gallery'):
                    scene_data['gallery_id'] = scene.get('gallery').get('id')

                if scene.get('rating'):
                    scene_data['rating'] = scene.get('rating')

                client.updateScene(scene_data)
def findScene(client, tags_name):
    log.LogDebug(f"Searching scenes...")

    tags_id = client.findTagIdWithName(tags_name)
    log.LogDebug("Your tag ID:" + str(tags_id))

    scenes = client.findScenesByTags([tags_id])
    log.LogDebug("Nbr Scenes with this tag:" + str(len(scenes)))

    # Get the ID of the tags writed by this plugin
    pluginsTagsID = get_scrape_tag(client)
    for scene_fromTags in scenes:
        log.LogDebug("Scene:" + str(scene_fromTags))
        # Scene need to have a url set
        if scene_fromTags['url'] == None:
            continue
        # Only wanted to use the scraper Javlibrary
        if 'javlibrary' not in scene_fromTags['url']:
            continue
        # get_scrape_tag only give URL and ID so using getSceneById to get more information
        scene = client.getSceneById(scene_fromTags['id'])

        # Current tags present in scene
        AlreadyWatch = 0
        sceneTags = []
        for tags in scene.get('tags'):
            if tags['id'] == pluginsTagsID:
                log.LogDebug("Already watched")
                AlreadyWatch = 1
                break
            else:
                sceneTags.append(tags['id'])
        if AlreadyWatch == 1:
            continue
        # I guess URL is useless since check above
        if scene['url'] != None and scene['tags'] != None:
            # Scraping scene (What happend if the URL don't have scraper ?)
            scrapedData = client.scrapeSceneURL(scene['url'])
            if scrapedData == None:
                log.LogDebug("Error when scraping ?")
                continue
            scrapedTags = []
            if scrapedData['tags'] == None:
                log.LogDebug("No tags from Scraping")
                continue
            for tags in scrapedData['tags']:
                # Only take tags that already exist in Database
                if tags['stored_id'] == None:
                    continue
                scrapedTags.append(tags['stored_id'])
            # Probably useless ?
            if not scrapedTags:
                log.LogDebug("No tags from Scraping")
                continue

            # Compare
            log.LogDebug("Current Tags:" + str(sceneTags))
            log.LogDebug("Scraped Tags:" + str(scrapedTags))
            # Remove duplicate
            unique_Tags = list(set(sceneTags + scrapedTags))
            log.LogDebug("Unique Tags:" + str(unique_Tags))
            # Look for tags not in the current tag list.
            new_tags = []
            for tags in unique_Tags:
                if tags in sceneTags:
                    continue
                else:
                    new_tags.append(tags)
            if not new_tags:
                log.LogDebug("No new tags")
            else:
                log.LogDebug("New Tags:" + str(new_tags))

            # Get all data needed for update (Stolen from niemands plugins)
            scene_data = {
                'id': scene.get('id'),
                'url': scene.get('url'),
                'title': scene.get('title')
            }

            if scrapedData.get('details'):
                scene_data['details'] = scrapedData.get('details')

            performer_ids = []
            for p in scene.get('performers'):
                performer_ids.append(p.get('id'))
            scene_data['performer_ids'] = performer_ids

            if scene.get('studio'):
                scene_data['studio_id'] = scene.get('studio').get('id')

            if scene.get('gallery'):
                scene_data['gallery_id'] = scene.get('gallery').get('id')

            if scene.get('rating'):
                scene_data['rating'] = scene.get('rating')

            if not new_tags:
                # No new tags but we still put our custom tags to don't scan it again.
                tag_ids = []
                for t in scene.get('tags'):
                    tag_ids.append(t.get('id'))
                # Putting our custom tag
                tag_ids.append(pluginsTagsID)
                scene_data['tag_ids'] = tag_ids
            else:
                new_tags.append(pluginsTagsID)
                scene_data['tag_ids'] = new_tags + sceneTags
                log.LogDebug("Updated scene: " + str(scene.get('title')))
            client.updateScene(scene_data)
Exemple #22
0
def __bulk_scrape(client,
                  entities: Dict[Entity, array.array],
                  create_missing_performers=False,
                  create_missing_tags=False,
                  create_missing_studios=False,
                  create_missing_movies=False,
                  delay=5) -> None:
    last_request = -1
    # Unpack entity dict and iterate over each type (scenes, galleries)
    # entities is non empty and contains at least one non empty entity type
    for entity_class, entity_array in entities.items():
        log.LogInfo(f"Scraping {entity_class.value}")
        # Fetch available url scrapers for entity type
        if entity_class is Entity.Scene:
            supported_scrapers = client.sceneScraperURLs()
        elif entity_class is Entity.Gallery:
            supported_scrapers = client.galleryScraperURLs()
        else:
            raise TypeError(f"Unexpected Entity type: {entity_class}")

        if delay > 0:
            # Initialize last request with current time + delay time
            last_request = time.time() + delay

        missing_scrapers = list()

        # Number of scraped scenes
        count = 0

        total = len(entity_array)
        # Index for progress bar
        i = 0

        # Scrape if url not in missing_scrapers
        for entity in entity_array:
            # Update status bar
            i += 1
            log.LogProgress(i / total)

            if entity.get('url') is None or entity.get('url') == "":
                # Skip the scene/gallery if it does not have an url
                log.LogInfo(
                    f"{entity_class.name} {entity.get('id')} is missing url")
                continue

            url_netloc = urlparse(entity.get("url")).netloc.split('www.')[
                -1]  # URL domain name (without www. and tld)
            if url_netloc not in missing_scrapers:
                if delay:
                    last_request = wait(delay, last_request, time.time())

                # The query has different fields, so there can not be one scrapeURL function
                if entity_class is Entity.Scene:
                    scraped_data = client.scrapeSceneURL(entity.get('url'))
                elif entity_class is Entity.Gallery:
                    scraped_data = client.scrapeGalleryURL(entity.get('url'))
                else:
                    raise TypeError(f"Unexpected Entity type: {entity_class}")
                if scraped_data is None:
                    if url_netloc not in supported_scrapers:
                        # If result is null, and url is not in list of supported scrapers, add url to missing_scrapers
                        # Faster then checking every time, if url is in list of supported scrapers
                        log.LogWarning(
                            f"{entity_class.name} {entity.get('id')}: "
                            f"Missing scraper for {url_netloc}")
                        log.LogDebug(f"Full url: {entity.get('url')}")
                        missing_scrapers.append(url_netloc)
                    else:
                        log.LogInfo(
                            f"Could not scrape {entity_class.name.lower()} {entity.get('id')}"
                        )
                        log.LogDebug("Return data was None")
                    continue
                # No data has been found for this scene
                if not any(scraped_data.values()):
                    log.LogInfo(
                        f"Could not get data for {entity_class.name.lower()} {entity.get('id')}"
                    )
                    continue

                update_entity(
                    client=client,
                    entity=entity,
                    entity_type=entity_class,
                    scraped_data=scraped_data,
                    create_missing_tags=create_missing_tags,
                    create_missing_performers=create_missing_performers,
                    create_missing_studios=create_missing_studios,
                    create_missing_movies=create_missing_movies)

                log.LogDebug(
                    f"Scraped data for {entity_class.name.lower()} {entity.get('id')}"
                )
                count += 1

        log.LogInfo(f"Scraped data for {count} {entity_class.value}")
Exemple #23
0
def __bulk_create_performer(client, scenes, create_missing_performers, parse_performer_pattern, delay):
    last_request = -1
    if delay > 0:
        # Initialize last request with current time + delay time
        last_request = time.time() + delay

    # Number of created performers
    count = 0

    total = len(scenes)
    # Index for progress bar
    i = 0

    # List all performers in database
    all_performers = client.listPerformers()

    for scene in scenes:
        # Update status bar
        i += 1
        log.LogProgress(i/total)

        if scene.get('path') is None or scene.get('path') == "":
            log.LogInfo(f"Scene {scene.get('id')} is missing path")
            continue

        # Parse performer name from scene basename file path
        scene_basename = os.path.basename(scene['path'])
        log.LogInfo(f"Scene basename is: {scene_basename}")
        performer_regex = re.compile(parse_performer_pattern)
        parsed_performer_regex = performer_regex.search(scene_basename)
        if parsed_performer_regex is None:
            log.LogInfo(f"No Performer found Scene {scene.get('id')} filename")
            continue
        parsed_performer_name = ' '.join(parsed_performer_regex.groups())
        log.LogInfo(f"Parsed performer name is: {parsed_performer_name}")

        # If performer name successfully parsed from scene basename
        if parsed_performer_name:
            # Create dict with scene data
            update_data = {
                'id': scene.get('id')
            }

            # List all performers currently attached to scene
            scene_performers = [sp['name'].lower() for sp in scene['performers']]
            log.LogInfo(f"Current scene performers are: {scene_performers}")

            # Check if performer already attached to scene
            performer_ids = list()
            if parsed_performer_name.lower() in scene_performers:
                continue
            else:
                # Check if performer already exists in database
                for performer in all_performers:
                    if  performer['name'] and parsed_performer_name.lower() == performer['name'].lower():
                        performer_ids.append(performer['id'])
                        break
                    if performer['aliases'] and parsed_performer_name.lower() in [p.strip().lower() for p in performer['aliases'].replace('/', ',').split(',')]:
                        performer_ids.append(performer['id'])
                        break
                else:
                    # Create performer if not in database
                    if create_missing_performers and parsed_performer_name != "":
                        performer_name = " ".join(x.capitalize() for x in parsed_performer_name.split(" "))
                        log.LogInfo(f'Create missing performer: {performer_name}')
                        performer_id = client.createPerformerByName(performer_name)
                        performer_ids.append(performer_id)
                        # Add newly created performer to all performers list
                        all_performers.append({'id':performer_id, 'name':performer_name, 'aliases':''})

                # Add found/created performer IDs to scene update data
                if len(performer_ids) > 0:
                    update_data['performer_ids'] = performer_ids
                    log.LogInfo(f"Performer IDs found: {performer_ids}")

                # Update scene with parsed performer data
                client.updateScene(update_data)
                log.LogDebug(f"Updated performer data for scene {scene.get('id')}")
                count += 1

    return count