Ejemplo n.º 1
0
def bulk_create_performer(client, create_missing_performers=False, parse_performer_pattern=r'^.*[ \._]([A-Z][a-zA-Z]+)[ \._]([A-Z][a-zA-Z]+)[ \._].*$', delay=5):
    try:
        create_missing_performers = bool(config.create_missing_performers)
        parse_performer_pattern = config.parse_performer_pattern
        delay = int(config.delay)
    except AttributeError as e:
        log.LogWarning(e)
        log.LogWarning("Using defaults for missing config values")
    except ValueError as e:
        log.LogWarning(e)
        log.LogWarning("Using defaults for wrong values")

    log.LogInfo('##### Bulk Create Performer #####')
    log.LogInfo(f'create_missing_performers: {create_missing_performers}')
    log.LogInfo(f'parse_performer_pattern: {parse_performer_pattern}')
    log.LogInfo(f'delay: {delay}')
    log.LogInfo('#############################')

    # Search for all scenes with scrape tag
    tag = client.findTagIdWithName(control_tag)
    if tag is None:
        sys.exit("Scrape Tag does not exist. Please create it via the 'Create scrape tag' task")

    tag_ids = [tag]
    scenes = client.findScenesByTags(tag_ids)
    log.LogInfo(f'Found {len(scenes)} scenes with scrape tag')
    count = __bulk_create_performer(client, scenes, create_missing_performers, parse_performer_pattern, delay)
    log.LogInfo(f'Created {count} performers')
Ejemplo n.º 2
0
def bulk_scrape(client, create_missing_performers=False, create_missing_tags=False, create_missing_studios=False, delay=5):
    try:
        create_missing_studios = bool(config.create_missing_studios)
        create_missing_tags = bool(config.create_missing_tags)
        create_missing_performers = bool(config.create_missing_performers)
        delay = int(config.delay)
    except AttributeError as e:
        log.LogWarning(e)
        log.LogWarning("Using defaults for missing config values")
    except ValueError as e:
        log.LogWarning(e)
        log.LogWarning("Using defaults for wrong values")

    log.LogInfo('##### Bulk URL Scraper #####')
    log.LogInfo(f'create_missing_performers: {create_missing_performers}')
    log.LogInfo(f'create_missing_tags: {create_missing_tags}')
    log.LogInfo(f'create_missing_studios: {create_missing_studios}')
    log.LogInfo(f'delay: {delay}')
    log.LogInfo('#############################')

    # Search for all scenes with scrape tag
    tag = client.findTagIdWithName(control_tag)
    if tag is None:
        sys.exit("Tag scrape does not exist. Please create it via the 'Create scrape tag' task")

    tag_ids = [tag]
    scenes = client.findScenesByTags(tag_ids)
    log.LogInfo(f'Found {len(scenes)} scenes with scrape tag')
    count = __bulk_scrape(client, scenes, create_missing_performers, create_missing_tags, create_missing_studios, delay)
    log.LogInfo(f'Scraped data for {count} scenes')
Ejemplo n.º 3
0
def copy_all_tags(client):
    log.LogWarning("#######################################")
    log.LogWarning("Warning! This task will copy all information to all galleries with attached scenes")
    log.LogWarning("You have 30 seconds to cancel this task before it starts copying")
    log.LogWarning("#######################################")

    time.sleep(30)
    log.LogInfo("Start copying information. This may take a while depending on the amount of galleries")
    # Get all galleries
    galleries = client.findGalleriesByTags([])
    log.LogDebug(f"Found {len(galleries)} galleries")
    count = __copy_tags(client, galleries)

    log.LogInfo(f'Copied scene information to {count} galleries')
Ejemplo n.º 4
0
    def RemoveSessionsFromServer(self, nodeID, sessionIDs):
        """
        On proxy: remove the sessions 'sessionIDs' from server node 'nodeID',
        by faking TransportClosed messages. This call comes from server 'nodeID', it having detected
        that the sessions are no longer relevant to the server. Note that the sessions are _not_ removed
        from the proxy and that "race conditions" are OK: in the worst-case a removed relevant session
        will be re-JITed to the server again, ahead of a notification or call.
        """
        if machobase.mode != 'proxy':
            raise RuntimeError(
                'RemoveSessionsFromServer should only be called on a proxy')
        log.LogInfo('CTXSESS: RemoveSessionsFromServer(nodeID=', nodeID,
                    '), with ', len(sessionIDs), ' session IDs')
        mn = sm.services['machoNet']
        serverTID = mn.transportIDbySolNodeID.get(nodeID, None)
        if serverTID is not None:
            serverTransport = mn.transportsByID[serverTID]
            for sid in sessionIDs:
                sess = sessionsBySID.get(sid, None)
                if sess is not None:
                    uthread.worker('SessionMgr::RemoveSesssionsFromServer',
                                   serverTransport.RemoveSessionFromServer,
                                   sess)

        else:
            log.LogWarning(
                'RemoveSessionsFromServer() called with unknown or non-server nodeID ',
                nodeID)
Ejemplo n.º 5
0
def download(url, downloaded):
    config_path = os.path.join(plugin_folder, 'config.ini')
    log.LogDebug(f"Reading config file at: {config_path}")
    config = configparser.ConfigParser()
    config.read(config_path)
    download_dir = str(pathlib.Path(config.get('PATHS', 'downloadDir') + '/%(id)s.%(ext)s').absolute())
    log.LogDebug("Downloading " + url + " to: " + download_dir)

    ydl = youtube_dl.YoutubeDL({
        'outtmpl': download_dir,
        'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
    })

    with ydl:
        try:
            meta = ydl.extract_info(url=url, download=True)
            log.LogDebug(meta['id'])
            log.LogDebug("Download finished!")
            downloaded.append({
                "url": url,
                "id": meta.get('id'),
                "title": meta.get('title'),
            })
        except Exception as e:
            log.LogWarning(str(e))
Ejemplo n.º 6
0
    def RemoveSessionsFromServer(self, nodeID, sessionIDs):
        if machobase.mode != 'proxy':
            raise RuntimeError(
                'RemoveSessionsFromServer should only be called on a proxy')
        log.LogInfo('CTXSESS: RemoveSessionsFromServer(nodeID=', nodeID,
                    '), with ', len(sessionIDs), ' session IDs')
        mn = sm.services['machoNet']
        serverTID = mn.transportIDbySolNodeID.get(nodeID, None)
        if serverTID is not None:
            serverTransport = mn.transportsByID[serverTID]
            for sid in sessionIDs:
                sess = sessionsBySID.get(sid, None)
                if sess is not None:
                    uthread.worker('SessionMgr::RemoveSesssionsFromServer',
                                   serverTransport.RemoveSessionFromServer,
                                   sess)

        else:
            log.LogWarning(
                'RemoveSessionsFromServer() called with unknown or non-server nodeID ',
                nodeID)
Ejemplo n.º 7
0
def __bulk_scrape(client,
                  scenes,
                  create_missing_performers=False,
                  create_missing_tags=False,
                  create_missing_studios=False,
                  delay=5):
    last_request = -1
    if delay > 0:
        # Initialize last request with current time + delay time
        last_request = time.time() + delay

    missing_scrapers = list()

    # Number of scraped scenes
    count = 0

    total = len(scenes)
    # Index for progress bar
    i = 0

    # Scrape if url not in missing_scrapers
    for scene in scenes:
        # Update status bar
        i += 1
        log.LogProgress(i / total)

        if scene.get('url') is None or scene.get('url') == "":
            log.LogInfo(f"Scene {scene.get('id')} is missing url")
            continue
        if urlparse(scene.get("url")).netloc not in missing_scrapers:
            if delay:
                wait(delay, last_request, time.time())
            scraped_data = client.scrapeSceneURL(scene.get('url'))
            # If result is null, add url to missing_scrapers
            if scraped_data is None:
                log.LogWarning(
                    f"Missing scraper for {urlparse(scene.get('url')).netloc}")
                missing_scrapers.append(urlparse(scene.get('url')).netloc)
                continue
            # No data has been found for this scene
            if not any(scraped_data.values()):
                log.LogInfo(f"Could not get data for scene {scene.get('id')}")
                continue

            # Create dict with scene data
            update_data = {'id': scene.get('id')}
            if scraped_data.get('title'):
                update_data['title'] = scraped_data.get('title')
            if scraped_data.get('details'):
                update_data['details'] = scraped_data.get('details')
            if scraped_data.get('date'):
                update_data['date'] = scraped_data.get('date')
            if scraped_data.get('image'):
                update_data['cover_image'] = scraped_data.get('image')
            if scraped_data.get('tags'):
                tag_ids = list()
                for tag in scraped_data.get('tags'):
                    if tag.get('stored_id'):
                        tag_ids.append(tag.get('stored_id'))
                    else:
                        if create_missing_tags and tag.get('name') != "":
                            # Capitalize each word
                            tag_name = " ".join(
                                x.capitalize()
                                for x in tag.get('name').split(" "))
                            log.LogInfo(f'Create missing tag: {tag_name}')
                            tag_id = client.createTagWithName(tag_name)
                            tag_ids.append(tag_id)
                if len(tag_ids) > 0:
                    update_data['tag_ids'] = tag_ids

            if scraped_data.get('performers'):
                performer_ids = list()
                for performer in scraped_data.get('performers'):
                    if performer.get('stored_id'):
                        performer_ids.append(performer.get('stored_id'))
                    else:
                        if create_missing_performers and performer.get(
                                'name') != "":
                            performer_name = " ".join(
                                x.capitalize()
                                for x in performer.get('name').split(" "))
                            log.LogInfo(
                                f'Create missing performer: {performer_name}')
                            performer_id = client.createPerformerByName(
                                performer_name)
                            performer_ids.append(performer_id)
                if len(performer_ids) > 0:
                    update_data['performer_ids'] = performer_ids

            if scraped_data.get('studio'):
                studio = scraped_data.get('studio')
                if studio.get('stored_id'):
                    update_data['studio_id'] = studio.get('stored_id')
                else:
                    if create_missing_studios:
                        studio_name = " ".join(
                            x.capitalize()
                            for x in studio.get('name').split(" "))
                        log.LogInfo(f'Creating missing studio {studio_name}')
                        studio_url = '{uri.scheme}://{uri.netloc}'.format(
                            uri=urlparse(scene.get('url')))
                        studio_id = client.createStudio(
                            studio_name, studio_url)
                        update_data['studio_id'] = studio_id

            # Update scene with scraped scene data
            client.updateScene(update_data)
            log.LogDebug(f"Scraped data for scene {scene.get('id')}")
            count += 1

    return count
Ejemplo n.º 8
0
def doIndefiniteTask():
    log.LogWarning("Sleeping indefinitely")
    while True:
        time.sleep(1)
Ejemplo n.º 9
0
def __bulk_scrape(client,
                  entities: Dict[Entity, array.array],
                  create_missing_performers=False,
                  create_missing_tags=False,
                  create_missing_studios=False,
                  create_missing_movies=False,
                  delay=5) -> None:
    last_request = -1
    # Unpack entity dict and iterate over each type (scenes, galleries)
    # entities is non empty and contains at least one non empty entity type
    for entity_class, entity_array in entities.items():
        log.LogInfo(f"Scraping {entity_class.value}")
        # Fetch available url scrapers for entity type
        if entity_class is Entity.Scene:
            supported_scrapers = client.sceneScraperURLs()
        elif entity_class is Entity.Gallery:
            supported_scrapers = client.galleryScraperURLs()
        else:
            raise TypeError(f"Unexpected Entity type: {entity_class}")

        if delay > 0:
            # Initialize last request with current time + delay time
            last_request = time.time() + delay

        missing_scrapers = list()

        # Number of scraped scenes
        count = 0

        total = len(entity_array)
        # Index for progress bar
        i = 0

        # Scrape if url not in missing_scrapers
        for entity in entity_array:
            # Update status bar
            i += 1
            log.LogProgress(i / total)

            if entity.get('url') is None or entity.get('url') == "":
                # Skip the scene/gallery if it does not have an url
                log.LogInfo(
                    f"{entity_class.name} {entity.get('id')} is missing url")
                continue

            url_netloc = urlparse(entity.get("url")).netloc.split('www.')[
                -1]  # URL domain name (without www. and tld)
            if url_netloc not in missing_scrapers:
                if delay:
                    last_request = wait(delay, last_request, time.time())

                # The query has different fields, so there can not be one scrapeURL function
                if entity_class is Entity.Scene:
                    scraped_data = client.scrapeSceneURL(entity.get('url'))
                elif entity_class is Entity.Gallery:
                    scraped_data = client.scrapeGalleryURL(entity.get('url'))
                else:
                    raise TypeError(f"Unexpected Entity type: {entity_class}")
                if scraped_data is None:
                    if url_netloc not in supported_scrapers:
                        # If result is null, and url is not in list of supported scrapers, add url to missing_scrapers
                        # Faster then checking every time, if url is in list of supported scrapers
                        log.LogWarning(
                            f"{entity_class.name} {entity.get('id')}: "
                            f"Missing scraper for {url_netloc}")
                        log.LogDebug(f"Full url: {entity.get('url')}")
                        missing_scrapers.append(url_netloc)
                    else:
                        log.LogInfo(
                            f"Could not scrape {entity_class.name.lower()} {entity.get('id')}"
                        )
                        log.LogDebug("Return data was None")
                    continue
                # No data has been found for this scene
                if not any(scraped_data.values()):
                    log.LogInfo(
                        f"Could not get data for {entity_class.name.lower()} {entity.get('id')}"
                    )
                    continue

                update_entity(
                    client=client,
                    entity=entity,
                    entity_type=entity_class,
                    scraped_data=scraped_data,
                    create_missing_tags=create_missing_tags,
                    create_missing_performers=create_missing_performers,
                    create_missing_studios=create_missing_studios,
                    create_missing_movies=create_missing_movies)

                log.LogDebug(
                    f"Scraped data for {entity_class.name.lower()} {entity.get('id')}"
                )
                count += 1

        log.LogInfo(f"Scraped data for {count} {entity_class.value}")