def bulk_create_performer(client, create_missing_performers=False, parse_performer_pattern=r'^.*[ \._]([A-Z][a-zA-Z]+)[ \._]([A-Z][a-zA-Z]+)[ \._].*$', delay=5): try: create_missing_performers = bool(config.create_missing_performers) parse_performer_pattern = config.parse_performer_pattern delay = int(config.delay) except AttributeError as e: log.LogWarning(e) log.LogWarning("Using defaults for missing config values") except ValueError as e: log.LogWarning(e) log.LogWarning("Using defaults for wrong values") log.LogInfo('##### Bulk Create Performer #####') log.LogInfo(f'create_missing_performers: {create_missing_performers}') log.LogInfo(f'parse_performer_pattern: {parse_performer_pattern}') log.LogInfo(f'delay: {delay}') log.LogInfo('#############################') # Search for all scenes with scrape tag tag = client.findTagIdWithName(control_tag) if tag is None: sys.exit("Scrape Tag does not exist. Please create it via the 'Create scrape tag' task") tag_ids = [tag] scenes = client.findScenesByTags(tag_ids) log.LogInfo(f'Found {len(scenes)} scenes with scrape tag') count = __bulk_create_performer(client, scenes, create_missing_performers, parse_performer_pattern, delay) log.LogInfo(f'Created {count} performers')
def bulk_scrape(client, create_missing_performers=False, create_missing_tags=False, create_missing_studios=False, delay=5): try: create_missing_studios = bool(config.create_missing_studios) create_missing_tags = bool(config.create_missing_tags) create_missing_performers = bool(config.create_missing_performers) delay = int(config.delay) except AttributeError as e: log.LogWarning(e) log.LogWarning("Using defaults for missing config values") except ValueError as e: log.LogWarning(e) log.LogWarning("Using defaults for wrong values") log.LogInfo('##### Bulk URL Scraper #####') log.LogInfo(f'create_missing_performers: {create_missing_performers}') log.LogInfo(f'create_missing_tags: {create_missing_tags}') log.LogInfo(f'create_missing_studios: {create_missing_studios}') log.LogInfo(f'delay: {delay}') log.LogInfo('#############################') # Search for all scenes with scrape tag tag = client.findTagIdWithName(control_tag) if tag is None: sys.exit("Tag scrape does not exist. Please create it via the 'Create scrape tag' task") tag_ids = [tag] scenes = client.findScenesByTags(tag_ids) log.LogInfo(f'Found {len(scenes)} scenes with scrape tag') count = __bulk_scrape(client, scenes, create_missing_performers, create_missing_tags, create_missing_studios, delay) log.LogInfo(f'Scraped data for {count} scenes')
def copy_all_tags(client): log.LogWarning("#######################################") log.LogWarning("Warning! This task will copy all information to all galleries with attached scenes") log.LogWarning("You have 30 seconds to cancel this task before it starts copying") log.LogWarning("#######################################") time.sleep(30) log.LogInfo("Start copying information. This may take a while depending on the amount of galleries") # Get all galleries galleries = client.findGalleriesByTags([]) log.LogDebug(f"Found {len(galleries)} galleries") count = __copy_tags(client, galleries) log.LogInfo(f'Copied scene information to {count} galleries')
def RemoveSessionsFromServer(self, nodeID, sessionIDs): """ On proxy: remove the sessions 'sessionIDs' from server node 'nodeID', by faking TransportClosed messages. This call comes from server 'nodeID', it having detected that the sessions are no longer relevant to the server. Note that the sessions are _not_ removed from the proxy and that "race conditions" are OK: in the worst-case a removed relevant session will be re-JITed to the server again, ahead of a notification or call. """ if machobase.mode != 'proxy': raise RuntimeError( 'RemoveSessionsFromServer should only be called on a proxy') log.LogInfo('CTXSESS: RemoveSessionsFromServer(nodeID=', nodeID, '), with ', len(sessionIDs), ' session IDs') mn = sm.services['machoNet'] serverTID = mn.transportIDbySolNodeID.get(nodeID, None) if serverTID is not None: serverTransport = mn.transportsByID[serverTID] for sid in sessionIDs: sess = sessionsBySID.get(sid, None) if sess is not None: uthread.worker('SessionMgr::RemoveSesssionsFromServer', serverTransport.RemoveSessionFromServer, sess) else: log.LogWarning( 'RemoveSessionsFromServer() called with unknown or non-server nodeID ', nodeID)
def download(url, downloaded): config_path = os.path.join(plugin_folder, 'config.ini') log.LogDebug(f"Reading config file at: {config_path}") config = configparser.ConfigParser() config.read(config_path) download_dir = str(pathlib.Path(config.get('PATHS', 'downloadDir') + '/%(id)s.%(ext)s').absolute()) log.LogDebug("Downloading " + url + " to: " + download_dir) ydl = youtube_dl.YoutubeDL({ 'outtmpl': download_dir, 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', }) with ydl: try: meta = ydl.extract_info(url=url, download=True) log.LogDebug(meta['id']) log.LogDebug("Download finished!") downloaded.append({ "url": url, "id": meta.get('id'), "title": meta.get('title'), }) except Exception as e: log.LogWarning(str(e))
def RemoveSessionsFromServer(self, nodeID, sessionIDs): if machobase.mode != 'proxy': raise RuntimeError( 'RemoveSessionsFromServer should only be called on a proxy') log.LogInfo('CTXSESS: RemoveSessionsFromServer(nodeID=', nodeID, '), with ', len(sessionIDs), ' session IDs') mn = sm.services['machoNet'] serverTID = mn.transportIDbySolNodeID.get(nodeID, None) if serverTID is not None: serverTransport = mn.transportsByID[serverTID] for sid in sessionIDs: sess = sessionsBySID.get(sid, None) if sess is not None: uthread.worker('SessionMgr::RemoveSesssionsFromServer', serverTransport.RemoveSessionFromServer, sess) else: log.LogWarning( 'RemoveSessionsFromServer() called with unknown or non-server nodeID ', nodeID)
def __bulk_scrape(client, scenes, create_missing_performers=False, create_missing_tags=False, create_missing_studios=False, delay=5): last_request = -1 if delay > 0: # Initialize last request with current time + delay time last_request = time.time() + delay missing_scrapers = list() # Number of scraped scenes count = 0 total = len(scenes) # Index for progress bar i = 0 # Scrape if url not in missing_scrapers for scene in scenes: # Update status bar i += 1 log.LogProgress(i / total) if scene.get('url') is None or scene.get('url') == "": log.LogInfo(f"Scene {scene.get('id')} is missing url") continue if urlparse(scene.get("url")).netloc not in missing_scrapers: if delay: wait(delay, last_request, time.time()) scraped_data = client.scrapeSceneURL(scene.get('url')) # If result is null, add url to missing_scrapers if scraped_data is None: log.LogWarning( f"Missing scraper for {urlparse(scene.get('url')).netloc}") missing_scrapers.append(urlparse(scene.get('url')).netloc) continue # No data has been found for this scene if not any(scraped_data.values()): log.LogInfo(f"Could not get data for scene {scene.get('id')}") continue # Create dict with scene data update_data = {'id': scene.get('id')} if scraped_data.get('title'): update_data['title'] = scraped_data.get('title') if scraped_data.get('details'): update_data['details'] = scraped_data.get('details') if scraped_data.get('date'): update_data['date'] = scraped_data.get('date') if scraped_data.get('image'): update_data['cover_image'] = scraped_data.get('image') if scraped_data.get('tags'): tag_ids = list() for tag in scraped_data.get('tags'): if tag.get('stored_id'): tag_ids.append(tag.get('stored_id')) else: if create_missing_tags and tag.get('name') != "": # Capitalize each word tag_name = " ".join( x.capitalize() for x in tag.get('name').split(" ")) log.LogInfo(f'Create missing tag: {tag_name}') tag_id = client.createTagWithName(tag_name) tag_ids.append(tag_id) if len(tag_ids) > 0: update_data['tag_ids'] = tag_ids if scraped_data.get('performers'): performer_ids = list() for performer in scraped_data.get('performers'): if performer.get('stored_id'): performer_ids.append(performer.get('stored_id')) else: if create_missing_performers and performer.get( 'name') != "": performer_name = " ".join( x.capitalize() for x in performer.get('name').split(" ")) log.LogInfo( f'Create missing performer: {performer_name}') performer_id = client.createPerformerByName( performer_name) performer_ids.append(performer_id) if len(performer_ids) > 0: update_data['performer_ids'] = performer_ids if scraped_data.get('studio'): studio = scraped_data.get('studio') if studio.get('stored_id'): update_data['studio_id'] = studio.get('stored_id') else: if create_missing_studios: studio_name = " ".join( x.capitalize() for x in studio.get('name').split(" ")) log.LogInfo(f'Creating missing studio {studio_name}') studio_url = '{uri.scheme}://{uri.netloc}'.format( uri=urlparse(scene.get('url'))) studio_id = client.createStudio( studio_name, studio_url) update_data['studio_id'] = studio_id # Update scene with scraped scene data client.updateScene(update_data) log.LogDebug(f"Scraped data for scene {scene.get('id')}") count += 1 return count
def doIndefiniteTask(): log.LogWarning("Sleeping indefinitely") while True: time.sleep(1)
def __bulk_scrape(client, entities: Dict[Entity, array.array], create_missing_performers=False, create_missing_tags=False, create_missing_studios=False, create_missing_movies=False, delay=5) -> None: last_request = -1 # Unpack entity dict and iterate over each type (scenes, galleries) # entities is non empty and contains at least one non empty entity type for entity_class, entity_array in entities.items(): log.LogInfo(f"Scraping {entity_class.value}") # Fetch available url scrapers for entity type if entity_class is Entity.Scene: supported_scrapers = client.sceneScraperURLs() elif entity_class is Entity.Gallery: supported_scrapers = client.galleryScraperURLs() else: raise TypeError(f"Unexpected Entity type: {entity_class}") if delay > 0: # Initialize last request with current time + delay time last_request = time.time() + delay missing_scrapers = list() # Number of scraped scenes count = 0 total = len(entity_array) # Index for progress bar i = 0 # Scrape if url not in missing_scrapers for entity in entity_array: # Update status bar i += 1 log.LogProgress(i / total) if entity.get('url') is None or entity.get('url') == "": # Skip the scene/gallery if it does not have an url log.LogInfo( f"{entity_class.name} {entity.get('id')} is missing url") continue url_netloc = urlparse(entity.get("url")).netloc.split('www.')[ -1] # URL domain name (without www. and tld) if url_netloc not in missing_scrapers: if delay: last_request = wait(delay, last_request, time.time()) # The query has different fields, so there can not be one scrapeURL function if entity_class is Entity.Scene: scraped_data = client.scrapeSceneURL(entity.get('url')) elif entity_class is Entity.Gallery: scraped_data = client.scrapeGalleryURL(entity.get('url')) else: raise TypeError(f"Unexpected Entity type: {entity_class}") if scraped_data is None: if url_netloc not in supported_scrapers: # If result is null, and url is not in list of supported scrapers, add url to missing_scrapers # Faster then checking every time, if url is in list of supported scrapers log.LogWarning( f"{entity_class.name} {entity.get('id')}: " f"Missing scraper for {url_netloc}") log.LogDebug(f"Full url: {entity.get('url')}") missing_scrapers.append(url_netloc) else: log.LogInfo( f"Could not scrape {entity_class.name.lower()} {entity.get('id')}" ) log.LogDebug("Return data was None") continue # No data has been found for this scene if not any(scraped_data.values()): log.LogInfo( f"Could not get data for {entity_class.name.lower()} {entity.get('id')}" ) continue update_entity( client=client, entity=entity, entity_type=entity_class, scraped_data=scraped_data, create_missing_tags=create_missing_tags, create_missing_performers=create_missing_performers, create_missing_studios=create_missing_studios, create_missing_movies=create_missing_movies) log.LogDebug( f"Scraped data for {entity_class.name.lower()} {entity.get('id')}" ) count += 1 log.LogInfo(f"Scraped data for {count} {entity_class.value}")