def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) basePageElements = getJSONfromPage(sceneURL) detailsPageElements = basePageElements['video'] # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = PAutils.cleanHTML(detailsPageElements['description']) # Studio metadata.studio = 'Top Web Models' # Tagline and Collection(s) metadata.collections.clear() if 'sites' in detailsPageElements: tagline = re.sub( r"(\w)([A-Z])", r"\1 \2", json.loads(json.dumps(detailsPageElements['sites'][0]))['name']) else: tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Date date = detailsPageElements['release_date'] if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['tags']: genreName = genreLink['name'] movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements['models']: actorName = actorLink['name'] actorPhotoURL = actorLink['thumb'] movieActors.addActor(actorName, actorPhotoURL) # Posters art.append(detailsPageElements['thumb']) art.append(basePageElements['file_poster']) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteID) + sceneURL tagline = metadata_id[2] scenePoster = PAutils.Decode(metadata_id[3]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h2[@class="scene-h2-heading"]')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//p[@class="indie-model-p"]')[0].text_content().strip() # Studio metadata.studio = 'Vivid Entertainment' # Tagline and Collection(s) metadata.collections.clear() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//h5[contains(text(), "Released:")]')[0].text_content().replace( 'Released:', '').strip() if date: date_object = datetime.strptime(date, '%b %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//h5[contains(text(),"Categories:")]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//h4[contains(text(),"Starring:")]/a'): actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [scenePoster] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//h1')[0].text_content().strip() # Summary try: metadata.summary = detailsPageElements.xpath('///span[@class="full"]')[0].text_content().strip() except: pass # Studio metadata.studio = 'Marc Dorcel' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() movieGenres.addGenre('French p**n') movieName = detailsPageElements.xpath('//span[@class="movie"]/a') if movieName: metadata.collections.add(movieName[0].text_content().strip()) movieGenres.addGenre('Blockbuster Movie') # Actors movieActors.clearActors() if 'p**n-movie' not in sceneURL: actors = detailsPageElements.xpath('//div[@class="actress"]/a') else: actors = detailsPageElements.xpath('//div[@class="actor thumbnail "]/a/div[@class="name"]') if actors: if 'p**n-movie' not in sceneURL: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Release Date if 'p**n-movie' not in sceneURL: date = detailsPageElements.xpath('//span[@class="publish_date"]')[0].text_content().strip() else: date = detailsPageElements.xpath('//span[@class="out_date"]')[0].text_content().replace('Year :', '').strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Director director = metadata.directors.new() try: movieDirector = detailsPageElements.xpath('//span[@class="director"]')[0].text_content().replace( 'Director :', '').strip() director.name = movieDirector except: pass # Poster art = [] xpaths = [ '//div[contains(@class, "photos")]//source/@data-srcset' ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): if ',' in img: img = img.split(',')[-1].split()[0] trash = '_' + img.split('_', 3)[-1].rsplit('.', 1)[0] img = img.replace(trash, '', 1) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): path = PAutils.Decode(str(metadata.id).split("|")[0]) url = PAsearchSites.getSearchBaseURL(siteID) + path detailsPageElements = HTML.ElementFromURL(url) # Title metadata.title = detailsPageElements.xpath( '//div[@class="video-rating-and-details"]//h1[@class="heading heading--2 video-title"]' )[0].text_content() # Studio metadata.studio = 'BadoinkVR' # Summary metadata.summary = detailsPageElements.xpath( '//p[@class="video-description"]')[0].text_content().strip() # Tagline and Collection tagline = PAsearchSites.getSearchSiteName(siteID) metadata.collections.clear() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath('//a[@class="video-tag"]') if len(genres) > 0: for genre in genres: movieGenres.addGenre(genre.text_content()) # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//a[contains(@class,"video-actor-link")]') if len(actors) > 0: for actorLink in actors: actorName = actorLink.text_content() actorPageURL = PAsearchSites.getSearchBaseURL( siteID) + actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) actorPhotoURL = actorPage.xpath( '//img[@class="girl-details-photo"]')[0].get("src").split('?') movieActors.addActor(actorName, actorPhotoURL[0]) # Posters/Background valid_names = list() metadata.posters.validate_keys(valid_names) metadata.art.validate_keys(valid_names) posters = detailsPageElements.xpath( '//div[contains(@class,"gallery-item")]') posterNum = 1 for posterCur in posters: posterURL = posterCur.get("data-big-image") metadata.posters[posterURL] = Proxy.Preview(HTTP.Request( posterURL, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=posterNum) posterNum = posterNum + 1 backgroundURL = detailsPageElements.xpath( '//img[@class="video-image"]')[0].get("src").split('?') metadata.art[backgroundURL[0]] = Proxy.Preview(HTTP.Request( backgroundURL[0], headers={ 'Referer': 'http://www.google.com' }).content, sort_order=1) # Date try: date = detailsPageElements.xpath( './/div[@class="video-details"]//p[@class="video-upload-date"]' )[0].text_content().split(":") dateFixed = date[1].strip() Log('DateFixed: ' + dateFixed) date_object = datetime.strptime(dateFixed, '%B %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year except: Log("No date found") return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneTitle = PAutils.Decode(metadata_id[0]) sceneDescription = PAutils.Decode(metadata_id[2]) sceneDate = metadata_id[3] sceneActors = metadata_id[4] scenePoster = PAutils.Decode(metadata_id[5]) metadata.collections.clear() movieGenres.clearGenres() movieActors.clearActors() # Title metadata.title = sceneTitle # Summary metadata.summary = sceneDescription # Studio metadata.studio = 'PureCFNM' # Tagline and Collection(s) subSite = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = subSite metadata.collections.add(subSite) # Genres if subSite.lower() == 'AmateurCFNM'.lower(): for genreName in ['CFNM']: movieGenres.addGenre(genreName) elif subSite.lower() == 'CFNMGames'.lower(): for genreName in ['CFNM', 'Femdom']: movieGenres.addGenre(genreName) elif subSite.lower() == 'GirlsAbuseGuys'.lower(): for genreName in ['CFNM', 'Femdom', 'Male Humiliation']: movieGenres.addGenre(genreName) elif subSite.lower() == 'HeyLittleDick'.lower(): for genreName in ['CFNM', 'Femdom', 'Small Penis Humiliation']: movieGenres.addGenre(genreName) elif subSite.lower() == 'LadyVoyeurs'.lower(): for genreName in ['CFNM', 'Voyeur']: movieGenres.addGenre(genreName) elif subSite.lower() == 'PureCFNM'.lower(): for genreName in ['CFNM']: movieGenres.addGenre(genreName) # Release Date date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors actors = sceneActors.split(',') if actors: if len(actors) == 2: movieGenres.addGenre('Threesome') elif len(actors) == 3: movieGenres.addGenre('Foursome') elif len(actors) > 3: movieGenres.addGenre('Group') for actorLink in actors: actorName = actorLink.strip() actorPhotoURL = ' ' movieActors.addActor(actorName, actorPhotoURL) # Posters art.append(scenePoster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Studio metadata.studio = 'Holly Randall Productions' # Title metadata.title = PAutils.Decode(metadata_id[2]).strip() # Summary not available # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//ul[@class="tags"]/li/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Release Date date_object = parse(PAutils.Decode(metadata_id[3])) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//div[@class="info"]/p')[0].text_content().split('\n')[3].replace( 'Featuring:', '').split(',') for actorLink in actors: actorName = actorLink.strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//img[contains(@class, "update_thumb")]/@src0_3x', ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(PAsearchSites.getSearchBaseURL(siteNum) + poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteID) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) art = [] # Title metadata.title = detailsPageElements.xpath('//div[@class="fltWrap"]/h1/span')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath('//p[@class="description"]')[0].text_content().replace('Description:', '').strip() # Studio metadata.studio = 'ClubFilly' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteID) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath('//div[@class="fltRight"]')[0].text_content().replace('Release Date :', '').strip() if date: date_object = datetime.strptime(date, '%Y-%m-%d') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() movieGenres.addGenre('Lesbian') # Actors movieActors.clearActors() actorText = detailsPageElements.xpath('//p[@class="starring"]')[0].text_content().replace('Starring:', '').strip() actors = actorText.split(',') if len(actors) > 0: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//ul[@id="lstSceneFocus"]/li/img/@src' ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//div[contains(@class, "right-info")]//h1')[0].text_content().strip() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(metadata.studio) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Summary description = detailsPageElements.xpath( '//div[@class="description"]//span[contains(@class, "full")]') if description: metadata.summary = description[0].text_content().strip() else: metadata.summary = detailsPageElements.xpath( '//div[@class="description"]')[0].text_content().strip() # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[@class="tag-list"]//a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actor in detailsPageElements.xpath( '//div[contains(@class, "right-info")]//div[@class="info"]//a'): actorName = actor.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] style = detailsPageElements.xpath('//div[@id="player"]/@style')[0] img = style[style.find('\'') + 1:style.rfind('\'')].split('?', 1)[0] art.append(img) posters = detailsPageElements.xpath( '//div[@class="gallery-item"]//a/@href') for poster in posters: img = poster.split('?', 1)[0] art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchSearchURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1')[0].text_content().strip() # Summary summary_xpaths = [ '//div[@class="p-desc"]', '//div[contains(@class, "desc")]' ] for xpath in summary_xpaths: for summary in detailsPageElements.xpath(xpath): metadata.summary = summary.text_content().replace( 'Read More »', '').strip() break # Studio metadata.studio = 'Score Group' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline if Prefs['collections_addsitename']: metadata.collections.add(metadata.tagline) # Release Date date = detailsPageElements.xpath('//div/span[@class="value"]') if date: date = date[1].text_content().strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//div/span[@class="value"]/a'): actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) if siteNum == 1344: movieActors.addActor('Christy Marks', '') # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[@class="mb-3"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Posters/Background art = [] match = re.search(r'posterImage: \'(.*)\'', req.text) if match: art.append(match.group(1)) xpaths = [ '//div[contains(@class, "thumb")]/img/@src', '//div[contains(@class, "p-image")]/a/img/@src', '//div[contains(@class, "dl-opts")]/a/img/@src', '//div[contains(@class, "p-photos")]/div/div/a/@href', '//div[contains(@class, "gallery")]/div/div/a/@href' ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): if not poster.startswith('http'): poster = 'http:' + poster if 'shared-bits' not in poster: art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) sceneID = 0 regex = re.search(r'-([0-9]{1,})\.', sceneURL) if regex: sceneID = int(regex.group(1)) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title titleDate = detailsPageElements.xpath('//title')[0].text_content().split( 'Released') metadata.title = titleDate[0].strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@id="Bio"]')[0].text_content().strip() # Studio metadata.studio = 'First Time Videos' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = titleDate[-1].replace('!', '').strip() if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() genres = [] if tagline == 'FTVGirls': genres = ['Teen', 'Solo', 'Public'] elif tagline == 'FTVMilfs': genres = ['MILF', 'Solo', 'Public'] for genreName in genres: movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = [] for idx, actorLink in enumerate( detailsPageElements.xpath('//div[@id="ModelDescription"]//h1')): actorName = actorLink.text_content().replace('\'s Statistics', '').strip() actors.append(actorName) regex = re.search(r'\s(%s [A-Z]\w{1,})\s' % actorName, metadata.summary) if regex: actorName = regex.group(1) actorPhotoURL = detailsPageElements.xpath( '//div[@id="Thumbs"]/img/@src')[idx] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//img[@id="Magazine"]/@src', '//div[@class="gallery"]//div[@class="row"]//@href', '//div[@class="thumbs_horizontal"]//@href', '//a[img[@class="t"]]/@href', ] scenes = photoLookup(sceneID) googleResults = PAutils.getFromGoogleSearch(' '.join(actors).strip(), siteNum) for photoURL in googleResults: for scene in scenes: if ('galleries' in photoURL or 'preview' in photoURL) and (scene in photoURL or scene == 'none'): req = PAutils.HTTPRequest(photoURL) photoPageElements = HTML.ElementFromString(req.text) for xpath in xpaths: for img in photoPageElements.xpath(xpath): art.append(img) for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1[@class="description"]/text()')[0].strip() # Tagline and Collection(s) metadata.collections.add( detailsPageElements.xpath('//div[@class="studio"]//span/text()') [1].strip()) if 'filthykings' in sceneURL: metadata.collections.add(PAsearchSites.getSearchSiteName(siteNum)) # Studio metadata.studio = 'AdultEmpireCash' # Summary summary = detailsPageElements.xpath('//div[@class="synopsis"]/p/text()') if summary: metadata.summary = summary[0].strip() # Director directorElement = detailsPageElements.xpath( '//div[@class="director"]/text()') if directorElement: director = metadata.directors.new() directorName = directorElement[0].strip() director.name = directorName # Release Date date = detailsPageElements.xpath( '//div[@class="release-date"]/text()')[0].strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//div[@class="video-performer"]//img'): actorName = actorLink.get('title') actorPhotoURL = actorLink.get('data-bgsrc') movieActors.addActor(actorName, actorPhotoURL) if 'filthykings' and '796896' in sceneURL: movieActors.addActor('Alice Visby', '') # Genres movieGenres.clearGenres() for genreName in detailsPageElements.xpath( '//div[@class="tags"]//a/text()'): movieGenres.addGenre(genreName) # Posters for poster in detailsPageElements.xpath( '//div[@id="dv_frames"]//img/@src'): img = poster.replace('320', '1280') art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 10: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def processActors(self, metadata): for actorLink in self.actorsTable: skip = False # Save the potentional new Actor or Actress to a new variable, replace with a true space, and strip off any surrounding whitespace actorName = actorLink['name'].replace('\xc2\xa0', ' ').replace( ',', '').strip().title() actorPhoto = actorLink['photo'].strip() actorName = ' '.join(actorName.split()) # Skip an actor completely; this could be used to filter out male actors if desired if actorName == 'Bad Name': skip = True elif actorName == 'Test Model Name': skip = True if not skip: searchStudioIndex = None for studioIndex, studioList in PAdatabaseActors.ActorsStudioIndexes.items( ): if metadata.studio in studioList: searchStudioIndex = studioIndex break searchActorName = actorName.lower() if searchStudioIndex is not None and searchStudioIndex in PAdatabaseActors.ActorsReplaceStudios: for newActorName, aliases in PAdatabaseActors.ActorsReplaceStudios[ searchStudioIndex].items(): if searchActorName == newActorName.lower( ) or searchActorName in map(str.lower, aliases): actorName = newActorName if searchStudioIndex == 32 and actorName != 'QueenSnake': actorName = '%s QueenSnake' % actorName break for newActorName, aliases in PAdatabaseActors.ActorsReplace.items( ): if searchActorName == newActorName.lower( ) or searchActorName in map(str.lower, aliases): actorName = newActorName break if ',' in actorName: for newActor in actorName.split(','): actorName = newActor.strip() actorPhoto = actorDBfinder(actorName) Log('Actor: %s %s' % (actorName, actorPhoto)) role = metadata.roles.new() role.name = actorName role.photo = actorPhoto else: req = None if actorPhoto: req = PAutils.HTTPRequest(actorPhoto, 'HEAD', bypass=False) if not req or not req.ok: actorPhoto = actorDBfinder(actorName) if actorPhoto: actorPhoto = PAutils.getClearURL(actorPhoto) Log('Actor: %s %s' % (actorName, actorPhoto)) role = metadata.roles.new() role.name = actorName role.photo = actorPhoto
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneID = int(metadata_id[0]) sceneType = metadata_id[2] sceneIDName = 'clip_id' if sceneType == 'scenes' else 'movie_id' sceneDate = metadata_id[3] apiKEY = getAPIKey(siteNum) url = PAsearchSites.getSearchSearchURL( siteNum ) + '?x-algolia-application-id=TSMKFA364Q&x-algolia-api-key=' + apiKEY data = getAlgolia(url, 'all_' + sceneType, 'filters=%s=%d' % (sceneIDName, sceneID), PAsearchSites.getSearchBaseURL(siteNum)) detailsPageElements = data[0] data = getAlgolia(url, 'all_scenes', 'query=%s' % detailsPageElements['url_title'], PAsearchSites.getSearchBaseURL(siteNum)) data = sorted(data, key=lambda i: i['clip_id']) scenesPagesElements = list(enumerate(data, 1)) # Title title = None if sceneType == 'scenes' and len(scenesPagesElements) > 1: for idx, scene in scenesPagesElements: if scene['clip_id'] == sceneID: title = '%s, Scene %d' % (detailsPageElements['title'], idx) break if not title: title = detailsPageElements['title'] metadata.title = title # Summary metadata.summary = detailsPageElements['description'].replace( '</br>', '\n').replace('<br>', '\n') # Studio metadata.studio = detailsPageElements['network_name'] # Tagline and Collection(s) metadata.collections.clear() for collectionName in ['studio_name', 'serie_name']: if collectionName in detailsPageElements: metadata.collections.add(detailsPageElements[collectionName]) if (':' in detailsPageElements['title'] or '#' in detailsPageElements['title']) and len(scenesPagesElements) > 1: if 'movie_title' in detailsPageElements: metadata.collections.add(detailsPageElements['movie_title']) # Release Date date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['categories']: genreName = genreLink['name'] if genreName: movieGenres.addGenre(genreName) if sceneType == 'movies': for idx, scene in scenesPagesElements: for genreLink in scene['categories']: genreName = genreLink['name'] if genreName: movieGenres.addGenre(genreName) # Actors movieActors.clearActors() female = [] male = [] for actorLink in detailsPageElements['actors']: actorName = actorLink['name'] actorData = getAlgolia(url, 'all_actors', 'filters=actor_id=' + actorLink['actor_id'], PAsearchSites.getSearchBaseURL(siteNum))[0] if 'pictures' in actorData and actorData['pictures']: max_quality = sorted(actorData['pictures'].keys())[-1] actorPhotoURL = 'https://images-fame.gammacdn.com/actors' + actorData[ 'pictures'][max_quality] else: actorPhotoURL = '' if actorLink['gender'] == 'female': female.append((actorName, actorPhotoURL)) else: male.append((actorName, actorPhotoURL)) combined = female + male for actor in combined: movieActors.addActor(actor[0], actor[1]) # Posters art = [] if not PAsearchSites.getSearchBaseURL(siteNum).endswith( ('girlsway.com', 'puretaboo.com')): art.append( 'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg' .format(detailsPageElements['movie_id'], detailsPageElements['url_title'].lower().replace('-', '_'))) if 'url_movie_title' in detailsPageElements: art.append( 'https://images-fame.gammacdn.com/movies/{0}/{0}_{1}_front_400x625.jpg' .format( detailsPageElements['movie_id'], detailsPageElements['url_movie_title'].lower().replace( '-', '_'))) if 'pictures' in detailsPageElements and detailsPageElements['pictures']: max_quality = detailsPageElements['pictures']['nsfw']['top'].keys()[0] pictureURL = 'https://images-fame.gammacdn.com/movies/' + detailsPageElements[ 'pictures'][max_quality] if sceneType == 'movies': art.append(pictureURL) else: art.insert(0, pictureURL) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = metadata.id.split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1//span[@class="title"]')[0].text_content().strip() # Summary try: metadata.summary = detailsPageElements.xpath( '//div[@class="content-information-description"]//p' )[0].text_content().strip() except: pass # Studio metadata.studio = 'Karups' # Tagline and Collection(s) metadata.collections.clear() tagline = detailsPageElements.xpath( '//h1//span[@class="sup-title"]//span')[0].text_content().strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//span[@class="date"]/span[@class="content"]')[0].text_content( ).replace(tagline, '').replace('Video added on', '').strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() if tagline == 'KarupsHA': genres = ['Amateur'] if tagline == 'KarupsPC': genres = [] if tagline == 'KarupsOW': genres = ['MILF'] for genre in genres: movieGenres.addGenre(genre) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//span[@class="models"]//a'): actorName = actorLink.text_content().strip() actorPageURL = actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPageElements = HTML.ElementFromString(req.text) actorPhotoURL = actorPageElements.xpath( '//div[@class="model-thumb"]//img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[@class="video-player"]//video/@poster', '//img[@class="poster"]/@src', '//div[@class="video-thumbs"]//img/@src' ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = '%s/membersarea/%s' % (PAsearchSites.getSearchBaseURL(siteID), PAutils.Decode(metadata_id[0])) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//title')[0].text_content().split('-')[0].strip() # Studio metadata.studio = 'FuelVirtual' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteID).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//td[@class="plaintext"]/a[@class="model_category_link"]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) movieGenres.addGenre('18-Year-Old') # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//div[@id="description"]//td[@align="left"]/a') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = ['//a[@class="jqModal"]/img/@src'] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): img = PAsearchSites.getSearchBaseURL(siteID) + img art.append(img) photoPageUrl = sceneURL.replace('vids', 'highres') req = PAutils.HTTPRequest(photoPageUrl) photoPage = HTML.ElementFromString(req.text) for img in photoPage.xpath('//a[@class="jqModal"]/img/@src'): img = PAsearchSites.getSearchBaseURL(siteID) + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) art = [] metadata.collections.clear() movieGenres.clearGenres() movieActors.clearActors() # Title metadata.title = detailsPageElements.xpath( '//h2[@class="wxt7nk-4 fSsARZ"]')[0].text_content().replace( 'SML-', '').replace('Trailer', '').strip() # Studio metadata.studio = 'TrueAmateurs' # Tagline and Collection(s) metadata.tagline = metadata.studio metadata.collections.add(metadata.tagline) # Genres for genreLink in detailsPageElements.xpath( '//div[@class="tjb798-2 flgKJM"]/span[1]/a'): genreName = genreLink.text_content().replace(',', '').strip().lower() movieGenres.addGenre(genreName) # Release Date date = detailsPageElements.xpath( '//div[@class="tjb798-3 gFvmpb"]/span[last()]') if date: date = date[0].text_content().strip().replace('Release Date:', '') date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors try: actors = detailsPageElements.xpath('//a[@class="wxt7nk-6 czvZQW"]') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorPageURL = actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = 'http:' + actorPage.xpath( '//div[@class="profilePic_in"]//img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) except: pass # Posters art = [] xpaths = ['//div[@class="tg5e7m-2 evtSOm"]/img/@src'] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneID = metadata_id[0] detailsPageElements = getDataFromAPI( PAsearchSites.getSearchSearchURL(siteNum), 'identifier', sceneID)['hits']['hits'][0]['_source'] # Title metadata.title = detailsPageElements['name'] # Summary metadata.summary = detailsPageElements['description'] # Studio metadata.studio = detailsPageElements['studio']['name'].title() # Tagline and Collection(s) metadata.collections.add(metadata.studio) seriesScene = detailsPageElements['series']['name'] if seriesScene: metadata.collections.add(seriesScene.title()) # Release Date date = detailsPageElements['releaseDate'] date_object = datetime.strptime(date, '%Y-%m-%d') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorLink in detailsPageElements['actors']: actorName = actorLink['name'] actorPhotoURL = 'https://i.bang.com/pornstars/%d.jpg' % actorLink['id'] movieActors.addActor(actorName, actorPhotoURL) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements['genres']: genreName = genreLink['name'] movieGenres.addGenre(genreName) # Posters dvdID = detailsPageElements['dvd']['id'] art = ['https://i.bang.com/covers/%d/front.jpg' % dvdID] for img in detailsPageElements['screenshots']: art.append('https://i.bang.com/screenshots/%d/movie/1/%d.jpg' % (dvdID, img['screenId'])) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = metadata.id.split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//h1')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath('//div[contains(@class, "u-mb--six ")]')[0].text_content().strip() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection(s) metadata.collections.clear() metadata.tagline = metadata.studio metadata.collections.add(metadata.studio) # Release Date date = detailsPageElements.xpath('//span[@class="date-display-single"] | //span[@class="u-inline-block u-mr--nine"] | //div[@class="video-meta-date"] | //div[@class="date"]')[0].text_content().strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//a[contains(@href, "/list/category/")]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath('//a[contains(@href, "/pornstars/model/")]'): actorPageURL = PAsearchSites.getSearchBaseURL(siteNum) + actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorName = actorLink.text_content().strip() actorPhotoURL = actorPage.xpath('//div[contains(@class, "u-ratio--model-poster")]//img/@data-src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters/Background art = [] xpaths = [ '//div[contains(@class, "splash-screen")]/@style', '//a[contains(@class, "u-ratio--lightbox")]/@href', ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): if poster.startswith('background-image'): poster.split('url(')[1].split(')')[0] art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = 'http:' + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) movieGenres.clearGenres() # Title metadata.title = detailsPageElements.xpath( '//div[@class="section"]//h1')[0].text_content().strip().title() # Summary metadata.summary = detailsPageElements.xpath( '//div[4]/div/div[2]/div/div/div[2]/div[2]/p')[0].text_content().strip( ) # Studio metadata.studio = 'SinsLife' # Release Date date = detailsPageElements.xpath( '//div[4]/div/div[2]/div/div/div[2]/div[1]/div/div[1]' )[0].text_content().strip("Release Date:") if date: date_object = datetime.strptime(date, '%B %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//div[4]/div/div[2]/div/div/div[2]/div[3]/ul/li/a/..') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters try: twitterBG = detailsPageElements.xpath( '//div[4]/div/div[2]/div/div/div[1]/div[2]/div/div[1]/img')[0].get( 'src') twitterBG = 'https:' + twitterBG art.append(twitterBG) except: pass Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//h1')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath('//div[@class="customcontent"]//div')[0].text_content().strip() # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.studio = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath('//div[@class="date"]')[0].text_content().strip() if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath('//h4')[0].text_content().split(',') for genreLink in genres: genreName = genreLink.strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//div[@class="customcontent"]//h3')[0].text_content().split(',') for actorLink in actors: actorName = actorLink.replace(' ', '').strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] for idx in range(1, 4): img = detailsPageElements.xpath('//center/img/@src')[0] img = '%s/%sthumb_%d.jpg' % (PAsearchSites.getSearchBaseURL(siteNum), img.replace('thumb_1.jpg', ''), idx) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="videoDetails clear"]/p')[0].text_content().strip() # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.studio = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//div[@class="featuring clear"]//li[./a]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() # Posters art = [] xpaths = [ '//div[@class="player"]/script', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): match = re.search(r'(?<=(poster=")).*?(?=")', img.text_content()) if match: img = match.group(0) if 'http' not in img: img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = metadata.id.split('|') sceneURL = PAutils.Decode(metadata_id[0]) scenePoster = PAutils.Decode(metadata_id[2]) if 'http' not in scenePoster: scenePoster = PAsearchSites.getSearchSiteName(siteNum) + scenePoster req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//div[@class="trailerArea"]/h3')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="trailerContent"]/p')[0].text_content().strip() # Studio metadata.studio = 'Meana Wolf' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//div[@class="videoContent"]/ul/li[2]')[0].text_content().replace( 'ADDED:', '').strip() if date: date_object = datetime.strptime(date, '%B %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieActors.clearActors() movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//div[@class="videoContent"]/ul/li[position()=last()]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors for actorLink in detailsPageElements.xpath( '//div[@class="videoContent"]/ul/li[3]/a'): actorName = actorLink.text_content().strip() actorPhotoURL = '' try: actorPageURL = actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath( '//div[@class="modelBioPic"]/img/@src0_3x')[0] if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteNum) + actorPhotoURL except: pass movieActors.addActor(actorName, actorPhotoURL) # Posters art = [scenePoster] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h2[@class="title"]/text()')[0] # Summary metadata.summary = detailsPageElements.xpath( '//meta[@name="description"]/@content')[0] # Studio metadata.studio = 'Girls Rimming' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() actors = [] genres = detailsPageElements.xpath( '//meta[@name="keywords"]/@content')[0].split(',') for genreLink in genres: genreName = genreLink.strip() if ' Id ' in genreName: actors.append(genreName) else: movieGenres.addGenre(genreName.title()) movieGenres.addGenre('Rim Job') # Actors movieActors.clearActors() for actorLink in actors: actorLink = actorLink.split(' Id ') actorName = actorLink[0].strip() actorPhotoURL = '' actorPageURL = '%s/tour/models/%s.html' % ( PAsearchSites.getSearchBaseURL(siteNum), actorName.lower().replace( ' ', '-')) data = PAutils.HTTPRequest(actorPageURL) if not data or data == 'Page not found': googleResults = PAutils.getFromGoogleSearch(actorName, siteNum) for actorURL in googleResults: actorURL = actorURL.lower() if ('/models/' in actorURL): data = PAutils.HTTPRequest(actorURL) break actorPage = HTML.ElementFromString(data.text) actorPhotoURL = actorPage.xpath( '//div[contains(@class, "model_picture")]//img/@src0_3x')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [ detailsPageElements.xpath('//div[@id="fakeplayer"]//img/@src0_3x')[0] ] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title try: metadata.title = detailsPageElements.xpath( '//a[@class="title"]')[0].text_content().strip() except: metadata.title = detailsPageElements.xpath( '//meta[@property="og:title"]')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="entry_content"]/p')[0].text_content().strip() # Studio metadata.studio = 'MomPOV' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//div[@class="date_holder"]')[0].text_content().strip() if date: date_object = datetime.strptime(date, '%b %Y %d') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() movieGenres.addGenre('MILF') # Actors movieActors.clearActors() # Posters art = [] xpaths = ['//div[@id="inner_content"]/div[1]/a/img/@src'] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 and height >= width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.starstswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) art = [] metadata.collections.clear() movieGenres.clearGenres() movieActors.clearActors() # Title metadata.title = detailsPageElements.xpath('//h1//span')[0].text_content().strip().title().replace('Xxx', 'XXX') # Studio metadata.studio = 'Wicked Pictures' # Release Date date = detailsPageElements.xpath('//li[@class="updatedOn"] | //li[@class="updatedDate"]')[0].text_content().replace('Updated', '').replace('|', '').strip() if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Scene update if '/video/' in url: # Genres for genreLink in detailsPageElements.xpath('//div[contains(@class, "sceneColCategories")]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors for actorLink in detailsPageElements.xpath('//div[contains(@class, "sceneColActors")]//a'): actorName = actorLink.text_content().strip() actorPhotoURL = '' try: actorPageURL = urlBase + actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath('//img[@class="actorPicture"]/@src')[0] except: pass movieActors.addActor(actorName, actorPhotoURL) script_text = detailsPageElements.xpath('//script')[7].text_content() # Background alpha = script_text.find('picPreview":"') omega = script_text.find('"', alpha + 13) previewBG = script_text[alpha + 13:omega].replace(r'\/', '/') art.append(previewBG) # Get dvd page for some info dvdPageURL = urlBase + detailsPageElements.xpath('//div[@class="content"]//a[contains(@class,"dvdLink")]/@href')[0] req = PAutils.HTTPRequest(dvdPageURL) dvdPageElements = HTML.ElementFromString(req.text) # Tagline and Collection(s) tagline = dvdPageElements.xpath('//h3[@class="dvdTitle"]')[0].text_content().strip().title().replace('Xxx', 'XXX') metadata.tagline = tagline metadata.collections.add(tagline) # Summary try: metadata.summary = dvdPageElements.xpath('//p[@class="descriptionText"]')[0].text_content().strip() except: pass # Director director = metadata.directors.new() try: directors = dvdPageElements.xpath('//ul[@class="directedBy"]') for dirname in directors: director.name = dirname.text_content().strip() except: pass # DVD cover dvdCover = dvdPageElements.xpath('//img[@class="dvdCover"]/@src')[0] art.append(dvdCover) # Extra photos for the completist photoPageURL = urlBase + detailsPageElements.xpath('//div[contains(@class, "picturesItem")]//a')[0].get('href').split('?')[0] req = PAutils.HTTPRequest(photoPageURL) photoPageElements = HTML.ElementFromString(req.text) # good 2:3 poster picture poster = photoPageElements.xpath('//div[@class="previewImage"]//img/@src')[0] art.append(poster) # more Pictures extraPix = photoPageElements.xpath('//li[@class="preview"]//a[@class="imgLink pgUnlocked"]/@href') for pictureURL in extraPix: art.append(pictureURL) # Full DVD update else: # Genres for genreLink in detailsPageElements.xpath('//p[@class="dvdCol"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors for actorLink in detailsPageElements.xpath('//div[@class="actorCarousel"]//a'): actorName = actorLink.xpath('.//span')[0].text_content().strip() actorPhotoURL = '' try: actorPhotoURL = actorLink.xpath('.//img/@src')[0].get("src") except: pass movieActors.addActor(actorName, actorPhotoURL) # Tagline/collections tagline = 'Wicked Pictures' metadata.tagline = tagline metadata.collections.add(tagline) # Summary try: metadata.summary = detailsPageElements.xpath('//p[@class="descriptionText"]')[0].text_content().strip() except: pass # Director director = metadata.directors.new() try: directors = detailsPageElements.xpath('//ul[@class="directedBy"]') for dirname in directors: director.name = dirname.text_content().strip() except: pass # Backgrounds scenePreviews = detailsPageElements.xpath('//div[@class="sceneContainer"]//img[contains(@id,"clip")]/@data-original') for scenePreview in scenePreviews: previewIMG = scenePreview.split('?')[0] art.append(previewIMG) # DVD cover dvdCover = detailsPageElements.xpath('//img[@class="dvdCover"]/@src')[0] art.append(dvdCover) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): Log('******UPDATE CALLED*******') metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) detailsPageElements = HTML.ElementFromURL(sceneURL) movieGenres.clearGenres() # Studio metadata.studio = 'LittleCaprice' # Title metadata.title = detailsPageElements.xpath( '//h1[@class="entry-title"]')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="et_pb_text et_pb_module et_pb_bg_layout_light et_pb_text_align_left"]/p' )[0].text_content().strip() #Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteID).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//meta[@property="article:published_time"]')[0].get("content").split( "T")[0] if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actors = detailsPageElements.xpath( '//div[contains(@class,"et_pb_text_align_left")]/ul/li[contains(.,"Models")]/a' ) if actors: if len(actors) == 3: movieGenres.addGenre("Threesome") if len(actors) == 4: movieGenres.addGenre("Foursome") if len(actors) > 4: movieGenres.addGenre("Orgy") for actorLink in actors: actorPageURL = actorLink.get("href") actorPage = HTML.ElementFromURL(actorPageURL) actorName = actorLink.text_content().strip() actorPhotoURL = actorPage.xpath( '//img[@class="model-page"]')[0].get("src") actorPhotoURL = actorPhotoURL.replace("media.", '') if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteID) + actorPhotoURL movieActors.addActor(actorName, actorPhotoURL) ### Posters and artwork ### art = [] # Video trailer background image try: twitterBG = detailsPageElements.xpath( '//meta[@property="og:image"]/@content')[0] art.append(twitterBG) except: pass # Photos photos = detailsPageElements.xpath( '//span[@class="et_pb_image_wrap "]/img/@src') for photoLink in photos: art.append(PAsearchSites.getSearchBaseURL(siteID) + photoLink) # Scene photos page try: photoPageUrl = PAsearchSites.getSearchBaseURL( siteID ) + detailsPageElements.xpath( '//div[contains(@class, "et_pb_text_align_left")]/ul/li[contains(., "Pictures:")]/a/@href' )[0] photoPage = HTML.ElementFromURL(photoPageUrl) for unlockedPhoto in photoPage.xpath( '//div[@class="et_pb_gallery_image landscape"]/a/@href'): if not unlockedPhoto.startswith('http'): unlockedPhoto = PAsearchSites.getSearchBaseURL( siteID) + unlockedPhoto art.append(unlockedPhoto) except: pass Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): #Download image file for analysis try: img_file = urllib.urlopen(posterUrl) im = StringIO(img_file.read()) resized_image = Image.open(im) width, height = resized_image.size #Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(HTTP.Request( posterUrl, headers={ 'Referer': 'http://www.google.com' }).content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteID) + sceneURL sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteID) # Title metadata.title = detailsPageElements.xpath( '//div[@class="col-xs-12 video-title"]//h3')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="col-sm-6 col-md-6 vidpage-info"]/text()')[4].strip() # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteID).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//div[@class="videopage-tags"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//div[@class="col-xs-6 col-sm-4 col-md-3"]'): actorName = actorLink.xpath( './/div[@class="vidpage-mobilePad"]//a//strong/text()')[0].strip() actorPhotoURL = '' try: actorPhotoURL = actorLink.xpath( './/img[@class="img-responsive imgHover"]/@src')[0] except: pass movieActors.addActor(actorName, actorPhotoURL) # Photos art = [] try: posterURL = detailsPageElements.xpath( '//div[@class="col-xs-12 col-sm-6 col-md-6 vidCover"]//img/@src' )[0] art.append(posterURL) except: pass for photo in detailsPageElements.xpath( '//div[@class="vid-flex-container"]//span'): photoLink = photo.xpath('.//img/@rsc')[0].replace('_thumb', '') art.append(photoLink) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteID) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) javID = detailsPageElements.xpath('//dt[text()="DVD ID:"]/following-sibling::dd[1]')[0].text_content().strip() if javID.startswith('--'): javID = detailsPageElements.xpath('//dt[text()="Content ID:"]/following-sibling::dd[1]')[0].text_content().strip() if ' ' in javID: javID = javID.upper.replace(' ', '-') # Title JavTitle = detailsPageElements.xpath("//cite[@itemprop='name']")[0].text_content().strip() # Undoing the Self Censoring R18.com does to their tags and titles if '**' in JavTitle: JavTitle = JavTitle.replace('R**e', 'Rape') JavTitle = JavTitle.replace('S********l', 'Schoolgirl') JavTitle = JavTitle.replace('S***e', 'Slave') JavTitle = JavTitle.replace('M****t', 'Molest') JavTitle = JavTitle.replace('F***e', 'Force') JavTitle = JavTitle.replace('G*******g', 'Gang Bang') JavTitle = JavTitle.replace('G******g', 'G******g') JavTitle = JavTitle.replace('K*d', 'Descendant') JavTitle = JavTitle.replace('C***d', 'Descendant') JavTitle = JavTitle.replace('T*****e', 'Torture') JavTitle = JavTitle.replace('T******e', 'Tentacle') JavTitle = JavTitle.replace('D**g', 'Drug') JavTitle = JavTitle.replace('P****h', 'Punish') JavTitle = JavTitle.replace('S*****t', 'Student') JavTitle = JavTitle.replace('V*****e', 'Violate') JavTitle = JavTitle.replace('V*****t', 'Violent') JavTitle = JavTitle.replace('B***d', 'Blood') JavTitle = JavTitle.replace('M************n', 'Mother and Son') metadata.title = javID + ' ' + JavTitle # Summary try: description = detailsPageElements.xpath('//div[@class="cmn-box-description01"]')[0].text_content() metadata.summary = description.replace('Product Description', '', 1).strip() except: pass # Studio metadata.studio = detailsPageElements.xpath('//dd[@itemprop="productionCompany"]')[0].text_content().strip() # Director director = metadata.directors.new() directorName = detailsPageElements.xpath('//dd[@itemprop="director"]')[0].text_content().strip() if directorName != '----': director.name = directorName # Release Date date = detailsPageElements.xpath('//dd[@itemprop="dateCreated"]')[0].text_content().strip().replace('.', '').replace(',', '').replace('Sept', 'Sep').replace('June', 'Jun').replace('July', 'Jul') date_object = datetime.strptime(date, '%b %d %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actor in detailsPageElements.xpath('//div[@itemprop="actors"]//span[@itemprop="name"]'): fullActorName = actor.text_content().strip() if fullActorName != '----': splitActorName = fullActorName.split('(') mainName = splitActorName[0].strip() actorPhotoURL = detailsPageElements.xpath('//div[@id="%s"]//img[contains(@alt, "%s")]/@src' % (mainName.replace(' ', ''), mainName))[0] if actorPhotoURL.rsplit('/', 1)[1] == 'nowprinting.gif': actorPhotoURL = '' if len(splitActorName) > 1 and mainName == splitActorName[1][:-1]: actorName = mainName else: actorName = fullActorName movieActors.addActor(actorName, actorPhotoURL) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//a[@itemprop="genre"]'): genreName = (genreLink.text_content().lower().strip()).lower() if '**' in genreName: genreName = genreName.replace('r**e', 'rape') genreName = genreName.replace('s********l', 'schoolgirl') genreName = genreName.replace('s***e', 'slave') genreName = genreName.replace('m****ter', 'molester') genreName = genreName.replace('g*******g', 'gang bang') genreName = genreName.replace('g******g', 'g******g') genreName = genreName.replace('k*d', 'descendant') genreName = genreName.replace('c***d', 'descendant') genreName = genreName.replace('f***e', 'force') genreName = genreName.replace('t*****e', 'torture') genreName = genreName.replace('t******e', 'tentacle') genreName = genreName.replace('d**g', 'drug') genreName = genreName.replace('p****h', 'punish') genreName = genreName.replace('s*****t', 'student') genreName = genreName.replace('v*****e', 'violate') genreName = genreName.replace('v*****t', 'violent') genreName = genreName.replace('b***d', 'blood') movieGenres.addGenre(genreName) metadata.collections.add('Japan Adult Video') # Posters art = [] xpaths = [ '//img[@itemprop="image"]/@src', '//img[contains(@alt, "cover")]/@src', '//section[@id="product-gallery"]//img/@data-src' ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = metadata.id.split('|') sceneURL = PAsearchSites.getSearchBaseURL( siteID) + '/video/' + metadata_id[0] sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h2[@class="h2 m-0"]')[0].text_content().strip() # Summary try: paragraphs = detailsPageElements.xpath('//div[@class="desc-text"]') pNum = 0 summary = "" for paragraph in paragraphs: if pNum >= 0 and pNum < (len(paragraphs)): summary = summary + '\n\n' + paragraph.text_content() pNum += 1 except: pass if summary == '': try: summary = detailsPageElements.xpath( '//div[@class="desc-text"]')[0].text_content().strip() except: pass metadata.summary = summary.strip() # Studio metadata.studio = 'ManyVids' # Collections / Tagline metadata.collections.clear() tagline = detailsPageElements.xpath( '//a[contains(@class,"username ")]')[0].text_content().strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath('//div[@class="tags"]/a') for genreLink in genres: genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actorName = detailsPageElements.xpath( '//a[contains(@class,"username ")]')[0].text_content() actorPhotoURL = '' try: actorPhotoURL = detailsPageElements.xpath( '//div[@class="pr-2"]/a/img')[0].get('src') except: pass movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = ['//div[@id="rmpPlayer"]/@data-video-screenshot'] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//title')[0].text_content().strip().split('•')[0] # Summary summary = '' for paragraph in detailsPageElements.xpath('//li/div[@class="small"]/p'): summary = summary + paragraph.text_content() metadata.summary = summary # Studio metadata.studio = 'SinsVR' # Tagline and Collection metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath('//span//time')[0].text_content().strip() if date: date_object = datetime.strptime(date, '%b %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for tag in detailsPageElements.xpath('//div[@class="tags-item"]'): genreName = tag.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//div/strong[text()="Starring"]//following-sibling::span/a[@class="tiny-link"]' ): actorName = actorLink.text_content().strip() actorPageURL = PAsearchSites.getSearchBaseURL(siteNum) + actorLink.get( 'href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath( '//div[@class="model-header__photo"]/img')[0].get('src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters for poster in detailsPageElements.xpath( '//div[contains(@class, "tn-photo__container")]/div/a/div/img/@src' ): if poster.startswith('http'): img = poster.replace('sceneGallerySmall', 'sceneGallery') art.append(img) poster = detailsPageElements.xpath('//dl8-video')[0] img = poster.get('poster') art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata