def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate): if siteNum == 978: url = PAsearchSites.getSearchSearchURL( siteNum) # All models are on one page else: url = PAsearchSites.getSearchSearchURL(siteNum) + searchTitle[ 0:1] # First letter of model's name req = PAutils.HTTPRequest(url) searchResults = HTML.ElementFromString(req.text) searchResultElements = searchResults.xpath( '//a[starts-with(@href, "/free/girl/")][strong]') for searchResult in searchResultElements: titleNoFormatting = searchResult.text_content().strip() curID = PAutils.Encode(searchResult.xpath('@href')[0].split('/')[3]) score = 100 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) if titleNoFormatting in searchTitle: modelLink = PAsearchSites.getSearchBaseURL( siteNum) + searchResult.xpath('@href')[0] modelReq = PAutils.HTTPRequest(modelLink) modelResults = HTML.ElementFromString(modelReq.text) sceneResults = modelResults.xpath( './/a[starts-with(@href, "/free/scene/")][strong]') if sceneResults: for modelResult in modelResults.xpath( '//div[contains(@class, "videos")]'): titleLine = modelResult.xpath( './/h5')[0].text_content().strip() href = modelResult.xpath( './/a[starts-with(@href, "/free/scene/")][strong]/@href' )[0] titleNoFormatting = titleLine.split('-')[1].strip() curID = PAutils.Encode( href.split('/')[3] + '/' + href.split('/')[4]) releaseDate = parse( titleLine.split('ADDED:')[1].split('-') [0].strip()).strftime('%Y-%m-%d') score = 100 - Util.LevenshteinDistance( searchDate, releaseDate) results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) else: modelThumbnail = PAsearchSites.getSearchBaseURL( siteNum) + searchResult.xpath('./img/@src')[0] image = PAutils.HTTPRequest( modelThumbnail, headers={'Referer': 'http://www.google.com'}) releaseDate = parse( image.headers['Last-Modified']).strftime('%Y-%m-%d') if releaseDate == '2016-01-01': # Oldest image date, so could be anything pre 2016 releaseDate = '' score = 100 - Util.LevenshteinDistance(titleNoFormatting, searchTitle) if releaseDate and parse( searchDate ).year >= 2016: # Very good chance we can match on the date delta = parse(searchDate) - parse(releaseDate) if abs(delta.days) < 7: score = 100 # High level of confidence right here else: score = score - 5 elif releaseDate == '' and parse( searchDate).year < 2016: # Older video score = score - 10 else: score = score - 50 results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, searchDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) return results
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = '%s/membersarea/%s' % (PAsearchSites.getSearchBaseURL(siteNum), PAutils.Decode(metadata_id[0])) sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//title')[0].text_content().split('-')[0].strip() # Studio metadata.studio = 'FuelVirtual' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline if Prefs['collections_addsitename']: metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//td[@class="plaintext"]/a[@class="model_category_link"]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) movieGenres.addGenre('18-Year-Old') # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//div[@id="description"]//td[@align="left"]/a') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//a[@class="jqModal"]/img/@src' ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) photoPageUrl = sceneURL.replace('vids', 'highres') req = PAutils.HTTPRequest(photoPageUrl) photoPage = HTML.ElementFromString(req.text) for img in photoPage.xpath('//a[@class="jqModal"]/img/@src'): img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, lang, siteNum, searchData): req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath('//div[@class="scene"]'): url = searchResult.xpath('.//a[@data-track="TITLE_LINK"]/@href')[0] if '/scenes/' in url: curID = PAutils.Encode(url) titleNoFormatting = searchResult.xpath( './/a[@data-track="TITLE_LINK"]')[0].text_content() releaseDate = parse( searchResult.xpath('./span[@class="scene-date"]') [0].text_content().strip()).strftime('%Y-%m-%d') if searchData.date: score = 100 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) # search for exact scene name urlTitle = searchData.encoded.replace('%20', '-') urls = [ PAsearchSites.getSearchBaseURL(siteNum) + '/scenes/video---' + urlTitle + '_vids.html', PAsearchSites.getSearchBaseURL(siteNum) + '/scenes/movie---' + urlTitle + '_vids.html' ] for url in urls: try: sceneReq = PAutils.HTTPRequest(url) scenePage = HTML.ElementFromString(sceneReq.text) curID = PAutils.Encode(url) titleNoFormatting = scenePage.xpath( '//div[@class="content-desc content-new-scene"]//h1' )[0].text_content().strip() releaseDate = parse( scenePage.xpath('//meta[@itemprop="uploadDate"]')[0].get( 'content')) score = 100 results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) except: pass return results
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = PAutils.Decode(metadata_id[2]) # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="container text-center"]//h2')[0].text_content().strip() # Studio metadata.studio = 'Cumbizz' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//span[@class="label label-primary"]/a'): genreName = genreLink.text_content().strip().lower() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//div[@class="breadcrumbs"]/a'): actorName = str(actorLink.text_content().strip()) actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//section[@class="har_section har_image_bck har_wht_txt har_fixed"]/@data-image', '//img[@class="vidgal unos"]/@src', '//img[@class="vidgal dos"]/@src', '//img[@class="vidgal tres"]/@src', '//img[@class="vidgal quatros"]/@src', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') searchURL = PAutils.Decode(metadata_id[0]) sceneID = int(metadata_id[2]) req = PAutils.HTTPRequest(searchURL) detailsPageElements = None searchResults = req.json() if 'results' not in searchResults: return metadata for searchResult in searchResults['results']: if searchResult['id'] == sceneID: detailsPageElements = searchResult break if not detailsPageElements: return metadata # Title metadata.title = detailsPageElements['title'] # Summary metadata.summary = re.sub(r'<.*?>', '', detailsPageElements['long_description']).strip( ) # must strip HTML tags # Tagline and Collection(s) metadata.collections.clear() metadata.studio = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = metadata.studio metadata.collections.add(metadata.studio) # Release date date_object = parse(detailsPageElements['release_date']) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() genres = [] for genreLink in genres: genreName = genreLink movieGenres.addGenre(genreName) # Actors movieActors.clearActors() if 'actors' in detailsPageElements: actors = detailsPageElements['actors'] if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink['name'] actorPhotoURL = actorLink['thumb']['image'] if actorPhotoURL.endswith('noimageavailable.gif'): actorSearchURL = PAsearchSites.getSearchBaseURL( siteNum ) + '/api/v2/search/actors?thumb_size=355x475&query=' + actorName.split( )[0] req = PAutils.HTTPRequest(actorSearchURL) searchResults = req.json() if 'results' in searchResults: for searchResult in searchResults['results']: if searchResult['id'] == actorLink['id']: actorPhotoURL = searchResult['thumb']['image'] break movieActors.addActor(actorName, actorPhotoURL) # Director metadata.directors.clear() if 'directors' in detailsPageElements: for directorLink in detailsPageElements['directors']: director = metadata.directors.new() director.name = directorLink['name'] # Posters art = [detailsPageElements['thumb']['image']] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) cookies = {'nats': 'MC4wLjMuNTguMC4wLjAuMC4w'} req = PAutils.HTTPRequest(sceneURL, cookies=cookies) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//title')[0].text_content().split('|')[0] # Summary metadata.summary = detailsPageElements.xpath( '//div[contains(@class, "video-summary")]//p[@class=""]' )[0].text_content() # Studio metadata.studio = '5Kporn' # Tagline and Collection(s) metadata.collections.clear() metadata.tagline = PAsearchSites.getSearchSiteName(siteID) metadata.collections.add(metadata.tagline) # Date date = detailsPageElements.xpath('//h5[contains(., "Published")]') if date: date = date[0].text_content().replace('Published:', '').strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//h5[contains(., "Starring")]/a') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' modelURL = actorLink.xpath('.//@href')[0] req = PAutils.HTTPRequest(modelURL, cookies=cookies) actorsPageElements = HTML.ElementFromString(req.text) img = actorsPageElements.xpath('//img[@class="model-image"]/@src') if img: actorPhotoURL = img[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[contains(@class, "gal")]//img/@src', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl, headers={'Referer': sceneURL}, cookies=cookies) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1')[0].text_content().strip() # Summary description = '' for desc in detailsPageElements.xpath('//div[@class="description"]//p'): description += desc.text_content().strip() + '\n\n' metadata.summary = description # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection(s) metadata.collections.clear() tagline = detailsPageElements.xpath( '//p[@class="series"]')[0].text_content().strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//span[@class="date"]')[0].text_content().strip() if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//meta[@name="keywords"]/@content')[0].split(','): genreName = genreLink.strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//div[@class="model-wrap"]//li') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.xpath('.//h5')[0].text_content() actorPhotoURL = actorLink.xpath('.//img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[@class="photo-wrap"]//@href', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): if 'http' not in img: img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) art = [] # Title metadata.title = detailsPageElements.xpath( '//div[@class="fltWrap"]/h1/span')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//p[@class="description"]')[0].text_content().replace( 'Description:', '').strip() # Studio metadata.studio = 'ClubFilly' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//div[@class="fltRight"]')[0].text_content().replace( 'Release Date :', '').strip() if date: date_object = datetime.strptime(date, '%Y-%m-%d') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() movieGenres.addGenre('Lesbian') # Actors movieActors.clearActors() actorText = detailsPageElements.xpath( '//p[@class="starring"]')[0].text_content().replace('Starring:', '').strip() actors = actorText.split(',') if len(actors) > 0: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = ['//ul[@id="lstSceneFocus"]/li/img/@src'] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = metadata.id.split('|') sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + '/video/' + metadata_id[0] sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//h2[@class="h2 m-0"]')[0].text_content().strip() # Summary try: paragraphs = detailsPageElements.xpath('//div[@class="desc-text"]') pNum = 0 summary = "" for paragraph in paragraphs: if pNum >= 0 and pNum < (len(paragraphs)): summary = summary + '\n\n' + paragraph.text_content() pNum += 1 except: pass if summary == '': try: summary = detailsPageElements.xpath('//div[@class="desc-text"]')[0].text_content().strip() except: pass metadata.summary = summary.strip() # Studio metadata.studio = 'ManyVids' # Collections / Tagline metadata.collections.clear() tagline = detailsPageElements.xpath('//a[contains(@class, "username ")]')[0].text_content().strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath('//div[@class="tags"]/a') for genreLink in genres: genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actorName = detailsPageElements.xpath('//a[contains(@class, "username ")]')[0].text_content() actorPhotoURL = '' try: actorPhotoURL = detailsPageElements.xpath('//div[@class="pr-2"]/a/img')[0].get('src') except: pass movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[@id="rmpPlayer"]/@data-video-screenshot' ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//h1')[0].text_content().strip() # Summary try: metadata.summary = detailsPageElements.xpath('///span[@class="full"]')[0].text_content().strip() except: pass # Studio metadata.studio = 'Marc Dorcel' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() movieGenres.addGenre('French p**n') movieName = detailsPageElements.xpath('//span[@class="movie"]/a') if movieName: metadata.collections.add(movieName[0].text_content().strip()) movieGenres.addGenre('Blockbuster Movie') # Actors movieActors.clearActors() if 'p**n-movie' not in sceneURL: actors = detailsPageElements.xpath('//div[@class="actress"]/a') else: actors = detailsPageElements.xpath('//div[@class="actor thumbnail "]/a/div[@class="name"]') if actors: if 'p**n-movie' not in sceneURL: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Release Date if 'p**n-movie' not in sceneURL: date = detailsPageElements.xpath('//span[@class="publish_date"]')[0].text_content().strip() else: date = detailsPageElements.xpath('//span[@class="out_date"]')[0].text_content().replace('Year :', '').strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Director director = metadata.directors.new() try: movieDirector = detailsPageElements.xpath('//span[@class="director"]')[0].text_content().replace( 'Director :', '').strip() director.name = movieDirector except: pass # Poster art = [] xpaths = [ '//div[contains(@class, "photos")]//source/@data-srcset' ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): if ',' in img: img = img.split(',')[-1].split()[0] trash = '_' + img.split('_', 3)[-1].rsplit('.', 1)[0] img = img.replace(trash, '', 1) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title try: metadata.title = detailsPageElements.xpath( '//h1')[0].text_content().strip() except: try: metadata.title = detailsPageElements.xpath( '//meta[@property="og:title"]')[0].text_content().strip() except: try: metadata.title = detailsPageElements.xpath( '//meta[@name="twitter:title"]')[0].text_content().strip() except: pass # Summary summary = detailsPageElements.xpath( '//div[@class="record-description-content record-box-content"]' )[0].text_content().strip() metadata.summary = summary[:summary.find('Runtime')].strip() # Studio metadata.studio = 'Hegre' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//span[@class="date"]')[0].text_content().strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//a[@class="tag"]'): genreName = genreLink.text_content().strip().lower() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//a[@class="record-model"]') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.get('title').strip() actorPhotoURL = actorLink.xpath('.//img/@src')[0].replace( '240x', '480x') movieActors.addActor(actorName, actorPhotoURL) # Director director = metadata.directors.new() director.name = 'Petter Hegre' director.photo = 'https://img.discogs.com/TafxhnwJE2nhLodoB6UktY6m0xM=/fit-in/180x264/filters:strip_icc():format(jpeg):mode_rgb():quality(90)/discogs-images/A-2236724-1305622884.jpeg.jpg' # Posters art = [ detailsPageElements.xpath('//meta[@name="twitter:image"]/@content') [0].replace('board-image', 'poster-image').replace('1600x', '640x'), detailsPageElements.xpath('//meta[@name="twitter:image"]/@content') [0].replace('1600x', '1920x') ] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneID = metadata_id[0] title = metadata_id[2].strip() apiurl = getAPIURL( PAsearchSites.getSearchBaseURL(siteNum) + '/scene/' + sceneID + '/' + urllib.quote(title)) apiurl = PAsearchSites.getSearchSearchURL(siteNum) + apiurl detailsPageElements = getJSONfromAPI(apiurl + updatequery.format(sceneID))[0] # Title metadata.title = PAutils.parseTitle(detailsPageElements['title'].title(), siteNum) # Summary metadata.summary = detailsPageElements['description'] # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection(s) metadata.collections.clear() metadata.collections.add(metadata.studio) # Release Date date = detailsPageElements['sites']['collection'][sceneID]['publishDate'] date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() movieActors.clearActors() if 'tags' in detailsPageElements: genres = detailsPageElements['tags']['collection'] if not isinstance(genres, list): for (key, value) in genres.items(): genre = value['alias'] if genre: if siteNum == 1027: genre = genre.replace('-', ' ') movieActors.addActor(genre, '') else: movieGenres.addGenre(genre) # Actors actors = getJSONfromAPI(apiurl + modelquery + sceneID) if not isinstance(actors, list): for key, value in actors.items(): collect = value['modelId']['collection'] for k, val in collect.items(): actorName = val['stageName'] if actorName: movieActors.addActor(actorName, '') if siteNum == 1024: baseactor = 'Aletta Ocean' elif siteNum == 1025: baseactor = 'Eva Lovia' elif siteNum == 1026: baseactor = 'Romi Rain' elif siteNum == 1030: baseactor = 'Dani Daniels' elif siteNum == 1031: baseactor = 'Chloe Toy' elif siteNum == 1033: baseactor = 'Katya Clover' elif siteNum == 1035: baseactor = 'Lisey Sweet' elif siteNum == 1037: baseactor = 'Gina Gerson' elif siteNum == 1038: baseactor = 'Valentina Nappi' elif siteNum == 1039: baseactor = 'Vina Sky' elif siteNum == 1058: baseactor = 'Vicki Valkyrie' elif siteNum == 1075: baseactor = 'Dillion Harper' elif siteNum == 1191: baseactor = 'Lilu Moon' else: baseactor = '' movieActors.addActor(baseactor, '') # Posters art = [] artobj = json.loads(PAutils.Decode(metadata_id[3])) if artobj: for detailsPageElements in artobj: art.append(detailsPageElements['url']) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def getJSONfromAPI(url): req = PAutils.HTTPRequest(url) return req.json()['response']['collection']
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title try: metadata.title = detailsPageElements.xpath( '//h1[@class="entry-title"]')[-1].text_content().strip() except: pass # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Collections / Tagline metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Genres for genreLink in detailsPageElements.xpath( '//div[@class="tags-list"]/a//i[@class="fa fa-folder-open"]/..'): genreName = genreLink.text_content().replace('Movies', '').strip().lower() movieGenres.addGenre(genreName) # Release Date date = detailsPageElements.xpath( '//div[@id="video-date"]')[0].text_content().strip() if date: date = date.replace('Date:', '').strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//div[@id="video-actors"]//a') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters image = PAutils.Decode(metadata_id[2]) if image: art.append(image) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate): sceneID = None splited = searchTitle.split(' ') if unicode(splited[0], 'utf8').isdigit(): sceneID = splited[0] searchTitle = searchTitle.replace(sceneID, '', 1).strip() req = PAutils.HTTPRequest( PAsearchSites.getSearchBaseURL(siteNum) + '/vrpornvideo/' + sceneID) searchResults = HTML.ElementFromString(req.text) titleNoFormatting = searchResults.xpath( '//h1[contains(@class, "video-title")]')[0].text_content() curID = PAutils.Encode( PAsearchSites.getSearchBaseURL(siteNum) + '/vrpornvideo/' + sceneID) girlName = '' releaseDate = '' date = searchResults.xpath('//p[@itemprop="uploadDate"]/@content') if date: releaseDate = parse(date[0]).strftime('%Y-%m-%d') score = 100 results.Append( MetadataSearchResult(id='%s|%d' % (curID, siteNum), name='[%s] %s in %s %s' % (PAsearchSites.getSearchSiteName(siteNum), girlName, titleNoFormatting, releaseDate), score=score, lang=lang)) else: req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath( '//div[@class="tile-grid-item"]'): data = searchResult.xpath( './/a[contains(@class, "video-card-title")]')[0] titleNoFormatting = searchResult.xpath( './/a[contains(@class, "video-card-title")]/@title')[0] curID = PAutils.Encode( searchResult.xpath( './/a[contains(@class, "video-card-title")]/@href')[0]) releaseDate = '' date = searchResult.xpath( './/span[@class="video-card-upload-date"]/@content') if date: releaseDate = parse(date[0]).strftime('%Y-%m-%d') girlName = searchResult.xpath( './/a[@class="video-card-link"]')[0].text_content() if searchDate and releaseDate: score = 100 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='[%s] %s in %s %s' % (PAsearchSites.getSearchSiteName(siteNum), girlName, titleNoFormatting, releaseDate), score=score, lang=lang)) return results
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) movieActors.clearActors() # Title metadata.title = detailsPageElements.xpath( '//div[contains(@class, "title")]/h2')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="description"]/div[@class="desc-text"]')[0].text_content( ).strip() # Studio/Tagline/Collection metadata.collections.clear() metadata.studio = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = metadata.studio metadata.collections.add(metadata.studio) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//ul[contains(@class, "tags")]//a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Posters/Background art = [] xpaths = [ '//meta[@property="og:image"]/@content', '//div[contains(@class, "thumbnails")]//img/@src', ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): poster = poster.split('?')[0] art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteID) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1[contains(@class, "video-title")]')[0].text_content() # Summary metadata.summary = detailsPageElements.xpath( '//p[@class="video-description"]')[0].text_content().strip() # Studio metadata.studio = 'BadoinkVR' # Tagline and Collection metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteID) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date sceneDate = detailsPageElements.xpath( '//p[@itemprop="uploadDate"]/@content') if sceneDate: date_object = parse(sceneDate[0]) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//a[@class="video-tag"]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//a[contains(@class,"video-actor-link")]'): actorName = actorLink.text_content().strip() actorPageURL = PAsearchSites.getSearchBaseURL(siteID) + actorLink.get( 'href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath( '//img[@class="girl-details-photo"]/@src')[0].split('?')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//div[contains(@class,"gallery-item")]/@data-big-image', '//img[@class="video-image"]/@src' ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): img = img.split('?')[0] art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if idx > 1 and width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art): metadata_id = metadata.id.split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//h1')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[contains(@class, "u-mb--six ")]')[0].text_content().strip() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Tagline and Collection(s) metadata.collections.clear() metadata.tagline = metadata.studio metadata.collections.add(metadata.studio) # Release Date date = detailsPageElements.xpath( '//span[@class="date-display-single"] | //span[@class="u-inline-block u-mr--nine"] | //div[@class="video-meta-date"] | //div[@class="date"]' )[0].text_content().strip() date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//a[contains(@href, "/list/category/")]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() for actorLink in detailsPageElements.xpath( '//a[contains(@href, "/pornstars/model/")]'): actorPageURL = PAsearchSites.getSearchBaseURL(siteNum) + actorLink.get( 'href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorName = actorLink.text_content().strip() actorPhotoURL = actorPage.xpath( '//div[contains(@class, "u-ratio--model-poster")]//img/@data-src' )[0] movieActors.addActor(actorName, actorPhotoURL) # Posters/Background xpaths = [ '//div[contains(@class, "splash-screen")]/@style', '//a[contains(@class, "u-ratio--lightbox")]/@href', ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): if poster.startswith('background-image'): poster.split('url(')[1].split(')')[0] art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and idx > 1: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate): searchResults = [] googleResults = PAutils.getFromGoogleSearch(searchTitle, siteNum) for sceneURL in googleResults: sceneURL = sceneURL.split('?')[0].replace('dev.', '', 1) if ('/view/' in sceneURL) and ( 'photoset' not in sceneURL) and sceneURL not in searchResults: searchResults.append(sceneURL) for sceneURL in googleResults: if ('/model/' in sceneURL): req = PAutils.HTTPRequest(sceneURL) actorPageElements = HTML.ElementFromString(req.text) for searchResult in actorPageElements.xpath( '//div[contains(@class, "content-item")]'): sceneURL = searchResult.xpath('.//h3//@href')[0].split( '?')[0].replace('dev.', '', 1) if sceneURL not in searchResults: titleNoFormatting = searchResult.xpath( './/h3')[0].text_content().strip() curID = PAutils.Encode(sceneURL) date = searchResult.xpath('.//span[@class="pub-date"]' )[0].text_content().strip() releaseDate = parse(date).strftime('%Y-%m-%d') if searchDate: score = 100 - Util.LevenshteinDistance( searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) for sceneURL in searchResults: req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) titleNoFormatting = detailsPageElements.xpath( '//h1')[0].text_content().strip() curID = PAutils.Encode(sceneURL) date = detailsPageElements.xpath( '//span[@class="date"]')[0].text_content().strip() releaseDate = parse(date).strftime('%Y-%m-%d') if searchDate: score = 100 - Util.LevenshteinDistance(searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance(searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) return results
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) try: sceneDate = metadata_id[2] except: pass req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) movieGenres.clearGenres() movieActors.clearActors() # Title if 'webmasters' in sceneURL: resultTitleID = detailsPageElements.xpath('//h1/text()')[0].title() metadata.title = re.sub(r'^\d+', '', resultTitleID) else: resultTitleID = detailsPageElements.xpath( '//h4/span')[0].text_content().title() metadata.title = re.sub(r'^\d+', '', resultTitleID) sceneID = re.sub(r'\D.*', '', resultTitleID) # Summary try: if 'webmasters' in sceneURL: metadata.summary = detailsPageElements.xpath( '//div[@class="row gallery-description"]//div' )[1].text_content().strip() else: metadata.summary = detailsPageElements.xpath( '//div[@class="row"]//a/@title')[0].strip() except: pass # Tagline and Collection(s) metadata.collections.clear() metadata.studio = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = metadata.studio metadata.collections.add(metadata.studio) # Actors if 'webmasters' in sceneURL: actors = detailsPageElements.xpath('//spam[@class="key-words"]//a') else: actors = detailsPageElements.xpath('//h5//a') actorPhotoURL = '' # Remove Actor Names from Genre List genres = detailsPageElements.xpath( '//meta[@name="keywords"]/@content')[0].replace('Aussie Ass', '') genres = re.sub(r'id.\d*', '', genres, flags=re.IGNORECASE) if actors: for actorLink in actors: actorName = actorLink.text_content().title() genres = genres.replace(actorName, '') modelURL = actorLink.xpath('./@href')[0] req = PAutils.HTTPRequest(modelURL) actorsPageElements = HTML.ElementFromString(req.text) img = actorsPageElements.xpath( '//img[contains(@id,"set-target")]/@src')[0] if img: actorPhotoURL = img if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteNum) + actorPhotoURL movieActors.addActor(actorName, actorPhotoURL) # Date date = "" try: if 'webmasters' in sceneURL: pageResults = (int)(actorsPageElements.xpath( '//span[@class="number_item "]')[0].text_content().strip()) if not pageResults: pageResults = 1 for x in range(pageResults): if x == 1: actorsPageElements.xpath( '//a[contains(@class,"in_stditem")]/@href')[1] req = PAutils.HTTPRequest( PAsearchSites.getSearchBaseURL(siteNum) + actorsPageElements.xpath( '//a[contains(@class,"in_stditem")]/@href')[1]) actorsPageElements = HTML.ElementFromString(req.text) for sceneElements in actorsPageElements.xpath( '//div[@class="box"]'): if sceneID in sceneElements.xpath('.//a/text()')[1]: date = actorsPageElements.xpath( './/span[@class="video-date"]')[0].text_content( ).strip() break else: date = sceneDate except: date = sceneDate if date: date = parse(date).strftime('%d-%m-%Y') date_object = datetime.strptime(date, '%d-%m-%Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres for genre in genres.split(','): movieGenres.addGenre(genre.strip()) # Posters art = [] xpaths = [ '//img[contains(@alt,"content")]/@src', '//div[@class="box"]//img/@src', ] altURL = "" for xpath in xpaths: for img in detailsPageElements.xpath(xpath): if 'http' not in img: if 'join' in img: break elif 'webmasters' in sceneURL: img = sceneURL + "/" + img else: img = PAsearchSites.getSearchBaseURL(siteNum) + img art.append(img) if 'webmasters' not in sceneURL: altURL = PAsearchSites.getSearchBaseURL( siteNum) + "/webmasters/" + sceneID req = PAutils.HTTPRequest(altURL) detailsPageElements = HTML.ElementFromString(req.text) sceneURL = altURL Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//meta[@property="og:title"]/@content')[0].replace( ' - Mormon Girlz', '').strip() # Summary metadata.summary = detailsPageElements.xpath( '//*[contains(@id, "post-")]/aside[2]/div/div[1]')[0].text_content( ).strip() # Studio metadata.studio = 'MormonGirlz' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//meta[@property="article:published_time"]/@content')[0].strip() if date: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() xpaths = [ '//h1[contains(text(), "more of") and not(contains(text(), "Mormon Girls"))]' ] for xpath in xpaths: for genreLink in detailsPageElements.xpath(xpath): genreName = genreLink.text.replace('more of', '').strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() # Posters art = [] xpaths = ['//*[@class="ngg-gallery-thumbnail"]/a/@href'] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 and height >= width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate): sceneID = re.sub(r'\D.*', '', searchTitle) if sceneID: sceneURL = PAsearchSites.getSearchBaseURL( siteNum) + "/webmasters/" + sceneID req = PAutils.HTTPRequest(sceneURL) searchResults = HTML.ElementFromString(req.text) titleNoFormatting = re.sub( r'^\d+', '', searchResults.xpath('//h1/text()')[0].title()) curID = PAutils.Encode(sceneURL) score = 100 results.Append( MetadataSearchResult( id='%s|%d' % (curID, siteNum), name='%s [%s]' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum)), score=score, lang=lang)) else: # Handle 3 Types of Links: First, Last; First Only; First-Last try: encodedTitle = re.search(r'^\S*.\S*', searchTitle).group(0).replace(' ', '').lower() req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + ".html") searchResults = HTML.ElementFromString(req.text) if searchResults.xpath( '//html')[0].text_content() == 'Page not found': raise Exception except: try: encodedTitle = re.search(r'^\S*.\S*', searchTitle).group(0).replace( ' ', '-').lower() req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + ".html") searchResults = HTML.ElementFromString(req.text) if searchResults.xpath( '//html')[0].text_content() == 'Page not found': raise Exception except: encodedTitle = re.search(r'^\S*', searchTitle).group(0).lower() req = PAutils.HTTPRequest( PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle + ".html") searchResults = HTML.ElementFromString(req.text) try: pageResults = (int)(searchResults.xpath( '//span[@class="number_item "]')[0].text_content().strip()) except: pageResults = 1 for x in range(pageResults): if x == 1: searchResults.xpath( '//a[contains(@class,"in_stditem")]/@href')[1] req = PAutils.HTTPRequest( PAsearchSites.getSearchBaseURL(siteNum) + searchResults.xpath( '//a[contains(@class,"in_stditem")]/@href')[1]) searchResults = HTML.ElementFromString(req.text) for searchResult in searchResults.xpath('//div[@class="infos"]'): resultTitleID = searchResult.xpath( './/span[@class="video-title"]')[0].text_content().strip( ).title() titleNoFormatting = re.sub(r'^\d+', '', resultTitleID) resultID = re.sub(r'\D.*', '', resultTitleID) sceneURL = searchResult.xpath('.//a/@href')[0] curID = PAutils.Encode(sceneURL) date = searchResult.xpath( './/span[@class="video-date"]')[0].text_content().strip() if date: releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = parse(searchDate).strftime( '%Y-%m-%d') if searchDate else '' releaseDate = parse(date).strftime('%Y-%m-%d') displayDate = releaseDate if date else '' if sceneID == resultID: score = 100 elif searchDate and displayDate: score = 100 - Util.LevenshteinDistance( searchDate, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchTitle.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum), releaseDate), score=score, lang=lang)) return results
def search(results, lang, siteNum, searchData): searchURL = PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded req = PAutils.HTTPRequest(searchURL) searchResults = req.json() if 'results' not in searchResults or not searchResults[ 'results'] and searchData.filename: # femjoy.17.03.12.maria.rya.girl.in.the.mirror.mp4 # try to extract as much of the title as possible without including the model m = re.search(r'femjoy\.(\d{2}\.\d{2}\.\d{2})\.(.+)', searchData.filename, re.IGNORECASE) if m: searchData.date = parse('20' + m.group(1)).strftime('%Y-%m-%d') searchWords = m.group(2).split('.') wordCount = len(searchWords) if wordCount > 2: searchData.title = ' '.join(searchWords[2:]) else: searchData.title = searchWords[-1] else: # Belinda & Fiva - Give me your hand 29-Mar-2010.mp4 # take everything from after the dash and before the date m = re.search(r'.+ - (.+) (\d{2}-[a-z]{3}-\d{4})', searchData.filename, re.IGNORECASE) if m: searchData.date = parse(m.group(2)).strftime('%Y-%m-%d') searchData.title = m.group(1) searchData.encoded = urllib.quote(searchData.title) searchURL = PAsearchSites.getSearchSearchURL( siteNum) + searchData.encoded req = PAutils.HTTPRequest(searchURL) searchResults = req.json() if 'results' in searchResults: curID = PAutils.Encode(searchURL) for searchResult in searchResults['results']: titleNoFormatting = searchResult['title'] sceneID = searchResult['id'] date = searchResult['release_date'] if date: releaseDate = parse(date).strftime('%Y-%m-%d') else: releaseDate = searchData.dateFormat( ) if searchData.date else '' displayDate = releaseDate if date else '' actorsName = [ actorLink['name'] for actorLink in searchResult['actors'] ] actorsString = getActorsString(actorsName) if searchData.date and displayDate: score = 100 - Util.LevenshteinDistance(searchData.date, releaseDate) else: score = 100 - Util.LevenshteinDistance( searchData.title.lower(), titleNoFormatting.lower()) results.Append( MetadataSearchResult( id='%s|%d|%d' % (curID, siteNum, sceneID), name='%s - %s [%s] %s' % (titleNoFormatting, actorsString, PAsearchSites.getSearchSiteName(siteNum), displayDate), score=score, lang=lang)) return results
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) movieGenres.clearGenres() # Title metadata.title = detailsPageElements.xpath( '//title')[0].text_content().split('|')[-1].strip() # Summary metadata.summary = detailsPageElements.xpath( '//meta[@name="description"]')[0].get('content').strip() # Studio metadata.studio = 'ReidMyLips' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '/html/body/div/div[4]/div[4]/div/main/div[2]/div[2]/div/div/div[2]/div/div/div[4]/p/span' )[0].text_content().strip() if len(date) > 0: date_object = datetime.strptime(date, '%B %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actorName = 'Riley Reid' actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] photos = detailsPageElements.xpath( '//div[@id="pro-gallery-margin-container"]//a[@class="block-fullscreen gallery-item-social-download pull-right gallery-item-social-button"]//@href' ) for photoLink in photos: photo = photoLink.split('?')[0] art.append(photo) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneTitle = PAutils.Decode(metadata_id[0]) sceneDescription = PAutils.Decode(metadata_id[2]) sceneDate = metadata_id[3] scenePoster = PAutils.Decode(metadata_id[4]) art = [] metadata.collections.clear() movieGenres.clearGenres() movieActors.clearActors() # Title metadata.title = sceneTitle # Summary metadata.summary = sceneDescription # Studio metadata.studio = 'Thick Cash' # Tagline and Collection(s) subSite = PAsearchSites.getSearchSiteName(siteNum) metadata.tagline = subSite if Prefs['collections_addsitename']: metadata.collections.add(subSite) # Genres if subSite.lower() == 'Family Lust'.lower(): for genreName in ['Family Roleplay']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Over 40 Handjobs'.lower(): for genreName in ['MILF', 'Handjob']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Ebony Tugs'.lower(): for genreName in ['Ebony', 'Handjob']: movieGenres.addGenre(genreName) elif subSite.lower() == 'Teen Tugs'.lower(): for genreName in ['Teen', 'Handjob']: movieGenres.addGenre(genreName) # Release Date date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Posters art = [scenePoster] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = metadata.id.split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) art = [] movieGenres.clearGenres() movieActors.clearActors() # Title metadata.title = detailsPageElements.xpath( '//div[@class="main-info-left"]/h1')[0].text_content().strip() # Summary metadata.summary = detailsPageElements.xpath( '//p[@class="description"]')[0].text_content().strip() # Studio metadata.studio = 'Love Her Feet' # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Release Date date = detailsPageElements.xpath( '//div[@class="date"]')[0].text_content().strip() if date: date_object = datetime.strptime(date, '%B %d, %Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[@class="video-tags"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) movieGenres.addGenre('Foot Sex') # Actors movieActors.clearActors() actors = detailsPageElements.xpath('//div[@class="featured"]/a') if actors: if len(actors) == 3: movieGenres.addGenre('Threesome') if len(actors) == 4: movieGenres.addGenre('Foursome') if len(actors) > 4: movieGenres.addGenre('Orgy') for actorLink in actors: actorName = actorLink.text_content().strip() actorPhotoURL = '' try: actorPageURL = actorLink.get('href') req = PAutils.HTTPRequest(actorPageURL) actorPage = HTML.ElementFromString(req.text) actorPhotoURL = actorPage.xpath( '//div[@class="picture"]/img')[0].get("src0_3x") if 'http' not in actorPhotoURL: actorPhotoURL = PAsearchSites.getSearchBaseURL( siteNum) + actorPhotoURL except: pass movieActors.addActor(actorName, actorPhotoURL) # Photos art = [] xpaths = [ '//meta[@property="og:image"]/@content', '//div[@class="photos"]/a/img/@src' ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Studio metadata.studio = 'Angela White' # Title metadata.title = PAutils.Decode(metadata_id[2]).strip() # Summary metadata.summary = detailsPageElements.xpath( '//div[@class="desc"]/p')[0].text_content().strip() # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.tagline = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() genres = detailsPageElements.xpath( '//meta[@name="keywords"]/@content')[0].replace('.', '').split(',') for genreLink in genres: genreName = genreLink.strip() movieGenres.addGenre(genreName) # Release Date date_object = parse(PAutils.Decode(metadata_id[3])) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() actorstr = metadata.title.replace('BTS', '') actorstr = (''.join(i for i in list(actorstr) if not i.isdigit())).strip() actors = actorstr.split(' X ') for actorLink in actors: actorName = actorLink.strip().lower() actorPhotoURL = '' movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//img[contains(@class, "tour-area-thumb")]/@data-src', ] for xpath in xpaths: for poster in detailsPageElements.xpath(xpath): art.append(PAsearchSites.getSearchBaseURL(siteNum) + poster) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1 or height > width: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title if '/en/' in sceneURL: metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//title')[0].text_content().split('|')[0].split('-')[0].strip(), siteNum) else: metadata.title = detailsPageElements.xpath('//title')[0].text_content().split('|')[0].split('-')[0].strip() # Summary metadata.summary = detailsPageElements.xpath('//div[@class="description clearfix"]')[0].text_content().split(':')[-1].strip().replace('\n', ' ') # Tagline and Collection(s) metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteNum).strip() metadata.studio = tagline metadata.collections.add(tagline) # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath('//div[@class="categories"]/a'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() if '/en/' in sceneURL: if '&' in metadata.title: actors = metadata.title.split('&') else: actors = detailsPageElements.xpath('//span[@class="site-name"]')[0].text_content().split(' and ') else: if '&' in metadata.title: actors = metadata.title.split('&') else: actors = detailsPageElements.xpath('//span[@class="site-name"]')[0].text_content().split(' y ') for actorLink in actors: actorName = actorLink.strip() modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum), metadata.title[0].lower()) req = PAutils.HTTPRequest(modelURL) modelPageElements = HTML.ElementFromString(req.text) for model in modelPageElements.xpath('//div[@class="c-boxlist__box--image"]//parent::a'): if model.text_content().strip().lower() == metadata.title.lower(): actorName = metadata.title break if 'africa' in actorName.lower(): actorName = 'Africat' elif metadata.title == 'MAMADA ARGENTINA': actorName = 'Alejandra Argentina' elif actorName == 'Alika': actorName = 'Alyka' modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum), actorName[0].lower()) req = PAutils.HTTPRequest(modelURL) modelPageElements = HTML.ElementFromString(req.text) actorPhotoURL = '' for model in modelPageElements.xpath('//div[@class="c-boxlist__box--image"]//parent::a'): if model.text_content().strip().lower() == actorName.lower(): actorPhotoURL = model.xpath('.//img/@src')[0].strip() break movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] img = detailsPageElements.xpath('//div[@class="top-area-content"]/script')[0].text_content().strip() posterImage = re.search(r'(?<=posterImage:\s").*(?=")', img) if posterImage: img = posterImage.group(0) art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if height > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') sceneURL = PAutils.Decode(metadata_id[0]) if not sceneURL.startswith('http'): sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL sceneDate = metadata_id[2] req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath( '//div[@class="content-desc content-new-scene"]//h1')[0].text_content( ).replace('Video -', '').replace('Movie -', '').strip() # Studio metadata.studio = PAsearchSites.getSearchSiteName(siteNum) # Summary try: metadata.summary = detailsPageElements.xpath( '//div[@class="content-desc content-new-scene"]//p' )[0].text_content().strip() except: pass # Genres movieGenres.clearGenres() for genre in detailsPageElements.xpath( '//ul[contains(@class, "scene-tags")]/li'): genreName = genre.xpath('.//a')[0].text_content().lower() movieGenres.addGenre(genreName) # Release Date date = detailsPageElements.xpath('//meta[@itemprop="uploadDate"]')[0].get( 'content') if date: date_object = datetime.strptime(date, '%m/%d/%Y') metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year elif sceneDate: date_object = parse(date) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Actors movieActors.clearActors() for actorPage in detailsPageElements.xpath( '//ul[@id="featured_pornstars"]//div[@class="model"]'): actorName = actorPage.xpath('.//h3')[0].text_content().strip() actorPhotoURL = actorPage.xpath('.//img/@src')[0] movieActors.addActor(actorName, actorPhotoURL) # Posters art = [ detailsPageElements.xpath( '//div[@id="trailer_player_finished"]//img/@src')[0] ] Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest(posterUrl) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata
def update(metadata, siteID, movieGenres, movieActors): metadata_id = str(metadata.id).split('|') curID = PAutils.Decode(metadata_id[0]) if len(metadata_id) > 2: sceneDate = metadata_id[2] if '/' in curID: # Viewing a Scene sceneURL = PAsearchSites.getSearchBaseURL( siteID) + '/free/scene/' + curID + '/ec' else: # Viewing a Model sceneURL = PAsearchSites.getSearchBaseURL( siteID) + '/free/girl/' + curID + '/ec' req = PAutils.HTTPRequest(sceneURL) detailsPageElements = HTML.ElementFromString(req.text) # Title metadata.title = detailsPageElements.xpath('//h3/text()[1]')[0].strip() # Summary description = detailsPageElements.xpath( '//meta[@name="description"]/@content')[0].strip() metadata.summary = description.strip() # Studio metadata.studio = 'ExploitedX' # Collections / Tagline metadata.collections.clear() tagline = PAsearchSites.getSearchSiteName(siteID) metadata.tagline = tagline metadata.collections.add(tagline) # Release Date if sceneDate: date_object = parse(sceneDate) metadata.originally_available_at = date_object metadata.year = metadata.originally_available_at.year # Genres movieGenres.clearGenres() for genreLink in detailsPageElements.xpath( '//a[starts-with(@href, "/free/keywords")]'): genreName = genreLink.text_content().strip() movieGenres.addGenre(genreName) # Actors movieActors.clearActors() if '/' in curID: # Viewing a Scene unique_list = [] for modelLink in detailsPageElements.xpath( '//a[starts-with(@href, "/free/girl/")]'): modelLinkUrl = PAsearchSites.getSearchBaseURL( siteID) + modelLink.xpath('./@href')[0] if modelLinkUrl not in unique_list: unique_list.append(modelLinkUrl) req = PAutils.HTTPRequest(modelLinkUrl) modelPageElements = HTML.ElementFromString(req.text) actorName = modelPageElements.xpath( '//meta[@name="twitter:description"]/@content')[0].strip() actorPhotoURL = modelPageElements.xpath( '//meta[@name="twitter:image"]/@content')[0].strip() movieActors.addActor(actorName, actorPhotoURL) else: # Viewing a Model actorName = detailsPageElements.xpath( '//meta[@name="twitter:description"]/@content')[0].strip() actorPhotoURL = detailsPageElements.xpath( '//meta[@name="twitter:image"]/@content')[0].strip() movieActors.addActor(actorName, actorPhotoURL) # Posters art = [] xpaths = [ '//meta[@name="twitter:image"]/@content', ] for xpath in xpaths: for img in detailsPageElements.xpath(xpath): art.append(img) Log('Artwork found: %d' % len(art)) for idx, posterUrl in enumerate(art, 1): if not PAsearchSites.posterAlreadyExists(posterUrl, metadata): # Download image file for analysis try: image = PAutils.HTTPRequest( posterUrl, headers={'Referer': 'http://www.google.com'}) im = StringIO(image.content) resized_image = Image.open(im) width, height = resized_image.size # Add the image proxy items to the collection if width > 1: # Item is a poster metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx) if width > 100 and width > height: # Item is an art item metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx) except: pass return metadata