Beispiel #1
0
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate):
    if siteNum == 978:
        url = PAsearchSites.getSearchSearchURL(
            siteNum)  # All models are on one page
    else:
        url = PAsearchSites.getSearchSearchURL(siteNum) + searchTitle[
            0:1]  # First letter of model's name

    req = PAutils.HTTPRequest(url)
    searchResults = HTML.ElementFromString(req.text)
    searchResultElements = searchResults.xpath(
        '//a[starts-with(@href, "/free/girl/")][strong]')
    for searchResult in searchResultElements:
        titleNoFormatting = searchResult.text_content().strip()
        curID = PAutils.Encode(searchResult.xpath('@href')[0].split('/')[3])

        score = 100 - Util.LevenshteinDistance(searchTitle.lower(),
                                               titleNoFormatting.lower())

        if titleNoFormatting in searchTitle:
            modelLink = PAsearchSites.getSearchBaseURL(
                siteNum) + searchResult.xpath('@href')[0]
            modelReq = PAutils.HTTPRequest(modelLink)
            modelResults = HTML.ElementFromString(modelReq.text)

            sceneResults = modelResults.xpath(
                './/a[starts-with(@href, "/free/scene/")][strong]')

            if sceneResults:
                for modelResult in modelResults.xpath(
                        '//div[contains(@class, "videos")]'):
                    titleLine = modelResult.xpath(
                        './/h5')[0].text_content().strip()
                    href = modelResult.xpath(
                        './/a[starts-with(@href, "/free/scene/")][strong]/@href'
                    )[0]

                    titleNoFormatting = titleLine.split('-')[1].strip()
                    curID = PAutils.Encode(
                        href.split('/')[3] + '/' + href.split('/')[4])
                    releaseDate = parse(
                        titleLine.split('ADDED:')[1].split('-')
                        [0].strip()).strftime('%Y-%m-%d')

                    score = 100 - Util.LevenshteinDistance(
                        searchDate, releaseDate)

                    results.Append(
                        MetadataSearchResult(
                            id='%s|%d|%s' % (curID, siteNum, releaseDate),
                            name='%s [%s] %s' %
                            (titleNoFormatting,
                             PAsearchSites.getSearchSiteName(siteNum),
                             releaseDate),
                            score=score,
                            lang=lang))
            else:
                modelThumbnail = PAsearchSites.getSearchBaseURL(
                    siteNum) + searchResult.xpath('./img/@src')[0]
                image = PAutils.HTTPRequest(
                    modelThumbnail,
                    headers={'Referer': 'http://www.google.com'})
                releaseDate = parse(
                    image.headers['Last-Modified']).strftime('%Y-%m-%d')

                if releaseDate == '2016-01-01':  # Oldest image date, so could be anything pre 2016
                    releaseDate = ''

                score = 100 - Util.LevenshteinDistance(titleNoFormatting,
                                                       searchTitle)
                if releaseDate and parse(
                        searchDate
                ).year >= 2016:  # Very good chance we can match on the date
                    delta = parse(searchDate) - parse(releaseDate)
                    if abs(delta.days) < 7:
                        score = 100  # High level of confidence right here
                    else:
                        score = score - 5
                elif releaseDate == '' and parse(
                        searchDate).year < 2016:  # Older video
                    score = score - 10
                else:
                    score = score - 50

                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, searchDate),
                        name='%s [%s] %s' %
                        (titleNoFormatting,
                         PAsearchSites.getSearchSiteName(siteNum),
                         releaseDate),
                        score=score,
                        lang=lang))

    return results
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = '%s/membersarea/%s' % (PAsearchSites.getSearchBaseURL(siteNum), PAutils.Decode(metadata_id[0]))
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath('//title')[0].text_content().split('-')[0].strip()

    # Studio
    metadata.studio = 'FuelVirtual'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.tagline = tagline
    if Prefs['collections_addsitename']:
        metadata.collections.add(tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//td[@class="plaintext"]/a[@class="model_category_link"]'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)
    movieGenres.addGenre('18-Year-Old')

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath('//div[@id="description"]//td[@align="left"]/a')
    if actors:
        if len(actors) == 3:
            movieGenres.addGenre('Threesome')
        if len(actors) == 4:
            movieGenres.addGenre('Foursome')
        if len(actors) > 4:
            movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink.text_content().strip()
            actorPhotoURL = ''

            movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//a[@class="jqModal"]/img/@src'
    ]
    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            img = PAsearchSites.getSearchBaseURL(siteNum) + img

            art.append(img)

    photoPageUrl = sceneURL.replace('vids', 'highres')
    req = PAutils.HTTPRequest(photoPageUrl)
    photoPage = HTML.ElementFromString(req.text)
    for img in photoPage.xpath('//a[@class="jqModal"]/img/@src'):
        img = PAsearchSites.getSearchBaseURL(siteNum) + img

        art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def search(results, lang, siteNum, searchData):
    req = PAutils.HTTPRequest(
        PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded)
    searchResults = HTML.ElementFromString(req.text)
    for searchResult in searchResults.xpath('//div[@class="scene"]'):
        url = searchResult.xpath('.//a[@data-track="TITLE_LINK"]/@href')[0]
        if '/scenes/' in url:
            curID = PAutils.Encode(url)
            titleNoFormatting = searchResult.xpath(
                './/a[@data-track="TITLE_LINK"]')[0].text_content()
            releaseDate = parse(
                searchResult.xpath('./span[@class="scene-date"]')
                [0].text_content().strip()).strftime('%Y-%m-%d')

            if searchData.date:
                score = 100 - Util.LevenshteinDistance(searchData.date,
                                                       releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchData.title.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting,
                     PAsearchSites.getSearchSiteName(siteNum), releaseDate),
                    score=score,
                    lang=lang))

    # search for exact scene name
    urlTitle = searchData.encoded.replace('%20', '-')
    urls = [
        PAsearchSites.getSearchBaseURL(siteNum) + '/scenes/video---' +
        urlTitle + '_vids.html',
        PAsearchSites.getSearchBaseURL(siteNum) + '/scenes/movie---' +
        urlTitle + '_vids.html'
    ]

    for url in urls:
        try:
            sceneReq = PAutils.HTTPRequest(url)
            scenePage = HTML.ElementFromString(sceneReq.text)

            curID = PAutils.Encode(url)
            titleNoFormatting = scenePage.xpath(
                '//div[@class="content-desc content-new-scene"]//h1'
            )[0].text_content().strip()
            releaseDate = parse(
                scenePage.xpath('//meta[@itemprop="uploadDate"]')[0].get(
                    'content'))
            score = 100

            results.Append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting,
                     PAsearchSites.getSearchSiteName(siteNum), releaseDate),
                    score=score,
                    lang=lang))
        except:
            pass

    return results
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.Decode(metadata_id[2])

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[@class="container text-center"]//h2')[0].text_content().strip()

    # Studio
    metadata.studio = 'Cumbizz'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//span[@class="label label-primary"]/a'):
        genreName = genreLink.text_content().strip().lower()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath(
            '//div[@class="breadcrumbs"]/a'):
        actorName = str(actorLink.text_content().strip())
        actorPhotoURL = ''

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//section[@class="har_section har_image_bck har_wht_txt har_fixed"]/@data-image',
        '//img[@class="vidgal unos"]/@src',
        '//img[@class="vidgal dos"]/@src',
        '//img[@class="vidgal tres"]/@src',
        '//img[@class="vidgal quatros"]/@src',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl, headers={'Referer': 'http://www.google.com'})
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #5
0
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    searchURL = PAutils.Decode(metadata_id[0])
    sceneID = int(metadata_id[2])
    req = PAutils.HTTPRequest(searchURL)
    detailsPageElements = None
    searchResults = req.json()
    if 'results' not in searchResults:
        return metadata

    for searchResult in searchResults['results']:
        if searchResult['id'] == sceneID:
            detailsPageElements = searchResult
            break

    if not detailsPageElements:
        return metadata

    # Title
    metadata.title = detailsPageElements['title']

    # Summary
    metadata.summary = re.sub(r'<.*?>', '',
                              detailsPageElements['long_description']).strip(
                              )  # must strip HTML tags

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = metadata.studio
    metadata.collections.add(metadata.studio)

    # Release date
    date_object = parse(detailsPageElements['release_date'])
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    genres = []

    for genreLink in genres:
        genreName = genreLink

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    if 'actors' in detailsPageElements:
        actors = detailsPageElements['actors']
        if len(actors) == 3:
            movieGenres.addGenre('Threesome')
        if len(actors) == 4:
            movieGenres.addGenre('Foursome')
        if len(actors) > 4:
            movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink['name']
            actorPhotoURL = actorLink['thumb']['image']

            if actorPhotoURL.endswith('noimageavailable.gif'):
                actorSearchURL = PAsearchSites.getSearchBaseURL(
                    siteNum
                ) + '/api/v2/search/actors?thumb_size=355x475&query=' + actorName.split(
                )[0]
                req = PAutils.HTTPRequest(actorSearchURL)
                searchResults = req.json()
                if 'results' in searchResults:
                    for searchResult in searchResults['results']:
                        if searchResult['id'] == actorLink['id']:
                            actorPhotoURL = searchResult['thumb']['image']
                            break

            movieActors.addActor(actorName, actorPhotoURL)

    # Director
    metadata.directors.clear()
    if 'directors' in detailsPageElements:
        for directorLink in detailsPageElements['directors']:
            director = metadata.directors.new()
            director.name = directorLink['name']

    # Posters
    art = [detailsPageElements['thumb']['image']]

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteID, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])

    cookies = {'nats': 'MC4wLjMuNTguMC4wLjAuMC4w'}
    req = PAutils.HTTPRequest(sceneURL, cookies=cookies)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//title')[0].text_content().split('|')[0]

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[contains(@class, "video-summary")]//p[@class=""]'
    )[0].text_content()

    # Studio
    metadata.studio = '5Kporn'

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.tagline = PAsearchSites.getSearchSiteName(siteID)
    metadata.collections.add(metadata.tagline)

    # Date
    date = detailsPageElements.xpath('//h5[contains(., "Published")]')
    if date:
        date = date[0].text_content().replace('Published:', '').strip()
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath('//h5[contains(., "Starring")]/a')
    for actorLink in actors:
        actorName = actorLink.text_content().strip()
        actorPhotoURL = ''

        modelURL = actorLink.xpath('.//@href')[0]
        req = PAutils.HTTPRequest(modelURL, cookies=cookies)
        actorsPageElements = HTML.ElementFromString(req.text)

        img = actorsPageElements.xpath('//img[@class="model-image"]/@src')
        if img:
            actorPhotoURL = img[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//div[contains(@class, "gal")]//img/@src',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl,
                                            headers={'Referer': sceneURL},
                                            cookies=cookies)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//h1')[0].text_content().strip()

    # Summary
    description = ''
    for desc in detailsPageElements.xpath('//div[@class="description"]//p'):
        description += desc.text_content().strip() + '\n\n'
    metadata.summary = description

    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = detailsPageElements.xpath(
        '//p[@class="series"]')[0].text_content().strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    date = detailsPageElements.xpath(
        '//span[@class="date"]')[0].text_content().strip()
    if date:
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//meta[@name="keywords"]/@content')[0].split(','):
        genreName = genreLink.strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath('//div[@class="model-wrap"]//li')
    if actors:
        if len(actors) == 3:
            movieGenres.addGenre('Threesome')
        if len(actors) == 4:
            movieGenres.addGenre('Foursome')
        if len(actors) > 4:
            movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink.xpath('.//h5')[0].text_content()
            actorPhotoURL = actorLink.xpath('.//img/@src')[0]

            movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//div[@class="photo-wrap"]//@href',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            if 'http' not in img:
                img = PAsearchSites.getSearchBaseURL(siteNum) + img

            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl, headers={'Referer': 'http://www.google.com'})
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #8
0
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    art = []

    # Title
    metadata.title = detailsPageElements.xpath(
        '//div[@class="fltWrap"]/h1/span')[0].text_content().strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//p[@class="description"]')[0].text_content().replace(
            'Description:', '').strip()

    # Studio
    metadata.studio = 'ClubFilly'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    date = detailsPageElements.xpath(
        '//div[@class="fltRight"]')[0].text_content().replace(
            'Release Date :', '').strip()
    if date:
        date_object = datetime.strptime(date, '%Y-%m-%d')
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    movieGenres.addGenre('Lesbian')

    # Actors
    movieActors.clearActors()
    actorText = detailsPageElements.xpath(
        '//p[@class="starring"]')[0].text_content().replace('Starring:',
                                                            '').strip()
    actors = actorText.split(',')
    if len(actors) > 0:
        if len(actors) == 3:
            movieGenres.addGenre('Threesome')
        if len(actors) == 4:
            movieGenres.addGenre('Foursome')
        if len(actors) > 4:
            movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink.strip()
            actorPhotoURL = ''

            movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = ['//ul[@id="lstSceneFocus"]/li/img/@src']

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #9
0
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = metadata.id.split('|')
    sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + '/video/' + metadata_id[0]
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath('//h2[@class="h2 m-0"]')[0].text_content().strip()

    # Summary
    try:
        paragraphs = detailsPageElements.xpath('//div[@class="desc-text"]')
        pNum = 0
        summary = ""
        for paragraph in paragraphs:
            if pNum >= 0 and pNum < (len(paragraphs)):
                summary = summary + '\n\n' + paragraph.text_content()
            pNum += 1
    except:
        pass
    if summary == '':
        try:
            summary = detailsPageElements.xpath('//div[@class="desc-text"]')[0].text_content().strip()
        except:
            pass
    metadata.summary = summary.strip()

    # Studio
    metadata.studio = 'ManyVids'

    # Collections / Tagline
    metadata.collections.clear()
    tagline = detailsPageElements.xpath('//a[contains(@class, "username ")]')[0].text_content().strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    genres = detailsPageElements.xpath('//div[@class="tags"]/a')
    for genreLink in genres:
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actorName = detailsPageElements.xpath('//a[contains(@class, "username ")]')[0].text_content()
    actorPhotoURL = ''

    try:
        actorPhotoURL = detailsPageElements.xpath('//div[@class="pr-2"]/a/img')[0].get('src')
    except:
        pass

    movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//div[@id="rmpPlayer"]/@data-video-screenshot'
    ]
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            art.append(poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath('//h1')[0].text_content().strip()

    # Summary
    try:
        metadata.summary = detailsPageElements.xpath('///span[@class="full"]')[0].text_content().strip()
    except:
        pass

    # Studio
    metadata.studio = 'Marc Dorcel'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    movieGenres.addGenre('French p**n')

    movieName = detailsPageElements.xpath('//span[@class="movie"]/a')
    if movieName:
        metadata.collections.add(movieName[0].text_content().strip())
    movieGenres.addGenre('Blockbuster Movie')

    # Actors
    movieActors.clearActors()
    if 'p**n-movie' not in sceneURL:
        actors = detailsPageElements.xpath('//div[@class="actress"]/a')
    else:
        actors = detailsPageElements.xpath('//div[@class="actor thumbnail "]/a/div[@class="name"]')

    if actors:
        if 'p**n-movie' not in sceneURL:
            if len(actors) == 3:
                movieGenres.addGenre('Threesome')
            if len(actors) == 4:
                movieGenres.addGenre('Foursome')
            if len(actors) > 4:
                movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink.text_content().strip()
            actorPhotoURL = ''

            movieActors.addActor(actorName, actorPhotoURL)

    # Release Date
    if 'p**n-movie' not in sceneURL:
        date = detailsPageElements.xpath('//span[@class="publish_date"]')[0].text_content().strip()
    else:
        date = detailsPageElements.xpath('//span[@class="out_date"]')[0].text_content().replace('Year :', '').strip()
    date_object = parse(date)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Director
    director = metadata.directors.new()
    try:
        movieDirector = detailsPageElements.xpath('//span[@class="director"]')[0].text_content().replace(
            'Director :', '').strip()
        director.name = movieDirector
    except:
        pass

    # Poster
    art = []

    xpaths = [
        '//div[contains(@class, "photos")]//source/@data-srcset'
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            if ',' in img:
                img = img.split(',')[-1].split()[0]

            trash = '_' + img.split('_', 3)[-1].rsplit('.', 1)[0]
            img = img.replace(trash, '', 1)

            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    try:
        metadata.title = detailsPageElements.xpath(
            '//h1')[0].text_content().strip()
    except:
        try:
            metadata.title = detailsPageElements.xpath(
                '//meta[@property="og:title"]')[0].text_content().strip()
        except:
            try:
                metadata.title = detailsPageElements.xpath(
                    '//meta[@name="twitter:title"]')[0].text_content().strip()
            except:
                pass

    # Summary
    summary = detailsPageElements.xpath(
        '//div[@class="record-description-content record-box-content"]'
    )[0].text_content().strip()
    metadata.summary = summary[:summary.find('Runtime')].strip()

    # Studio
    metadata.studio = 'Hegre'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    date = detailsPageElements.xpath(
        '//span[@class="date"]')[0].text_content().strip()
    date_object = parse(date)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//a[@class="tag"]'):
        genreName = genreLink.text_content().strip().lower()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath('//a[@class="record-model"]')
    if actors:
        if len(actors) == 3:
            movieGenres.addGenre('Threesome')
        if len(actors) == 4:
            movieGenres.addGenre('Foursome')
        if len(actors) > 4:
            movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink.get('title').strip()
            actorPhotoURL = actorLink.xpath('.//img/@src')[0].replace(
                '240x', '480x')

            movieActors.addActor(actorName, actorPhotoURL)

    # Director
    director = metadata.directors.new()
    director.name = 'Petter Hegre'
    director.photo = 'https://img.discogs.com/TafxhnwJE2nhLodoB6UktY6m0xM=/fit-in/180x264/filters:strip_icc():format(jpeg):mode_rgb():quality(90)/discogs-images/A-2236724-1305622884.jpeg.jpg'

    # Posters
    art = [
        detailsPageElements.xpath('//meta[@name="twitter:image"]/@content')
        [0].replace('board-image', 'poster-image').replace('1600x', '640x'),
        detailsPageElements.xpath('//meta[@name="twitter:image"]/@content')
        [0].replace('1600x', '1920x')
    ]

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl, headers={'Referer': 'http://www.google.com'})
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneID = metadata_id[0]
    title = metadata_id[2].strip()
    apiurl = getAPIURL(
        PAsearchSites.getSearchBaseURL(siteNum) + '/scene/' + sceneID + '/' +
        urllib.quote(title))
    apiurl = PAsearchSites.getSearchSearchURL(siteNum) + apiurl
    detailsPageElements = getJSONfromAPI(apiurl +
                                         updatequery.format(sceneID))[0]

    # Title
    metadata.title = PAutils.parseTitle(detailsPageElements['title'].title(),
                                        siteNum)

    # Summary
    metadata.summary = detailsPageElements['description']

    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.collections.add(metadata.studio)

    # Release Date
    date = detailsPageElements['sites']['collection'][sceneID]['publishDate']
    date_object = parse(date)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    movieActors.clearActors()

    if 'tags' in detailsPageElements:
        genres = detailsPageElements['tags']['collection']

        if not isinstance(genres, list):
            for (key, value) in genres.items():
                genre = value['alias']

                if genre:
                    if siteNum == 1027:
                        genre = genre.replace('-', ' ')
                        movieActors.addActor(genre, '')
                    else:
                        movieGenres.addGenre(genre)

    # Actors
    actors = getJSONfromAPI(apiurl + modelquery + sceneID)

    if not isinstance(actors, list):
        for key, value in actors.items():
            collect = value['modelId']['collection']

            for k, val in collect.items():
                actorName = val['stageName']

                if actorName:
                    movieActors.addActor(actorName, '')

    if siteNum == 1024:
        baseactor = 'Aletta Ocean'
    elif siteNum == 1025:
        baseactor = 'Eva Lovia'
    elif siteNum == 1026:
        baseactor = 'Romi Rain'
    elif siteNum == 1030:
        baseactor = 'Dani Daniels'
    elif siteNum == 1031:
        baseactor = 'Chloe Toy'
    elif siteNum == 1033:
        baseactor = 'Katya Clover'
    elif siteNum == 1035:
        baseactor = 'Lisey Sweet'
    elif siteNum == 1037:
        baseactor = 'Gina Gerson'
    elif siteNum == 1038:
        baseactor = 'Valentina Nappi'
    elif siteNum == 1039:
        baseactor = 'Vina Sky'
    elif siteNum == 1058:
        baseactor = 'Vicki Valkyrie'
    elif siteNum == 1075:
        baseactor = 'Dillion Harper'
    elif siteNum == 1191:
        baseactor = 'Lilu Moon'
    else:
        baseactor = ''

    movieActors.addActor(baseactor, '')

    # Posters
    art = []
    artobj = json.loads(PAutils.Decode(metadata_id[3]))

    if artobj:
        for detailsPageElements in artobj:
            art.append(detailsPageElements['url'])

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height and idx > 1:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def getJSONfromAPI(url):
    req = PAutils.HTTPRequest(url)

    return req.json()['response']['collection']
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    try:
        metadata.title = detailsPageElements.xpath(
            '//h1[@class="entry-title"]')[-1].text_content().strip()
    except:
        pass

    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)

    # Collections / Tagline
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Genres
    for genreLink in detailsPageElements.xpath(
            '//div[@class="tags-list"]/a//i[@class="fa fa-folder-open"]/..'):
        genreName = genreLink.text_content().replace('Movies',
                                                     '').strip().lower()

        movieGenres.addGenre(genreName)

    # Release Date
    date = detailsPageElements.xpath(
        '//div[@id="video-date"]')[0].text_content().strip()
    if date:
        date = date.replace('Date:', '').strip()
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()

    actors = detailsPageElements.xpath('//div[@id="video-actors"]//a')
    if actors:
        if len(actors) == 3:
            movieGenres.addGenre('Threesome')
        if len(actors) == 4:
            movieGenres.addGenre('Foursome')
        if len(actors) > 4:
            movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink.text_content()
            actorPhotoURL = ''

            movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    image = PAutils.Decode(metadata_id[2])
    if image:
        art.append(image)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #15
0
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate):
    sceneID = None
    splited = searchTitle.split(' ')
    if unicode(splited[0], 'utf8').isdigit():
        sceneID = splited[0]
        searchTitle = searchTitle.replace(sceneID, '', 1).strip()
        req = PAutils.HTTPRequest(
            PAsearchSites.getSearchBaseURL(siteNum) + '/vrpornvideo/' +
            sceneID)
        searchResults = HTML.ElementFromString(req.text)
        titleNoFormatting = searchResults.xpath(
            '//h1[contains(@class, "video-title")]')[0].text_content()
        curID = PAutils.Encode(
            PAsearchSites.getSearchBaseURL(siteNum) + '/vrpornvideo/' +
            sceneID)
        girlName = ''

        releaseDate = ''
        date = searchResults.xpath('//p[@itemprop="uploadDate"]/@content')
        if date:
            releaseDate = parse(date[0]).strftime('%Y-%m-%d')

        score = 100
        results.Append(
            MetadataSearchResult(id='%s|%d' % (curID, siteNum),
                                 name='[%s] %s in %s %s' %
                                 (PAsearchSites.getSearchSiteName(siteNum),
                                  girlName, titleNoFormatting, releaseDate),
                                 score=score,
                                 lang=lang))
    else:
        req = PAutils.HTTPRequest(
            PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle)
        searchResults = HTML.ElementFromString(req.text)
        for searchResult in searchResults.xpath(
                '//div[@class="tile-grid-item"]'):
            data = searchResult.xpath(
                './/a[contains(@class, "video-card-title")]')[0]
            titleNoFormatting = searchResult.xpath(
                './/a[contains(@class, "video-card-title")]/@title')[0]
            curID = PAutils.Encode(
                searchResult.xpath(
                    './/a[contains(@class, "video-card-title")]/@href')[0])
            releaseDate = ''
            date = searchResult.xpath(
                './/span[@class="video-card-upload-date"]/@content')
            if date:
                releaseDate = parse(date[0]).strftime('%Y-%m-%d')
            girlName = searchResult.xpath(
                './/a[@class="video-card-link"]')[0].text_content()
            if searchDate and releaseDate:
                score = 100 - Util.LevenshteinDistance(searchDate, releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())
            results.Append(
                MetadataSearchResult(
                    id='%s|%d' % (curID, siteNum),
                    name='[%s] %s in %s %s' %
                    (PAsearchSites.getSearchSiteName(siteNum), girlName,
                     titleNoFormatting, releaseDate),
                    score=score,
                    lang=lang))

    return results
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)
    movieActors.clearActors()

    # Title
    metadata.title = detailsPageElements.xpath(
        '//div[contains(@class, "title")]/h2')[0].text_content().strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[@class="description"]/div[@class="desc-text"]')[0].text_content(
        ).strip()

    # Studio/Tagline/Collection
    metadata.collections.clear()
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = metadata.studio
    metadata.collections.add(metadata.studio)

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//ul[contains(@class, "tags")]//a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Posters/Background
    art = []
    xpaths = [
        '//meta[@property="og:image"]/@content',
        '//div[contains(@class, "thumbnails")]//img/@src',
    ]
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            poster = poster.split('?')[0]

            art.append(poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and idx > 1:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #17
0
def update(metadata, siteID, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteID) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//h1[contains(@class, "video-title")]')[0].text_content()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//p[@class="video-description"]')[0].text_content().strip()

    # Studio
    metadata.studio = 'BadoinkVR'

    # Tagline and Collection
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteID)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    sceneDate = detailsPageElements.xpath(
        '//p[@itemprop="uploadDate"]/@content')
    if sceneDate:
        date_object = parse(sceneDate[0])
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//a[@class="video-tag"]'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath(
            '//a[contains(@class,"video-actor-link")]'):
        actorName = actorLink.text_content().strip()

        actorPageURL = PAsearchSites.getSearchBaseURL(siteID) + actorLink.get(
            'href')
        req = PAutils.HTTPRequest(actorPageURL)
        actorPage = HTML.ElementFromString(req.text)
        actorPhotoURL = actorPage.xpath(
            '//img[@class="girl-details-photo"]/@src')[0].split('?')[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//div[contains(@class,"gallery-item")]/@data-big-image',
        '//img[@class="video-image"]/@src'
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            img = img.split('?')[0]

            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl, headers={'Referer': 'http://www.google.com'})
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if idx > 1 and width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #18
0
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = metadata.id.split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//h1')[0].text_content().strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[contains(@class, "u-mb--six ")]')[0].text_content().strip()

    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.tagline = metadata.studio
    metadata.collections.add(metadata.studio)

    # Release Date
    date = detailsPageElements.xpath(
        '//span[@class="date-display-single"] | //span[@class="u-inline-block u-mr--nine"] | //div[@class="video-meta-date"] | //div[@class="date"]'
    )[0].text_content().strip()
    date_object = parse(date)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//a[contains(@href, "/list/category/")]'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath(
            '//a[contains(@href, "/pornstars/model/")]'):
        actorPageURL = PAsearchSites.getSearchBaseURL(siteNum) + actorLink.get(
            'href')
        req = PAutils.HTTPRequest(actorPageURL)
        actorPage = HTML.ElementFromString(req.text)

        actorName = actorLink.text_content().strip()
        actorPhotoURL = actorPage.xpath(
            '//div[contains(@class, "u-ratio--model-poster")]//img/@data-src'
        )[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters/Background
    xpaths = [
        '//div[contains(@class, "splash-screen")]/@style',
        '//a[contains(@class, "u-ratio--lightbox")]/@href',
    ]
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            if poster.startswith('background-image'):
                poster.split('url(')[1].split(')')[0]

            art.append(poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and idx > 1:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate):
    searchResults = []

    googleResults = PAutils.getFromGoogleSearch(searchTitle, siteNum)
    for sceneURL in googleResults:
        sceneURL = sceneURL.split('?')[0].replace('dev.', '', 1)

        if ('/view/' in sceneURL) and (
                'photoset' not in sceneURL) and sceneURL not in searchResults:
            searchResults.append(sceneURL)

    for sceneURL in googleResults:
        if ('/model/' in sceneURL):
            req = PAutils.HTTPRequest(sceneURL)
            actorPageElements = HTML.ElementFromString(req.text)

            for searchResult in actorPageElements.xpath(
                    '//div[contains(@class, "content-item")]'):
                sceneURL = searchResult.xpath('.//h3//@href')[0].split(
                    '?')[0].replace('dev.', '', 1)

                if sceneURL not in searchResults:
                    titleNoFormatting = searchResult.xpath(
                        './/h3')[0].text_content().strip()
                    curID = PAutils.Encode(sceneURL)

                    date = searchResult.xpath('.//span[@class="pub-date"]'
                                              )[0].text_content().strip()
                    releaseDate = parse(date).strftime('%Y-%m-%d')

                    if searchDate:
                        score = 100 - Util.LevenshteinDistance(
                            searchDate, releaseDate)
                    else:
                        score = 100 - Util.LevenshteinDistance(
                            searchTitle.lower(), titleNoFormatting.lower())

                    results.Append(
                        MetadataSearchResult(
                            id='%s|%d' % (curID, siteNum),
                            name='%s [%s] %s' %
                            (titleNoFormatting,
                             PAsearchSites.getSearchSiteName(siteNum),
                             releaseDate),
                            score=score,
                            lang=lang))

    for sceneURL in searchResults:
        req = PAutils.HTTPRequest(sceneURL)
        detailsPageElements = HTML.ElementFromString(req.text)

        titleNoFormatting = detailsPageElements.xpath(
            '//h1')[0].text_content().strip()
        curID = PAutils.Encode(sceneURL)

        date = detailsPageElements.xpath(
            '//span[@class="date"]')[0].text_content().strip()
        releaseDate = parse(date).strftime('%Y-%m-%d')

        if searchDate:
            score = 100 - Util.LevenshteinDistance(searchDate, releaseDate)
        else:
            score = 100 - Util.LevenshteinDistance(searchTitle.lower(),
                                                   titleNoFormatting.lower())

        results.Append(
            MetadataSearchResult(
                id='%s|%d' % (curID, siteNum),
                name='%s [%s] %s' %
                (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum),
                 releaseDate),
                score=score,
                lang=lang))

    return results
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    try:
        sceneDate = metadata_id[2]
    except:
        pass
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    movieGenres.clearGenres()
    movieActors.clearActors()

    # Title
    if 'webmasters' in sceneURL:
        resultTitleID = detailsPageElements.xpath('//h1/text()')[0].title()
        metadata.title = re.sub(r'^\d+', '', resultTitleID)
    else:
        resultTitleID = detailsPageElements.xpath(
            '//h4/span')[0].text_content().title()
        metadata.title = re.sub(r'^\d+', '', resultTitleID)

    sceneID = re.sub(r'\D.*', '', resultTitleID)

    # Summary
    try:
        if 'webmasters' in sceneURL:
            metadata.summary = detailsPageElements.xpath(
                '//div[@class="row gallery-description"]//div'
            )[1].text_content().strip()
        else:
            metadata.summary = detailsPageElements.xpath(
                '//div[@class="row"]//a/@title')[0].strip()
    except:
        pass

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = metadata.studio
    metadata.collections.add(metadata.studio)

    # Actors
    if 'webmasters' in sceneURL:
        actors = detailsPageElements.xpath('//spam[@class="key-words"]//a')
    else:
        actors = detailsPageElements.xpath('//h5//a')

    actorPhotoURL = ''

    # Remove Actor Names from Genre List
    genres = detailsPageElements.xpath(
        '//meta[@name="keywords"]/@content')[0].replace('Aussie Ass', '')
    genres = re.sub(r'id.\d*', '', genres, flags=re.IGNORECASE)

    if actors:
        for actorLink in actors:
            actorName = actorLink.text_content().title()
            genres = genres.replace(actorName, '')

            modelURL = actorLink.xpath('./@href')[0]
            req = PAutils.HTTPRequest(modelURL)
            actorsPageElements = HTML.ElementFromString(req.text)

            img = actorsPageElements.xpath(
                '//img[contains(@id,"set-target")]/@src')[0]
            if img:
                actorPhotoURL = img
                if 'http' not in actorPhotoURL:
                    actorPhotoURL = PAsearchSites.getSearchBaseURL(
                        siteNum) + actorPhotoURL

            movieActors.addActor(actorName, actorPhotoURL)

    # Date
    date = ""

    try:
        if 'webmasters' in sceneURL:

            pageResults = (int)(actorsPageElements.xpath(
                '//span[@class="number_item "]')[0].text_content().strip())

            if not pageResults:
                pageResults = 1

            for x in range(pageResults):
                if x == 1:
                    actorsPageElements.xpath(
                        '//a[contains(@class,"in_stditem")]/@href')[1]
                    req = PAutils.HTTPRequest(
                        PAsearchSites.getSearchBaseURL(siteNum) +
                        actorsPageElements.xpath(
                            '//a[contains(@class,"in_stditem")]/@href')[1])
                    actorsPageElements = HTML.ElementFromString(req.text)

                for sceneElements in actorsPageElements.xpath(
                        '//div[@class="box"]'):
                    if sceneID in sceneElements.xpath('.//a/text()')[1]:
                        date = actorsPageElements.xpath(
                            './/span[@class="video-date"]')[0].text_content(
                            ).strip()
                        break
        else:
            date = sceneDate
    except:
        date = sceneDate

    if date:
        date = parse(date).strftime('%d-%m-%Y')
        date_object = datetime.strptime(date, '%d-%m-%Y')
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    for genre in genres.split(','):
        movieGenres.addGenre(genre.strip())

    # Posters
    art = []
    xpaths = [
        '//img[contains(@alt,"content")]/@src',
        '//div[@class="box"]//img/@src',
    ]

    altURL = ""

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            if 'http' not in img:
                if 'join' in img:
                    break
                elif 'webmasters' in sceneURL:
                    img = sceneURL + "/" + img
                else:
                    img = PAsearchSites.getSearchBaseURL(siteNum) + img
            art.append(img)
        if 'webmasters' not in sceneURL:
            altURL = PAsearchSites.getSearchBaseURL(
                siteNum) + "/webmasters/" + sceneID
            req = PAutils.HTTPRequest(altURL)
            detailsPageElements = HTML.ElementFromString(req.text)
            sceneURL = altURL

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//meta[@property="og:title"]/@content')[0].replace(
            ' - Mormon Girlz', '').strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//*[contains(@id, "post-")]/aside[2]/div/div[1]')[0].text_content(
        ).strip()

    # Studio
    metadata.studio = 'MormonGirlz'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    date = detailsPageElements.xpath(
        '//meta[@property="article:published_time"]/@content')[0].strip()
    if date:
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    xpaths = [
        '//h1[contains(text(), "more of") and not(contains(text(), "Mormon Girls"))]'
    ]
    for xpath in xpaths:
        for genreLink in detailsPageElements.xpath(xpath):
            genreName = genreLink.text.replace('more of', '').strip()

            movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()

    # Posters
    art = []
    xpaths = ['//*[@class="ngg-gallery-thumbnail"]/a/@href']
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            art.append(poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 and height >= width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate):

    sceneID = re.sub(r'\D.*', '', searchTitle)

    if sceneID:
        sceneURL = PAsearchSites.getSearchBaseURL(
            siteNum) + "/webmasters/" + sceneID
        req = PAutils.HTTPRequest(sceneURL)
        searchResults = HTML.ElementFromString(req.text)
        titleNoFormatting = re.sub(
            r'^\d+', '',
            searchResults.xpath('//h1/text()')[0].title())
        curID = PAutils.Encode(sceneURL)

        score = 100

        results.Append(
            MetadataSearchResult(
                id='%s|%d' % (curID, siteNum),
                name='%s [%s]' %
                (titleNoFormatting, PAsearchSites.getSearchSiteName(siteNum)),
                score=score,
                lang=lang))
    else:
        # Handle 3 Types of Links: First, Last; First Only; First-Last
        try:
            encodedTitle = re.search(r'^\S*.\S*',
                                     searchTitle).group(0).replace(' ',
                                                                   '').lower()

            req = PAutils.HTTPRequest(
                PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle +
                ".html")
            searchResults = HTML.ElementFromString(req.text)

            if searchResults.xpath(
                    '//html')[0].text_content() == 'Page not found':
                raise Exception
        except:
            try:
                encodedTitle = re.search(r'^\S*.\S*',
                                         searchTitle).group(0).replace(
                                             ' ', '-').lower()

                req = PAutils.HTTPRequest(
                    PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle +
                    ".html")
                searchResults = HTML.ElementFromString(req.text)

                if searchResults.xpath(
                        '//html')[0].text_content() == 'Page not found':
                    raise Exception
            except:
                encodedTitle = re.search(r'^\S*', searchTitle).group(0).lower()

                req = PAutils.HTTPRequest(
                    PAsearchSites.getSearchSearchURL(siteNum) + encodedTitle +
                    ".html")
                searchResults = HTML.ElementFromString(req.text)
        try:
            pageResults = (int)(searchResults.xpath(
                '//span[@class="number_item "]')[0].text_content().strip())
        except:
            pageResults = 1

        for x in range(pageResults):
            if x == 1:
                searchResults.xpath(
                    '//a[contains(@class,"in_stditem")]/@href')[1]
                req = PAutils.HTTPRequest(
                    PAsearchSites.getSearchBaseURL(siteNum) +
                    searchResults.xpath(
                        '//a[contains(@class,"in_stditem")]/@href')[1])
                searchResults = HTML.ElementFromString(req.text)
            for searchResult in searchResults.xpath('//div[@class="infos"]'):
                resultTitleID = searchResult.xpath(
                    './/span[@class="video-title"]')[0].text_content().strip(
                    ).title()

                titleNoFormatting = re.sub(r'^\d+', '', resultTitleID)

                resultID = re.sub(r'\D.*', '', resultTitleID)

                sceneURL = searchResult.xpath('.//a/@href')[0]
                curID = PAutils.Encode(sceneURL)

                date = searchResult.xpath(
                    './/span[@class="video-date"]')[0].text_content().strip()

                if date:
                    releaseDate = parse(date).strftime('%Y-%m-%d')
                else:
                    releaseDate = parse(searchDate).strftime(
                        '%Y-%m-%d') if searchDate else ''
                releaseDate = parse(date).strftime('%Y-%m-%d')
                displayDate = releaseDate if date else ''

                if sceneID == resultID:
                    score = 100
                elif searchDate and displayDate:
                    score = 100 - Util.LevenshteinDistance(
                        searchDate, releaseDate)
                else:
                    score = 100 - Util.LevenshteinDistance(
                        searchTitle.lower(), titleNoFormatting.lower())

                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, releaseDate),
                        name='%s [%s] %s' %
                        (titleNoFormatting,
                         PAsearchSites.getSearchSiteName(siteNum),
                         releaseDate),
                        score=score,
                        lang=lang))

    return results
Beispiel #23
0
def search(results, lang, siteNum, searchData):
    searchURL = PAsearchSites.getSearchSearchURL(siteNum) + searchData.encoded
    req = PAutils.HTTPRequest(searchURL)
    searchResults = req.json()

    if 'results' not in searchResults or not searchResults[
            'results'] and searchData.filename:
        # femjoy.17.03.12.maria.rya.girl.in.the.mirror.mp4
        # try to extract as much of the title as possible without including the model
        m = re.search(r'femjoy\.(\d{2}\.\d{2}\.\d{2})\.(.+)',
                      searchData.filename, re.IGNORECASE)
        if m:
            searchData.date = parse('20' + m.group(1)).strftime('%Y-%m-%d')
            searchWords = m.group(2).split('.')
            wordCount = len(searchWords)
            if wordCount > 2:
                searchData.title = ' '.join(searchWords[2:])
            else:
                searchData.title = searchWords[-1]

        else:
            # Belinda & Fiva - Give me your hand 29-Mar-2010.mp4
            # take everything from after the dash and before the date
            m = re.search(r'.+ - (.+) (\d{2}-[a-z]{3}-\d{4})',
                          searchData.filename, re.IGNORECASE)
            if m:
                searchData.date = parse(m.group(2)).strftime('%Y-%m-%d')
                searchData.title = m.group(1)

        searchData.encoded = urllib.quote(searchData.title)
        searchURL = PAsearchSites.getSearchSearchURL(
            siteNum) + searchData.encoded
        req = PAutils.HTTPRequest(searchURL)
        searchResults = req.json()

    if 'results' in searchResults:
        curID = PAutils.Encode(searchURL)

        for searchResult in searchResults['results']:
            titleNoFormatting = searchResult['title']
            sceneID = searchResult['id']

            date = searchResult['release_date']
            if date:
                releaseDate = parse(date).strftime('%Y-%m-%d')
            else:
                releaseDate = searchData.dateFormat(
                ) if searchData.date else ''
            displayDate = releaseDate if date else ''

            actorsName = [
                actorLink['name'] for actorLink in searchResult['actors']
            ]
            actorsString = getActorsString(actorsName)

            if searchData.date and displayDate:
                score = 100 - Util.LevenshteinDistance(searchData.date,
                                                       releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchData.title.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id='%s|%d|%d' % (curID, siteNum, sceneID),
                    name='%s - %s [%s] %s' %
                    (titleNoFormatting, actorsString,
                     PAsearchSites.getSearchSiteName(siteNum), displayDate),
                    score=score,
                    lang=lang))

    return results
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    movieGenres.clearGenres()

    # Title
    metadata.title = detailsPageElements.xpath(
        '//title')[0].text_content().split('|')[-1].strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//meta[@name="description"]')[0].get('content').strip()

    # Studio
    metadata.studio = 'ReidMyLips'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    date = detailsPageElements.xpath(
        '/html/body/div/div[4]/div[4]/div/main/div[2]/div[2]/div/div/div[2]/div/div/div[4]/p/span'
    )[0].text_content().strip()
    if len(date) > 0:
        date_object = datetime.strptime(date, '%B %d, %Y')
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    actorName = 'Riley Reid'
    actorPhotoURL = ''

    movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []

    photos = detailsPageElements.xpath(
        '//div[@id="pro-gallery-margin-container"]//a[@class="block-fullscreen gallery-item-social-download  pull-right  gallery-item-social-button"]//@href'
    )
    for photoLink in photos:
        photo = photoLink.split('?')[0]

        art.append(photo)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneTitle = PAutils.Decode(metadata_id[0])
    sceneDescription = PAutils.Decode(metadata_id[2])
    sceneDate = metadata_id[3]
    scenePoster = PAutils.Decode(metadata_id[4])

    art = []
    metadata.collections.clear()
    movieGenres.clearGenres()
    movieActors.clearActors()

    # Title
    metadata.title = sceneTitle

    # Summary
    metadata.summary = sceneDescription

    # Studio
    metadata.studio = 'Thick Cash'

    # Tagline and Collection(s)
    subSite = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = subSite
    if Prefs['collections_addsitename']:
        metadata.collections.add(subSite)

    # Genres
    if subSite.lower() == 'Family Lust'.lower():
        for genreName in ['Family Roleplay']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Over 40 Handjobs'.lower():
        for genreName in ['MILF', 'Handjob']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Ebony Tugs'.lower():
        for genreName in ['Ebony', 'Handjob']:
            movieGenres.addGenre(genreName)
    elif subSite.lower() == 'Teen Tugs'.lower():
        for genreName in ['Teen', 'Handjob']:
            movieGenres.addGenre(genreName)

    # Release Date
    date_object = parse(sceneDate)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Posters
    art = [scenePoster]

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = metadata.id.split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    art = []
    movieGenres.clearGenres()
    movieActors.clearActors()

    # Title
    metadata.title = detailsPageElements.xpath(
        '//div[@class="main-info-left"]/h1')[0].text_content().strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//p[@class="description"]')[0].text_content().strip()

    # Studio
    metadata.studio = 'Love Her Feet'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    date = detailsPageElements.xpath(
        '//div[@class="date"]')[0].text_content().strip()
    if date:
        date_object = datetime.strptime(date, '%B %d, %Y')
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//div[@class="video-tags"]/a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)
    movieGenres.addGenre('Foot Sex')

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath('//div[@class="featured"]/a')
    if actors:
        if len(actors) == 3:
            movieGenres.addGenre('Threesome')
        if len(actors) == 4:
            movieGenres.addGenre('Foursome')
        if len(actors) > 4:
            movieGenres.addGenre('Orgy')

        for actorLink in actors:
            actorName = actorLink.text_content().strip()
            actorPhotoURL = ''
            try:
                actorPageURL = actorLink.get('href')
                req = PAutils.HTTPRequest(actorPageURL)
                actorPage = HTML.ElementFromString(req.text)
                actorPhotoURL = actorPage.xpath(
                    '//div[@class="picture"]/img')[0].get("src0_3x")
                if 'http' not in actorPhotoURL:
                    actorPhotoURL = PAsearchSites.getSearchBaseURL(
                        siteNum) + actorPhotoURL
            except:
                pass

            movieActors.addActor(actorName, actorPhotoURL)

    # Photos
    art = []
    xpaths = [
        '//meta[@property="og:image"]/@content',
        '//div[@class="photos"]/a/img/@src'
    ]
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            art.append(poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl, headers={'Referer': 'http://www.google.com'})
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Studio
    metadata.studio = 'Angela White'

    # Title
    metadata.title = PAutils.Decode(metadata_id[2]).strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[@class="desc"]/p')[0].text_content().strip()

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    genres = detailsPageElements.xpath(
        '//meta[@name="keywords"]/@content')[0].replace('.', '').split(',')
    for genreLink in genres:
        genreName = genreLink.strip()
        movieGenres.addGenre(genreName)

    # Release Date
    date_object = parse(PAutils.Decode(metadata_id[3]))
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    actorstr = metadata.title.replace('BTS', '')
    actorstr = (''.join(i for i in list(actorstr) if not i.isdigit())).strip()
    actors = actorstr.split(' X ')
    for actorLink in actors:
        actorName = actorLink.strip().lower()
        actorPhotoURL = ''

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//img[contains(@class, "tour-area-thumb")]/@data-src',
    ]
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            art.append(PAsearchSites.getSearchBaseURL(siteNum) + poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])

    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    if '/en/' in sceneURL:
        metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//title')[0].text_content().split('|')[0].split('-')[0].strip(), siteNum)
    else:
        metadata.title = detailsPageElements.xpath('//title')[0].text_content().split('|')[0].split('-')[0].strip()

    # Summary
    metadata.summary = detailsPageElements.xpath('//div[@class="description clearfix"]')[0].text_content().split(':')[-1].strip().replace('\n', ' ')

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.studio = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//div[@class="categories"]/a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    if '/en/' in sceneURL:
        if '&' in metadata.title:
            actors = metadata.title.split('&')
        else:
            actors = detailsPageElements.xpath('//span[@class="site-name"]')[0].text_content().split(' and ')
    else:
        if '&' in metadata.title:
            actors = metadata.title.split('&')
        else:
            actors = detailsPageElements.xpath('//span[@class="site-name"]')[0].text_content().split(' y ')

    for actorLink in actors:
        actorName = actorLink.strip()

        modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum), metadata.title[0].lower())
        req = PAutils.HTTPRequest(modelURL)
        modelPageElements = HTML.ElementFromString(req.text)
        for model in modelPageElements.xpath('//div[@class="c-boxlist__box--image"]//parent::a'):
            if model.text_content().strip().lower() == metadata.title.lower():
                actorName = metadata.title
                break

        if 'africa' in actorName.lower():
            actorName = 'Africat'
        elif metadata.title == 'MAMADA ARGENTINA':
            actorName = 'Alejandra Argentina'
        elif actorName == 'Alika':
            actorName = 'Alyka'

        modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum), actorName[0].lower())
        req = PAutils.HTTPRequest(modelURL)
        modelPageElements = HTML.ElementFromString(req.text)

        actorPhotoURL = ''
        for model in modelPageElements.xpath('//div[@class="c-boxlist__box--image"]//parent::a'):
            if model.text_content().strip().lower() == actorName.lower():
                actorPhotoURL = model.xpath('.//img/@src')[0].strip()
                break

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []

    img = detailsPageElements.xpath('//div[@class="top-area-content"]/script')[0].text_content().strip()
    posterImage = re.search(r'(?<=posterImage:\s").*(?=")', img)
    if posterImage:
        img = posterImage.group(0)
        art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//div[@class="content-desc content-new-scene"]//h1')[0].text_content(
        ).replace('Video -', '').replace('Movie -', '').strip()

    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)

    # Summary
    try:
        metadata.summary = detailsPageElements.xpath(
            '//div[@class="content-desc content-new-scene"]//p'
        )[0].text_content().strip()
    except:
        pass

    # Genres
    movieGenres.clearGenres()
    for genre in detailsPageElements.xpath(
            '//ul[contains(@class, "scene-tags")]/li'):
        genreName = genre.xpath('.//a')[0].text_content().lower()

        movieGenres.addGenre(genreName)

    # Release Date
    date = detailsPageElements.xpath('//meta[@itemprop="uploadDate"]')[0].get(
        'content')
    if date:
        date_object = datetime.strptime(date, '%m/%d/%Y')
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year
    elif sceneDate:
        date_object = parse(date)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Actors
    movieActors.clearActors()
    for actorPage in detailsPageElements.xpath(
            '//ul[@id="featured_pornstars"]//div[@class="model"]'):
        actorName = actorPage.xpath('.//h3')[0].text_content().strip()
        actorPhotoURL = actorPage.xpath('.//img/@src')[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = [
        detailsPageElements.xpath(
            '//div[@id="trailer_player_finished"]//img/@src')[0]
    ]

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #30
0
def update(metadata, siteID, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    curID = PAutils.Decode(metadata_id[0])
    if len(metadata_id) > 2:
        sceneDate = metadata_id[2]

    if '/' in curID:  # Viewing a Scene
        sceneURL = PAsearchSites.getSearchBaseURL(
            siteID) + '/free/scene/' + curID + '/ec'
    else:  # Viewing a Model
        sceneURL = PAsearchSites.getSearchBaseURL(
            siteID) + '/free/girl/' + curID + '/ec'

    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath('//h3/text()[1]')[0].strip()

    # Summary
    description = detailsPageElements.xpath(
        '//meta[@name="description"]/@content')[0].strip()
    metadata.summary = description.strip()

    # Studio
    metadata.studio = 'ExploitedX'

    # Collections / Tagline
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteID)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//a[starts-with(@href, "/free/keywords")]'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    if '/' in curID:  # Viewing a Scene
        unique_list = []
        for modelLink in detailsPageElements.xpath(
                '//a[starts-with(@href, "/free/girl/")]'):
            modelLinkUrl = PAsearchSites.getSearchBaseURL(
                siteID) + modelLink.xpath('./@href')[0]
            if modelLinkUrl not in unique_list:
                unique_list.append(modelLinkUrl)

            req = PAutils.HTTPRequest(modelLinkUrl)
            modelPageElements = HTML.ElementFromString(req.text)

            actorName = modelPageElements.xpath(
                '//meta[@name="twitter:description"]/@content')[0].strip()
            actorPhotoURL = modelPageElements.xpath(
                '//meta[@name="twitter:image"]/@content')[0].strip()

            movieActors.addActor(actorName, actorPhotoURL)
    else:  # Viewing a Model
        actorName = detailsPageElements.xpath(
            '//meta[@name="twitter:description"]/@content')[0].strip()
        actorPhotoURL = detailsPageElements.xpath(
            '//meta[@name="twitter:image"]/@content')[0].strip()

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//meta[@name="twitter:image"]/@content',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl, headers={'Referer': 'http://www.google.com'})
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata