Beispiel #1
0
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    if len(metadata_id) > 3:
        Log('Switching to Data18Content')
        siteData18Content.update(metadata, lang, siteNum, movieGenres, movieActors)
        return metadata
    
    # Title
    metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content(), siteNum)

    # Summary
    summary = detailsPageElements.xpath('//div[@class="gen12"]/p[contains(., "Description")]')[0].text_content().split(':', 1)[1].strip()
    if len(summary) > 1:
        metadata.summary = summary

    # Studio
    try:
        studio = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip()
    except:
        try:
            studio = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip()
        except:
            try:
                studio = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip()
            except:
                studio = ''

    if studio:
        metadata.studio = studio

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.collections.add(metadata.studio)
    try:
        tagline = detailsPageElements.xpath('//p[contains(., "Serie")]//a[@title]')[0].text_content().strip()
        metadata.collections.add(tagline)
    except:
        pass

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year
    else:
        date_object = parse(detailsPageElements.xpath('//div[@class="gen12"]/p[contains(., "Release")]')[0].text_content().split(':')[2].strip())
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//p[./b[contains(., "Categories")]]//a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath('//div[./p[span[@class="gen11"]]]//a')
    for actorLink in actors:
        actorName = actorLink.text_content().strip()
        actorPhotoURL = ''

        if actorName:
            movieActors.addActor(actorName, actorPhotoURL)

    # Director
    metadata.directors.clear()
    director = metadata.directors.new()
    try:
        directorName = detailsPageElements.xpath('//p[./b[contains(., "Director")]]')[0].text_content().split(':')[2].strip()
        if not directorName == 'Unknown':
            director.name = directorName
    except:
        pass

    # Posters
    art = []
    xpaths = [
        '//a[@data-featherlight="image"]/@href',
        '//img[contains(@src, "th5")]/@src',
    ]

    try:
        for xpath in xpaths:
            for img in detailsPageElements.xpath(xpath):
                art.append(img.replace('/th5', ''))
    except:
        pass

    images = []
    posterExists = False
    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl, headers={'Referer': 'http://www.data18.com'})
                images.append(image)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > width:
                    # Item is a poster
                    posterExists = True
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    if not posterExists:
        for idx, image in enumerate(images, 1):
            try:
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[art[idx - 1]] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
Beispiel #2
0
def search(results, lang, siteNum, searchData):
    searchResults = []
    siteResults = []
    temp = []
    count = 0

    sceneID = None
    parts = searchData.title.split()
    if unicode(parts[0], 'UTF-8').isdigit():
        sceneID = parts[0]

        if int(sceneID) > 100:
            searchData.title = searchData.title.replace(sceneID, '', 1).strip()
            movieURL = '%s/movies/%s' % (PAsearchSites.getSearchBaseURL(siteNum), sceneID)
            searchResults.append(movieURL)

    searchData.encoded = searchData.title.replace(' ', '+')
    searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum), searchData.encoded)
    req = PAutils.HTTPRequest(searchURL, headers={'Referer': 'http://www.data18.com'})
    searchPageElements = HTML.ElementFromString(req.text)

    for searchResult in searchPageElements.xpath('//a[contains(@href, "movies")]//parent::div[contains(@style, "float: left; padding")]'):
        movieURL = searchResult.xpath('.//*[img]/@href')[0]
        urlID = re.sub(r'.*/', '', movieURL)

        if movieURL not in searchResults:
            titleNoFormatting = PAutils.parseTitle(searchResult.xpath('.//*[contains(@href, "movies")]')[1].text_content(), siteNum)
            curID = PAutils.Encode(movieURL)
            siteResults.append(movieURL)

            date = searchResult.text

            if date and not date == 'unknown':
                try:
                    releaseDate = datetime.strptime(date, '%Y%m%d').strftime('%Y-%m-%d')
                except:
                    releaseDate = ''
            else:
                releaseDate = searchData.dateFormat() if searchData.date else ''
            displayDate = releaseDate if date else ''

            if sceneID == urlID:
                score = 100
            elif searchData.date and displayDate:
                score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate)
            else:
                score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower())

            if score > 70:
                sceneURL = PAutils.Decode(curID)
                req = PAutils.HTTPRequest(sceneURL)
                detailsPageElements = HTML.ElementFromString(req.text)

                # Studio
                try:
                    studio = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip()
                except:
                    try:
                        studio = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip()
                    except:
                        try:
                            studio = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip()
                        except:
                            studio = ''

                if score == 80:
                    count += 1
                    temp.append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang))
                else:
                    results.Append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang))
                
                #Split Scenes
                sceneCount = detailsPageElements.xpath('//text()[contains(., "Related Scenes")]')[0][-2]
                if sceneCount.isdigit():
                    sceneCount = int(sceneCount)
                else:
                    sceneCount = 0
                for sceneNum in range(1,sceneCount + 1):
                    section = "Scene " + str(sceneNum)
                    scene = PAutils.Encode(detailsPageElements.xpath('//a[contains(., "%s")]/@href' % (section))[0])
                    if score == 80:
                        count += 1
                        temp.append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang))
                    else:
                        results.Append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang))
            else:
                if score == 80:
                    count += 1
                    temp.append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang))
                else:
                    results.Append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s %s' % (titleNoFormatting, displayDate), score=score, lang=lang))

    googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum)
    for movieURL in googleResults:
        if ('/movies/' in movieURL and '.html' not in movieURL and movieURL not in searchResults and movieURL not in siteResults):
            searchResults.append(movieURL)

    for movieURL in searchResults:
        req = PAutils.HTTPRequest(movieURL)
        detailsPageElements = HTML.ElementFromString(req.text)
        urlID = re.sub(r'.*/', '', movieURL)

        try:
            siteName = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip()
        except:
            try:
                siteName = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip()
            except:
                try:
                    siteName = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip()
                except:
                    siteName = ''

        titleNoFormatting = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content(), siteNum)
        curID = PAutils.Encode(movieURL)

        try:
            date = detailsPageElements.xpath('//p[contains(., "Release")]')[0].text_content().text_content().split(':')[2].strip()
        except:
            date = ''

        if date and not date == 'unknown':
            releaseDate = parse(date).strftime('%Y-%m-%d')
        else:
            releaseDate = searchData.dateFormat() if searchData.date else ''
        displayDate = releaseDate if date else ''

        if sceneID == urlID:
            score = 100
        elif searchData.date and displayDate:
            score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate)
        else:
            score = 80 - Util.LevenshteinDistance(searchData.title.lower(), titleNoFormatting.lower())

        # Studio
        try:
            studio = detailsPageElements.xpath('//i[contains(., "Network")]//preceding-sibling::a[1]')[0].text_content().strip()
        except:
            try:
                studio = detailsPageElements.xpath('//i[contains(., "Studio")]//preceding-sibling::a[1]')[0].text_content().strip()
            except:
                try:
                    studio = detailsPageElements.xpath('//i[contains(., "Site")]//preceding-sibling::a[1]')[0].text_content().strip()
                except:
                    studio = ''

        if score == 80:
            count += 1
            temp.append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang))
        else:
            results.Append(MetadataSearchResult(id='%s|%d|%s' % (curID, siteNum, releaseDate), name='%s [%s] %s' % (titleNoFormatting, studio, displayDate), score=score, lang=lang))
        
        #Split Scenes
        sceneCount = detailsPageElements.xpath('//text()[contains(., "Related Scenes")]')[0][-2]
        if sceneCount.isdigit():
            sceneCount = int(sceneCount)
        else:
            sceneCount = 0
        for sceneNum in range(1,sceneCount + 1):
            section = "Scene " + str(sceneNum)
            scene = PAutils.Encode(detailsPageElements.xpath('//a[contains(., "%s")]/@href' % (section))[0])
            if score == 80:
                count += 1
                temp.append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang))
            else:
                results.Append(MetadataSearchResult(id='%s|%d|%s|%s|%d' % (scene, siteNum, releaseDate, titleNoFormatting, sceneNum), name='%s [%s][%s] %s' % (titleNoFormatting, section, studio, displayDate), score=score, lang=lang))

    for result in temp:
        if count > 1 and result.score == 80:
            results.Append(MetadataSearchResult(id=result.id, name=result.name, score=79, lang=lang))
        else:
            results.Append(MetadataSearchResult(id=result.id, name=result.name, score=result.score, lang=lang))

    return results
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content(), siteNum).replace('’', '\'')

    # Summary
    metadata.summary = detailsPageElements.xpath('//div[@class="vdoDesc"]')[0].text_content().strip()

    # Studio
    metadata.studio = 'Bang Bros'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = detailsPageElements.xpath('//a[contains(@href, "/websites")]')[1].text_content().strip()
    metadata.tagline = tagline
    if Prefs['collections_addsitename']:
        metadata.collections.add(metadata.tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//div[contains(@class, "vdoTags")]//a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath('//div[@class="vdoCast"]//a[contains(@href, "/model")]'):
        actorName = actorLink.text_content().strip()

        actorPageURL = actorLink.get('href')
        req = PAutils.HTTPRequest(PAsearchSites.getSearchBaseURL(siteNum) + actorPageURL)
        actorPage = HTML.ElementFromString(req.text)
        actorPhotoURL = 'http:' + actorPage.xpath('//div[@class="profilePic_in"]//img/@src')[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//img[contains(@id, "player-overlay-image")]/@src',
        '//div[@class="WdgtPic modal-overlay"]//img/@src'
    ]
    for xpath in xpaths:
        for poster in detailsPageElements.xpath(xpath):
            if not poster.startswith('http'):
                poster = 'http:' + poster
            if 'big' not in poster:
                (poster, filename) = poster.rsplit('/', 1)
                poster = poster + '/big' + filename

            art.append(poster)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def search(results, lang, siteNum, searchData):
    searchResults = []
    siteResults = []
    temp = []
    directID = False
    count = 0

    sceneID = None
    parts = searchData.title.split()
    if unicode(parts[0], 'UTF-8').isdigit():
        sceneID = parts[0]

        if int(sceneID) > 100:
            searchData.title = searchData.title.replace(sceneID, '', 1).strip()
            movieURL = '%s/%s' % (PAsearchSites.getSearchBaseURL(siteNum),
                                  sceneID)
            searchResults.append(movieURL)
            directID = True

    searchData.encoded = searchData.title.replace(' ', '+')
    searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum),
                          searchData.encoded)
    req = PAutils.HTTPRequest(
        searchURL, headers={'Referer': 'http://www.data18.empirestores.co'})
    searchPageElements = HTML.ElementFromString(req.text)
    if not directID:
        for searchResult in searchPageElements.xpath('//a[@class="boxcover"]'):
            movieURL = '%s%s' % (PAsearchSites.getSearchBaseURL(siteNum),
                                 searchResult.xpath('./@href')[0])
            urlID = searchResult.xpath('./@href')[0].split("/")[1]
            if movieURL not in searchResults:
                titleNoFormatting = PAutils.parseTitle(
                    searchResult.xpath('./span/span/text()')[0].strip(),
                    siteNum)
                curID = PAutils.Encode(movieURL)
                siteResults.append(movieURL)

                if sceneID == urlID:
                    score = 100
                elif searchData.date and displayDate:
                    score = 80 - Util.LevenshteinDistance(
                        searchData.date, releaseDate)
                else:
                    score = 80 - Util.LevenshteinDistance(
                        searchData.title.lower(), titleNoFormatting.lower())

                if score > 70:
                    sceneURL = PAutils.Decode(curID)
                    req = PAutils.HTTPRequest(sceneURL)
                    detailsPageElements = HTML.ElementFromString(req.text)

                    #Find date on movie specific page
                    date = detailsPageElements.xpath(
                        '//div[@class="release-date" and ./span[contains(., "Released:")]]/text()'
                    )[0].strip()
                    if date and not date == 'unknown':
                        try:
                            releaseDate = datetime.strptime(
                                date, '%b %d, %Y').strftime('%Y-%m-%d')
                        except:
                            releaseDate = ''
                    else:
                        releaseDate = searchData.dateFormat(
                        ) if searchData.date else ''
                    displayDate = releaseDate if date else ''

                    # Studio
                    try:
                        studio = detailsPageElements.xpath(
                            '//div[@class="studio"]/a/text()')[0].strip()
                    except:
                        studio = ''
                    if sceneID == urlID:
                        score = 100
                    elif searchData.date and displayDate:
                        score = 80 - Util.LevenshteinDistance(
                            searchData.date, releaseDate)
                    else:
                        score = 80 - Util.LevenshteinDistance(
                            searchData.title.lower(),
                            titleNoFormatting.lower())

                    if score == 80:
                        count += 1
                        temp.append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s [%s] %s' %
                                (titleNoFormatting, studio, displayDate),
                                score=score,
                                lang=lang))
                    else:
                        results.Append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s [%s] %s' %
                                (titleNoFormatting, studio, displayDate),
                                score=score,
                                lang=lang))

                    # Split Scenes
                    scenes = detailsPageElements.xpath(
                        '//div[@class="item-grid item-grid-scene"]/div/a/@href'
                    )
                    sceneCount = len(scenes)
                    for sceneNum in range(0, sceneCount):
                        section = "Scene %d" % (sceneNum + 1)
                        if score == 80:
                            count += 1
                            temp.append(
                                MetadataSearchResult(
                                    id='%s|%d|%s|%d' %
                                    (curID, siteNum, releaseDate, sceneNum),
                                    name='%s [%s][%s] %s' %
                                    (titleNoFormatting, section, studio,
                                     displayDate),
                                    score=score,
                                    lang=lang))
                        else:
                            results.Append(
                                MetadataSearchResult(
                                    id='%s|%d|%s|%d' %
                                    (curID, siteNum, releaseDate, sceneNum),
                                    name='%s [%s][%s] %s' %
                                    (titleNoFormatting, section, studio,
                                     displayDate),
                                    score=score,
                                    lang=lang))
                else:
                    if score == 80:
                        count += 1
                        temp.append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s %s' %
                                (titleNoFormatting, displayDate),
                                score=score,
                                lang=lang))
                    else:
                        results.Append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s %s' %
                                (titleNoFormatting, displayDate),
                                score=score,
                                lang=lang))

    googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum)
    for movieURL in googleResults:
        if ('/movies/' in movieURL and '.html' not in movieURL
                and movieURL not in searchResults
                and movieURL not in siteResults):
            searchResults.append(movieURL)

    for movieURL in searchResults:
        req = PAutils.HTTPRequest(movieURL)
        detailsPageElements = HTML.ElementFromString(req.text)
        urlID = re.sub(r'.*/', '', movieURL)
        titleNoFormatting = PAutils.parseTitle(
            detailsPageElements.xpath('//h1[@class="description"]/text()')
            [0].strip(), siteNum)
        curID = PAutils.Encode(movieURL)

        date = detailsPageElements.xpath(
            '//div[@class="release-date" and ./span[contains(., "Released:")]]/text()'
        )[0].strip()
        if date and not date == 'unknown':
            try:
                releaseDate = datetime.strptime(
                    date, '%b %d, %Y').strftime('%Y-%m-%d')
            except:
                releaseDate = ''
        else:
            releaseDate = searchData.dateFormat() if searchData.date else ''
        displayDate = releaseDate if date else ''

        if sceneID == urlID:
            score = 100
        elif searchData.date and displayDate:
            score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate)
        else:
            score = 80 - Util.LevenshteinDistance(searchData.title.lower(),
                                                  titleNoFormatting.lower())

        # Studio
        try:
            studio = detailsPageElements.xpath(
                '//div[@class="studio"]/a/text()')[0].strip()
        except:
            studio = ''

        if score == 80:
            count += 1
            temp.append(
                MetadataSearchResult(id='%s|%d|%s' %
                                     (curID, siteNum, releaseDate),
                                     name='%s [%s] %s' %
                                     (titleNoFormatting, studio, displayDate),
                                     score=score,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id='%s|%d|%s' %
                                     (curID, siteNum, releaseDate),
                                     name='%s [%s] %s' %
                                     (titleNoFormatting, studio, displayDate),
                                     score=score,
                                     lang=lang))

        # Split Scenes
        scenes = detailsPageElements.xpath(
            '//div[@class="item-grid item-grid-scene"]/div/a/@href')
        sceneCount = len(scenes)
        for sceneNum in range(1, sceneCount + 1):
            section = "Scene %d" % (sceneNum)
            if score == 80:
                count += 1
                temp.append(
                    MetadataSearchResult(
                        id='%s|%d|%s|%d' %
                        (curID, siteNum, releaseDate, sceneNum),
                        name='%s [%s][%s] %s' %
                        (titleNoFormatting, section, studio, displayDate),
                        score=score,
                        lang=lang))
            else:
                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s|%d' %
                        (curID, siteNum, releaseDate, sceneNum),
                        name='%s [%s][%s] %s' %
                        (titleNoFormatting, section, studio, displayDate),
                        score=score,
                        lang=lang))

    for result in temp:
        if count > 1 and result.score == 80:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=79,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=result.score,
                                     lang=lang))

    return results
Beispiel #5
0
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneName = metadata_id[0]
    sceneDate = metadata_id[2]
    sceneType = metadata_id[3]

    dbURL = getDBURL(PAsearchSites.getSearchBaseURL(siteNum))
    detailsPageElements = getDataFromAPI(dbURL, sceneType, sceneName, siteNum)

    # Title
    metadata.title = PAutils.parseTitle(detailsPageElements['title'], siteNum)

    # Summary
    metadata.summary = detailsPageElements['description']

    # Studio
    metadata.studio = 'TeamSkeet'

    # Collections / Tagline
    siteName = detailsPageElements['site']['name'] if 'site' in detailsPageElements else PAsearchSites.getSearchSiteName(siteNum)
    metadata.collections.clear()
    metadata.tagline = siteName
    metadata.collections.add(siteName)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    if 'tags' in detailsPageElements and detailsPageElements['tags']:
        for genreLink in detailsPageElements['tags']:
            genreName = genreLink.strip()

            movieGenres.addGenre(genreName)

    if siteName == 'Sis Loves Me':
        movieGenres.addGenre('Step Sister')
    elif siteName == 'DadCrush' or siteName == 'DaughterSwap':
        movieGenres.addGenre('Step Dad')
        movieGenres.addGenre('Step Daughter')
    elif siteName == 'PervMom':
        movieGenres.addGenre('Step Mom')
    elif siteName == 'Family Strokes':
        movieGenres.addGenre('Taboo Family')
    elif siteName == 'Foster Tapes':
        movieGenres.addGenre('Taboo Sex')
    elif siteName == 'BFFs':
        movieGenres.addGenre('Teen')
        movieGenres.addGenre('Group Sex')
    elif siteName == 'Shoplyfter':
        movieGenres.addGenre('Strip')
    elif siteName == 'ShoplyfterMylf':
        movieGenres.addGenre('Strip')
        movieGenres.addGenre('MILF')
    elif siteName == 'Exxxtra Small':
        movieGenres.addGenre('Teen')
        movieGenres.addGenre('Small T**s')
    elif siteName == 'Little Asians':
        movieGenres.addGenre('Asian')
        movieGenres.addGenre('Teen')
    elif siteName == 'TeenJoi':
        movieGenres.addGenre('Teen')
        movieGenres.addGenre('JOI')
    elif siteName == 'Black Valley Girls':
        movieGenres.addGenre('Teen')
        movieGenres.addGenre('Ebony')
    elif siteName == 'Thickumz':
        movieGenres.addGenre('Thick')
    elif siteName == 'Dyked':
        movieGenres.addGenre('Hardcore')
        movieGenres.addGenre('Teen')
        movieGenres.addGenre('Lesbian')
    elif siteName == 'Teens Love Black Cocks':
        movieGenres.addGenre('Teens')
        movieGenres.addGenre('BBC')
    elif siteName == 'Teen Curves':
        movieGenres.addGenre('Big Ass')
    elif siteName == 'Titty Attack':
        movieGenres.addGenre('Big T**s')
    elif siteName == 'Teeny Black':
        movieGenres.addGenre('Teen')
        movieGenres.addGenre('Ebony')
    elif siteName == 'Teens Do P**n':
        movieGenres.addGenre('Teen')
    elif siteName == 'Teen Pies':
        movieGenres.addGenre('Teen')
        movieGenres.addGenre('Creampie')
    elif siteName == 'POV Life':
        movieGenres.addGenre('POV')
    elif siteName == 'Ginger Patch':
        movieGenres.addGenre('Redhead')
    elif siteName == 'Innocent High':
        movieGenres.addGenre('School Girl')
    elif siteName == 'Oye Loca':
        movieGenres.addGenre('Latina')

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements['models']
    for actorLink in actors:
        actorData = getDataFromAPI(dbURL, 'modelscontent', actorLink['modelId'], siteNum)

        if actorData:
            actorName = actorData['name']
            actorPhotoURL = actorData['img']

            movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = [
        detailsPageElements['img']
    ]

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
Beispiel #6
0
def getSearchSettings(mediaTitle):
    Log('mediaTitle w/ possible abbreviation: %s' % mediaTitle)

    for abbreviation, full in PAsiteList.abbreviations:
        r = re.compile(abbreviation, flags=re.IGNORECASE)
        if r.match(mediaTitle):
            mediaTitle = r.sub(full, mediaTitle, 1)
            break

    Log('mediaTitle w/ possible abbrieviation fixed: %s' % mediaTitle)

    result = {
        'siteNum': None,
        'siteName': None,
        'searchTitle': None,
        'searchDate': None,
    }

    # Remove Site from Title
    siteNum = getSiteNumByFilter(mediaTitle)
    if siteNum is not None:
        Log('^^^^^^^ siteNum: %d' % siteNum)
        Log('^^^^^^^ Shortening Title')

        title = mediaTitle
        site = getSearchSiteName(siteNum).lower()

        title = re.sub(r'[^a-zA-Z0-9#& ]', ' ', title)
        site = re.sub(r'\W', '', site)

        matched = False
        while (' ' in title):
            title = title.replace(' ', '', 1)
            if title.lower().startswith(site):
                matched = True
                break

        if matched:
            searchTitle = re.sub(site, '', title, 1, flags=re.IGNORECASE)
            searchTitle = ' '.join(searchTitle.split())
        else:
            searchTitle = mediaTitle

        searchTitle = PAutils.parseTitle(searchTitle, siteNum)

        Log('Search Title (before date processing): %s' % searchTitle)

        # Search Type
        searchDate = None
        regex = [(r'\b\d{4} \d{2} \d{2}\b', '%Y %m %d'),
                 (r'\b\d{2} \d{2} \d{2}\b', '%y %m %d')]
        date_obj = None
        for r, dateFormat in regex:
            date = re.search(r, searchTitle)
            if date:
                try:
                    date_obj = datetime.strptime(date.group(), dateFormat)
                except:
                    pass

                if date_obj:
                    searchDate = date_obj.strftime('%Y-%m-%d')
                    searchTitle = ' '.join(
                        re.sub(r, '', searchTitle, 1).split())
                    break

        searchTitle = searchTitle[0].upper() + searchTitle[1:]

        result['siteNum'] = siteNum
        result['siteName'] = site
        result['searchTitle'] = searchTitle
        result['searchDate'] = searchDate

    return result
Beispiel #7
0
def update(metadata, lang, siteNum, movieGenres, movieActors):
    splitScene = False
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    if len(metadata_id) > 3:
        Log('Split Scene: %d' % int(metadata_id[3]))
        splitScene = True

    # Title
    metadata.title = PAutils.parseTitle(
        detailsPageElements.xpath('//h1/text()')[0], siteNum).strip()
    if splitScene:
        metadata.title = '%s [Scene %s]' % (metadata.title, metadata_id[3])

    # Summary
    summary = ''
    try:
        summary = '\n'.join([
            line.text_content().strip() for line in detailsPageElements.xpath(
                '//div[@class="product-details-container"]/div[@class="row breakout bg-lightgrey"]//h4/p'
            )
        ])
    except:
        pass
    metadata.summary = summary

    # Studio
    try:
        studio = detailsPageElements.xpath(
            '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()'
        )[0].strip()
    except:
        studio = ''

    if studio:
        metadata.studio = studio

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = ''
    try:
        tagline = re.sub(
            r'\(.*\)', '',
            detailsPageElements.xpath(
                '//div[@class="container"]/h2/a[@label="Series"]/text()')
            [0].strip().split('"')[1]).strip()
        metadata.tagline = tagline
        metadata.collections.add(tagline)
    except:
        if splitScene:
            metadata.collections.add(
                PAutils.parseTitle(
                    detailsPageElements.xpath('//h1/text()')[0],
                    siteNum).strip())
        else:
            metadata.collections.add(studio)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//div[@class="col-sm-4 m-b-2"]/ul/li//a[@label="Category"]'):
        genreName = genreLink.text_content().strip()
        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()

    actors = []
    if splitScene:
        actorNames = detailsPageElements.xpath(
            '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a'
            % int(metadata_id[3]))
        for name in actorNames:
            try:
                actors.append(name)
            except:
                pass
    else:
        actors = detailsPageElements.xpath(
            '//div[@class="col-sm-4 m-b-2"]/ul/li/a[@label="Performers - detail"]'
        )

    for actorLink in actors:
        actorName = actorLink.text_content().strip()
        try:
            actorPhotoURL = detailsPageElements.xpath(
                '//div[@class="itempage"]/div/div[@class="row"]/div[@class="col-sm-3 col-md-4 col-lg-3 m-b-2"]/div/a[@label="Performer"][contains(., "%s")]//img/@src'
                % actorName)[0].strip()
        except:
            continue
        if actorName:
            movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    cover = '//div[@class="boxcover-container"]/a/img/@src'
    splitscenes = ''
    if splitScene:
        splitscenes = '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"][./div[@class="col-sm-9 col-md-10"]][%d]/div[@class="col-sm-9 col-md-10"]/div/div/a/@href' % int(
            metadata_id[3])
    try:
        if splitScene:
            art = art + detailsPageElements.xpath(splitscenes)
        art.append(detailsPageElements.xpath(cover)[0])
    except:
        pass

    images = []
    posterExists = False
    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl,
                    headers={'Referer': 'http://www.data18.empirestores.co'})
                images.append(image)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                posterExists = True
                metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
                metadata.art[posterUrl] = Proxy.Media(image.content,
                                                      sort_order=idx)
            except:
                pass

    if not posterExists:
        for idx, image in enumerate(images, 1):
            try:
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[art[idx - 1]] = Proxy.Media(
                        image.content, sort_order=idx)
            except:
                pass

    return metadata
def search(results, lang, siteNum, searchData):
    searchResults = []

    googleResults = PAutils.getFromGoogleSearch(searchData.title,
                                                siteNum,
                                                lang='enes')
    for sceneURL in googleResults:
        sceneURL = sceneURL.replace('index.php/', '')
        sceneURL = sceneURL.replace('es/', '')
        if '/tags/' not in sceneURL and '/actr' not in sceneURL and '?pag' not in sceneURL and '/xvideos' not in sceneURL and '/tag/' not in sceneURL and sceneURL not in searchResults:
            searchResults.append(sceneURL)
            if '/en/' in sceneURL:
                searchResults.append(sceneURL.replace('en/', ''))

    for sceneURL in searchResults:
        req = PAutils.HTTPRequest(sceneURL)
        detailsPageElements = HTML.ElementFromString(req.text)

        try:
            if '/en/' in sceneURL:
                language = 'English'
                titleNoFormatting = PAutils.parseTitle(
                    detailsPageElements.xpath('//title')
                    [0].text_content().split('|')[0].split('-')[0].strip(),
                    siteNum)
            else:
                language = 'Español'
                titleNoFormatting = detailsPageElements.xpath('//title')[
                    0].text_content().split('|')[0].split('-')[0].strip()

            curID = PAutils.Encode(sceneURL)

            date = detailsPageElements.xpath(
                '//div[@class="released-views"]/span')[0].text_content().strip(
                )
            if date:
                releaseDate = datetime.strptime(
                    date, '%d/%m/%Y').strftime('%Y-%m-%d')
            else:
                releaseDate = searchData.dateFormat(
                ) if searchData.date else ''
            displayDate = releaseDate if date else ''

            if searchData.date and displayDate:
                score = 100 - Util.LevenshteinDistance(searchData.date,
                                                       releaseDate)
            else:
                score = 100 - Util.LevenshteinDistance(
                    searchData.title.lower(), titleNoFormatting.lower())

            results.Append(
                MetadataSearchResult(
                    id='%s|%d' % (curID, siteNum),
                    name='%s {%s} [%s] %s' %
                    (titleNoFormatting, language,
                     PAsearchSites.getSearchSiteName(siteNum), displayDate),
                    score=score,
                    lang=lang))
        except:
            pass

    return results
Beispiel #9
0
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = str(metadata.id).split('|')
    videoId = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]

    splitted = videoId.split(':')
    modelId = splitted[0]
    scene = splitted[-1]
    sceneNum = int(scene.replace('scene', ''))

    detailsPageElements = getGraphQL(findVideoQuery, 'videoId', videoId,
                                     siteNum)['video']['find']['result']

    # Title
    metadata.title = PAutils.parseTitle(detailsPageElements['title'], siteNum)

    # Summary
    summary = detailsPageElements['description']['long'].strip()
    if not summary.endswith('.'):
        summary = summary + '.'

    metadata.summary = summary

    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.collections.add(metadata.studio)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for key, value in genresDB.items():
        if key.lower() == PAsearchSites.getSearchSiteName(siteNum).lower():
            for genreName in value:
                movieGenres.addGenre(genreName)
            break

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements['talent']:
        actorPhoto = []
        actorName = actorLink['talent']['name']

        actorPhoto.append('/members/models/%s/profile-sm.jpg' %
                          actorLink['talent']['talentId'])
        actorPhotoURL = getGraphQL(
            assetQuery, 'paths', actorPhoto,
            siteNum)['asset']['batch']['result'][0]['serve']['uri']

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    images = []
    images.append('/members/models/%s/scenes/%s/videothumb.jpg' %
                  (modelId, scene))
    for idx in range(1, detailsPageElements['galleryCount'] + 1):
        path = '/members/models/%s/scenes/%s/photos/thumbs/%s-%s-%d-%d.jpg' % (
            modelId, scene, PAsearchSites.getSearchSiteName(siteNum).lower(),
            modelId, sceneNum, idx)
        images.append(path)

    posters = getGraphQL(assetQuery, 'paths', images,
                         siteNum)['asset']['batch']['result']

    for poster in posters:
        if poster:
            art.append(poster['serve']['uri'])

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        # Remove Timestamp and Token from URL
        cleanUrl = posterUrl.split('?')[0]
        art[idx - 1] = cleanUrl
        if not PAsearchSites.posterAlreadyExists(cleanUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > width:
                    # Item is a poster
                    metadata.posters[cleanUrl] = Proxy.Media(image.content,
                                                             sort_order=idx)
                if width > height:
                    # Item is an art item
                    metadata.art[cleanUrl] = Proxy.Media(image.content,
                                                         sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    try:
        sceneDate = metadata_id[2]
    except:
        pass
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    movieGenres.clearGenres()
    movieActors.clearActors()

    # Title
    if 'webmasters' in sceneURL:
        resultTitleID = detailsPageElements.xpath('//h1/text()')[0]
    else:
        resultTitleID = detailsPageElements.xpath('//h4/span')[0].text_content()

    sceneID = re.sub(r'\D.*', '', resultTitleID)
    metadata.title = PAutils.parseTitle(re.sub(r'^\d+', '', resultTitleID), siteNum)

    # Summary
    try:
        if 'webmasters' in sceneURL:
            metadata.summary = detailsPageElements.xpath('//div[@class="row gallery-description"]//div')[1].text_content().strip()
        else:
            metadata.summary = detailsPageElements.xpath('//div[@class="row"]//a/@title')[0].strip()
    except:
        pass

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = metadata.studio
    metadata.collections.add(metadata.studio)

    # Actors
    if 'webmasters' in sceneURL:
        actors = detailsPageElements.xpath('//spam[@class="key-words"]//a')
    else:
        actors = detailsPageElements.xpath('//h5//a')

    actorPhotoURL = ''

    # Remove Actor Names from Genre List
    genres = detailsPageElements.xpath('//meta[@name="keywords"]/@content')[0].replace('Aussie Ass', '')
    genres = re.sub(r'id.\d*', '', genres, flags=re.IGNORECASE)

    if actors:
        for actorLink in actors:
            actorName = actorLink.text_content().title()
            genres = genres.replace(actorName, '')

            modelURL = actorLink.xpath('./@href')[0]
            req = PAutils.HTTPRequest(modelURL)
            actorsPageElements = HTML.ElementFromString(req.text)

            img = actorsPageElements.xpath('//img[contains(@id, "set-target")]/@src')[0]
            if img:
                actorPhotoURL = img
                if 'http' not in actorPhotoURL:
                    actorPhotoURL = PAsearchSites.getSearchBaseURL(siteNum) + actorPhotoURL

            movieActors.addActor(actorName, actorPhotoURL)

    # Date
    date = ""

    try:
        if 'webmasters' in sceneURL:

            pageResults = (int)(actorsPageElements.xpath('//span[@class="number_item "]')[0].text_content().strip())

            if not pageResults:
                pageResults = 1

            for x in range(pageResults):
                if x == 1:
                    actorsPageElements.xpath('//a[contains(@class, "in_stditem")]/@href')[1]
                    req = PAutils.HTTPRequest(PAsearchSites.getSearchBaseURL(siteNum) + actorsPageElements.xpath('//a[contains(@class, "in_stditem")]/@href')[1])
                    actorsPageElements = HTML.ElementFromString(req.text)

                for sceneElements in actorsPageElements.xpath('//div[@class="box"]'):
                    if sceneID in sceneElements.xpath('.//a/text()')[1]:
                        date = actorsPageElements.xpath('.//span[@class="video-date"]')[0].text_content().strip()
                        break
        else:
            date = sceneDate
    except:
        date = sceneDate

    if date:
        date = parse(date).strftime('%d-%m-%Y')
        date_object = datetime.strptime(date, '%d-%m-%Y')
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    for genreLink in genres.split(','):
        genreName = genreLink.strip()

        movieGenres.addGenre(genreName)

    # Posters
    xpaths = [
        '//img[contains(@alt, "content")]/@src',
        '//div[@class="box"]//img/@src',
    ]

    altURL = ""

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            if 'http' not in img:
                if 'join' in img:
                    break
                elif 'webmasters' in sceneURL:
                    img = sceneURL + "/" + img
                else:
                    img = PAsearchSites.getSearchBaseURL(siteNum) + img
            art.append(img)
        if 'webmasters' not in sceneURL:
            altURL = PAsearchSites.getSearchBaseURL(siteNum) + "/webmasters/" + sceneID
            req = PAutils.HTTPRequest(altURL)
            detailsPageElements = HTML.ElementFromString(req.text)
            sceneURL = altURL

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100 and width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
Beispiel #11
0
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.parseTitle(
        detailsPageElements.xpath('//h3[@class="mas_title"]')
        [0].text_content().strip(), siteNum)

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//p[@class="mas_longdescription"]')[0].text_content().strip()

    # Studio
    metadata.studio = 'Deranged Dollars'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = detailsPageElements.xpath('//title')[0].text_content().split(
        '|')[1].strip().replace('.com', '')
    metadata.tagline = tagline
    metadata.collections.add(metadata.tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//p[@class="tags"]/a'):
        genreName = PAutils.parseTitle(genreLink.text_content().strip(),
                                       siteNum)

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath(
        '//div[@class="lch"]/span')[0].text_content().rsplit(',', 2)[0]
    if ':' in actors:
        actors = re.split(',|&|/|And', actors.split(':', 1)[1])
    else:
        actors = re.split(',|&|/|And', actors)

    modelURL = PAsearchSites.getSearchSearchURL(siteNum) + '?models'
    req = PAutils.HTTPRequest(modelURL)
    modelPageElements = HTML.ElementFromString(req.text)
    models = modelPageElements.xpath('//div[@class="item"]')
    modelURL = PAsearchSites.getSearchSearchURL(siteNum) + '?models/2'
    req = PAutils.HTTPRequest(modelURL)
    modelPageElements = HTML.ElementFromString(req.text)
    models += modelPageElements.xpath('//div[@class="item"]')

    for actorLink in actors:
        actorName = actorLink.strip()
        actorName = re.sub(r'\W', ' ',
                           actorName).replace('Nurses',
                                              '').replace('Nurse', '')

        actorPhotoURL = ''
        for model in models:
            if ':' in model.text_content().strip():
                if actorName in model.text_content().split(':', 1)[1].strip():
                    actorName = model.text_content().split(':', 1)[1].strip()
                    actorPhotoURL = PAsearchSites.getSearchSearchURL(
                        siteNum) + model.xpath('.//@src')[0]
                    break
            else:
                if actorName in model.text_content().strip():
                    actorName = model.text_content().strip()
                    actorPhotoURL = PAsearchSites.getSearchSearchURL(
                        siteNum) + model.xpath('.//@src')[0]
                    break

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//div[@class="stills clearfix"]//img/@src',
        '//div[@class="mainpic"]//script/text()',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            if '\'' in img:
                img = img.split('\'')[1]
            if 'http' not in img:
                img = PAsearchSites.getSearchSearchURL(siteNum) + img

            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #12
0
def search(results, encodedTitle, searchTitle, siteNum, lang, searchDate):
    searchResults = []
    siteResults = []
    temp = []
    count = 0

    sceneID = None
    splited = searchTitle.split(' ')
    if unicode(splited[0], 'UTF-8').isdigit():
        sceneID = splited[0]

        if int(sceneID) > 100:
            searchTitle = searchTitle.replace(sceneID, '', 1).strip()
            sceneURL = '%s/content/%s' % (
                PAsearchSites.getSearchBaseURL(siteNum), sceneID)
            searchResults.append(sceneURL)

    encodedTitle = searchTitle.replace(' ', '+')
    searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum),
                          encodedTitle)
    req = PAutils.HTTPRequest(searchURL,
                              headers={'Referer': 'http://www.data18.com'})
    searchPageElements = HTML.ElementFromString(req.text)

    for searchResult in searchPageElements.xpath(
            '//p[@class="genmed"]//parent::div'):
        sceneURL = searchResult.xpath(
            './/*[contains(@href, "content")]/@href')[0]

        if sceneURL not in searchResults:
            urlID = re.sub(r'.*/', '', sceneURL)

            try:
                siteName = searchResult.xpath(
                    './/*[contains(., "Network")]')[0].text_content().replace(
                        'Network:', '').strip()
            except:
                try:
                    siteName = searchResult.xpath('.//*[contains(., "Studio")]'
                                                  )[0].text_content().replace(
                                                      'Studio:', '').strip()
                except:
                    siteName = ''

            try:
                subSite = searchResult.xpath(
                    './/p[@class][contains(., "Site:")]')[0].text_content(
                    ).replace('Site:', '').strip()
            except:
                subSite = ''

            if siteName:
                siteDisplay = '%s/%s' % (siteName,
                                         subSite) if subSite else siteName
            else:
                siteDisplay = subSite

            titleNoFormatting = PAutils.parseTitle(
                searchResult.xpath('.//*[contains(@href, "content")]')
                [1].text_content(), siteNum)
            curID = PAutils.Encode(sceneURL)
            siteResults.append(sceneURL)

            try:
                date = searchResult.xpath(
                    './/p[@class="genmed"]')[0].text_content().strip()
                date = re.sub(r'^#(.*?)\s', '', date)
            except:
                date = ''

            if date and not date == 'unknown':
                date = date.replace('Sept', 'Sep')
                releaseDate = parse(date).strftime('%Y-%m-%d')
            else:
                releaseDate = parse(searchDate).strftime(
                    '%Y-%m-%d') if searchDate else ''
            displayDate = releaseDate if date else ''

            if sceneID == urlID:
                score = 100
            elif searchDate and displayDate:
                score = 80 - Util.LevenshteinDistance(searchDate, releaseDate)
            else:
                score = 80 - Util.LevenshteinDistance(
                    searchTitle.lower(), titleNoFormatting.lower())

            if score == 80:
                count += 1
                temp.append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, releaseDate),
                        name='%s [%s] %s' %
                        (titleNoFormatting, siteDisplay, displayDate),
                        score=score,
                        lang=lang))
            else:
                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s' % (curID, siteNum, releaseDate),
                        name='%s [%s] %s' %
                        (titleNoFormatting, siteDisplay, displayDate),
                        score=score,
                        lang=lang))

    googleResults = PAutils.getFromGoogleSearch(searchTitle, siteNum)
    for sceneURL in googleResults:
        if ('/content/' in sceneURL and '.html' not in sceneURL
                and sceneURL not in searchResults
                and sceneURL not in siteResults):
            searchResults.append(sceneURL)

    for sceneURL in searchResults:
        req = PAutils.HTTPRequest(sceneURL)
        detailsPageElements = HTML.ElementFromString(req.text)
        urlID = re.sub(r'.*/', '', sceneURL)

        try:
            siteName = detailsPageElements.xpath(
                '//i[contains(., "Network")]//preceding-sibling::a[1]'
            )[0].text_content().strip()
        except:
            try:
                siteName = detailsPageElements.xpath(
                    '//i[contains(., "Studio")]//preceding-sibling::a[1]'
                )[0].text_content().strip()
            except:
                siteName = ''

        try:
            subSite = detailsPageElements.xpath(
                '//i[contains(., "Site")]//preceding-sibling::a[1]'
            )[0].text_content().strip()
        except:
            subSite = ''

        if siteName:
            siteDisplay = '%s/%s' % (siteName,
                                     subSite) if subSite else siteName
        else:
            siteDisplay = subSite

        titleNoFormatting = PAutils.parseTitle(
            detailsPageElements.xpath('//h1')[0].text_content(), siteNum)
        curID = PAutils.Encode(sceneURL)

        try:
            date = detailsPageElements.xpath(
                '//span[@class][./*[contains(.., "date")]]')[0].text_content(
                ).split(':', 2)[-1].strip()
        except:
            date = ''

        if date and not date == 'unknown':
            releaseDate = parse(date).strftime('%Y-%m-%d')
        else:
            releaseDate = parse(searchDate).strftime(
                '%Y-%m-%d') if searchDate else ''
        displayDate = releaseDate if date else ''

        if sceneID == urlID:
            score = 100
        elif searchDate and displayDate:
            score = 80 - Util.LevenshteinDistance(searchDate, releaseDate)
        else:
            score = 80 - Util.LevenshteinDistance(searchTitle.lower(),
                                                  titleNoFormatting.lower())

        if score == 80:
            count += 1
            temp.append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting, siteDisplay, displayDate),
                    score=score,
                    lang=lang))
        else:
            results.Append(
                MetadataSearchResult(
                    id='%s|%d|%s' % (curID, siteNum, releaseDate),
                    name='%s [%s] %s' %
                    (titleNoFormatting, siteDisplay, displayDate),
                    score=score,
                    lang=lang))

    for result in temp:
        if count > 1 and result.score == 80:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=79,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=result.score,
                                     lang=lang))

    return results
Beispiel #13
0
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.parseTitle(
        detailsPageElements.xpath('//h1')[0].text_content(), siteNum)

    # Summary
    try:
        metadata.summary = detailsPageElements.xpath(
            '//div[@class="gen12"]/p[contains(., "Story")]')[0].text_content(
            ).split('\n', 2)[-1]
    except:
        pass

    # Studio
    try:
        metadata.studio = detailsPageElements.xpath(
            '//i[contains(., "Network")]//preceding-sibling::a[1]'
        )[0].text_content().strip()
    except:
        try:
            metadata.studio = detailsPageElements.xpath(
                '//i[contains(., "Studio")]//preceding-sibling::a[1]'
            )[0].text_content().strip()
        except:
            pass

    # Tagline and Collection(s)
    metadata.collections.clear()
    try:
        tagline = detailsPageElements.xpath(
            '//i[contains(., "Site")]//preceding-sibling::a[1]'
        )[0].text_content().strip()
        if not metadata.studio:
            metadata.studio = tagline
        else:
            metadata.tagline = tagline
        metadata.collections.add(tagline)
    except:
        metadata.collections.add(metadata.studio)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//div[./b[contains(., "Categories")]]//a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath(
        '//p[contains(., "Starring")]//following-sibling::a[1]')
    if actors:
        for actorLink in actors:
            actorName = actorLink.text_content().strip()
            actorPhotoURL = ''

            movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//img[contains(@src, "th8")]/@src',
    ]

    try:
        req = PAutils.HTTPRequest(
            detailsPageElements.xpath('//@href[contains(., "viewer")]')[0])
        photoPageElements = HTML.ElementFromString(req.text)
        for xpath in xpaths:
            for img in photoPageElements.xpath(xpath):
                art.append(img.replace('/th8', ''))
    except:
        pass

    try:
        img = detailsPageElements.xpath('//div[@id="moviewrap"]//@src')[0]
        art.append(img)
    except:
        pass

    images = []
    posterExists = False
    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(
                    posterUrl, headers={'Referer': 'http://www.data18.com'})
                images.append(image)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > width:
                    # Item is a poster
                    posterExists = True
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > height:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    if not posterExists:
        for idx, image in enumerate(images, 1):
            try:
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[art[idx - 1]] = Proxy.Media(
                        image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = metadata.id.split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    if not sceneURL.startswith('http'):
        sceneURL = PAsearchSites.getSearchBaseURL(siteNum) + sceneURL
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.parseTitle(
        detailsPageElements.xpath('//h1[@class="watchpage-title"]')
        [0].text_content().strip(), siteNum)

    # Studio
    metadata.studio = 'AnalVids'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = detailsPageElements.xpath(
        '//a[@class="watchpage-studioname"]/text()')[0].strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    releaseDate = detailsPageElements.xpath(
        '//span[@class="scene-description__detail"]//a/text()')[0]
    date_object = parse(releaseDate)
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    genres = detailsPageElements.xpath('//dd/a[contains(@href, "/niche/")]')

    for genreLink in genres:
        genreName = genreLink.text_content().title()
        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = detailsPageElements.xpath(
        '//dd/a[contains(@href, "model") and not(contains(@href, "forum"))]')
    for actorLink in actors:
        actorName = actorLink.text_content()

        actorPageURL = actorLink.get('href')
        req = PAutils.HTTPRequest(actorPageURL)
        actorPage = HTML.ElementFromString(req.text)
        actorPhotoURL = actorPage.xpath(
            '//div[@class="model--avatar"]//img/@src')[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Director
    director = metadata.directors.new()
    if tagline == 'Giorgio Grandi' or tagline == 'Giorgio\'s Lab':
        director.name = 'Giorgio Grandi'
    try:
        directors = detailsPageElements.xpath('//p[@class="director"]/a')
        for dirname in directors:
            director.name = dirname.text_content().strip()
    except:
        pass

    # Posters/Background
    art.append(
        detailsPageElements.xpath('//div[@id="player"]/@style')[0].split(
            'url(')[1].split(')')[0])

    for img in detailsPageElements.xpath(
            '//div[contains(@class, "thumbs2 gallery")]//img/@src'):
        art.append(img.split('?')[0])
        art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #15
0
def search(results, lang, siteNum, searchData):
    searchResults = []
    siteResults = []
    temp = []
    directID = False
    count = 0
    scene_matched = False

    sceneID = None
    parts = searchData.title.split()
    if unicode(parts[0], 'UTF-8').isdigit():
        sceneID = parts[0]

        if int(sceneID) > 100:
            searchData.title = searchData.title.replace(sceneID, '', 1).strip()
            movieURL = '%s/%s' % (PAsearchSites.getSearchBaseURL(siteNum),
                                  sceneID)
            searchResults.append(movieURL)
            directID = True

    scene_break = re.search(r'.*(?=Scene\s\d)', searchData.title)
    if scene_break:
        scene_break = (scene_break.group().strip(), searchData.title[-1])
        searchData.title = scene_break[0]

    searchData.encoded = searchData.title.replace(' ', '+')
    searchURL = '%s%s' % (PAsearchSites.getSearchSearchURL(siteNum),
                          searchData.encoded)
    req = PAutils.HTTPRequest(
        searchURL, headers={'Referer': 'http://www.data18.empirestores.co'})
    searchPageElements = HTML.ElementFromString(req.text)
    if not directID:
        for searchResult in searchPageElements.xpath(
                '//div[@class="product-card"]'):
            if scene_matched:
                break
            movieURL = '%s%s' % (
                PAsearchSites.getSearchBaseURL(siteNum),
                searchResult.xpath(
                    './div[@class="boxcover-container"]/a/@href')[0].strip())
            urlID = searchResult.xpath(
                './div[@class="boxcover-container"]/a/@href')[0].split("/")[1]
            if movieURL not in searchResults:
                titleNoFormatting = PAutils.parseTitle(
                    searchResult.xpath(
                        './div[@class="product-details"]/div/a/text()')
                    [0].strip(), siteNum)
                curID = PAutils.Encode(movieURL)
                siteResults.append(movieURL)

                if sceneID == urlID:
                    score = 100
                else:
                    score = 80 - Util.LevenshteinDistance(
                        searchData.title.lower(), titleNoFormatting.lower())

                if score > 70:
                    sceneURL = PAutils.Decode(curID)
                    req = PAutils.HTTPRequest(sceneURL)
                    detailsPageElements = HTML.ElementFromString(req.text)

                    # Find date on movie specific page
                    date = detailsPageElements.xpath(
                        '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()'
                    )[0].strip()
                    if date and not date == 'unknown':
                        try:
                            releaseDate = datetime.strptime(
                                date, '%b %d, %Y').strftime('%Y-%m-%d')
                        except:
                            releaseDate = ''
                    else:
                        releaseDate = searchData.dateFormat(
                        ) if searchData.date else ''
                    displayDate = releaseDate if date else ''

                    # Studio
                    try:
                        studio = detailsPageElements.xpath(
                            '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()'
                        )[0].strip()
                    except:
                        studio = ''

                    if sceneID == urlID:
                        score = 100
                    elif searchData.date and displayDate:
                        score = 80 - Util.LevenshteinDistance(
                            searchData.date, releaseDate)
                    else:
                        score = 80 - Util.LevenshteinDistance(
                            searchData.title.lower(),
                            titleNoFormatting.lower())

                    if score == 80:
                        count += 1
                        temp.append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s [%s] %s' %
                                (titleNoFormatting, studio, displayDate),
                                score=score,
                                lang=lang))
                    else:
                        results.Append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s [%s] %s' %
                                (titleNoFormatting, studio, displayDate),
                                score=score,
                                lang=lang))

                    # Split Scenes
                    scenes = detailsPageElements.xpath(
                        '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]'
                    )
                    sceneCount = (len(scenes) - 1) / 2
                    for sceneNum in range(0, sceneCount):
                        section = 'Scene %d' % (sceneNum + 1)
                        actorNames = ', '.join(
                            detailsPageElements.xpath(
                                '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()'
                                % (sceneNum + 1)))

                        if scene_break and titleNoFormatting.replace(
                                '-',
                                ' ') == scene_break[0] and sceneNum + 1 == int(
                                    scene_break[1]):
                            scene_matched = True
                            results.Append(
                                MetadataSearchResult(
                                    id='%s|%d|%s|%d' %
                                    (curID, siteNum, releaseDate,
                                     sceneNum + 1),
                                    name='%s/#%d[%s][%s] %s' %
                                    (titleNoFormatting, sceneNum + 1,
                                     actorNames, studio, displayDate),
                                    score=100,
                                    lang=lang))
                            break
                        elif score == 80:
                            count += 1
                            temp.append(
                                MetadataSearchResult(
                                    id='%s|%d|%s|%d' %
                                    (curID, siteNum, releaseDate,
                                     sceneNum + 1),
                                    name='%s/#%d[%s][%s] %s' %
                                    (titleNoFormatting, sceneNum + 1,
                                     actorNames, studio, displayDate),
                                    score=score,
                                    lang=lang))
                        else:
                            results.Append(
                                MetadataSearchResult(
                                    id='%s|%d|%s|%d' %
                                    (curID, siteNum, releaseDate,
                                     sceneNum + 1),
                                    name='%s/#%d[%s][%s] %s' %
                                    (titleNoFormatting, sceneNum + 1,
                                     actorNames, studio, displayDate),
                                    score=score,
                                    lang=lang))
                else:
                    if score == 80:
                        count += 1
                        temp.append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s' % (titleNoFormatting),
                                score=score,
                                lang=lang))
                    else:
                        results.Append(
                            MetadataSearchResult(
                                id='%s|%d|%s' % (curID, siteNum, releaseDate),
                                name='%s' % (titleNoFormatting),
                                score=score,
                                lang=lang))

    googleResults = PAutils.getFromGoogleSearch(searchData.title, siteNum)
    for movieURL in googleResults:
        if ('/movies/' in movieURL and '.html' not in movieURL
                and movieURL not in searchResults
                and movieURL not in siteResults):
            searchResults.append(movieURL)

    for movieURL in searchResults:
        req = PAutils.HTTPRequest(movieURL)
        detailsPageElements = HTML.ElementFromString(req.text)
        urlID = re.sub(r'.*/', '', movieURL)
        titleNoFormatting = PAutils.parseTitle(
            detailsPageElements.xpath('//h1/text()')[0].strip(), siteNum)
        curID = PAutils.Encode(movieURL)

        date = detailsPageElements.xpath(
            '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Released:")]/text()'
        )[0].strip()
        if date and not date == 'unknown':
            try:
                releaseDate = datetime.strptime(
                    date, '%b %d %Y').strftime('%Y-%m-%d')
            except:
                releaseDate = ''
        else:
            releaseDate = searchData.dateFormat() if searchData.date else ''
        displayDate = releaseDate if date else ''

        if sceneID == urlID:
            score = 100
        elif searchData.date and displayDate:
            score = 80 - Util.LevenshteinDistance(searchData.date, releaseDate)
        else:
            score = 80 - Util.LevenshteinDistance(searchData.title.lower(),
                                                  titleNoFormatting.lower())

        # Studio
        try:
            studio = detailsPageElements.xpath(
                '//ul[@class="list-unstyled m-b-2"]/li[contains(., "Studio:")]/a/text()'
            )[0].strip()
        except:
            studio = ''

        if score == 80:
            count += 1
            temp.append(
                MetadataSearchResult(id='%s|%d|%s' %
                                     (curID, siteNum, releaseDate),
                                     name='%s [%s] %s' %
                                     (titleNoFormatting, studio, displayDate),
                                     score=score,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id='%s|%d|%s' %
                                     (curID, siteNum, releaseDate),
                                     name='%s [%s] %s' %
                                     (titleNoFormatting, studio, displayDate),
                                     score=score,
                                     lang=lang))

        # Split Scenes
        scenes = detailsPageElements.xpath(
            '//div[@class="product-details-container"]/div[@class="container"]/div[@class="row"]'
        )
        sceneCount = (len(scenes) - 1) / 2
        for sceneNum in range(0, sceneCount):
            actorNames = ', '.join(
                detailsPageElements.xpath(
                    '//div[@class="container"]/div[@class="row"][./div[@class="col-sm-6 text-right text-left-xs m-b-1"]][%d]/div[2]/div/a/text()'
                    % (sceneNum + 1)))
            if score == 80:
                count += 1
                temp.append(
                    MetadataSearchResult(
                        id='%s|%d|%s|%d' %
                        (curID, siteNum, releaseDate, sceneNum + 1),
                        name='%s/#%d[%s][%s] %s' %
                        (titleNoFormatting, sceneNum + 1, actorNames, studio,
                         displayDate),
                        score=score,
                        lang=lang))
            else:
                results.Append(
                    MetadataSearchResult(
                        id='%s|%d|%s|%d' %
                        (curID, siteNum, releaseDate, sceneNum + 1),
                        name='%s/#%d[%s][%s] %s' %
                        (titleNoFormatting, sceneNum + 1, actorNames, studio,
                         displayDate),
                        score=score,
                        lang=lang))

    for result in temp:
        if count > 1 and result.score == 80:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=79,
                                     lang=lang))
        else:
            results.Append(
                MetadataSearchResult(id=result.id,
                                     name=result.name,
                                     score=result.score,
                                     lang=lang))

    return results
Beispiel #16
0
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath('//div[@class="update_title"]')[0].text_content().strip()

    # Summary
    metadata.summary = detailsPageElements.xpath('//span[@class="update_description"]')[0].text_content().strip()

    # Studio
    metadata.studio = 'New Sensations'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//span[@class="update_tags"]/a'):
        genreName = PAutils.parseTitle(genreLink.text_content().replace('-', '').strip(), siteNum)

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath('//span[@class="update_models"]/a'):
        actorName = actorLink.text_content().strip()

        modelURL = actorLink.xpath('.//@href')[0]
        req = PAutils.HTTPRequest(modelURL)
        actorPageElements = HTML.ElementFromString(req.text)
        actorPhotoURL = actorPageElements.xpath('//div[@class="cell_top cell_thumb"]/img/@src0_1x')[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    xpaths = [
        '//div[@class="mejs-layers"]//img/@src',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            art.append(img)

    for scene in actorPageElements.xpath('//div[@class="table dvd_info"]'):
        resultTitle = scene.xpath('.//div[@class="update_title"]')[0].text_content()
        if resultTitle.lower() == metadata.title.lower():
            for img in scene.xpath('.//div[@class="cell"]//@src0_3x'):
                art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = detailsPageElements.xpath(
        '//h3')[0].text_content().strip()

    # Summary
    metadata.summary = ' '.join(
        detailsPageElements.xpath(
            '//div[@class="videoDetails clear"]//p/span//text()')).replace(
                'FULL VIDEO', '')

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum)
    metadata.studio = tagline
    if Prefs['collections_addsitename']:
        metadata.collections.add(tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath(
            '//li[contains(., "Tags")]//parent::ul//a'):
        genreName = PAutils.parseTitle(genreLink.text_content().strip(),
                                       siteNum)

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath('//li[@class="update_models"]'):
        actorName = actorLink.text_content().strip()

        modelURL = actorLink.xpath('.//@href')[0]
        req = PAutils.HTTPRequest(modelURL)
        actorPageElements = HTML.ElementFromString(req.text)
        actorPhotoURL = actorPageElements.xpath(
            '//div[@class="profile-pic"]//@src0_3x')[0]
        if 'http' not in actorPhotoURL:
            actorPhotoURL = PAsearchSites.getSearchBaseURL(
                siteNum) + actorPhotoURL

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//div[@class="player_thumbs"]//@src0_3x',
        '//div[@class="player full_width"]/script/text()',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            search = re.search(r'(?<=src0_3x=").*?(?=")', img)
            if search:
                img = search.group(0)
            if 'http' not in img:
                img = PAsearchSites.getSearchBaseURL(siteNum) + img
            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])

    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    if '/en/' in sceneURL:
        metadata.title = PAutils.parseTitle(
            detailsPageElements.xpath('//title')[0].text_content().split('|')
            [0].split('-')[0].strip(), siteNum)
    else:
        metadata.title = detailsPageElements.xpath(
            '//title')[0].text_content().split('|')[0].split('-')[0].strip()

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[@class="description clearfix"]')[0].text_content().split(
            ':')[-1].strip().replace('\n', ' ')

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.studio = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//div[@class="categories"]/a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    if '/en/' in sceneURL:
        if '&' in metadata.title:
            actors = metadata.title.split('&')
        else:
            actors = detailsPageElements.xpath(
                '//span[@class="site-name"]')[0].text_content().split(' and ')
    else:
        if '&' in metadata.title:
            actors = metadata.title.split('&')
        else:
            actors = detailsPageElements.xpath(
                '//span[@class="site-name"]')[0].text_content().split(' y ')

    for actorLink in actors:
        actorName = actorLink.strip()

        modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum),
                                       metadata.title[0].lower())
        req = PAutils.HTTPRequest(modelURL)
        modelPageElements = HTML.ElementFromString(req.text)
        for model in modelPageElements.xpath(
                '//div[@class="c-boxlist__box--image"]//parent::a'):
            if model.text_content().strip().lower() == metadata.title.lower():
                actorName = metadata.title
                break

        if 'africa' in actorName.lower():
            actorName = 'Africat'
        elif metadata.title == 'MAMADA ARGENTINA':
            actorName = 'Alejandra Argentina'
        elif actorName == 'Alika':
            actorName = 'Alyka'

        modelURL = '%s/actrices/%s' % (PAsearchSites.getSearchBaseURL(siteNum),
                                       actorName[0].lower())
        req = PAutils.HTTPRequest(modelURL)
        modelPageElements = HTML.ElementFromString(req.text)

        actorPhotoURL = ''
        for model in modelPageElements.xpath(
                '//div[@class="c-boxlist__box--image"]//parent::a'):
            if model.text_content().strip().lower() == actorName.lower():
                actorPhotoURL = model.xpath('.//img/@src')[0].strip()
                break

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []

    img = detailsPageElements.xpath(
        '//div[@class="top-area-content"]/script')[0].text_content().strip()
    posterImage = re.search(r'(?<=posterImage:\s").*(?=")', img)
    if posterImage:
        img = posterImage.group(0)
        art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.parseTitle(
        detailsPageElements.xpath('//span[@class="vdetitle"] | //h1')
        [0].text_content().strip(), siteNum)

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//span[@class="vdtx"] | //p[@class="videoDetail"]')[0].text_content(
        ).strip().replace('\n', '')

    # Studio
    metadata.studio = 'BangBros'

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = detailsPageElements.xpath(
        '//script[@type="text/javascript"][contains(., "siteName")]'
    )[0].text_content().split('siteName = \'')[-1].split('\'')[0].strip()
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    genres = detailsPageElements.xpath(
        '//meta[@http-equiv="keywords"]/@content')[0].split(',')
    for genreLink in genres:
        if tagline.replace(' ',
                           '').lower() not in genreLink.replace(' ',
                                                                '').lower():
            genreName = genreLink.strip()

            movieGenres.addGenre(genreName)

    # Posters
    xpaths = [
        '//div[@class="hideWhilePlaying"]/img/@src',
    ]

    if tagline == 'Mia Khalifa':
        movieActors.addActor('Mia Khalifa', '')
        shootId = detailsPageElements.xpath(
            '//script[@type="text/javascript"][contains(., "siteName")]'
        )[0].text_content().split('com/')[-1].split('/')[0].strip()

        art.append(
            'http://images.miakhalifa.com/shoots/%s/members/626x420.jpg' %
            shootId)

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            img = re.sub(r'////', 'http://', img)
            if 'http' not in img:
                img = 'http:' + img

            art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata
Beispiel #20
0
def update(metadata, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])

    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//title')[0].text_content().strip(), siteNum)

    # Summary
    metadata.summary = detailsPageElements.xpath('//div[@class="player-info-desc"]')[0].text_content().strip()

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = PAsearchSites.getSearchSiteName(siteNum).strip()
    metadata.studio = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//span[@class="tags"]/a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath('//div[@class="player-info-row"]/a'):
        actorName = actorLink.text_content().strip()

        modelURL = actorLink.xpath('.//@href')[0]
        req = PAutils.HTTPRequest(modelURL)
        actorPageElements = HTML.ElementFromString(req.text)
        actorPhotoURL = actorPageElements.xpath('//div[@class="pornstar-bio-left"]//@src')[0]

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []
    xpaths = [
        '//div[@id="player"]//script',
    ]

    for xpath in xpaths:
        for img in detailsPageElements.xpath(xpath):
            match = re.search(r'(?<=(image: ")).*(?=")', img.text_content())
            if match:
                img = match.group(0)

                art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    splitScene = False
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    if len(metadata_id) > 3:
        Log('Split Scene: %d' % int(metadata_id[3]))
        splitScene = True

    # Title
    metadata.title = PAutils.parseTitle(
        detailsPageElements.xpath('//h1[@class="description"]/text()')[0],
        siteNum).strip()
    if splitScene:
        metadata.title = ("%s [Scene %s]" % (metadata.title, metadata_id[3]))

    # Summary
    metadata.summary = detailsPageElements.xpath(
        '//div[@class="synopsis"]')[0].text_content().strip()

    # Studio
    try:
        studio = detailsPageElements.xpath(
            '//div[@class="studio"]/a/text()')[0].strip()
    except:
        studio = ''

    if studio:
        metadata.studio = studio

    # Tagline and Collection(s)
    metadata.collections.clear()
    tagline = ''
    try:
        tagline = detailsPageElements.xpath(
            '//p[contains(text(), "A scene from")]/a/text()')[0].strip()
        metadata.collections.add(tagline)
    except:
        try:
            tagline = detailsPageElements.xpath(
                '//a[@data-label="Series List"]/h2/text()')[0].strip().replace(
                    "Series:", "").replace("(%s)" % studio, "").strip()
            metadata.collections.add(tagline)
        except:
            metadata.collections.add(metadata.studio)
    Log("Tagline: %s" % tagline)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//div[@class="categories"]/a'):
        genreName = genreLink.text_content().strip()
        movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()

    actors = []
    if splitScene:
        actorNames = detailsPageElements.xpath(
            '//div[@class="item-grid item-grid-scene"]/div[@class="grid-item"][%d]/div/div[@class="scene-cast-list"]/a/text()'
            % int(metadata_id[3]))
        for name in actorNames:
            actors.append(
                detailsPageElements.xpath(
                    '//div[@class="video-performer"]/a[./img[@title="%s"]]/span/span'
                    % (name))[0])
    else:
        actors = detailsPageElements.xpath(
            '//div[@class="video-performer"]/a/span/span')

    for actorLink in actors:
        actorName = actorLink.text_content().strip()
        actorPhotoURL = detailsPageElements.xpath(
            '//div[@class="video-performer"]/a/img[@title="%s"]/@data-bgsrc' %
            (actorName))[0].strip()
        if actorName:
            movieActors.addActor(actorName, actorPhotoURL)

    # Director
    metadata.directors.clear()
    director = metadata.directors.new()
    try:
        directorName = detailsPageElements.xpath(
            '//div[@class="director"]/a/text()')[0].text_content().split(
                ':')[2].strip()
        if not directorName == 'Unknown':
            director.name = directorName
    except:
        pass

    # Posters
    art = []
    cover = '//div[@id="video-container-details"]/div/section/a/picture/source[1]/@data-srcset'
    scene = '//div[@class="item-grid item-grid-scene"]/div/a/img/@src'
    gallery = '//div[@id="video-container-details"]/div/section/div[2]/div[2]/a[@data-label="Gallery"]/@href'
    gallery_image = '//div[@class="item-grid item-grid-gallery"]/div[@class="grid-item"]/a/img/@data-src'
    try:
        art.append(detailsPageElements.xpath(cover)[0])
        gallery = detailsPageElements.xpath(gallery)
        if gallery:
            req = PAutils.HTTPRequest(
                '%s%s' % (PAsearchSites.getSearchBaseURL(siteNum), gallery[0]))
            galleryPageElement = HTML.ElementFromString(req.text)
            art = art + galleryPageElement.xpath(gallery_image)
        if splitScene:
            art.append(detailsPageElements.xpath(scene)[int(metadata_id[3])])
    except:
        pass

    images = []
    posterExists = False
    Log('Artwork found: %d' % len(art))
    numPosters = 20
    numArt = 5
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                if random.randint(0, 1) == 0 and idx != 0:
                    continue
                image = PAutils.HTTPRequest(
                    posterUrl,
                    headers={'Referer': 'http://www.data18.empirestores.co'})
                images.append(image)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > width:
                    # Item is a poster
                    if numPosters != 0:
                        numPosters = numPosters - 1
                    else:
                        continue
                    posterExists = True
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > height:
                    # Item is an art item
                    if numArt != 0:
                        numArt = numArt - 1
                    else:
                        continue
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    if not posterExists:
        for idx, image in enumerate(images, 1):
            try:
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1:
                    # Item is a poster
                    metadata.posters[art[idx - 1]] = Proxy.Media(
                        image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors, art):
    metadata_id = str(metadata.id).split('|')
    sceneURL = PAutils.Decode(metadata_id[0])
    sceneDate = metadata_id[2]
    req = PAutils.HTTPRequest(sceneURL)
    detailsPageElements = HTML.ElementFromString(req.text)

    # Title
    metadata.title = PAutils.parseTitle(detailsPageElements.xpath('//h1')[0].text_content().strip(), siteNum)

    # Summary
    metadata.summary = detailsPageElements.xpath('//span[@class="grisoscuro"]')[0].text_content().strip()

    # Studio
    metadata.studio = 'FAKings'

    # Collections / Tagline
    metadata.collections.clear()
    tagline = PAutils.parseTitle(detailsPageElements.xpath('//strong[contains(., "Serie")]//following-sibling::a')[0].text_content().strip(), siteNum)
    metadata.tagline = tagline
    metadata.collections.add(tagline)

    # Genres
    movieGenres.clearGenres()
    for genreLink in detailsPageElements.xpath('//strong[contains(., "Categori")]//following-sibling::a'):
        genreName = genreLink.text_content().strip()

        movieGenres.addGenre(genreName)

    # Release Date
    if sceneDate:
        date_object = parse(sceneDate)
        metadata.originally_available_at = date_object
        metadata.year = metadata.originally_available_at.year

    # Posters
    img = ''

    # Actors
    movieActors.clearActors()
    for actorLink in detailsPageElements.xpath('//strong[contains(., "Actr")]//following-sibling::a'):
        actorName = actorLink.text_content().strip()

        modelURL = actorLink.xpath('.//@href')[0]
        req = PAutils.HTTPRequest(modelURL)
        actorPageElements = HTML.ElementFromString(req.text)
        actorPhotoURL = actorPageElements.xpath('//div[@class="zona-imagen"]//img[@class]/@src')[0].strip()

        if not img:
            for scene in actorPageElements.xpath('//div[@class="zona-listado2"]'):
                if sceneURL == scene.xpath('.//@href')[0]:
                    img = scene.xpath('.//img[@class]/@src')[0].strip()

                    art.append(img)
                    break

        movieActors.addActor(actorName, actorPhotoURL)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if height > 1:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content, sort_order=idx)
                if width > 100:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content, sort_order=idx)
            except:
                pass

    return metadata
def update(metadata, lang, siteNum, movieGenres, movieActors):
    metadata_id = str(metadata.id).split('|')
    sceneName = PAutils.Decode(metadata_id[0])
    sceneURL = PAsearchSites.getSearchBaseURL(
        siteNum) + '/graphql?query=' + update_query % (
            sceneName, PAsearchSites.getSearchSiteName(siteNum).upper())

    detailsPageElements = getDatafromAPI(sceneURL)
    video = detailsPageElements['findOneVideo']
    pictureset = video['carousel']

    # Title
    metadata.title = PAutils.parseTitle(video['title'], siteNum)

    # Summary
    metadata.summary = video['description']

    # Director
    if video['directors']:
        director = metadata.directors.new()
        director.name = video['directors'][0]['name']

    # Studio
    metadata.studio = PAsearchSites.getSearchSiteName(siteNum).title()

    # Tagline and Collection(s)
    metadata.collections.clear()
    metadata.collections.add(metadata.studio)

    # Release Date
    date_object = parse(video['releaseDate'])
    metadata.originally_available_at = date_object
    metadata.year = metadata.originally_available_at.year

    # Genres
    if video['categories']:
        movieGenres.clearGenres()
        for tag in video['categories']:
            genreName = tag['name']

            movieGenres.addGenre(genreName)

    # Actors
    movieActors.clearActors()
    actors = video['models']
    for actor in actors:
        actorName = actor['name']
        actorPhotoURL = ''
        if actor['images']:
            actorPhotoURL = actor['images']['listing'][0]['highdpi']['double']

        movieActors.addActor(actorName, actorPhotoURL)

    # Posters
    art = []

    for name in ['movie', 'poster']:
        if name in video['carousel'] and video['images'][name]:
            image = video['images'][name][-1]
            if 'highdpi' in image:
                art.append(image['highdpi']['3x'])
            else:
                art.append(image['src'])
            break

    for image in pictureset:
        img = image['listing'][0]['highdpi']['triple']

        art.append(img)

    Log('Artwork found: %d' % len(art))
    for idx, posterUrl in enumerate(art, 1):
        if not PAsearchSites.posterAlreadyExists(posterUrl, metadata):
            # Download image file for analysis
            try:
                image = PAutils.HTTPRequest(posterUrl)
                im = StringIO(image.content)
                resized_image = Image.open(im)
                width, height = resized_image.size
                # Add the image proxy items to the collection
                if width > 1 or height > width:
                    # Item is a poster
                    metadata.posters[posterUrl] = Proxy.Media(image.content,
                                                              sort_order=idx)
                if width > 100 and width > height and idx > 1:
                    # Item is an art item
                    metadata.art[posterUrl] = Proxy.Media(image.content,
                                                          sort_order=idx)
            except:
                pass

    return metadata