Python MetaData Exemples, app.core.model.meta_data.MetaData Python Exemples

Exemple #1

0

Afficher le fichier

def search(meta_info, user_setting):
    plugin_name = config.get_info('en').get('name')
    meta_data_list = []
    video_title = meta_info.get('video_title')
    part_file = meta_info.get('part_file')
    movie_type = ''
    title_style = ''
    log('info', 'title:%s' % video_title, plugin_name)

    # code is the formatted media name which is used to check if it is already in cache
    code = 'formatted file name'
    cache_data = check_cache(code, get_info('en').get('name'))
    if cache_data:
        meta_data_list.append(cache_data)
    else:
        # search for meta data from internet
        meta_data = MetaData()
        meta_data_list.append(meta_data)
    return meta_data_list

Exemple #2

0

Afficher le fichier

Fichier : arzon.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        '''
        xpath_number = "//div[@class='item_register']//table[@class='item']//tr[8]/td[2]/text()"
        number = html.xpath(xpath_number)
        if len(number) > 0:
            number = self.tools.cleanstr(number[0])
            media.number = number
        '''
        media = MetaData()
        number = self.tools.cleanstr(q.upper())
        media.number = number

        xpath_title = "//div[@class='detail_title_new2']/table/tr/td[2]/h1"
        title = html.xpath(xpath_title)
        if len(title) > 0:
            title = self.tools.cleanstr(title[0].text)
            media.title = title

        xpath_poster = "//table[@class='item_detail']//tr[1]//td[1]//a//img[@class='item_img']/@src"
        poster = html.xpath(xpath_poster)
        if len(poster) > 0:
            poster = self.tools.cleanstr(poster[0])
            media.poster = 'https:%s' % poster
            media.thumbnail = 'https:%s' % poster

        xpath_summary = "//table[@class='item_detail']//tr[2]//td[@class='text']//div[@class='item_text']/text()"
        summary = html.xpath(xpath_summary)
        if len(summary) > 0:
            summary = self.tools.cleanstr(summary[1])
            media.summary = summary

        xpath_studio = "//div[@class='item_register']/table[@class='item']//tr[2]/td[2]/a"
        studio = html.xpath(xpath_studio)
        if len(studio) > 0:
            studio = self.tools.cleanstr(studio[0].text)
            media.studio = studio

        xpath_directors = "//table[@class='item']//tr[5]//td[2]/a"
        directors = html.xpath(xpath_directors)
        if len(directors) > 0:
            directors = self.tools.cleanstr(directors[0].text)
            media.directors = directors

        xpath_collections = "//table[@class='item']//tr[4]//td[2]//a"
        collections = html.xpath(xpath_collections)
        if collections[0].text is not None:
            collections = self.tools.cleanstr(collections[0].text)
            media.collections = collections

        xpath_year = "//table[@class='item']//tr[6]/td[2]/text()"
        year = html.xpath(xpath_year)
        if len(year) > 0:
            year = self.tools.cleanstr(year[0])
            media.year = self.tools.formatdatetime(year)

        xpath_originally_available_at = "//table[@class='item']//tr[6]/td[2]/text()"
        originally_available_at = html.xpath(xpath_originally_available_at)
        if len(originally_available_at) > 0:
            originally_available_at = self.tools.cleanstr(
                originally_available_at[0])
            media.originally_available_at = self.tools.formatdatetime(
                originally_available_at)

        xpath_category = "//div[@id='adultgenre2']//table//tr/td[2]//ul//li/a"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category.text))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//div[@class='item_register']//table[@class='item']//tr[1]/td[2]//a"
        xpath_actor_url = "//div[@class='item_register']//table[@class='item']//tr[1]/td[2]/a/@href"

        actor_name = html.xpath(xpath_actor_name)
        actor_url = html.xpath(xpath_actor_url)

        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                html = self.get_html_byurl('https://www.arzon.jp%s' %
                                           actor_url[i])
                if html['issuccess']:
                    xpath_actor_image = "//table[@class='p_list1']//img/@src"
                    actorimageurl = html['html'].xpath(xpath_actor_image)
                actor.update({actorname.text: 'https:%s' % actorimageurl[0]})
            media.actor = actor

        return media

Exemple #3

0

Afficher le fichier

Fichier : Fc2Club.py Projet : gsetant/adultscraperx

    def analysisMediaHtmlByxpath(self, html, q):
        """
                根据html对象与xpath解析数据
                html:<object>
                html_xpath_dict:<dict>
                return:<dict{issuccess,ex,dict}>
                """
        media = MetaData()
        number = self.tools.cleanstr(q.upper())
        media.number = number

        xpath_title = "/html/body/div[2]/div/div[1]/h3"
        title = html.xpath(xpath_title)[0].text
        media.title = title

        summary = title
        media.summary = summary

        xpath_poster_url = "//*[@id='slider']/ul[1]/li[1]/img"
        poster_url = 'https://' + self.basicUrl + html.xpath(
            xpath_poster_url)[0].attrib['src']
        media.poster = poster_url
        media.thumbnail = poster_url

        studio = 'FC2'
        media.studio = studio

        directors = ''
        media.directors = directors

        xpath_collections = "/html/body/div[2]/div/div[1]/h5[3]/a[1]"
        collections = html.xpath(xpath_collections)[0].text
        media.collections = collections

        year = ''
        media.year = year
        media.originally_available_at = year

        xpath_category = "/html/body/div[2]/div/div[1]/h5[6]/a"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category.text))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        xpath_actor_name = "/html/body/div[2]/div/div[1]/h5[5]/a"
        actor_name = html.xpath(xpath_actor_name)[0].text
        if actor_name != '':
            media.actor = actor_name

        return media

Exemple #4

0

Afficher le fichier

Fichier : javbus.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        '''
        xpath_number = "//div[@class='col-md-3 info']/p[1]/span[2]/text()"
        number = html.xpath(xpath_number)
        if len(number) > 0:
            number = self.tools.cleanstr(number[0])
            self.media.number = number
        '''
        media = MetaData()
        number = self.tools.cleanstr(q.upper())
        media.number = number

        xpath_title = "//div[@class='container']/h3/text()"
        title = html.xpath(xpath_title)
        if len(title) > 0:
            title = self.tools.cleantitlenumber(self.tools.cleanstr(title[0]),
                                                number)
            media.title = title

        xpath_poster = "//div[@class='col-md-9 screencap']/a[@class='bigImage']/img/@src"
        poster = html.xpath(xpath_poster)
        if len(poster) > 0:
            poster = self.checkUrl + self.tools.cleanstr(poster[0])
            media.poster = poster
            media.thumbnail = poster

        xpath_studio = "//div[@class='col-md-3 info']/p[5]/a/text()"
        studio = html.xpath(xpath_studio)
        if len(studio) > 0:
            studio = self.tools.cleanstr(studio[0])
            media.studio = studio

        xpath_directors = "//div[@class='col-md-3 info']/p[4]/a/text()"
        directors = html.xpath(xpath_directors)
        if len(directors) > 0:
            directors = self.tools.cleanstr(directors[0])
            media.directors = directors

        xpath_collections = "//div[@class='col-md-3 info']/p[6]/a/text()"
        collections = html.xpath(xpath_collections)
        if len(collections) > 0:
            collections = self.tools.cleanstr(collections[0])
            media.collections = collections

        xpath_year = "/html/body/div[@class='container']/div[@class='row movie']/div[@class='col-md-3 info']/p[2]/text()"
        year = html.xpath(xpath_year)
        if len(year) > 0:
            year = self.tools.cleanstr(year[0])
            media.originally_available_at = year
            media.year = year

        xpath_category = "/html/body/div[@class='container']/div[@class='row movie']/div[@class='col-md-3 info']/p[8]/span[@class='genre']/a"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category.text))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "/html/body/div[5]/div[1]/div[2]/p/span/a"
        xpath_actor_url = "//div[@id='star-div']//img/@src"

        actor_name = html.xpath(xpath_actor_name)
        actor_url = html.xpath(xpath_actor_url)
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                if actor_url[i].find('nowprinting') > 0:
                    actor.update({actorname.text: ''})
                else:
                    actor.update(
                        {actorname.text: self.checkUrl + actor_url[i]})
            media.actor = actor

        return media

Exemple #5

0

Afficher le fichier

Fichier : caribbean.py Projet : gsetant/adultscraperx

    def analysisMediaHtmlByxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        media = MetaData()
        number = self.tools.cleanstr(q.upper())
        media.number = number
        media.web = 'caribbean'


        xpath_title = "//*[@id='moviepages']/div/div[1]/div[1]/div[2]/h1"
        title = html.xpath(xpath_title)[0].text

        media.title = title

        xpath_summary = "//*[@id='moviepages']/div/div[1]/div[1]/p"
        summary = html.xpath(xpath_summary)[0].text

        media.summary = summary

        media.poster = 'https://%s/moviepages/%s/images/l_l.jpg' % (self.basicUrl, number)
        media.thumbnail = 'https://%s/moviepages/%s/images/l_l.jpg' % (self.basicUrl, number)

        studio = 'Caribbeancom'
        media.studio = studio

        directors = ''
        media.directors = directors

        collections = 'Caribbeancom'
        media.collections = collections

        xpath_year = "//*[@id='moviepages']/div/div[1]/div[1]/ul/li[2]/span[2]"
        year = html.xpath(xpath_year)[0].text
        media.year = year
        media.originally_available_at = year

        xpath_category = "//*[@id='moviepages']/div/div[1]/div[1]/ul/li[4]/span[2]/a"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category.text))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys


        xpath_actor_name = "//*[@id='moviepages']/div/div[1]/div[1]/ul/li[1]/span[2]/a/span"
        actor_name = html.xpath(xpath_actor_name)
        actor_dict = {}
        for actor in actor_name:
            actor_dict[actor.text] = 'https://images.d2pass.com/images/d2p_toolbar/images/d2p_logo.png'
        media.actor = actor_dict
        return media

Exemple #6

0

Afficher le fichier

Fichier : caribbeancompr.py Projet : gsetant/adultscraperx

    def analysisMediaHtmlByxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        media = MetaData()
        number = self.tools.cleanstr(q.upper())
        media.number = number
        media.web = 'caribbeancompr'

        xpath_title = "//*[@id='moviepages']/div/div[2]/div[1]/div/div[2]/h1"
        title = html.xpath(xpath_title)[0].text
        # if len(title) > 0:
        #     title = self.tools.cleantitlenumber(
        #         self.tools.cleanstr(title[0]), number)
        media.title = title

        xpath_summary = "//*[@id='moviepages']/div/div[2]/div[1]/div/p"
        summary = html.xpath(xpath_summary)[0].text
        media.summary = summary

        media.poster = 'https://%s/moviepages/%s/images/l_l.jpg' % (
            self.basicUrl, number)
        media.thumbnail = 'https://%s/moviepages/%s/images/l_l.jpg' % (
            self.basicUrl, number)

        studio = 'Caribbeancompr'
        media.studio = studio

        directors = ''
        media.directors = directors

        collections = 'Caribbeancompr'
        media.collections = collections

        xpath_year = "//li[@class='movie-spec'][2]/span[@class='spec-content']/text()"
        year = html.xpath(xpath_year)
        if len(year) > 0:
            year = self.tools.cleanstr(year[0])
            media.year = year
            media.originally_available_at = year

        xpath_category = "//li[@class='movie-spec'][5]/span[@class='spec-content']/a/text()"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//li[@class='movie-spec'][1]/span[@class='spec-content']/a"
        actor_name = html.xpath(xpath_actor_name)
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                actor.update({actorname.text: ''})

            media.actor = actor

        return media

Exemple #7

0

Afficher le fichier

    def analysis_media_html_byxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        media = MetaData()
        number = self.tools.cleanstr(q.upper())
        media.number = number

        xpath_title = "//div[@id='main']/h1/text()"
        title = html.xpath(xpath_title)
        if len(title) > 0:
            title = self.tools.cleantitlenumber(self.tools.cleanstr(title[0]),
                                                number)
            media.title = title

        xpath_summary = "//dd[@class='comment']/div/text()"
        summary = html.xpath(xpath_summary)
        if len(summary) > 0:
            summary = summary[0]
            media.summary = summary

        # xpath_poster = "//img/@src"
        # poster = html.xpath(xpath_poster)
        # if len(poster) > 0:
        # poster = self.tools.cleanstr(poster[0])
        media.poster = 'https://www.pacopacomama.com/moviepages/%s/images/poster_en.jpg' % number
        media.thumbnail = 'https://www.pacopacomama.com/moviepages/%s/images/l/1.jpg' % number

        # xpath_studio = "//div[@class='col-md-3 info']/p[5]/a/text()"
        # studio = html.xpath(xpath_studio)
        # if len(studio) > 0:
        studio = 'PacoPacoMama'
        media.studio = studio

        # xpath_directors = "//div[@class='col-md-3 info']/p[4]/a/text()"
        # directors = html.xpath(xpath_directors)
        # if len(directors) > 0:
        directors = ''
        media.directors = directors

        # xpath_collections = "//div[@class='col-md-3 info']/p[6]/a/text()"
        # collections = html.xpath(xpath_collections)
        # if len(collections) > 0:
        collections = 'PacoPacoMama'
        media.collections = collections

        xpath_year = "//div[@class='movie-info']/dl[3]/dd"
        year = html.xpath(xpath_year)
        if len(year) > 0:
            year = self.tools.cleanstr(year[0].text)
            media.year = year
            media.originally_available_at = year

        xpath_category = "//div[@class='clearfix']/table/tr[4]/td[2]/a/text()"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//div[@class='clearfix']/table/tr[1]/td[2]/a/text()"
        xpath_actor_url = "//div[@class='clearfix']/img[@class='lefty']/@src"
        actor_name = html.xpath(xpath_actor_name)
        actor_url = 'https://www.pacopacomama.com%s' % html.xpath(
            xpath_actor_url)[0]
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                actorimageurl = actor_url

                actor.update({self.tools.cleanstr2(actorname): actorimageurl})
                # actor.update({self.tools.cleanstr2(
                #     actorname): ''})

            media.actor = actor

        return media

Exemple #8

0

Afficher le fichier

Fichier : HeyzoOfficial.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        '''
        xpath_number = "//div[@class='col-md-3 info']/p[1]/span[2]/text()"
        number = html.xpath(xpath_number)
        if len(number) > 0:
            number = self.tools.cleanstr(number[0])
            self.media.number = number
        '''
        media = MetaData()
        number = 'heyzo-%s' % self.tools.cleanstr(q.upper())
        media.number = number

        xpath_title = "//div[@id='wrapper']/article/section[1]/div[@id='movie']/h1/text()"
        title = html.xpath(xpath_title)
        if len(title) > 0:
            title = self.tools.cleantitlenumber(self.tools.cleanstr(title[0]),
                                                number)
            media.title = title

        xpath_summary = "//p[@class='memo']/text()"
        summary = html.xpath(xpath_summary)
        if len(summary) > 0:
            summary = summary[0]
            media.summary = summary

        media.poster = 'https://www.heyzo.com/contents/3000/%s/images/player_thumbnail.jpg' % q
        media.thumbnail = 'https://www.heyzo.com/contents/3000/%s/gallery/001.jpg' % q

        media.studio = 'Heyzo'

        xpath_collections = "//tr[@class='table-series']/td[2]/text()"
        collections = html.xpath(xpath_collections)
        if len(collections) > 0:
            collections = self.tools.cleanstr(collections[0])
            if not collections == '-----':
                media.collections = collections

        xpath_year = "//tr[@class='table-release-day']/td[2]/text()"
        year = html.xpath(xpath_year)
        if len(year) > 0:
            year = self.tools.cleanstr(year[0])
            media.year = year
            media.originally_available_at = year

        xpath_category = "//ul[@class='tag-keyword-list']/li/a/text()"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//tr[@class='table-actor']/td//a/span/text()"
        #xpath_actor_url = "//tr[@class='table-actor']/td//a/@href"
        actor_name = html.xpath(xpath_actor_name)
        #actor_url = html.xpath(xpath_actor_url)
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                # if actor_url[i].find('nowprinting') > 0:
                #     actor.update({actorname: ''})
                # else:
                actor.update({actorname: ''})
            media.actor = actor

        return media

Exemple #9

0

Afficher le fichier

    def analysis_media_html_byxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        media = MetaData()
        title = q.upper()
        media.title = title
        number = self.tools.cleanstr(q.upper())
        media.number = number
        xpath_poster = "//div[@class='column']/img[@class='image']/@src"
        poster = html.xpath(xpath_poster)
        if len(poster) > 0:
            poster = self.tools.cleanstr(poster[0])
            media.poster = poster
            media.thumbnail = poster

        xpath_summary = "//p[@class='level has-text-grey-dark']/text()"
        summary = html.xpath(xpath_summary)
        if len(summary) > 0:
            summary = summary[0]
            media.summary = summary + ''

        xpath_year = "//p[@class='subtitle is-6']/a/text()"
        year = html.xpath(xpath_year)
        if len(year) > 0:
            year = self.tools.dateconvert(year[0])
            media.year = year
            media.originally_available_at = year

        xpath_category = "//div[@class='tags']//a/text()"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//a[@class='panel-block']"
        actor_name = html.xpath(xpath_actor_name)
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                actor.update({actorname.text: ''})
            media.actor = actor

        return media

Exemple #10

0

Afficher le fichier

Fichier : onePondo.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, browser, q):
        media = MetaData()
        browser.get("https://www.1pondo.tv/movies/%s/" % q)
        btn_xpath = "//button[@class='button-flat button-medium button-icon--right see-more']"
        btn = browser.find_elements_by_xpath(btn_xpath)
        if len(btn) == 0:
            return []
        btn[0].click()
        time.sleep(1)

        number = self.tools.cleanstr(q.upper())
        media.number = number
        media.web = 'onePondo'


        # title
        title_xpath = "//h1[@class='h1--dense']"
        title = browser.find_elements_by_xpath(title_xpath)
        media.title = title[0].text

        summary_xpath = "//div[@class='movie-info section divider']/div[@class='movie-detail']/p"
        summary = browser.find_elements_by_xpath(summary_xpath)
        media.summary = summary[0].text

        media.poster = 'https://www.1pondo.tv/assets/sample/%s/str.jpg' % number

        media.thumbnail = 'https://www.1pondo.tv/assets/sample/%s/str.jpg' % number

        media.studio = '一本道'

        # Collection
        collection_xpath = "//li[@class='movie-detail__spec'][3]/span[@class='spec-content']"
        Collection = browser.find_elements_by_xpath(collection_xpath)
        media.collections = Collection[0].text

        # datatime
        datatime_xpath = "//li[@class='movie-detail__spec'][1]/span[@class='spec-content']"
        datatime = browser.find_elements_by_xpath(datatime_xpath)
        media.year = datatime[0].text
        media.originally_available_at = datatime[0].text

        # types
        categorys_xpath = "//span[@class='spec-content']/a[@class='spec__tag']"
        categorys = browser.find_elements_by_xpath(categorys_xpath)

        categorys_list = []
        for item in categorys:
            categorys_list.append(self.tools.cleanstr(item.text))
        categorys = ','.join(categorys_list)
        if len(categorys) > 0:
            media.category = categorys

        # actor
        actor = {}
        xpath_actor_name = "//li[@class='movie-detail__spec'][2]/span[@class='spec-content']"
        actor_name = browser.find_elements_by_xpath(xpath_actor_name)
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                actor.update({self.tools.cleanstr2(
                    actorname.text): ''})
        media.actor = actor

        return media

Exemple #11

0

Afficher le fichier

Fichier : mgstage.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, browser, q):

        media = MetaData()
        infos_xpath = "//div[@class='detail_data']"
        infos = browser.find_elements_by_xpath(infos_xpath)
        info_list = infos[0].text.split('\n')
        for info in info_list:
            tmp = info.split('：')
            if len(tmp) > 1:
                keyword = self.tools.cleanstr(tmp[0])
                value = tmp[1]
                if keyword == '出演':  # actor
                    actor = {}
                    actor_name = []
                    actor_name.append(self.tools.cleanstr(value))
                    if len(actor_name) > 0:
                        for i, actorname in enumerate(actor_name):
                            actor.update(
                                {self.tools.cleanstr2(actorname): ''})
                    media.actor = actor

                if keyword == 'メーカー':  # 工作室
                    media.studio = self.tools.cleanstr(value)

                if keyword == '品番':  # 番号
                    media.number = self.tools.cleanstr(value)

                if keyword == '配信開始日':  # 日期
                    media.year = self.tools.formatdatetime(self.tools.cleanstr(value))
                    media.originally_available_at = self.tools.formatdatetime(self.tools.cleanstr(value))

                if keyword == 'シリーズ':  # 系列
                    media.collections = self.tools.cleanstr2(self.tools.cleanstr(value))

                if keyword == 'ジャンル':  # 类型
                    # types
                    categorys = value.split(' ')
                    while '' in categorys:
                        categorys.remove('')

                    categorys_list = []
                    for item in categorys:
                        categorys_list.append(self.tools.cleanstr(item))
                    categorys = ','.join(categorys_list)
                    if len(categorys) > 0:
                        media.category = categorys

        # title
        title_xpath = "//h1[@class='tag']"
        title = browser.find_elements_by_xpath(title_xpath)
        media.title = self.tools.cleanstr(title[0].text)

        more_xpath = "//p[@id='introduction_all']"
        more = browser.find_elements_by_xpath(more_xpath)
        if not more[0].get_attribute("style") == 'display: none;':
            more[0].click()

        summary_xpath = "//p[@class='txt introduction']"
        summary = browser.find_elements_by_xpath(summary_xpath)
        media.summary = summary[0].text

        poster_xpath = "//a[@id='EnlargeImage']"
        poster = browser.find_elements_by_xpath(poster_xpath)
        media.poster = poster[0].get_attribute('href')

        art_xpath = "//div[@class='detail_left']/dl[@id='sample-photo']/dd/ul/li[1]/a[@class='sample_image']"
        art = browser.find_elements_by_xpath(art_xpath)
        media.thumbnail = art[0].get_attribute('href')

        return media

Exemple #12

0

Afficher le fichier

Fichier : main.py Projet : gsetant/NeteaseCloudMusic

def search(meta_info, user_setting):
    score = MATCH_SCORE
    plugin_name = config.get_info('en').get('name')
    meta_data_list = []
    if meta_info.get('media_type') == 'album':
        if meta_info.get('name'):
            file_name = meta_info.get('name')
            score = MATCH_ALBUM
        else:
            file_name = clear_file_name(meta_info.get('file_name'))
    elif meta_info.get('media_type') == 'artist':
        if meta_info.get('file_name'):
            file_name = clear_file_name(meta_info.get('file_name'))
        else:
            file_name = ''
            score = MATCH_ARTIST
    else:
        return meta_data_list

    log('info', 'title:%s' % file_name, plugin_name)
    code = get_code(file_name, meta_info)
    result = None
    cache_data = check_cache(code, get_info('en').get('name'))
    if cache_data:
        return cache_data
    else:
        # search for meta data from internet
        results = search_song_by_code(code)
        sort_result(results, code)
        highest_result = get_highest_result(results)
        if not highest_result:
            return meta_data_list
        if highest_result.get('score') >= score:
            result = highest_result
    if not result:
        return meta_data_list
    get_artist_info(result)
    get_album_info(result)
    get_hot_comment(result)
    meta_data = MetaData()
    # Album
    album = Album()
    album.title = result.get('album').get('name')
    if result.get('album').get('description'):
        album.summary = result.get('album').get('description')
    album.studio = result.get('album').get('company')
    album.tags = result.get('album').get('tags')
    album.collections = result.get('album').get('subType')
    album.poster = get_picture_base64(result.get('album').get('picUrl'))
    album.originally_available_at = datetime.datetime.fromtimestamp(
        result.get('album').get('publishTime') / 1000).strftime('%Y-%m-%d')
    for review in result['reviews'].get('topComments'):
        album.reviews.append(
            review.get('user').get('nickname') + ': ' + review.get('content'))
    for review in result['reviews'].get('hotComments'):
        album.reviews.append(
            review.get('user').get('nickname') + ': ' + review.get('content'))
    if user_setting.get('hotComment'):
        album.summary += '\n 网易云热门评论： '
        for review in album.reviews:
            album.summary += '\n ' + review.replace('\n', '')
    meta_data.album = album

    # Artist
    for result_artist in result.get('artists'):
        artist = Artist()
        artist.poster = get_picture_base64(result_artist.get('cover'))
        artist.art = get_picture_base64(result_artist.get('cover'))
        title = result_artist.get('name')
        if title == 'Various Artists' or title == '[Unknown Artist]':
            title = '未知艺术家'
        artist.title = title
        if result_artist.get('identifyTag'):
            artist.tags = ','.join(result_artist.get('identifyTag'))
        artist.summary = ''
        if result_artist.get('briefDesc'):
            artist.summary = result_artist.get('briefDesc')
        if result_artist.get('rank'):
            rank = result_artist.get('rank')
            rank_type = ['', '华语', '欧美', '韩国', '日本']
            rank_string = '\n歌手排行：' + str(
                rank_type[rank.get('type')]) + '地区：' + str(rank.get('rank'))
            artist.summary += rank_string
        if result_artist.get('albumSize'):
            artist.summary += '\n歌手专辑数：' + str(result_artist.get('albumSize'))
        if result_artist.get('musicSize'):
            artist.summary += '\n歌手音乐数：' + str(result_artist.get('musicSize'))
        if result_artist.get('mvSize'):
            artist.summary += '\n歌手MV数：' + str(result_artist.get('mvSize'))
        meta_data.artist.append(artist)
    meta_data.code = code
    meta_data_list.append(meta_data)
    return meta_data_list

Exemple #13

0

Afficher le fichier

Fichier : heydougaOfficial.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, browser, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """

        codeList = []
        imgnumber = ''
        re_list = re.finditer(
            r'[0-9]{4}\D[0-9]{1,5}|[0-9]{4}\D(Q|q)[0-9]{1,5}|[0-9]{4}\D(.{3})\D[0-9]{4}|[0-9]{4}\D(.{3})\D[0-9]{6}\D[0-9]{3}',
            q, re.IGNORECASE)
        for item in re_list:
            imgnumber = item.group()
            codeList.append(item.group())

        browser.get('https://www.heydouga.com/moviepages/%s/index.html' % q)

        media = MetaData()
        media.number = q.replace('/', '-')

        xpath_title = "//div[@id='title-bg']/h1"
        title = browser.find_elements_by_xpath(xpath_title)[0].text
        if len(title) > 0:
            title = self.tools.cleanstr(title)
            media.title = title

        xpath_summary = "//div[@class='movie-description']/p"
        summary = browser.find_elements_by_xpath(xpath_summary)[0].text
        if len(summary) > 0:
            summary = self.tools.cleanstr(summary)
            media.summary = summary

        media.poster = 'https://www.heydouga.com/contents/%s/player_thumb.jpg' % self.format(
            imgnumber)
        media.thumbnail = 'https://www.heydouga.com/contents/%s/player_thumb.jpg' % self.format(
            imgnumber)

        #xpath_studio = "//div[@class='col-md-3 info']/p[5]/a/text()"
        #studio = html.xpath(xpath_studio)
        # if len(studio) > 0:
        #studio = self.tools.cleanstr(studio[0])
        media.studio = 'heydouga'

        # xpath_directors = "//div[@class='col-md-3 info']/p[4]/a/text()"
        # directors = html.xpath(xpath_directors)
        # if len(directors) > 0:
        #     directors = self.tools.cleanstr(directors[0])
        #     media.directors = directors

        # xpath_collections = "//div[@class='col-md-3 info']/p[6]/a/text()"
        # collections = html.xpath(xpath_collections)
        # if len(collections) > 0:
        #     collections = self.tools.cleanstr(collections[0])
        media.collections = 'heydouga'

        xpath_year = "//div[@id='movie-info']//li[1]/span[2]"
        year = browser.find_elements_by_xpath(xpath_year)[0].text
        if len(year) > 0:
            media.year = year
            media.originally_available_at = year

        xpath_category = "//ul[@id='movie_tag_list']/li/a"
        categorys = browser.find_elements_by_xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category.text))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//div[@id='movie-info']/ul/li[2]/span[2]/a"
        #xpath_actor_url = "//div[@id='star-div']//img/@src"
        actor_name = browser.find_elements_by_xpath(xpath_actor_name)
        #actor_url = html.xpath(xpath_actor_url)
        if len(actor_name) > 0:
            actor_names = actor_name[0].text.split(' ')
            for actorname in actor_names:
                #     if actor_url[i].find('nowprinting') > 0:
                #         actor.update({actorname: ''})
                #     else:
                actor.update({actorname: ''})
            media.actor = actor

        return media

Exemple #14

0

Afficher le fichier

Fichier : javr.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """
        media = MetaData()
        number = self.tools.cleanstr(q.upper())
        media.number = number
        media.web = 'javr'

        studio_text = ''
        xpath_p = "//div[@class='post-metadata']/p"
        p_list = html.xpath(xpath_p)
        for i in range(len(p_list)):
            lab = html.xpath('%s[%s]/b/text()' % (xpath_p, (i + 1)))
            if lab[0] == 'Studio:':
                studio = html.xpath('%s[%s]//text()' % (xpath_p, (i + 1)))[2]

        xpath_title = "//h1[@class='entry-title1']/text()"
        title = html.xpath(xpath_title)
        title = title[0].replace('Watch XXX Japanese P**n - ',
                                 '').replace(studio, '')
        media.title = title
        media.summary = title

        xpath_poster = "//img[@id='myvidcover']/@src"
        post_url_list = html.xpath(xpath_poster)
        for post_url in post_url_list:
            if len(re.findall('data:image', post_url)) < 1:
                media.poster = post_url
                media.thumbnail = post_url

        media.studio = studio

        directors = ''
        media.directors = directors

        xpath_category = "//div[@class='categories tags cactus-info']/a/text()"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//div[@class='channel-content']//a/h4/text()"
        xpath_actor_url = "//div[@class='post-metadata sp-style style-5']//a/img/@data-src"
        actor_name = html.xpath(xpath_actor_name)
        actor_url = html.xpath(xpath_actor_url)
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                try:
                    actor.update({actorname: actor_url[i]})
                except Exception as ex:
                    actor.update({
                        actorname:
                        'https://ravecloud.xyz/2019/02/javraveclublogo_41.png'
                    })

            media.actor = actor

        return media

Exemple #15

0

Afficher le fichier

Fichier : tenMusume.py Projet : gsetant/adultscraperx

    def analysis_media_html_byxpath(self, html, q):
        """
        根据html对象与xpath解析数据
        html:<object>
        html_xpath_dict:<dict>
        return:<dict{issuccess,ex,dict}>
        """

        number = self.tools.cleanstr(q.upper())
        media = MetaData()

        xpath_title = "//dl[@class='list-spec cf']/dd[1]/text()"
        title = html.xpath(xpath_title)
        if len(title) > 0:
            title = self.tools.cleantitlenumber(
                self.tools.cleanstr(title[0]), number)
            media.title = title

        xpath_summary = "//div[@class='detail-info__item'][2]/p[@class='detail-info__comment']/text()"
        summary = html.xpath(xpath_summary)
        if len(summary) > 0:
            summary = summary[0]
            media.summary = summary

        # xpath_poster = "//img/@src"
        # poster = html.xpath(xpath_poster)        
        # if len(poster) > 0:
        # poster = self.tools.cleanstr(poster[0])
        media.poster = 'https://www.10musume.com/moviepages//%s/images/list1.jpg' % number
        media.thumbnail = 'https://www.10musume.com/moviepages//%s/images/g_b001.jpg' % number

        # xpath_studio = "//div[@class='col-md-3 info']/p[5]/a/text()"
        # studio = html.xpath(xpath_studio)
        # if len(studio) > 0:
        studio = '素人専門アダルト動画'
        media.studio = studio

        # xpath_directors = "//div[@class='col-md-3 info']/p[4]/a/text()"
        # directors = html.xpath(xpath_directors)
        # if len(directors) > 0:
        directors = ''
        media.directors = directors

        # xpath_collections = "//div[@class='col-md-3 info']/p[6]/a/text()"
        # collections = html.xpath(xpath_collections)
        # if len(collections) > 0:
        collections = '天然むすめ'
        media.collections = collections

        xpath_year = "//dl[@class='list-spec cf']/dd[2]/text()"
        year = html.xpath(xpath_year)
        if len(year) > 0:
            year = self.tools.cleanstr(year[0])
            self.media.year = year
            self.media.originally_available_at = year

        xpath_category = "//dl[@class='list-spec cf']/dd[7]/a/text()"
        categorys = html.xpath(xpath_category)
        category_list = []
        for category in categorys:
            category_list.append(self.tools.cleanstr(category))
        categorys = ','.join(category_list)
        if len(categorys) > 0:
            media.category = categorys

        actor = {}
        xpath_actor_name = "//dl[@class='list-spec cf']/dd[4]/a/text()"
        # xpath_actor_url = "//div[@class='video-performer']/a/img/@style"
        actor_name = html.xpath(xpath_actor_name)
        # actor_url = html.xpath(xpath_actor_url)
        if len(actor_name) > 0:
            for i, actorname in enumerate(actor_name):
                # actorimageurl = actor_url[i].replace('background-image:url(', '').replace(');', '')
                '''
                actor.update({self.tools.cleanstr2(
                    actorname): actorimageurl})
                '''
                actor.update({self.tools.cleanstr2(
                    actorname): ''})

            media.actor = actor

        return media