예제 #1
0
파일: cdw.py 프로젝트: mksingh202/scrapy
    def _parse_features(self, response):
        features = []
        features_name = response.xpath(
            '//ul[@id="productSpecsContainer"]'
            '/li//label[contains(@for, "product_spec")]/text()').extract()
        for f_name in features_name:
            f_content = response.xpath('//ul[@id="productSpecsContainer"]'
                                       '/li/div[contains(@id, "product_spec")]'
                                       '/*[@aria-label="%s"]'
                                       '//text()' % f_name).extract()
            f_content = clean_list(self, f_content)
            if len(f_content) > 1:
                f_content_title = response.xpath(
                    '//ul[@id="productSpecsContainer"]'
                    '/li/div[contains(@id, "product_spec")]'
                    '/*[@aria-label="%s"]'
                    '//span[@class="strong"]/text()' % f_name).extract()
                f_content_title = clean_list(self, f_content_title)

                f_content_text = response.xpath(
                    '//ul[@id="productSpecsContainer"]'
                    '/li/div[contains(@id, "product_spec")]'
                    '/*[@aria-label="%s"]'
                    '//span[not(contains(@class,"strong"))]'
                    '/text()' % f_name).extract()
                f_content_text = clean_list(self, f_content_text)

                for f_c_title in f_content_title:
                    index = f_content_title.index(f_c_title)
                    feature = {
                        f_c_title.replace(":", ""): f_content_text[index]
                    }
                    features.append(feature)

            else:
                f_content = f_content[0]
                f_content = clean_text(self, f_content)
                feature = {f_name: f_content}
                features.append(feature)

        return features
예제 #2
0
파일: zones.py 프로젝트: mksingh202/scrapy
    def _parse_features(self, response):
        features = []
        features_name = response.xpath('//span[@class="ppdefaultbold"]/text()').extract()
        features_value = response.xpath('//div[@class="sumCont"]//li/text()').extract()
        features_value = clean_list(self, features_value)

        for f_name in features_name:
            index = features_name.index(f_name)
            feature = {f_name: features_value[index]}
            features.append(feature)

        return features
    def _parse_features(self, response):
        features = []
        features_name = response.xpath('//div[contains(@data-ccs-cc-inline-acc-idm, "specification")]'
                                       '//td[contains(@class, "specification-attribute")]/text()').extract()
        features_value = response.xpath('//div[contains(@data-ccs-cc-inline-acc-idm, "specification")]'
                                        '//td[not(contains(@class, "specification-attribute"))]/text()').extract()
        features_value = clean_list(self, features_value)

        for f_name in features_name:
            index = features_name.index(f_name)
            feature = {f_name: features_value[index]}
            features.append(feature)

        return features
    def _parse_features(self, response):
        features = []
        features_name = response.xpath('//ul[@id="productSpecsContainer"]'
                                       '/li//label[contains(@for, "product_spec")]/text()').extract()
        for f_name in features_name:
            f_content = response.xpath('//ul[@id="productSpecsContainer"]'
                                       '/li/div[contains(@id, "product_spec")]'
                                       '/*[@aria-label="%s"]'
                                       '//text()' % f_name).extract()
            f_content = clean_list(self, f_content)
            if len(f_content) > 1:
                f_content_title = response.xpath('//ul[@id="productSpecsContainer"]'
                                                 '/li/div[contains(@id, "product_spec")]'
                                                 '/*[@aria-label="%s"]'
                                                 '//span[@class="strong"]/text()' % f_name).extract()
                f_content_title = clean_list(self, f_content_title)

                f_content_text = response.xpath('//ul[@id="productSpecsContainer"]'
                                                '/li/div[contains(@id, "product_spec")]'
                                                '/*[@aria-label="%s"]'
                                                '//span[not(contains(@class,"strong"))]'
                                                '/text()' % f_name).extract()
                f_content_text = clean_list(self, f_content_text)

                for f_c_title in f_content_title:
                    index = f_content_title.index(f_c_title)
                    feature = {f_c_title.replace(":", ""): f_content_text[index]}
                    features.append(feature)

            else:
                f_content = f_content[0]
                f_content = clean_text(self, f_content)
                feature = {f_name: f_content}
                features.append(feature)

        return features
예제 #5
0
    def _parse_features(self, response):
        features = []
        features_name = response.xpath(
            '//div[@class="summaryContainer"]//td[@class="hdr"]/text()'
        ).extract()
        features_value = response.xpath(
            '//div[@class="summaryContainer"]//td[@class="value"]/text()'
        ).extract()
        features_value = clean_list(self, features_value)

        for f_name in features_name:
            index = features_name.index(f_name)
            feature = {f_name: features_value[index]}
            features.append(feature)

        return features
예제 #6
0
    def _parse_features(self, response):
        features = []
        features_name = response.xpath(
            '//div[contains(@data-ccs-cc-inline-acc-idm, "specification")]'
            '//td[contains(@class, "specification-attribute")]/text()'
        ).extract()
        features_value = response.xpath(
            '//div[contains(@data-ccs-cc-inline-acc-idm, "specification")]'
            '//td[not(contains(@class, "specification-attribute"))]/text()'
        ).extract()
        features_value = clean_list(self, features_value)

        for f_name in features_name:
            index = features_name.index(f_name)
            feature = {f_name: features_value[index]}
            features.append(feature)

        return features