Ejemplo n.º 1
0
    def getProductDimension(self, instance_of, instance_val):
        if self.htmlObject is None:
            return ''

        if instance_val is None:
            return ''

        content = None
        dimension = ''

        if instance_of == "DOCUMENT" and len(instance_val) > 0:
            content = self.getElementValue(instance_val)
        elif instance_of == "STRING" and len(instance_val) > 0:
            content = instance_val

        content = StringUtil.remove_html_tags(content)
        content = StringUtil.str_cleaner(content, r'\\([a-z0-9]{3})', '')
        content = StringUtil.str_cleaner(content,
                                         r'[^0-9a-zA-Z\s\-\(\).,"\'&]+', '')

        if content is not None and type(
                content) is not None and len(content) > 0:
            # for pat in self.dimensionPatternsToRemove:
            #     content = StringUtil.str_cleaner(content, pat, "")

            for sptf in self.dimensionPatterns:
                if StringUtil.str_find_str(str(content), sptf):
                    dimension = StringUtil.str_search_str(str(content), sptf)
                    break

        if dimension and len(dimension) == 0 or len(dimension) > 35:
            dimension = self.getProductWeight(content)

        return dimension
Ejemplo n.º 2
0
 def getProductWeight(self, instance_val):
     # self.writeToFile('content_raw.txt',instance_val)
     # self.writeToFile('content.txt',content.strip())
     content = ''
     if instance_val is not None:
         content = StringUtil.str_cleaner(instance_val, r'<[^>]*>', '')
         content = StringUtil.str_cleaner(content, r'\s\s', '')
         content = StringUtil.str_search_str(
             content,
             r"(item weight|Shipping Weight)(:|:\s|\s:)(\d+(\.\d{1,2})?)(\s|\S)(ounce|pound|lb\s|lbs)"
         )
     # self.writeToFile('content.txt',content.strip())
     return content