Example #1
0
File: parser.py Project: sgml/mf2py
 def parse_rels(el):
     """Parse an element for rel microformats
     """
     rel_attrs = [text_type(rel) for rel in get_attr(el, 'rel')]
     # if rel attributes exist
     if rel_attrs is not None:
         # find the url and normalise it
         url = text_type(urljoin(self.__url__, el.get('href', '')))
         value_dict = self.__parsed__["rel-urls"].get(
             url, self.dict_class())
         if "text" not in value_dict:
             value_dict["text"] = el.get_text().strip()  # 1st one wins
         url_rels = value_dict.get("rels", [])
         value_dict["rels"] = url_rels
         for knownattr in ("media", "hreflang", "type", "title"):
             x = get_attr(el, knownattr)
             if x is not None:
                 value_dict[knownattr] = text_type(x)
         self.__parsed__["rel-urls"][url] = value_dict
         for rel_value in rel_attrs:
             value_list = self.__parsed__["rels"].get(rel_value, [])
             if url not in value_list:
                 value_list.append(url)
             if rel_value not in url_rels:
                 url_rels.append(rel_value)
             self.__parsed__["rels"][rel_value] = value_list
         if "alternate" in rel_attrs:
             alternate_list = self.__parsed__.get("alternates", [])
             alternate_dict = self.dict_class()
             alternate_dict["url"] = url
             x = " ".join(
                 [r for r in rel_attrs if not r == "alternate"])
             if x is not "":
                 alternate_dict["rel"] = x
             alternate_dict["text"] = text_type(el.get_text().strip())
             for knownattr in ("media", "hreflang", "type", "title"):
                 x = get_attr(el, knownattr)
                 if x is not None:
                     alternate_dict[knownattr] = text_type(x)
             alternate_list.append(alternate_dict)
             self.__parsed__["alternates"] = alternate_list
Example #2
0
 def parse_rels(el):
     """Parse an element for rel microformats
     """
     rel_attrs = [text_type(rel) for rel in get_attr(el, 'rel')]
     # if rel attributes exist
     if rel_attrs is not None:
         # find the url and normalise it
         url = text_type(urljoin(self.__url__, el.get('href', '')))
         value_dict = self.__parsed__["rel-urls"].get(url,
                                                      self.dict_class())
         if "text" not in value_dict:
             value_dict["text"] = el.get_text().strip()  # 1st one wins
         url_rels = value_dict.get("rels", [])
         value_dict["rels"] = url_rels
         for knownattr in ("media", "hreflang", "type", "title"):
             x = get_attr(el, knownattr)
             if x is not None:
                 value_dict[knownattr] = text_type(x)
         self.__parsed__["rel-urls"][url] = value_dict
         for rel_value in rel_attrs:
             value_list = self.__parsed__["rels"].get(rel_value, [])
             if url not in value_list:
                 value_list.append(url)
             if rel_value not in url_rels:
                 url_rels.append(rel_value)
             self.__parsed__["rels"][rel_value] = value_list
         if "alternate" in rel_attrs:
             alternate_list = self.__parsed__.get("alternates", [])
             alternate_dict = self.dict_class()
             alternate_dict["url"] = url
             x = " ".join(
                 [r for r in rel_attrs if not r == "alternate"])
             if x is not "":
                 alternate_dict["rel"] = x
             alternate_dict["text"] = text_type(el.get_text().strip())
             for knownattr in ("media", "hreflang", "type", "title"):
                 x = get_attr(el, knownattr)
                 if x is not None:
                     alternate_dict[knownattr] = text_type(x)
             alternate_list.append(alternate_dict)
             self.__parsed__["alternates"] = alternate_list
Example #3
0
        def handle_microformat(root_class_names, el, value_property=None,
                               simple_value=None):
            """Handles a (possibly nested) microformat, i.e. h-*
            """
            properties = {}
            children = []
            self._default_date = None

            # parse for properties and children
            for child in el.find_all(True, recursive=False):
                child_props, child_children = parse_props(child)
                for key, new_value in child_props.items():
                    prop_value = properties.get(key, [])
                    prop_value.extend(new_value)
                    properties[key] = prop_value
                children.extend(child_children)

            # complex h-* objects can take their "value" from the
            # first explicit property ("name" for p-* or "url" for u-*)
            if value_property and value_property in properties:
                simple_value = properties[value_property][0]

            # if some properties not already found find in implied ways
            if "name" not in properties:
                properties["name"] = [text_type(prop) for prop in implied_properties.name(el)]
            if "photo" not in properties:
                x = implied_properties.photo(el, base_url=self.__url__)
                if x is not None:
                    properties["photo"] = [text_type(u) for u in x]

            if "url" not in properties:
                x = implied_properties.url(el, base_url=self.__url__)
                if x is not None:
                    properties["url"] = [text_type(u) for u in x]

            # build microformat with type and properties
            microformat = {"type": [text_type(class_name) for class_name in root_class_names],
                           "properties": properties}
            if str(el.name) == "area":
                shape = get_attr(el, 'shape')
                if shape is not None:
                    microformat['shape'] = text_type(shape)

                coords = get_attr(el, 'coords')
                if coords is not None:
                    microformat['coords'] = text_type(coords)

            # insert children if any
            if children:
                microformat["children"] = children
            # simple value is the parsed property value if it were not
            # an h-* class
            if simple_value is not None:
                if isinstance(simple_value, dict):
                    # for e-* properties, the simple value will be
                    # {"html":..., "value":...}  which we should fold
                    # into the microformat object
                    # details: https://github.com/tommorris/mf2py/issues/35
                    microformat.update(simple_value)
                else:
                    microformat["value"] = text_type(simple_value)

            return microformat
Example #4
0
File: parser.py Project: sgml/mf2py
        def handle_microformat(root_class_names,
                               el,
                               value_property=None,
                               simple_value=None):
            """Handles a (possibly nested) microformat, i.e. h-*
            """
            properties = self.dict_class()
            children = []
            self._default_date = None

            # parse for properties and children
            for child in get_children(el):
                child_props, child_children = parse_props(child)
                for key, new_value in child_props.items():
                    prop_value = properties.get(key, [])
                    prop_value.extend(new_value)
                    properties[key] = prop_value
                children.extend(child_children)

            # complex h-* objects can take their "value" from the
            # first explicit property ("name" for p-* or "url" for u-*)
            if value_property and value_property in properties:
                simple_value = properties[value_property][0]

            # if some properties not already found find in implied ways
            if "name" not in properties:
                properties["name"] = [
                    text_type(prop) for prop in implied_properties.name(el)
                ]
            if "photo" not in properties:
                x = implied_properties.photo(el, base_url=self.__url__)
                if x is not None:
                    properties["photo"] = [text_type(u) for u in x]

            if "url" not in properties:
                x = implied_properties.url(el, base_url=self.__url__)
                if x is not None:
                    properties["url"] = [text_type(u) for u in x]

            # build microformat with type and properties
            microformat = self.dict_class([
                ("type",
                 [text_type(class_name) for class_name in root_class_names]),
                ("properties", properties),
            ])
            if str(el.name) == "area":
                shape = get_attr(el, 'shape')
                if shape is not None:
                    microformat['shape'] = text_type(shape)

                coords = get_attr(el, 'coords')
                if coords is not None:
                    microformat['coords'] = text_type(coords)

            # insert children if any
            if children:
                microformat["children"] = children
            # simple value is the parsed property value if it were not
            # an h-* class
            if simple_value is not None:
                if isinstance(simple_value, dict):
                    # for e-* properties, the simple value will be
                    # {"html":..., "value":...}  which we should fold
                    # into the microformat object
                    # details: https://github.com/tommorris/mf2py/issues/35
                    microformat.update(simple_value)
                else:
                    microformat["value"] = text_type(simple_value)

            return microformat