def parse_rels(el): """Parse an element for rel microformats """ rel_attrs = [text_type(rel) for rel in get_attr(el, 'rel')] # if rel attributes exist if rel_attrs is not None: # find the url and normalise it url = text_type(urljoin(self.__url__, el.get('href', ''))) value_dict = self.__parsed__["rel-urls"].get( url, self.dict_class()) if "text" not in value_dict: value_dict["text"] = el.get_text().strip() # 1st one wins url_rels = value_dict.get("rels", []) value_dict["rels"] = url_rels for knownattr in ("media", "hreflang", "type", "title"): x = get_attr(el, knownattr) if x is not None: value_dict[knownattr] = text_type(x) self.__parsed__["rel-urls"][url] = value_dict for rel_value in rel_attrs: value_list = self.__parsed__["rels"].get(rel_value, []) if url not in value_list: value_list.append(url) if rel_value not in url_rels: url_rels.append(rel_value) self.__parsed__["rels"][rel_value] = value_list if "alternate" in rel_attrs: alternate_list = self.__parsed__.get("alternates", []) alternate_dict = self.dict_class() alternate_dict["url"] = url x = " ".join( [r for r in rel_attrs if not r == "alternate"]) if x is not "": alternate_dict["rel"] = x alternate_dict["text"] = text_type(el.get_text().strip()) for knownattr in ("media", "hreflang", "type", "title"): x = get_attr(el, knownattr) if x is not None: alternate_dict[knownattr] = text_type(x) alternate_list.append(alternate_dict) self.__parsed__["alternates"] = alternate_list
def parse_rels(el): """Parse an element for rel microformats """ rel_attrs = [text_type(rel) for rel in get_attr(el, 'rel')] # if rel attributes exist if rel_attrs is not None: # find the url and normalise it url = text_type(urljoin(self.__url__, el.get('href', ''))) value_dict = self.__parsed__["rel-urls"].get(url, self.dict_class()) if "text" not in value_dict: value_dict["text"] = el.get_text().strip() # 1st one wins url_rels = value_dict.get("rels", []) value_dict["rels"] = url_rels for knownattr in ("media", "hreflang", "type", "title"): x = get_attr(el, knownattr) if x is not None: value_dict[knownattr] = text_type(x) self.__parsed__["rel-urls"][url] = value_dict for rel_value in rel_attrs: value_list = self.__parsed__["rels"].get(rel_value, []) if url not in value_list: value_list.append(url) if rel_value not in url_rels: url_rels.append(rel_value) self.__parsed__["rels"][rel_value] = value_list if "alternate" in rel_attrs: alternate_list = self.__parsed__.get("alternates", []) alternate_dict = self.dict_class() alternate_dict["url"] = url x = " ".join( [r for r in rel_attrs if not r == "alternate"]) if x is not "": alternate_dict["rel"] = x alternate_dict["text"] = text_type(el.get_text().strip()) for knownattr in ("media", "hreflang", "type", "title"): x = get_attr(el, knownattr) if x is not None: alternate_dict[knownattr] = text_type(x) alternate_list.append(alternate_dict) self.__parsed__["alternates"] = alternate_list
def handle_microformat(root_class_names, el, value_property=None, simple_value=None): """Handles a (possibly nested) microformat, i.e. h-* """ properties = {} children = [] self._default_date = None # parse for properties and children for child in el.find_all(True, recursive=False): child_props, child_children = parse_props(child) for key, new_value in child_props.items(): prop_value = properties.get(key, []) prop_value.extend(new_value) properties[key] = prop_value children.extend(child_children) # complex h-* objects can take their "value" from the # first explicit property ("name" for p-* or "url" for u-*) if value_property and value_property in properties: simple_value = properties[value_property][0] # if some properties not already found find in implied ways if "name" not in properties: properties["name"] = [text_type(prop) for prop in implied_properties.name(el)] if "photo" not in properties: x = implied_properties.photo(el, base_url=self.__url__) if x is not None: properties["photo"] = [text_type(u) for u in x] if "url" not in properties: x = implied_properties.url(el, base_url=self.__url__) if x is not None: properties["url"] = [text_type(u) for u in x] # build microformat with type and properties microformat = {"type": [text_type(class_name) for class_name in root_class_names], "properties": properties} if str(el.name) == "area": shape = get_attr(el, 'shape') if shape is not None: microformat['shape'] = text_type(shape) coords = get_attr(el, 'coords') if coords is not None: microformat['coords'] = text_type(coords) # insert children if any if children: microformat["children"] = children # simple value is the parsed property value if it were not # an h-* class if simple_value is not None: if isinstance(simple_value, dict): # for e-* properties, the simple value will be # {"html":..., "value":...} which we should fold # into the microformat object # details: https://github.com/tommorris/mf2py/issues/35 microformat.update(simple_value) else: microformat["value"] = text_type(simple_value) return microformat
def handle_microformat(root_class_names, el, value_property=None, simple_value=None): """Handles a (possibly nested) microformat, i.e. h-* """ properties = self.dict_class() children = [] self._default_date = None # parse for properties and children for child in get_children(el): child_props, child_children = parse_props(child) for key, new_value in child_props.items(): prop_value = properties.get(key, []) prop_value.extend(new_value) properties[key] = prop_value children.extend(child_children) # complex h-* objects can take their "value" from the # first explicit property ("name" for p-* or "url" for u-*) if value_property and value_property in properties: simple_value = properties[value_property][0] # if some properties not already found find in implied ways if "name" not in properties: properties["name"] = [ text_type(prop) for prop in implied_properties.name(el) ] if "photo" not in properties: x = implied_properties.photo(el, base_url=self.__url__) if x is not None: properties["photo"] = [text_type(u) for u in x] if "url" not in properties: x = implied_properties.url(el, base_url=self.__url__) if x is not None: properties["url"] = [text_type(u) for u in x] # build microformat with type and properties microformat = self.dict_class([ ("type", [text_type(class_name) for class_name in root_class_names]), ("properties", properties), ]) if str(el.name) == "area": shape = get_attr(el, 'shape') if shape is not None: microformat['shape'] = text_type(shape) coords = get_attr(el, 'coords') if coords is not None: microformat['coords'] = text_type(coords) # insert children if any if children: microformat["children"] = children # simple value is the parsed property value if it were not # an h-* class if simple_value is not None: if isinstance(simple_value, dict): # for e-* properties, the simple value will be # {"html":..., "value":...} which we should fold # into the microformat object # details: https://github.com/tommorris/mf2py/issues/35 microformat.update(simple_value) else: microformat["value"] = text_type(simple_value) return microformat