def validate(self, domitems): if helpers.is_list(domitems): if not all(helpers.is_dict(di) for di in domitems): raise Exception("The domitems list expects all elements to be dictionaries, some aren't") else: return domitems else: if not helpers.is_dict(domitems): raise Exception("The domitems expects a dictionary element, %s given" % type(domitems)) else: return domitems
def domitems(self, domitems): if helpers.is_list(domitems): if not all(helpers.is_dict(domitem) for domitem in domitems): raise Exception("The domitems list expects all elements to be dictionaries, some aren't") else: for i in domitems: if 'autogen' in i: self._domitems = WDIAutoGen(i['name'], i['url'], i['nested_items'] if 'nested_items' in i else None, i['autogen'], i['range'] if 'range' in i else None, i['parentless'] if 'parentless' in i else False) else: self._domitems = WDomItem(i['name'], i['url'], i['selector'], i['nested_items'] if 'nested_items' in i else None) elif helpers.is_dict(domitems): if 'autogen' in domitems: self._domitems = WDIAutoGen(domitems['name'], domitems['url'], domitems['nested_items'] if 'nested_items' in domitems else None, domitems['autogen'], domitems['range'] if 'range' in domitems else None, domitems['parentless'] if 'parentless' in domitems else False) else: self._domitems = WDomItem(domitems['name'], domitems['url'], domitems['selector'], domitems['nested_items'] if 'nested_items' in domitems else None)
def nested_items(self, ni): from newsline.helpers import helpers # The initialization case if ni is None: self._nested_items = [] return if not hasattr(self, "_nested_items"): self._nested_items = [] if isinstance(ni, DomItem): self._nested_items.append(ni) elif helpers.is_dict(ni): try: if 'autogen' in ni: self._nested_items.append(WDIAutoGen(ni['name'], ni['url'], ni['nested_items'] if 'nested_items' in ni else None, ni['autogen'], ni['range'] if 'range' in ni else None, ni['parentless'] if 'parentless' in ni else False)) else: self._nested_items.append(DomItem(ni['name'], ni['url'], ni['selector'], ni['nested_items']) if 'nested_items' in ni else DomItem(ni['name'], ni['url'], ni['selector'])) except Exception as e: raise Exception("DomItem nested element exception : %s" % str(e)) elif helpers.is_list(ni): if helpers.is_empty(ni): raise Exception("You cannot supply nested_items as empty") elif all(isinstance(i, DomItem) or isinstance(i, dict) for i in ni): try: self._nested_items.extend([(WDIAutoGen(i['name'], i['url'], i['nested_items'] if 'nested_items' in i else None, i['autogen'], i['range'] if 'range' in i else None, i['parentless'] if 'parentless' in i else False) if 'autogen' in i else DomItem(i['name'], i['url'], i['selector'], i['nested_items'] if 'nested_items' in i else None )) if isinstance(i, dict) else i for i in ni]) except Exception as e: raise Exception("DomItem nested element exception : %s" % str(e))
def clean(self, domitems): """ cleans the urls from the double slashes or trailing slashes""" if helpers.is_str(domitems): return self.regexr.remove_double_slash(domitems) elif helpers.is_dict(domitems): return helpers.map_dictionary(self.regexr.remove_double_slash, domitems, "url") elif helpers.is_list(domitems): def _mpdictpart(_didict, _func=self.regexr.remove_double_slash, _key="url"): return helpers.map_dictionary(func=_func, dictionary=_didict, key=_key) return list(map(_mpdictpart, domitems))
def decode(self, domitems): """ turns the utf-8/ISO-8859-I arabic characters to unicode arabic characters""" if helpers.is_str(domitems): return self.regexr.parse_arabic_urls(domitems) elif helpers.is_dict(domitems): return helpers.map_dictionary(self.regexr.parse_arabic_urls, domitems, "url") elif helpers.is_list(domitems): def _mpdictpart(_didict, _func=self.regexr.parse_arabic_urls, _key="url"): return helpers.map_dictionary(func=_func, dictionary=_didict, key=_key) return list(map(_mpdictpart, domitems))
def normalize(self, domitems): """ removes the rooturl from the domitem urls if they have it""" if helpers.is_str(domitems): return self.remove_rooturl(domitems) elif helpers.is_dict(domitems): return helpers.map_dictionary(self.remove_rooturl, domitems, "url") elif helpers.is_list(domitems): def _mpdictpart(_didict, _func=self.remove_rooturl, _key="url"): return helpers.map_dictionary(func=_func, dictionary=_didict, key=_key) return list(map(_mpdictpart, domitems))
def realCaseTest(self): raised = False domitem = None try: domitem = DomItem( 'category_item', '/category/politics', 'nav > ul > li > a', { "name": 'pagination', "url": '/category/politics/page1', "selector": 'div.pagination > ul > li > a', "nested_items": { "name": 'articles', "url": '/article/123123.html', "selector": 'h2 > a' } }) except Exception as e: self.print_failure("Test failed with :%s" % str(e)) self.print_seperator() return self.print_success("Dom Item instantiation successful") self.print_with_color("DARKCYAN", "DomItem name: %s" % domitem.name) self.print_with_color("DARKCYAN", "DomItem url: %s" % domitem.url) self.print_with_color("DARKCYAN", "DomItem selector: %s" % domitem.domselector) self.print_with_color( "DARKCYAN", "DomItem has_nested_items: %s" % domitem.has_nested_items) if domitem.has_nested_items: self.print_success("\tDom Item has nested items") from newsline.helpers import helpers if helpers.is_list(domitem.nested_items): self.print_with_color("DARKCYAN", "\tNested DomItems are many") else: nitem = domitem.nested_items self.print_with_color("DARKCYAN", "\tNested DomItem name: %s" % nitem.name) self.print_with_color("DARKCYAN", "\tNested DomItem url: %s" % nitem.url) self.print_with_color( "DARKCYAN", "\tNested DomItem selector: %s" % nitem.domselector) self.print_with_color( "DARKCYAN", "\tNested DomItem has_nested_items: %s" % nitem.has_nested_items) if nitem.has_nested_items: self.print_success("\t\tNested Dom Item has nested items") from newsline.helpers import helpers if helpers.is_dict(nitem.nested_items): self.print_with_color( "DARKCYAN", "\tNested DomItems nested items are many") else: nnitem = nitem.nested_items self.print_with_color( "DARKCYAN", "\t\tNested DomItem name: %s" % nnitem.name) self.print_with_color( "DARKCYAN", "\t\tNested DomItem url: %s" % nnitem.url) self.print_with_color( "DARKCYAN", "\t\tNested DomItem selector: %s" % nnitem.domselector) self.print_with_color( "DARKCYAN", "\t\tNested DomItem has_nested_items: %s" % nnitem.has_nested_items) self.print_success("Test passed successfully") self.print_seperator()