def parse(self, lItem): url = lItem['url'] cfg = lItem['cfg'] ext = getFileExtension(url) successfullyScraped = True tmpList = None if lItem['catcher']: catcher = lItem['catcher'] cfg = os.path.join(common.Paths.catchersDir, '__' + catcher + '.cfg') tmpList = self.__loadLocal(cfg, lItem) if tmpList and len(tmpList.rules) > 0: successfullyScraped = self.__loadRemote(tmpList, lItem) else: if ext == 'cfg': tmpList = self.__loadLocal(url, lItem) if tmpList and tmpList.start != '' and len(tmpList.rules) > 0: lItem['url'] = tmpList.start successfullyScraped = self.__loadRemote(tmpList, lItem) elif cfg: tmpList = self.__loadLocal(cfg, lItem) if tmpList and len(tmpList.rules) > 0: successfullyScraped = self.__loadRemote(tmpList, lItem) # autoselect if tmpList and tmpList.skill.find('autoselect') != -1 and len(tmpList.items) == 1: m = tmpList.items[0] m_type = m['type'] if m_type == 'rss': common.log('Autoselect - ' + m['title']) lItem = m tmpList = self.parse(lItem).list if not tmpList: return ParsingResult(ParsingResult.Code.CFGSYNTAX_INVALID, None) if tmpList and successfullyScraped == False: return ParsingResult(ParsingResult.Code.WEBREQUEST_FAILED, tmpList) # Remove duplicates if tmpList.skill.find('allowDuplicates') == -1: urls = [] for i in range(len(tmpList.items)-1,-1,-1): item = tmpList.items[i] tmpUrl = item['url'] tmpCfg = item['cfg'] if not tmpCfg: tmpCfg = '' if not urls.__contains__(tmpUrl + '|' + tmpCfg): urls.append(tmpUrl + '|' + tmpCfg) else: if item['type'] !='say': tmpList.items.remove(item) return ParsingResult(ParsingResult.Code.SUCCESS, tmpList)
def parse(self, lItem): url = lItem['url'] cfg = lItem['cfg'] ext = getFileExtension(url) successfullyScraped = True tmpList = None if lItem['catcher']: catcher = lItem['catcher'] cfg = os.path.join(common.Paths.catchersDir, '__' + catcher + '.cfg') tmpList = self.__loadLocal(cfg, lItem) if tmpList and len(tmpList.rules) > 0: successfullyScraped = self.__loadRemote(tmpList, lItem) else: if ext == 'cfg': tmpList = self.__loadLocal(url, lItem) if tmpList and tmpList.start != '' and len(tmpList.rules) > 0: lItem['url'] = tmpList.start successfullyScraped = self.__loadRemote(tmpList, lItem) elif cfg: tmpList = self.__loadLocal(cfg, lItem) if tmpList and len(tmpList.rules) > 0: successfullyScraped = self.__loadRemote(tmpList, lItem) # autoselect if tmpList and tmpList.skill.find('autoselect') != -1 and len( tmpList.items) == 1: m = tmpList.items[0] m_type = m['type'] if m_type == 'rss': common.log('Autoselect - ' + m['title']) lItem = m tmpList = self.parse(lItem).list if not tmpList: return ParsingResult(ParsingResult.Code.CFGSYNTAX_INVALID, None) if tmpList and successfullyScraped == False: return ParsingResult(ParsingResult.Code.WEBREQUEST_FAILED, tmpList) # Remove duplicates if tmpList.skill.find('allowDuplicates') == -1: urls = [] for i in range(len(tmpList.items) - 1, -1, -1): item = tmpList.items[i] tmpUrl = item['url'] tmpCfg = item['cfg'] if not tmpCfg: tmpCfg = '' if not urls.__contains__(tmpUrl + '|' + tmpCfg): urls.append(tmpUrl + '|' + tmpCfg) else: tmpList.items.remove(item) return ParsingResult(ParsingResult.Code.SUCCESS, tmpList)
def parse(self, lItem): url = lItem["url"] cfg = lItem["cfg"] ext = getFileExtension(url) successfullyScraped = True tmpList = None if lItem["catcher"]: catcher = lItem["catcher"] cfg = os.path.join(common.Paths.catchersDir, "__" + catcher + ".cfg") tmpList = self.__loadLocal(cfg, lItem) if tmpList and len(tmpList.rules) > 0: successfullyScraped = self.__loadRemote(tmpList, lItem) else: if ext == "cfg": tmpList = self.__loadLocal(url, lItem) if tmpList and tmpList.start != "" and len(tmpList.rules) > 0: lItem["url"] = tmpList.start successfullyScraped = self.__loadRemote(tmpList, lItem) elif cfg: tmpList = self.__loadLocal(cfg, lItem) if tmpList and len(tmpList.rules) > 0: successfullyScraped = self.__loadRemote(tmpList, lItem) # autoselect if tmpList and tmpList.skill.find("autoselect") != -1 and len(tmpList.items) == 1: m = tmpList.items[0] m_type = m["type"] if m_type == "rss": common.log("Autoselect - " + m["title"]) lItem = m tmpList = self.parse(lItem).list if not tmpList: return ParsingResult(ParsingResult.Code.CFGSYNTAX_INVALID, None) if tmpList and successfullyScraped == False: return ParsingResult(ParsingResult.Code.WEBREQUEST_FAILED, None) # Remove duplicates if tmpList.skill.find("allowDuplicates") == -1: urls = [] for i in range(len(tmpList.items) - 1, -1, -1): item = tmpList.items[i] tmpUrl = item["url"] tmpCfg = item["cfg"] if not tmpCfg: tmpCfg = "" if not urls.__contains__(tmpUrl + "|" + tmpCfg): urls.append(tmpUrl + "|" + tmpCfg) else: tmpList.items.remove(item) return ParsingResult(ParsingResult.Code.SUCCESS, tmpList)
def parse(self, lItem): url = lItem['url'] cfg = lItem['cfg'] ext = getFileExtension(url) successfullyScraped = True if ext == 'cfg': tmpList = self.__loadLocal(url, lItem) if tmpList and tmpList.start != '': lItem['url'] = tmpList.start successfullyScraped = self.__loadRemote(tmpList, lItem) elif cfg: tmpList = self.__loadLocal(cfg, lItem) if tmpList: successfullyScraped = self.__loadRemote(tmpList, lItem) else: return None # autoselect if tmpList.skill.find('autoselect') != -1 and len(tmpList.items) == 1: m = tmpList.items[0] m_type = m['type'] if m_type == 'rss': common.log('Autoselect - ' + m['title']) lItem = m tmpList = self.parse(lItem) if not tmpList: common.log("cfg file couldn't be loaded") return None if tmpList and successfullyScraped == False: common.log("cfg file successfully loaded, but scraping failed") return tmpList