def parse(self, lItem):
        url = lItem['url']
        cfg = lItem['cfg']
        ext = getFileExtension(url)

        successfullyScraped = True

        tmpList = None
        if lItem['catcher']:
            catcher = lItem['catcher']
            cfg = os.path.join(common.Paths.catchersDir, '__' + catcher + '.cfg')
            tmpList = self.__loadLocal(cfg, lItem)
            if tmpList and len(tmpList.rules) > 0:
                successfullyScraped = self.__loadRemote(tmpList, lItem)
        else:
            if ext == 'cfg':
                tmpList = self.__loadLocal(url, lItem)
                if tmpList and tmpList.start != '' and len(tmpList.rules) > 0:
                    lItem['url'] = tmpList.start
                    successfullyScraped = self.__loadRemote(tmpList, lItem)
            elif cfg:
                tmpList = self.__loadLocal(cfg, lItem)
                if tmpList and len(tmpList.rules) > 0:
                    successfullyScraped = self.__loadRemote(tmpList, lItem)

        # autoselect
        if tmpList and tmpList.skill.find('autoselect') != -1 and len(tmpList.items) == 1:
            m = tmpList.items[0]
            m_type = m['type']

            if m_type == 'rss':
                common.log('Autoselect - ' + m['title'])
                lItem = m
                tmpList = self.parse(lItem).list

        if not tmpList:
            return ParsingResult(ParsingResult.Code.CFGSYNTAX_INVALID, None)
        if tmpList and successfullyScraped == False:
            return ParsingResult(ParsingResult.Code.WEBREQUEST_FAILED, tmpList)

        # Remove duplicates

        if tmpList.skill.find('allowDuplicates') == -1:
            urls = []
            for i in range(len(tmpList.items)-1,-1,-1):
                item = tmpList.items[i]

                tmpUrl = item['url']
                tmpCfg = item['cfg']
                if not tmpCfg:
                    tmpCfg = ''

                if not urls.__contains__(tmpUrl + '|' + tmpCfg):
                    urls.append(tmpUrl + '|' + tmpCfg)
                else:
                    if item['type'] !='say':
                        tmpList.items.remove(item)


        return ParsingResult(ParsingResult.Code.SUCCESS, tmpList)
Esempio n. 2
0
    def parse(self, lItem):
        url = lItem['url']
        cfg = lItem['cfg']
        ext = getFileExtension(url)

        successfullyScraped = True

        tmpList = None
        if lItem['catcher']:
            catcher = lItem['catcher']
            cfg = os.path.join(common.Paths.catchersDir,
                               '__' + catcher + '.cfg')
            tmpList = self.__loadLocal(cfg, lItem)
            if tmpList and len(tmpList.rules) > 0:
                successfullyScraped = self.__loadRemote(tmpList, lItem)
        else:
            if ext == 'cfg':
                tmpList = self.__loadLocal(url, lItem)
                if tmpList and tmpList.start != '' and len(tmpList.rules) > 0:
                    lItem['url'] = tmpList.start
                    successfullyScraped = self.__loadRemote(tmpList, lItem)
            elif cfg:
                tmpList = self.__loadLocal(cfg, lItem)
                if tmpList and len(tmpList.rules) > 0:
                    successfullyScraped = self.__loadRemote(tmpList, lItem)

        # autoselect
        if tmpList and tmpList.skill.find('autoselect') != -1 and len(
                tmpList.items) == 1:
            m = tmpList.items[0]
            m_type = m['type']

            if m_type == 'rss':
                common.log('Autoselect - ' + m['title'])
                lItem = m
                tmpList = self.parse(lItem).list

        if not tmpList:
            return ParsingResult(ParsingResult.Code.CFGSYNTAX_INVALID, None)
        if tmpList and successfullyScraped == False:
            return ParsingResult(ParsingResult.Code.WEBREQUEST_FAILED, tmpList)

        # Remove duplicates
        if tmpList.skill.find('allowDuplicates') == -1:
            urls = []
            for i in range(len(tmpList.items) - 1, -1, -1):
                item = tmpList.items[i]
                tmpUrl = item['url']
                tmpCfg = item['cfg']
                if not tmpCfg:
                    tmpCfg = ''
                if not urls.__contains__(tmpUrl + '|' + tmpCfg):
                    urls.append(tmpUrl + '|' + tmpCfg)
                else:
                    tmpList.items.remove(item)

        return ParsingResult(ParsingResult.Code.SUCCESS, tmpList)
Esempio n. 3
0
    def parse(self, lItem):
        url = lItem["url"]
        cfg = lItem["cfg"]
        ext = getFileExtension(url)

        successfullyScraped = True

        tmpList = None
        if lItem["catcher"]:
            catcher = lItem["catcher"]
            cfg = os.path.join(common.Paths.catchersDir, "__" + catcher + ".cfg")
            tmpList = self.__loadLocal(cfg, lItem)
            if tmpList and len(tmpList.rules) > 0:
                successfullyScraped = self.__loadRemote(tmpList, lItem)
        else:
            if ext == "cfg":
                tmpList = self.__loadLocal(url, lItem)
                if tmpList and tmpList.start != "" and len(tmpList.rules) > 0:
                    lItem["url"] = tmpList.start
                    successfullyScraped = self.__loadRemote(tmpList, lItem)
            elif cfg:
                tmpList = self.__loadLocal(cfg, lItem)
                if tmpList and len(tmpList.rules) > 0:
                    successfullyScraped = self.__loadRemote(tmpList, lItem)

        # autoselect
        if tmpList and tmpList.skill.find("autoselect") != -1 and len(tmpList.items) == 1:
            m = tmpList.items[0]
            m_type = m["type"]

            if m_type == "rss":
                common.log("Autoselect - " + m["title"])
                lItem = m
                tmpList = self.parse(lItem).list

        if not tmpList:
            return ParsingResult(ParsingResult.Code.CFGSYNTAX_INVALID, None)
        if tmpList and successfullyScraped == False:
            return ParsingResult(ParsingResult.Code.WEBREQUEST_FAILED, None)

        # Remove duplicates
        if tmpList.skill.find("allowDuplicates") == -1:
            urls = []
            for i in range(len(tmpList.items) - 1, -1, -1):
                item = tmpList.items[i]
                tmpUrl = item["url"]
                tmpCfg = item["cfg"]
                if not tmpCfg:
                    tmpCfg = ""
                if not urls.__contains__(tmpUrl + "|" + tmpCfg):
                    urls.append(tmpUrl + "|" + tmpCfg)
                else:
                    tmpList.items.remove(item)

        return ParsingResult(ParsingResult.Code.SUCCESS, tmpList)
    def parse(self, lItem):
        url = lItem['url']
        cfg = lItem['cfg']
        ext = getFileExtension(url)

        successfullyScraped = True

        if ext == 'cfg':
            tmpList = self.__loadLocal(url, lItem)
            if tmpList and tmpList.start != '':
                lItem['url'] = tmpList.start
                successfullyScraped = self.__loadRemote(tmpList, lItem)
        elif cfg:
            tmpList = self.__loadLocal(cfg, lItem)
            if tmpList:
                successfullyScraped = self.__loadRemote(tmpList, lItem)
        else:
            return None

        # autoselect
        if tmpList.skill.find('autoselect') != -1 and len(tmpList.items) == 1:
            m = tmpList.items[0]
            m_type = m['type']

            if m_type == 'rss':
                common.log('Autoselect - ' + m['title'])
                lItem = m
                tmpList = self.parse(lItem)

        if not tmpList:
            common.log("cfg file couldn't be loaded")
            return None
        if tmpList and successfullyScraped == False:
            common.log("cfg file successfully loaded, but scraping failed")

        return tmpList