Python findall Examples, utils.regexUtils.findall Python Examples

Example #1

0

Show file

File: customReplacements.py Project: rollysalvana/pampereo-xbmc-plugins

 def __replaceSimpleVars(self, data):
     for s in self.simpleScheme:
         m_reg = findall(data, s)
         value = self.simpleScheme.get(s)
         for idat in m_reg:
             data = data.replace(idat, value)
     return data

Example #2

0

Show file

    def __replaceCatchers(self, data):
        m_reg = findall(data, self.regex('catch'))
        if not (m_reg is None or len(m_reg) == 0):
            for idat in m_reg:
                if idat.startswith('#'):
                    continue
                ps = idat[7:-2].strip().split(',')
                catcherName = ps.pop(0).strip()

                # import catcher file and insert parameters
                pathImp = os.path.join(common.Paths.catchersDir,
                                       catcherName + '.txt')
                if not (os.path.exists(pathImp)):
                    common.log('Skipped Catcher: ' + catcherName)
                    continue
                dataImp = fu.getFileContent(pathImp)

                for i in range(len(ps)):
                    dataImp = dataImp.replace('@PARAM' + str(i + 1) + '@',
                                              ps.pop(i).strip())

                dataImp = dataImp.replace('\r\n', '\n')
                dataImp += "\nitem_info_name=type\nitem_info_build=video\nitem_url_build=%s"
                data = data.replace(idat, dataImp)
        return data

Example #3

0

Show file

File: favouritesManager.py Project: gtfamily/gtfamily

    def _findItem(self, item):
        title = re.escape(item.getInfo('title'))    
        cfg = item.getInfo('cfg')
        if cfg:
            cfg = re.escape(cfg)
        url = re.escape(item.getInfo('url'))
    
        regex = [\
            '',
            '########################################################',
            '# ' + title.upper(),
            '########################################################',
            'title=' + title,
            '.*?'
            ]
        
        if cfg:
            regex.append('cfg=' + cfg)
        regex.append('url=' + url)
        regex = '(' + '\s*'.join(regex) + ')'
        
        cfgFile = self._favouritesFile
        definedIn = item.getInfo('definedIn')
        if definedIn and definedIn.startswith('favfolders/'):
            cfgFile = os.path.join(self._favouritesFoldersFolder, definedIn.split('/')[1])

        if os.path.exists(cfgFile):
            data = fu.getFileContent(cfgFile)            
            matches = regexUtils.findall(data, regex)
            if matches and len(matches) > 0:
                fav = matches[0]
                return (cfgFile, data, fav)
        return None

Example #4

0

Show file

 def __replaceSimpleVars(self, data):
     for s in self.simpleScheme:
         m_reg = findall(data, s)
         value = self.simpleScheme.get(s)
         for idat in m_reg:
             data = data.replace(idat, value)
     return data

Example #5

0

Show file

 def __replaceParameters(self, data, params=[]):
     i = 1
     for par in params:
         matches = findall(data, '(@PARAM' + str(i) + '@)')
         if matches:
             for m in matches:
                 data = data.replace(m, par.strip())
         i = i + 1
     return data

Example #6

0

Show file

File: customReplacements.py Project: CYBERxNUKE/xbmc-addon

 def __replaceParameters(self, data, params=[]):
     i=1
     for par in params:
         matches = findall(data,'(@PARAM' + str(i) + '@)')
         if matches:
             for m in matches:
                 data = data.replace(m, par.strip())
         i = i + 1
     return data

Example #7

0

Show file

File: favouritesManager.py Project: Anniyan/SportsDevil-Fixes

 def _findItem(self, item): 
     cfgFile = self._favouritesFile
     definedIn = item.getInfo('definedIn')
     if definedIn and definedIn.startswith('favfolders/'):
         cfgFile = os.path.join(self._favouritesFolder, definedIn)
     if os.path.exists(cfgFile):
         data = fu.getFileContent(cfgFile)
         regex = self.cfgBuilder.buildItem(re.escape(item.getInfo('title')), "[^#]*", re.escape(item.getInfo('url')))
         matches = regexUtils.findall(data, regex)        
         if matches:
             return (cfgFile, data, matches[0])
     return None

Example #8

0

Show file

 def install(self, filename):
     destination = xbmc.translatePath(INSTALL_DIR)
     files = self.extract(filename, destination)
     if files:
         addonXml = filter(lambda x: x.filename.endswith('addon.xml'), files)
         if addonXml:
             path = os.path.join(destination, addonXml[0].filename)
             content = getFileContent(path)
             addonId = findall(content, '<addon id="([^"]+)"')
             if addonId:
                 return addonId[0]
     return None

Example #9

0

Show file

 def _findItem(self, item):
     cfgFile = self._favouritesFile
     definedIn = item.getInfo('definedIn')
     if definedIn and definedIn.startswith('favfolders/'):
         cfgFile = os.path.join(self._favouritesFolder, definedIn)
     if os.path.exists(cfgFile):
         data = fu.getFileContent(cfgFile)
         regex = self.cfgBuilder.buildItem(re.escape(item.getInfo('title')),
                                           "[^#]*",
                                           re.escape(item.getInfo('url')))
         matches = regexUtils.findall(data, regex)
         if matches:
             return (cfgFile, data, matches[0])
     return None

Example #10

0

Show file

File: customReplacements.py Project: rollysalvana/pampereo-xbmc-plugins

    def __replaceFinders(self, data):
        m_reg = findall(data, self.regex('find'))
        if len(m_reg) > 0:
            for idat in m_reg:
                if idat.startswith('#'):
                    continue
                ps = idat[6:-2].strip().split(',')
                method = ps[0].strip("'")
                param1 = ps[1].strip("'")
                param2 = ps[2].strip("'")
                param3 = ps[3].strip("'")

                if method == 'JS1':
                    jsName = param1
                    idName = param2
                    varName = param3
                    regex = "javascript[^<]+" + idName + "\s*=\s*[\"']([^\"']+)[\"'][^<]*</script\s*>[^<]*<script[^<]*src=[\"']" + jsName + "[\"']"
                    lines = "item_infos=" + regex + "\nitem_order=" + varName
                    data = data.replace(idat, lines)
        return data

Example #11

0

Show file

    def __replaceFinders(self, data):
        m_reg = findall(data, self.regex('find'))
        if len(m_reg) > 0:
            for idat in m_reg:
                if idat.startswith('#'):
                    continue
                ps = idat[6:-2].strip().split(',')
                method = ps[0].strip("'")
                param1 = ps[1].strip("'")
                param2 = ps[2].strip("'")
                param3 = ps[3].strip("'")

                if method == 'JS1':
                    jsName = param1
                    idName = param2
                    varName = param3
                    regex = "(?:java)?scr(?:'\+')?ipt[^<]+" + idName + "\s*=\s*[\"'](?!http://)([^\"']+)[\"'](?!;\s*width='0')[^<]*</scr(?:'\+')?ipt\s*>[^<]*<scr(?:'\+')?ipt[^<]*src=[\"']" + jsName + "[\"']"
                    lines = "item_infos=" + regex + "\nitem_order=" + varName
                    data = data.replace(idat, lines)
        return data

Example #12

0

Show file

File: customReplacements.py Project: rollysalvana/pampereo-xbmc-plugins

 def __replaceImports(self, pathToImports, data):
     while True:
         m_reg = findall(data, self.regex('import'))
         if len(m_reg) > 0:
             for idat in m_reg:
                 if idat[0].startswith('#'):
                     data = data.replace(idat[0],'')
                     continue
                 filename = idat[1]
                 pathImp = os.path.join(self.Paths.modulesDir, filename)
                 if not os.path.exists(pathImp):
                     pathImp = os.path.join(pathToImports, filename)
                     if not (os.path.exists(pathImp)):
                         common.log('Skipped Import: ' + filename)
                         continue
                 dataImp = fu.getFileContent(pathImp)
                 dataImp = dataImp.replace('\r\n','\n')
                 data = data.replace(idat[0], dataImp)
         else:
             break
     return data

Example #13

0

Show file

 def __replaceImports(self, pathToImports, data):
     while True:
         m_reg = findall(data, self.regex('import'))
         if len(m_reg) > 0:
             for idat in m_reg:
                 if idat[0].startswith('#'):
                     data = data.replace(idat[0], '')
                     continue
                 filename = idat[1]
                 pathImp = os.path.join(common.Paths.modulesDir, filename)
                 if not os.path.exists(pathImp):
                     pathImp = os.path.join(pathToImports, filename)
                     if not (os.path.exists(pathImp)):
                         common.log('Skipped Import: ' + filename)
                         continue
                 dataImp = fu.getFileContent(pathImp)
                 dataImp = dataImp.replace('\r\n', '\n')
                 data = data.replace(idat[0], dataImp)
         else:
             break
     return data

Example #14

0

Show file

File: customReplacements.py Project: rollysalvana/pampereo-xbmc-plugins

    def __replaceCatchers(self, data):
        m_reg = findall(data, self.regex('catch'))
        if not (m_reg is None or len(m_reg) == 0):
            for idat in m_reg:
                if idat.startswith('#'):
                    continue
                ps = idat[7:-2].strip().split(',')
                catcherName = ps.pop(0).strip()

                # import catcher file and insert parameters
                pathImp = os.path.join(self.Paths.catchersDir, catcherName + '.txt')
                if not (os.path.exists(pathImp)):
                    common.log('Skipped Catcher: ' + catcherName)
                    continue
                dataImp = fu.getFileContent(pathImp)
                for i in range(len(ps)):
                    dataImp = dataImp.replace('@PARAM' + str(i+1) + '@',ps.pop(i).strip())

                dataImp = dataImp.replace('\r\n','\n')
                dataImp += "\nitem_info_name=type\nitem_info_build=video\nitem_url_build=%s"
                data = data.replace(idat, dataImp)
        return data

Example #15

0

Show file

File: syncManager.py Project: gtfamily/gtfamily

 def getFilesScrape(self):
     url = self.url
     response = None
     try:
         f = urllib.urlopen(url)
         response = f.read()
         f.close()
     except:
         return None
 
     matches = rU.findall(response, '<td class="content"><a href="([^"]+)"[^>]+id="([^"]+)".*?<td class="age"><time [^<]*title="([^"]+)"')
     if matches:
         syncObjects = []
         for m in matches:
             obj = SyncObject()
             obj.name = m[0].split('/')[-1]
             obj.file = ('https://github.com' + m[0]).replace('blob', 'raw')
             obj.checksum = m[1].split('-')[1]
             obj.created = github.getUpdatedAtFromString(m[2])
             syncObjects.append(obj)
         return syncObjects
     
     return None

Example #16

0

Show file

    def __loadRemote(self, inputList, lItem):

        try:
            inputList.curr_url = lItem['url']

            count = 0
            i = 1
            maxits = 2  # 1 optimistic + 1 demystified
            ignoreCache = False
            demystify = False
            startUrl = inputList.curr_url
            #print inputList, lItem
            while count == 0 and i <= maxits:
                if i > 1:
                    ignoreCache = True
                    demystify = True

                # Trivial: url is from known streamer
                items = self.__parseHtml(inputList.curr_url,
                                         '"' + inputList.curr_url + '"',
                                         inputList.rules, inputList.skill,
                                         inputList.cfg, lItem)
                count = len(items)

                # try to find items in html source code
                if count == 0:
                    referer = ''
                    if lItem['referer']:
                        referer = lItem['referer']
                    data = common.getHTML(inputList.curr_url, referer,
                                          ignoreCache, demystify)
                    if data == '':
                        return False

                    msg = 'Remote URL ' + str(inputList.curr_url) + ' opened'
                    if demystify:
                        msg += ' (demystified)'
                    common.log(msg)

                    if inputList.section != '':
                        section = inputList.section
                        data = self.__getSection(data, section)

                    if lItem['section']:
                        section = lItem['section']
                        data = self.__getSection(data, section)

                    items = self.__parseHtml(inputList.curr_url, data,
                                             inputList.rules, inputList.skill,
                                             inputList.cfg, lItem)
                    count = len(items)
                    common.log('    -> ' + str(count) + ' item(s) found')

                # find rtmp stream
                #common.log('Find rtmp stream')
                if count == 0:
                    item = self.__findRTMP(data, startUrl, lItem)
                    if item:
                        items = []
                        items.append(item)
                        count = 1

                # find embedding javascripts
                #common.log('Find embedding javascripts')
                if count == 0:
                    item = findJS(data)
                    if item:
                        firstJS = item[0]
                        streamId = firstJS[0]
                        jsUrl = firstJS[1]
                        streamerName = getHostName(jsUrl)
                        jsSource = getHTML(jsUrl, startUrl, True, False)
                        phpUrl = findPHP(jsSource, streamId)
                        if phpUrl:
                            data = getHTML(phpUrl, startUrl, True, True)
                            item = self.__findRTMP(data, phpUrl, lItem)
                            if item:

                                if streamerName:
                                    item['title'] = item['title'].replace(
                                        'RTMP', streamerName)

                                items = []
                                items.append(item)
                                count = 1

                # find vcods
                #common.log('find vcods')
                if count == 0:
                    vcods = findVCods(data)
                    if vcods:
                        sUrl = vcods[0]
                        cod1 = vcods[1]
                        cod2 = vcods[2]
                        swfUrl = vcods[3]
                        unixTS = str(dt.getUnixTimestamp())
                        sUrl = sUrl + '?callback=jQuery1707757964063647694_1347894980192&v_cod1=' + cod1 + '&v_cod2=' + cod2 + '&_=' + unixTS
                        tmpData = getHTML(sUrl, urllib.unquote_plus(startUrl),
                                          True, False)
                        if tmpData and tmpData.find("Bad Request") == -1:
                            newReg = '"result1":"([^\"]+)","result2":"([^\"]+)"'
                            link = regexUtils.findall(tmpData, newReg)
                            if link:
                                _file = link[0][0]
                                rtmp = link[0][1].replace('\\', '')
                                #.replace('/redirect','/vod')
                                item = CListItem()
                                item['title'] = getHostName(
                                    sUrl) + '* - ' + _file
                                item['type'] = 'video'
                                item[
                                    'url'] = rtmp + ' playPath=' + _file + ' swfUrl=' + swfUrl + ' swfVfy=1 live=true pageUrl=' + startUrl
                                item.merge(lItem)
                                items.append(item)
                                count = 1

                # find redirects
                #common.log('find redirects')
                if count == 0:
                    red = self.__findRedirect(startUrl, inputList.curr_url)
                    if startUrl == red:
                        common.log('    -> No redirect found')
                    else:
                        common.log('    -> Redirect: ' + red)
                        inputList.curr_url = red
                        common.log(
                            str(len(inputList.items)) + ' items ' +
                            inputList.cfg + ' -> ' + red)
                        startUrl = red
                        if lItem['referer']:
                            lItem['referer'] = red
                        i = 0

                i += 1

            if count != 0:
                inputList.items = inputList.items + items

        except IOError:
            if common.enable_debug:
                traceback.print_exc(file=sys.stdout)
            return False
        return True

Example #17

0

Show file

File: parser.py Project: aamkTV/githamtv.github.io

    def __loadRemote(self, inputList, lItem):

        try:
            inputList.curr_url = lItem['url']

            count = 0
            i = 1
            maxits = 2      # 1 optimistic + 1 demystified
            ignoreCache = False
            demystify = False
            startUrl = inputList.curr_url
            #print inputList, lItem
            while count == 0 and i <= maxits:
                if i > 1:
                    ignoreCache = True
                    demystify =  True

                # Trivial: url is from known streamer
                items = self.__parseHtml(inputList.curr_url, '"' + inputList.curr_url + '"', inputList.rules, inputList.skill, inputList.cfg, lItem)
                count = len(items)


                # try to find items in html source code
                if count == 0:
                    referer = ''
                    if lItem['referer']:
                        referer = lItem['referer']
                    inputList.curr_url = HTMLParser.HTMLParser().unescape(urllib.unquote(inputList.curr_url))
                    data = common.getHTML(inputList.curr_url, None, referer, ignoreCache, demystify)
                    if data == '':
                        return False

                    msg = 'Remote URL ' + str(inputList.curr_url) + ' opened'
                    if demystify:
                        msg += ' (demystified)'
                    common.log(msg)

                    
                    if inputList.section != '':
                        section = inputList.section
                        data = self.__getSection(data, section)
                        
                    if lItem['section']:
                        section = lItem['section']
                        data = self.__getSection(data, section)
                                                
                    
                    items = self.__parseHtml(inputList.curr_url, data, inputList.rules, inputList.skill, inputList.cfg, lItem)
                    count = len(items)
                    common.log('    -> ' + str(count) + ' item(s) found')

                # find rtmp stream
                #common.log('Find rtmp stream')
                if count == 0:
                    item = self.__findRTMP(data, startUrl, lItem)
                    if item:
                        items = []
                        items.append(item)
                        count = 1

                # find embedding javascripts
                #common.log('Find embedding javascripts')
                if count == 0:
                    item = findJS(data)
                    if item:
                        firstJS = item[0]
                        streamId = firstJS[0]
                        jsUrl = firstJS[1]
                        streamerName = getHostName(jsUrl)
                        jsSource = getHTML(jsUrl, None, startUrl, True, False)
                        phpUrl = findPHP(jsSource, streamId)
                        if phpUrl:
                            data = getHTML(phpUrl, None, startUrl, True, True)
                            item = self.__findRTMP(data, phpUrl, lItem)
                            if item:
                                
                                if streamerName:
                                    item['title'] = item['title'].replace('RTMP', streamerName)
                                
                                items = []
                                items.append(item)
                                count = 1
                            else:
                                red = phpUrl
                                common.log('    -> Redirect: ' + red)
                                inputList.curr_url = red
                                common.log(str(len(inputList.items)) + ' items ' + inputList.cfg + ' -> ' + red)
                                startUrl = red
                                if lItem['referer']:
                                    lItem['referer'] = red
                                continue

                # find vcods
                #common.log('find vcods')
                if count == 0:
                    vcods = findVCods(data)
                    if vcods:
                        sUrl = vcods[0]
                        cod1 = vcods[1]
                        cod2 = vcods[2]
                        swfUrl = vcods[3]
                        unixTS = str(dt.getUnixTimestamp())
                        sUrl = sUrl + '?callback=jQuery1707757964063647694_1347894980192&v_cod1=' + cod1 + '&v_cod2=' + cod2 + '&_=' + unixTS
                        tmpData = getHTML(sUrl, None, urllib.unquote_plus(startUrl), True, False)
                        if tmpData and tmpData.find("Bad Request") == -1:
                            newReg = '"result1":"([^\"]+)","result2":"([^\"]+)"'
                            link = regexUtils.findall(tmpData, newReg)
                            if link:
                                _file = link[0][0]
                                rtmp = link[0][1].replace('\\','')
                                #.replace('/redirect','/vod')
                                item = CListItem()
                                item['title'] = getHostName(sUrl) + '* - ' + _file
                                item['type'] = 'video'
                                item['url'] = rtmp + ' playPath=' + _file + ' swfUrl=' + swfUrl +' swfVfy=1 live=true pageUrl=' + startUrl
                                item.merge(lItem)
                                items.append(item)
                                count = 1  
                        
                        
                        
                # find redirects
                #common.log('find redirects')
                if count == 0:
                    red = self.__findRedirect(startUrl, inputList.curr_url)
                    if startUrl == red:
                        common.log('    -> No redirect found')
                    else:
                        red = HTMLParser.HTMLParser().unescape(red) 
                        red = urllib.unquote(red)
                        common.log('    -> Redirect: ' + red)
                        inputList.curr_url = red
                        common.log(str(len(inputList.items)) + ' items ' + inputList.cfg + ' -> ' + red)
                        startUrl = red
                        if lItem['referer']:
                            lItem['referer'] = red
                        i = 0

                i += 1


            if count != 0:
                inputList.items = inputList.items + items


        except IOError:
            if common.enable_debug:
                traceback.print_exc(file = sys.stdout)
            return False
        return True