def getFormData(comPattern, content, posini): if not isinstance(comPattern, CustomRegEx.ExtRegexObject): raise TypeError('Expecting an CustomRegEx.ExtRegexObject') match = comPattern.search(content, posini) if not match: return None posfin = match.end() formHtml = match.group() formAttr = getAttrDict(formHtml, 0, noTag=False)[1] formAttr.pop('*ParamPos*') formFields = collections.OrderedDict() if formAttr and formAttr.has_key('id'): formId = formAttr['id'] pattern = r'''\$\(['"]<input/>['"]\)\.attr\(\{(?P<input_attr>[^}]+)\}\)\.prependTo\(['"]#%s['"]\)''' % formId prependVars = re.findall(pattern, content) for avar in prependVars: avar = avar.replace(': ', ':').replace(',', '').replace(':', '=') avar = '<input ' + avar + ' prepend="">' attr = getAttrDict(avar, 0, noTag=True) name = attr['name'] formFields[name] = attr pattern = r'(?#<form<__TAG__="input|select|textarea"=tag name=name>*>)' for m in CustomRegEx.finditer(pattern, formHtml): # tag, name = map(operator.methodcaller('lower'),m.groups()) tag, name = m.groups() p1, p2 = m.span() attr = getAttrDict(m.group(), 0, noTag=True) attr.pop('*ParamPos*') if formFields.get(name, None): if 'value' in attr and formFields[name].has_key('value'): value = formFields[name]['value'] if isinstance(value, basestring): value = [value] value.append(attr['value']) formFields[name]['value'] = value else: formFields[name] = attr if attr.has_key('list'): pattern = r'(?#<datalist id="%s"<value=value>*>)' % attr['list'] attr['value'] = CustomRegEx.findall(pattern, formHtml) pass elif tag == 'select': pattern = r'(?#<option value=value *=&lvalue&>)' match = CustomRegEx.findall(pattern, formHtml[p1:p2]) # attr['value'] = map(operator.itemgetter(0), match) # attr['lvalue'] = map(operator.itemgetter(1), match) attr['value'], attr['lvalue'] = match.groups() pattern = r'(?#<option value=value>)' attr['value'] = CustomRegEx.findall(pattern, formHtml[p1:p2]) pattern = r'(?#<option value=value selected>)' try: attr['default'] = CustomRegEx.findall(pattern, formHtml[p1:p2])[0] except: attr['default'] = '' pass elif tag == 'textarea': attr['value'] = attr.get('*', '') continue pass return posfin, formAttr, formFields
def getMenuHeaderFooterOLD(param, args, data, menus): htmlUnescape = HTMLParser.HTMLParser().unescape menuId = args.get('menu', ['rootmenu'])[0] url = args.get("url")[0] headerFooter = [] for k, elem in enumerate(menus): opLabel, opregexp = elem opdefault, sep, opvalues = opregexp.partition('|') opvalues = opvalues or opdefault opdefault = opdefault if sep else '' pIni, pFin = 0, -1 if opdefault.startswith('(?#<SPAN>)'): pIni, match = -1, CustomRegEx.search(opdefault, data) if match: pIni, pFin = match.span(0) opmenu = CustomRegEx.findall(opvalues, data[pIni:pFin]) if not opmenu: continue cmpregex = CustomRegEx.compile(opvalues) tags = cmpregex.groupindex.keys() menuUrl = [elem[tags.index('url')] for elem in opmenu] if len(tags) > 1 else opmenu if 'label' in tags: menuLabel = map(htmlUnescape, [elem[tags.index('label')] for elem in opmenu]) else: menuLabel = len(menuUrl) * ['Label placeholder'] if opdefault: match = CustomRegEx.search(opdefault, data) opdefault = htmlUnescape(match.group(1) if match else '') paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["header", "footer"]]) paramDict.update({'section':param, 'url':url, param:k, 'menu':menuId, 'menulabel': str(menuLabel), 'menuurl':str(menuUrl)}) itemParam = {'isFolder':True, 'label':opLabel + opdefault} headerFooter.append([paramDict, itemParam, None]) return headerFooter
def parseClassDefinition(self): if not self._classSignature: classDoc = '' classDoc += '# -*- coding: utf-8 -*-\n"""%s"""\n' classDoc += 'from Android import overload\n\n' self._classSignature = classDoc print(classDoc % self.classUrl, file=self._log) else: texto = self._getSectionText('') pattern = r'(?#<code class="api-signature" *=label>)' m = crgx.search(pattern, texto) classSignature = m.group('label') classSignature = classSignature.replace('\n', '').split(' ') classSignature = ' '.join(filter(lambda x: x, classSignature)) self._classSignature = classSignature pos = m.end('label') m = crgx.search(r'(?#<p>)', texto[pos:]) classDoc = m.group() classDoc = ' '.join(re.findall('>(.*?)<', classDoc, re.DOTALL)) classDoc = classDoc.replace('\n', '') classDoc = re.sub(r' +', ' ', classDoc) classType, className = classSignature.split(' class ') indent = '\n ' classDoc = self.formatDocString(classDoc, indent, 80) if 'abstract' in classType: template = '\nclass I{0}(object):{1}{2}__metaclass__ = abc.ABCMeta' else: template = '\nclass {0}(object):{1}' self._classname = className print(template.format(className, classDoc, indent), file=self._log)
def thevideo(videoId, encHeaders=''): headers = { 'User-Agent': DESKTOP_BROWSER, 'Referer': 'http://thevideo.me/%s' % videoId } encodeHeaders = urllib.urlencode(headers) urlStr = 'http://thevideo.me/%s<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r'''name: '(?P<var1>[^']+)', value: '(?P<var2>[^']+)' \}\).prependTo\(\"#veriform\"\)''' formVars = CustomRegEx.findall(pattern, content) pattern = r"(?#<form .input<name=var1 value=var2>*>)" formVars.extend(CustomRegEx.findall(pattern, content)) pattern = r"\$\.cookie\(\'(?P<var1>[^']+)\', \'(?P<var2>[^']+)\'" cookieval = CustomRegEx.findall(pattern, content) qte = urllib.quote postdata = '&'.join( map(lambda x: '='.join(x), [(var1, qte(var2) if var2 else '') for var1, var2 in formVars])) headers['Cookie'] = '; '.join(map(lambda x: '='.join(x), cookieval)) encodeHeaders = urllib.urlencode(headers) urlStr = 'http://thevideo.me/%s<post>%s<headers>%s' % (videoId, postdata, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r"label: '(?P<res>[^']+)', file: '(?P<url>[^']+)'" sources = CustomRegEx.findall(pattern, content) res, href = sources.pop() return href pass
def test_equivNotation(self): """ Notación equivalente utilizando asociatividad que se expresa con las {} """ first = CustomRegEx.compile( '(?#<a href span{src=icon *=label} div.id>)', 0) scnd = CustomRegEx.compile( '(?#<a href span.src=icon span.*=label div.id>)', 0) assert ExtCompObjEquality(first, scnd)
def test_cleanvars(self): """ <a href="http://uno.html">texto</a> Html tag con variable implícita href y variable label que recoge el texto una vez se eliminan los espacios en el prefijo y el sufijo. Es decir si a.* = \n\r \testo es lo que vale \t\n la notación &label& hace que en label se almacene "esto es lo que vale" """ first = CustomRegEx.compile('(?#<a (href) *=label>)', 0) scnd = CustomRegEx.compile('(?#<a (href) *=&label&>)', 0) assert first.tags['tagpholder']['*'] != scnd.tags['tagpholder']['*']
def test_tripleAsignation(self): """ Notación equivalente utilizando doble asignación para declarar la variable y el parametro que se quiere """ first = CustomRegEx.compile( '(?#<ese a.*="http//.+?/prueba" a.*=icon href=url>)', 0) scnd = CustomRegEx.compile( '(?#<ese a.*="http//.+?/prueba"=icon href=url>)', 0) assert ExtCompObjEquality(first, scnd)
def test_equivNotationII(self): """ Notación equivalente utilizando asociatividad cuando se tienen el mismo tag en varios niveles """ first = CustomRegEx.compile( '(?#<table id td.*=grp1 td[2].b.*=grp2 td[2].a.href=grp2a td[2].a.src=grp2b td[3].*=grp3 td[4].*=grp4>)', 0) scnd = CustomRegEx.compile( '(?#<table id td{1.*=grp1 2{b.*=grp2 a{href=grp2a src=grp2b}} 3.*=grp3 4.*=grp4}>)', 0) assert ExtCompObjEquality(first, scnd)
def test_tag(self): answer = CustomRegEx.findall('(?#<span|a *=label>)', self.htmlStr) required1 = ['span0', 'bloque1', 'bloque2', 'span3'] assert answer == required1, 'Obtener texto de tags span o a' cmpobj = CustomRegEx.compile('(?#<(span|a) *=label>)') answer = cmpobj.groupindex.keys() required2 = ['__TAG__', 'label'] assert answer == required2, 'Al encerrar el tagpattern entre paréntesis el nametag se almacena en la variable __TAG__ ' answer = cmpobj.findall(self.htmlStr) required3 = [('span', 'span0'), ('span', 'bloque1'), ('span', 'bloque2'), ('span', 'span3')] assert answer == required3, 'El primer componente de los tuples que conforman answer corresponde al nametag' cmpobj = CustomRegEx.compile( '(?#<span|a __TAG__=mi_nametag_var *=label>)') answer = cmpobj.groupindex.keys() required4 = ['mi_nametag_var', 'label'] assert answer == required4, 'Al utilizar el atributo __TAG__ se puede asignar una variable que contendra el nametag de los tags que cumplen con el pattern buscado' answer = cmpobj.findall(self.htmlStr) assert answer == required3, 'El resultado es el mismo, cambia solo el nombre de la variable asociada al nametag' cmpobj = CustomRegEx.compile('(?#<__TAG__ *="[sb].+?"=label>)') answer = cmpobj.findall(self.htmlStr) assert answer == required1, 'Al utilizar __TAG__ como tag attribute se hace el tagpattern = "[a-zA-Z][^\s>]*", para con el primer resultado se asigna "[sb].+?" al *' cmpobj = CustomRegEx.compile('(?#<(__TAG__) *=".+?"=label>)') answer = cmpobj.groupindex.keys() assert answer == required2, 'Se puede utiliza (__TAG__) para guardar el nametag en la variable __TAG__' cmpobj = CustomRegEx.compile( '(?#<__TAG__ __TAG__=mi_nametag_var *=".+?"=label>)') answer = cmpobj.groupindex.keys() assert answer == required4, 'Se puede utiliza __TAG__=nombrevar para guardar el nametag en una variable con nmbre propio' cmpobj = CustomRegEx.compile( '(?#<__TAG__ __TAG__=mi_nametag_var *=label>)') answer = cmpobj.findall(self.htmlStr) required = [('span', 'span0'), ('script', ''), ('bloque', ''), ('span', 'span3')] assert answer == required, 'Utilizando __TAG__ como tagpattern' cmpobj = CustomRegEx.compile( '(?#<__TAG__ __TAG__="span|a"=mi_nametag_var *=label>)') answer = cmpobj.findall(self.htmlStr) assert answer == required3, 'Utilizando __TAG__="span|a"=mi_nametag_var se redefine el tagpattern a "span|a" y se asigna a la variable mi_nametag_var' with pytest.raises(re.error): 'Entrega error porque se utiliza (__TAG__) como tagpattern y con __TAG__=mi_nametag_var se intenta asignarle a otra variable' CustomRegEx.compile( '(?#<(__TAG__) __TAG__=mi_nametag_var *=label>)')
def parseUrlContent(url, data, regexp, compFlags = None, posIni = 0, posFin = 0): parseDirect = getParseDirectives(regexp) nxtposini = parseDirect.get('NXTPOSINI', 0) compFlags = compFlags if compFlags else 0 pattern = CustomRegEx.compile(regexp, flags = compFlags) matchs = [] while 1: match = pattern.search(data, posIni) if not match: break if posFin != 0 and match.start(0) > posFin: break matchDict = match.groupdict() if parseDirect.has_key('SPAN'): idGroup = parseDirect['SPAN'] matchDict['span'] = str((match.start(idGroup), match.end(idGroup))) posIni = match.end(nxtposini) matchs.append(matchDict) patternVars = pattern.groupindex.keys() url_vars = ['url', 'videoUrl', 'iconImage', 'thumbnailImage'] for key in set(url_vars).intersection(patternVars): for elem in matchs: elem[key] = urlparse.urljoin(url, elem[key]) if matchs and 'label' in patternVars: srchKeys = [key for key in patternVars if key.startswith('label') and key != 'label2'] srchKeys.sort() htmlUnescape = HTMLParser.HTMLParser().unescape for k in range(len(matchs)): lista = [matchs[k].pop(key) for key in srchKeys] labelValue = ' '.join([label for label in lista if label]) matchs[k]['label'] = htmlUnescape(labelValue) return matchs
def parseUrlContent(url, data, regexp, compFlags = None, posIni = 0, posFin = 0): parseDirect = getParseDirectives(regexp) nxtposini = parseDirect.get('NXTPOSINI', 0) compFlags = compFlags if compFlags else 0 pattern = CustomRegEx.compile(regexp, flags = compFlags) matchs = [] while 1: match = pattern.search(data, posIni) if not match: break if posFin != 0 and match.start(0) > posFin: break matchDict = match.groupdict() if parseDirect.has_key('SPAN'): idGroup = parseDirect['SPAN'] matchDict['span'] = str((match.start(idGroup), match.end(idGroup))) posIni = match.end(nxtposini) matchs.append(matchDict) patternVars = pattern.groupindex.keys() url_vars = ['url', 'videoUrl', 'iconImage', 'thumbnailImage'] for key in set(url_vars).intersection(patternVars): for elem in matchs: elem[key] = urlparse.urljoin(url, elem[key].replace('https:', 'http:')) if matchs and 'label' in patternVars: srchKeys = [key for key in patternVars if key.startswith('label') and key != 'label2'] srchKeys.sort() htmlUnescape = HTMLParser.HTMLParser().unescape for k in range(len(matchs)): lista = [matchs[k].pop(key) for key in srchKeys] labelValue = ' '.join([label for label in lista if label]) matchs[k]['label'] = htmlUnescape(labelValue) return matchs
def test_general(self): answer = CustomRegEx.findall('(?#<hijo id="hijo1" *=label>)', self.htmlStr) required = ['primer hijo'] assert answer == required, 'Comentario y variable independiente' answer = CustomRegEx.findall('(?#<hijo id=varid *=label>)', self.htmlStr) required = [('hijo1', 'primer hijo'), ('hijo2', ''), ('hijo3', 'tercer hijo')] assert answer == required, 'Utilizando variables para distinguir casos' answer = CustomRegEx.findall('(?#<hijo id="hijo[13]"=varid *=label>)', self.htmlStr) required = [('hijo1', 'primer hijo'), ('hijo3', 'tercer hijo')] assert answer == required, 'Utilizando variables para distinguir casos' answer = CustomRegEx.findall('(?#<hijo exp *=label>)', self.htmlStr) required = [''] assert answer == required, 'Utilizando atributos requeridos (exp) para distinguir un caso' answer = CustomRegEx.findall('(?#<hijo exp .*>)', self.htmlStr) required = [('El primer comentario', 'El segundo comentario', 'El tercer comentario')] assert answer == required, 'Comentarios incluidos en tag' with pytest.raises(re.error): 'Error porque no se pueden utilizar variables cuando se tiene ".*" como variable requerida' CustomRegEx.compile('(?#<span class=var1 .*>)')
def getParseDirectives(regexp): rawDir = CustomRegEx.findall(r'\?#<([^>]+)>', regexp) fltrDir = {} for rawkey in rawDir: key = rawkey.upper().strip('0123456789') if key in ['SPAN', 'NXTPOSINI']: value = int(rawkey[len(key):]) if len(rawkey) != len(key) else 0 fltrDir[key] = value return fltrDir
def vidto(videoId, headers = None): headers = headers or {} headers['User-Agent'] = MOBILE_BROWSER encodeHeaders = urllib.urlencode(headers) url = 'http://vidto.me/%s.html<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(url)[1] pattern = r'(?#<Form method="POST".input<type="hidden" name=name value=value>*>)' formVars = CustomRegEx.findall(pattern, content) qte = urllib.quote postdata = '&'.join(map(lambda x: '='.join(x),[(var1, qte(var2) if var2 else '') for var1, var2 in formVars])) urlStr = 'http://vidto.me/%s.html<post>%s<headers>%s' % (videoId, postdata, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r'(?#<a class="player-url" href=url>)' sources = CustomRegEx.findall(pattern, content, re.DOTALL) href = sources.pop() urlStr = '%s|%s' % (href,urllib.urlencode({'User-Agent':MOBILE_BROWSER})) return urlStr pass
def test_nzone(self): allspan = [ ('independiente', 'span0'), ('bloque1', 'span1'), ('bloque1', 'span2'), #En script ('independiente', 'bloque1'), ('independiente', 'bloque2'), #En bloque ('bloque2', 'span1'), ('bloque2', 'span2'), #En <!-- ('independiente', 'span3') ] answer1 = CustomRegEx.findall('(?#<span class=test *=label>)', self.htmlStr) required = [lista for lista in allspan if lista[0] == 'independiente'] assert answer1 == required, 'Por default se excluyen Los tags buscados en self.htmlStr contenidos en zonas <!--xxx--> y script' answer2 = CustomRegEx.findall( '(?#<span class=test *=label __EZONE__="[!--|script]">)', self.htmlStr) assert answer1 == answer2, 'El resultado por default se obtiene haciendo __NZONE__="[!--|script]" ' answer = CustomRegEx.findall( '(?#<span class=test *=label __EZONE__="">)', self.htmlStr) assert answer == allspan, 'Para no tener zonas de exclusi.n se hace __EZONE__=""' answer = CustomRegEx.findall( '(?#<span class=test *=label __EZONE__="[bloque]">)', self.htmlStr) required = [ lista for lista in allspan if not lista[1].startswith('bloque') ] assert answer == required, 'Se personaliza la zona de exclusi.n asignando a __NZONE__="xxx|zzz" donde xxx y zzz son tags' answer = CustomRegEx.findall( '(?#<span class=test *=label __EZONE__="^[!--|script]">)', self.htmlStr) required = [ lista for lista in allspan if lista[0].startswith('bloque') ] assert answer == required, 'Para incluir solo tags buscados en las zonas xxx y zzz se debe hacer __NZONE__="^[xxx|zzz]' answer = CustomRegEx.findall('(?#<a href=url *=labe>)', self.htmlStr) required = [] assert answer == required answer = CustomRegEx.findall( '(?#<a href=url *=label __EZONE__="^[script]">)', self.htmlStr) required = [('http://www.eltiempo.com.co', 'El Tiempo')] assert answer == required answer = CustomRegEx.findall( '(?#<a href=url *=label __EZONE__="^[!--]">)', self.htmlStr) required = [('http://www.elheraldo.com.co', 'El Heraldo')] assert answer == required
def test_namedvarswithpattern(self): """ <a href="http://uno/dos/tres.html">texto</a> Html tag con variables url y label a los que se asigna el valor del atributo href y el texto respectivamente ya que href cumple con el patrón "http://uno/.+?/tres.html" """ actual = CustomRegEx.compile( '(?#<a href="http://uno/.+?/tres.html" href=url *=label>)', 0) assert actual.varList == [['tagpholder.href', 'url'], ['tagpholder.*', 'label']]
def _getStringContent(self, pattern, texto): pini = 0 lista = [] while True: answ = crgx.search(pattern, texto[pini:]) if not answ: break fdef = ''.join(answ.groups()).strip('\n ') lista.append(fdef) pini += answ.end() return lista
def _getSectionDelimiters(self, section): sections = self._sections if self._sections is None: content = self.getUrlContent() pattern = r'(?#<h[12] class="api.+?">)' sections = crgx.findall(pattern, content) sections = filter(lambda x: 'Protected' not in x, sections) sections.append(u'<!-- end jd-content -->') self._sections = sections it = itertools.dropwhile(lambda x: section not in x, sections) return (it.next(), it.next())
def vidzi(videoId, headers = None): strVal = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' headers = headers or {} headers['User-Agent'] = MOBILE_BROWSER encodeHeaders = urllib.urlencode(headers) url = 'http://vidzi.tv/%s.html<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(url)[1] pattern = r"(?#<script *='eval.+?'=pack>)" packed = CustomRegEx.search(pattern, content).group('pack') pattern = "}\((?P<tupla>\'.+?\))(?:,0,{})*\)" m = re.search(pattern, packed) mgrp = m.group(1).rsplit(',', 3) patron, base, nTags, lista = mgrp[0], int(mgrp[1]), int(mgrp[2]), eval(mgrp[3]) while nTags: nTags -= 1 tag = strVal[nTags] if nTags < base else strVal[nTags/base] + strVal[nTags%base] patron = re.sub('\\b' + tag + '\\b', lista[nTags] or tag, patron) pattern = 'file:"([^"]+(?:mp4|ed=))"' sources = CustomRegEx.findall(pattern,patron) return sources.pop()
def test_implicitvars(self): """ <a href="http://uno.html">texto</a> Html tag con variable implícita href y variable label que recoge el texto """ actual = CustomRegEx.compile('(?#<a (href) *=label>)', 0) assert (actual.tagPattern, actual.tags, actual.varList) == ('a', { 'tagpholder': { '*': '', 'href': '' } }, [['tagpholder.href', 'group1'], ['tagpholder.*', 'label']])
def vidto(videoId, headers=None): headers = headers or {} headers['User-Agent'] = MOBILE_BROWSER encodeHeaders = urllib.urlencode(headers) url = 'http://vidto.me/%s.html<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(url)[1] pattern = r'(?#<Form method="POST".input<type="hidden" name=name value=value>*>)' formVars = CustomRegEx.findall(pattern, content) qte = urllib.quote postdata = '&'.join( map(lambda x: '='.join(x), [(var1, qte(var2) if var2 else '') for var1, var2 in formVars])) urlStr = 'http://vidto.me/%s.html<post>%s<headers>%s' % (videoId, postdata, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r'(?#<a class="player-url" href=url>)' sources = CustomRegEx.findall(pattern, content, re.DOTALL) href = sources.pop() urlStr = '%s|%s' % (href, urllib.urlencode({'User-Agent': MOBILE_BROWSER})) return urlStr pass
def test_namedvars(self): """ <a href="http://uno.html">texto</a> Html tag con variables url y label a los que se asigna el valor del atributo href y el texto respectivamente """ actual = CustomRegEx.compile('(?#<a href=url *=label>)', 0) assert (actual.tagPattern, actual.tags, actual.varList) == ('a', { 'tagpholder': { '*': '', 'href': '' } }, [['tagpholder.href', 'url'], ['tagpholder.*', 'label']])
def getMediaCode(self): keyValues = set(['url', 'videoUrl', 'videoId']) lista = [(elem, self.addonADG.getThreadParam(elem, 'regexp')) for elem in self.addonADG.getChildren('media') if self.addonADG.getThreadAttr(elem, 'type') == 'thread'] keySet = set() for elem in lista: cmpregex = CustomRegEx.compile(elem[1]) regexvars = keyValues.intersection(cmpregex.groupindex.keys()) keySet.update(regexvars) if not keySet: self.ERRORS += 'WARNING: Sources not send any of ' + str(keyValues) + ' to media' + '\n' regexp = self.addonADG.getThreadParam('media', 'regexp') compflags = self.addonADG.getThreadParam('media', 'compflags') return self.parser.handle(ntype.MEDIA, keySet, regexp, compflags)
def test_getAttrDict2(self): htmlStr = """<a href0="el 'tiempo com" href1="el 'tiempo" com' href2='el 'tiempo' com' href3=''el tiempo' com' href4='el 'tiempo com''>""" parser = CustomRegEx.ExtRegexParser() attrD = parser.getAttrDict(htmlStr) assert attrD[ 'href0'] == "el \'tiempo com", "Error comilla interior simple" assert attrD[ 'href1'] == "el \'tiempo\" com", "Error comillas interiores mixtas" assert attrD[ 'href2'] == "el \'tiempo\' com", "Error comillas interiores" assert attrD[ 'href3'] == "\'el tiempo\' com", "Error comillas interiores ajustadas a la izquierda" assert attrD[ 'href4'] == "el \'tiempo com\'", "Error comillas interiores ajustadas a la derecha"
def vidzi(videoId, headers=None): strVal = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' headers = headers or {} headers['User-Agent'] = MOBILE_BROWSER encodeHeaders = urllib.urlencode(headers) url = 'http://vidzi.tv/%s.html<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(url)[1] pattern = r"(?#<script *='eval.+?'=pack>)" packed = CustomRegEx.search(pattern, content).group('pack') pattern = "}\((?P<tupla>\'.+?\))(?:,0,{})*\)" m = re.search(pattern, packed) mgrp = m.group(1).rsplit(',', 3) patron, base, nTags, lista = mgrp[0], int(mgrp[1]), int(mgrp[2]), eval( mgrp[3]) while nTags: nTags -= 1 tag = strVal[nTags] if nTags < base else strVal[nTags / base] + strVal[nTags % base] patron = re.sub('\\b' + tag + '\\b', lista[nTags] or tag, patron) pattern = 'file:"([^"]+(?:mp4|ed=))"' sources = CustomRegEx.findall(pattern, patron) return sources.pop()
def getWebData(url, regexPattern, initConf=None, **kwargs): yield [PROCESS_MESSAGE, ('Contactando sitio web bvc', ), kwargs] if not initConf: initConf = r'curl --user-agent "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36" --cookie-jar "cookies.lwp" --location' net = network.network(initConf) content, end_url = net.openUrl(url) if isinstance(content, Exception): raise content yield [PROCESS_MESSAGE, ('Pagina web entregada', ), kwargs] reg = CustomRegEx.compile(regexPattern) response = reg.findall(content) yield [PROCESS_DATA, (response, ), kwargs]
def thevideo(videoId, encHeaders = ''): headers = {'User-Agent':DESKTOP_BROWSER, 'Referer': 'http://thevideo.me/%s' % videoId} encodeHeaders = urllib.urlencode(headers) urlStr = 'http://thevideo.me/%s<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r'''name: '(?P<var1>[^']+)', value: '(?P<var2>[^']+)' \}\).prependTo\(\"#veriform\"\)''' formVars = CustomRegEx.findall(pattern, content) pattern = r"(?#<form .input<name=var1 value=var2>*>)" formVars.extend(CustomRegEx.findall(pattern, content)) pattern = r"\$\.cookie\(\'(?P<var1>[^']+)\', \'(?P<var2>[^']+)\'" cookieval = CustomRegEx.findall(pattern, content) qte = urllib.quote postdata = '&'.join(map(lambda x: '='.join(x),[(var1, qte(var2) if var2 else '') for var1, var2 in formVars])) headers['Cookie'] = '; '.join(map(lambda x: '='.join(x),cookieval)) encodeHeaders = urllib.urlencode(headers) urlStr = 'http://thevideo.me/%s<post>%s<headers>%s' % (videoId, postdata, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r"label: '(?P<res>[^']+)', file: '(?P<url>[^']+)'" sources = CustomRegEx.findall(pattern, content) res, href = sources.pop() return href pass
def handle_media(self, keySet, regexp, compflags): tags = CustomRegEx.compile(regexp).groupindex.keys() INDENT = '\n\t' mediacode = 'def media():' mediacode += INDENT + 'import urlresolver' if 'url' in keySet: if len(keySet) > 1: mediacode += INDENT + 'if args.get("url", None):' INDENT += '\t' mediacode += INDENT + 'url = args.get("url")[0]' regexp = regexp.replace("'", "\\'") sep = "'" mediacode += INDENT + 'regexp = ' + sep + regexp + sep mediacode += INDENT + 'url, data = openUrl(url)' mediacode += INDENT + 'compflags =' + compflags mediacode += INDENT + 'subMenus = parseUrlContent(url, data, regexp, compflags )' if 'videourl' in tags: mediacode += INDENT + 'videoUrl = subMenus[0]["videourl"]' mediacode += INDENT + 'url = urlresolver.HostedMediaFile(url = videoUrl).resolve()' elif 'videoUrl' in tags: mediacode += INDENT + 'url = subMenus[0]["videoUrl"]' if 'videoUrl' in keySet: INDENT = '\n\t' if len(keySet) > 1: mediacode += INDENT + 'if args.get("videoUrl", None):' INDENT += '\t' mediacode += INDENT + 'videoUrl = args.get("videoUrl")[0]' mediacode += INDENT + 'url = urlresolver.HostedMediaFile(url=videoUrl).resolve()' if 'videoId' in keySet: INDENT = '\n\t' if len(keySet) > 1: mediacode += INDENT + 'if args.get("videoId", None):' INDENT += '\t' mediacode += INDENT + 'videoId = args.get("videoId")[0]' mediacode += INDENT + "videoHost = args.get('videoHost')[0]" mediacode += INDENT + 'url = urlresolver.HostedMediaFile(host=videoHost,media_id=videoId).resolve()' INDENT = '\n\t' mediacode += INDENT + 'li = xbmcgui.ListItem(path = url)' mediacode += INDENT + 'if args.get("icondef", None): li.setThumbnailImage(args["icondef"][0])' mediacode += INDENT + 'if args.get("labeldef", None): li.setLabel(args["labeldef"][0])' mediacode += INDENT + "li.setProperty('IsPlayable', 'true')" mediacode += INDENT + "li.setProperty('mimetype', 'video/x-msvideo')" mediacode += INDENT + "return xbmcplugin.setResolvedUrl(handle=addon_handle,succeeded=True,listitem=li)" return mediacode
def prepareEqLocals(self, startEq): pattern = r'[+*]*(?:start|stop)\("(.+?)"\)[*+]*' procIds = CustomRegEx.findall(pattern, startEq.replace(' ', '')) locals = dict() for id in procIds: key2, key1 = '_end%s_' % id, '_beg%s_' % id locals[key1] = id in self.activeList locals[key2] = False functions = dict( lt=lambda x, n: self.actProcess(x) < n, gt=lambda x, n: self.actProcess(x) > n, isact=lambda x: x in self.activeList, start=lambda x: locals['_beg%s_' % x], stop=lambda x: locals['_beg%s_' % x] and locals['_end%s_' % x]) locals.update(functions) return locals
def getMediaCode(self): keyValues = set(['url', 'videoUrl', 'videoId']) lista = [(elem, self.addonADG.getThreadParam(elem, 'regexp')) for elem in self.addonADG.getChildren('media') if self.addonADG.getThreadAttr(elem, 'type') == 'thread'] keySet = set() for elem in lista: cmpregex = CustomRegEx.compile(elem[1]) regexvars = keyValues.intersection(cmpregex.groupindex.keys()) keySet.update(regexvars) if not keySet: self.ERRORS += 'WARNING: Sources not send any of ' + str( keyValues) + ' to media' + '\n' regexp = self.addonADG.getThreadParam('media', 'regexp') compflags = self.addonADG.getThreadParam('media', 'compflags') return self.parser.handle(ntype.MEDIA, keySet, regexp, compflags)
def test_getAttrDict1(self, cycle): htmlLst = [ '<a', 'href0="eltiempo.com"', 'href1="eltiempo.com\'', 'href2=\'eltiempo.com\'', 'href3=eltiempo.com' ] htmlLst.append(htmlLst.pop(cycle)) htmlStr = ' '.join(htmlLst) + '>' print htmlStr parser = CustomRegEx.ExtRegexParser() tag, attrD = parser.getAttrDict(htmlStr, noTag=False) attrD.pop('*ParamPos*') assert sorted(attrD.keys()) == [ 'href%d' % k for k in range(4) ], "Los attributos reportados no corresponde a los reales" assert set(attrD.values()) == set( ["eltiempo.com"] ), "Por lo menos el valor de un attributo reportado no corresponde al real"
def allmyvideos(videoId, headers = None): headers = headers or {} headers['User-Agent'] = MOBILE_BROWSER encodeHeaders = urllib.urlencode(headers) url = 'http://allmyvideos.net/%s<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(url)[1] pattern = r'(?#<form .input<name=name value=value>*>)' formVars = CustomRegEx.findall(pattern, content) qte = urllib.quote postdata = '&'.join(map(lambda x: '='.join(x),[(var1, qte(var2) if var2 else '') for var1, var2 in formVars])) urlStr = 'http://allmyvideos.net/%s<post>%s<headers>%s' % (videoId, postdata, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r'"file" : "(?P<url>[^"]+)".+?"label" : "(?P<label>[^"]+)"' sources = re.findall(pattern, content, re.DOTALL) href, res = sources.pop() urlStr = '%s|%s' % (href,urllib.urlencode({'User-Agent':MOBILE_BROWSER})) return urlStr pass
def HTMLstruct(self): tagSpan = self.regexpEd.getSelRange('actMatch') content = self.regexpEd.getContent(*tagSpan) try: htmlParse = CustomRegEx.ExtRegexParser({}, []).htmlStruct(content, 0) except: equis = 'Not HTML conform' tkMessageBox.showinfo('Actual match HTMLstruct', equis) else: fmt = '{:<20} {:<40}'.format # equis = '\n'.join([fmt(x[0].count('.')*' ' + '*' + x[0].rpartition('.')[2],x[1][:40]) for x in htmlParse]) equis = [ fmt(x[0].count('.') * ' ' + '*' + x[0].rpartition('.')[2], x[1][:40]) for x in htmlParse ] from xbmcgui import Dialog k = Dialog().select('Actual match HTMLstruct', equis)
def parsingUrlData(url, regexPattern, initConf=None, **kwargs): yield [PROCESS_MESSAGE, ('Contactando sitio web bvc', ), kwargs] if not initConf: initConf = r'curl --user-agent "Mozilla/5.0 (Windows NT 6.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36" --cookie-jar "cookies.lwp" --location' net = network.network(initConf) content, end_url = net.openUrl(url) yield [PROCESS_MESSAGE, ('Pagina weg entregada', ), kwargs] reg = CustomRegEx.compile(regexPattern) k = 0 pos = baseIndex = 0 while True: match = reg.search(content, pos) if not match: break k += 1 pos = match.end(0) yield [PROCESS_DATA, (k, match, baseIndex), kwargs]
def allmyvideos(videoId, headers=None): headers = headers or {} headers['User-Agent'] = MOBILE_BROWSER encodeHeaders = urllib.urlencode(headers) url = 'http://allmyvideos.net/%s<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(url)[1] pattern = r'(?#<form .input<name=name value=value>*>)' formVars = CustomRegEx.findall(pattern, content) qte = urllib.quote postdata = '&'.join( map(lambda x: '='.join(x), [(var1, qte(var2) if var2 else '') for var1, var2 in formVars])) urlStr = 'http://allmyvideos.net/%s<post>%s<headers>%s' % ( videoId, postdata, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] pattern = r'"file" : "(?P<url>[^"]+)".+?"label" : "(?P<label>[^"]+)"' sources = re.findall(pattern, content, re.DOTALL) href, res = sources.pop() urlStr = '%s|%s' % (href, urllib.urlencode({'User-Agent': MOBILE_BROWSER})) return urlStr pass
def test_getAttrDict0(self, cycle): htmlLst = [ '<a', 'href2=eltiempo.com', 'href1', 'href0=\"eltiempo.com\"' ] htmlLst.append(htmlLst.pop(cycle)) htmlStr = ' '.join(htmlLst) + '>' print htmlStr parser = CustomRegEx.ExtRegexParser() htmlStr = 18 * ' ' + htmlStr tag, attrD = parser.getAttrDict(htmlStr, offset=18, noTag=False) attrP = attrD.pop('*ParamPos*') assert tag == 'a', "El tag reportado no corresponde al tag real" assert len(attrD) == 3 assert attrD[ 'href0'] == "eltiempo.com", "Error valor de attributo normal" assert attrD['href1'] == "", "Error atributo sin valor" assert attrD[ 'href2'] == "eltiempo.com", "Error valor de attributo sin comillas" getSlice = htmlStr.__getslice__ print '***' + htmlStr + '***' for k in attrD: print k, attrD[k], getSlice(*attrP[k]), attrP[k] assert all([attrD[k] == getSlice(*attrP[k]) for k in attrD])
def getFormXmlStr(content): form_xml ='<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<settings>\n' pattern = r'(?#<form>)' comPattern = CustomRegEx.compile(pattern) k = 0 posIni = 0 while True: formData = getFormData(comPattern, content, posIni) if not formData: break posIni, formAttr, formFields = formData formAttr = dict([(key, escapeXml(value)) for key, value in formAttr.items()]) form_xml += '\t<category label="Form %s">\n' % (k + 1) if formAttr: form_xml += '\t\t<setting type="lsep" label ="Form attributes"/>\n' for name, value in sorted(formAttr.items()): form_xml += '\t\t<setting id="fa_{0}" type="text" label="{0}" default="{1}" enable="false"/>\n'.format(name, value) bFlag = 0 for key in formFields: if formFields[key].has_key('prepend'): if bFlag == 0: bFlag = 1 form_xml += '\t\t<setting type="lsep" label ="Form Prepend Vars"/>\n' else: if bFlag < 2: bFlag = 2 form_xml += '\t\t<setting type="lsep" label ="Form Vars"/>\n' if isinstance(formFields[key].get('value', ''), basestring): formFields[key].update([(fkey, escapeXml(formFields[key][fkey])) for fkey in ['name', 'value', 'checked'] if formFields[key].has_key(fkey)]) atype = formFields[key].get('type', '') if atype == 'hidden': felem = '<setting id="{name}" type="text" label="{name}" default="{value}" enable="false"/>\n' pass elif atype in ['radio', 'checkbox']: formFields[key]['checked'] = 'true' if formFields[key].has_key('checked') else 'false' felem = '<setting id="{name}" type="bool" label ="{name}" default="{checked}"/>\n' pass elif atype == 'text': formFields[key]['value'] = formFields[key].get('value', '') felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n' elif atype == 'submit': felem = '<setting type="lsep" label ="{value}" noline="true"/>\n' elif atype == 'file': formFields[key]['defaultValue'] = formFields[key].get('defaultValue', '') felem = '<setting id="if_{name}" type="file" label="{name}" default="{defaultValue}"/>' else: formFields[key]['value'] = formFields[key].get('value', '') felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n' else: toEscape = ['name', 'value', 'default'] formFields[key]['value'] = '|'.join(formFields[key]['value']) if formFields[key].has_key('lvalue'): formFields[key]['lvalue'] = '|'.join(formFields[key]['lvalue']) toEscape.append('lvalue') formFields[key]['default'] = formFields[key].get('default', '') formFields[key].update([(fkey, escapeXml(formFields[key][fkey])) for fkey in toEscape]) if formFields[key].has_key('lvalue'): felem = '<setting id="{name}" type="drpdwnlst" label="{name}" lvalues="{lvalue}" values="{value}" default="{default}"/>\n' else: felem = '<setting id="{name}" type="labelenum" label="{name}" lvalues="{value}" default="{default}"/>\n' form_xml += '\t\t' + felem.format(**formFields[key]) form_xml += '\t</category>\n' k += 1 form_xml += '</settings>\n' return form_xml
def openloadORIG(videoId, encHeaders = ''): headers = {'User-Agent':MOBILE_BROWSER} encodeHeaders = urllib.urlencode(headers) urlStr = 'https://openload.co/embed/%s/<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] varTags = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' pattern = r'(?#<video script.*=puzzle>)' puzzle = CustomRegEx.findall(pattern, content)[0] vars = sorted(set(re.findall(r'\(([^=)(]+)\) *=', puzzle))) keys1 = re.findall(r', *(?P<key>[^: ]+) *:', puzzle) keys2 = re.findall(r"\(゚Д゚\) *\[[^']+\] *=", puzzle) keys = sorted(set(keys1 + keys2)) totVars = vars + keys for k in range(len(vars)): puzzle = puzzle.replace(vars[k], varTags[k]) for k in range(len(keys)): puzzle = puzzle.replace(keys[k], varTags[-k - 1]) # puzzle = puzzle.replace('\xef\xbe\x89'.decode('utf-8'), '').replace(' ','') puzzle = re.sub(r'[ \x80-\xff]','',puzzle) pat_dicId = r'\(([A-Z])\)={' m = re.search(pat_dicId, puzzle) assert m, 'No se encontro Id del diccionario' dicId = m.group(1) # pat_obj = r"\(\(%s\)\+\\'_\\'\)" % dicId dic_pat1 = r"\(\(%s\)\+\'_\'\)" % dicId dic_pat2 = r"\(%s\+([^+)]+)\)" % dicId dic_pat3 = r"\(%s\)\.(.+?)\b" % dicId dic_pat4 = r"(?<=[{,])([^: ]+)(?=:)" puzzle = re.sub(dic_pat1, "'[object object]_'", puzzle) puzzle = re.sub(dic_pat2, lambda x: "('[object object]'+str(%s))" % x.group(1), puzzle) puzzle = re.sub(dic_pat3, lambda x: "(%s)['%s']" % (dicId, x.group(1)), puzzle) puzzle = re.sub(dic_pat4, lambda x: "'%s'" % x.group(1), puzzle) pat_str1 = r"\((\(.+?\)|[A-Z])\+\'_\'\)" pat_str2 = r"\([^()]+\)\[[A-Z]\]\[[A-Z]\]" puzzle = re.sub(pat_str1, lambda x: "(str(%s)+'_')" % x.group(1), puzzle) puzzle = re.sub(pat_str2, "'function'", puzzle) codeGlb = {} code = puzzle.split(';') code.pop() code[0] = code[0][:2] + "'undefined'" # for k, linea in enumerate(code[:-1]): # try: # exec(linea, codeGlb) # except: # print 'Linea %s con errores ' % k, linea # code[k] = linea.split('=')[0] + '=' + "'\\\\'" # print 'Se corrige como ', code[k] # exec(code[k], codeGlb) linea = code[-1] linea = re.sub(r"\(([A-Z]+)\)", lambda x: x.group(1), linea) linea = re.sub(r"\([oc]\^_\^o\)", lambda x: "%s" % eval(x.group(), codeGlb), linea) while re.search(r"\([^)\]'\[(]+\)", linea): linea = re.sub(r"\([^)\]'\[(]+\)", lambda x: "%s" % eval(x.group(), codeGlb), linea) linea = re.sub(r"[A-Z](?=[^\]\[])", lambda x: "%s" % eval(x.group(), codeGlb), linea) linea = re.sub(r"E\[[\'_A-Z]+\]", lambda x: "%s" % eval(x.group(), codeGlb), linea) linea = linea.replace('+', '') linea = linea.decode('unicode-escape') m = re.search(r'http.+?true', linea) urlStr = '%s|%s' % (m.group(),encodeHeaders) return urlStr
def handle_apimenu(self, nodeId, menuId, paramDict, menuIcons, searchFlag, spanFlag): from basicFunc import INFOLABELS_KEYS otherParam = {} for key in paramDict.keys(): if not key.startswith('op_'): continue modKey = key[3:] otherParam[modKey] = paramDict.pop(key) addonInfoKeys = [key for key in otherParam if key.startswith('addonInfo')] if len(addonInfoKeys) > 1: addonInfo = {} for key in addonInfoKeys: value = otherParam.pop(key) key, value = value.rpartition('<>')[0:3:2] if key: addonInfo[key] = value else: addonInfoDef = value addonInfoFlag = paramDict.has_key('regexp') if addonInfoFlag: regexp = paramDict.pop('regexp') INDENT = '\n\t' sourceCode = 'def ' + nodeId + '():' if regexp.find('?#<PASS>') != -1: sourceCode += '\n\t'+ 'global args' if paramDict.get('url', None):paramDict.pop('url') sourceCode += '\n\t'+ 'url = args.get("url")[0]' suffix = ')' if menuId: if spanFlag: sourceCode += '\n\t'+ 'limInf, limSup = eval(args.get("span", ["(0,0)"])[0])' suffix = ', posIni = limInf, posFin = limSup)' spanFlag = False if addonInfoFlag: spanFlag = regexp.find('?#<SPAN') != -1 regexp = regexp.replace("'", "\\'") sep = "'" sourceCode += '\n\t'+ 'regexp = r' + sep + regexp + sep sourceCode += '\n\t'+ 'url, data = openUrl(url)' if paramDict.get('compflags', None): sourceCode += '\n\t'+ 'compflags = ' + paramDict.pop('compflags') sourceCode += '\n\t'+ 'subMenus = parseUrlContent(url, data, regexp, compflags' + suffix else: sourceCode += '\n\t'+ 'subMenus = parseUrlContent(url, data, regexp' + suffix tags = CustomRegEx.compile(regexp).groupindex.keys() addonInfoFlag = any(map(lambda x: x in INFOLABELS_KEYS, tags)) if regexp.find('?#<PASS>') != -1: sourceCode += '\n\t'+ "args = dict((key, [value]) for key, value in subMenus[0].items())" # if spanFlag: sourceCode += '\n\t'+ "args['span'] = [str(subMenus[0]['span'])]" sourceCode += '\n\t'+ "return " + str(paramDict['menu']) + "()" return sourceCode if menuIcons: iconList = '["' + '", "'.join(menuIcons) + '"]' sourceCode += '\n\t' + 'iconList = ' + iconList sourceCode += '\n\t' + 'for k in range(len(subMenus)):' sourceCode += '\n\t\t' + 'kmod = min(k, len(iconList) - 1)' sourceCode += '\n\t\t' + 'subMenus[k]["iconImage"] = os.path.join(_media, iconList[kmod])' contextMenuFlag = paramDict.has_key('contextmenus') if contextMenuFlag: contextMenu = [tuple(elem.split(',')) for elem in paramDict.pop('contextmenus').split('|')] onlyContext = paramDict.pop('onlycontext') if paramDict.has_key('onlycontext') else False sourceCode += '\n\t'+ 'contextMenu = {"lista":' + str(contextMenu) + ', "replaceItems":' + str(onlyContext) + '}' if len(addonInfoKeys) > 1: sourceCode += '\n\t'+ 'addonInfo=' + str(addonInfo) sourceCode += '\n\t'+ 'menuContent = []' sourceCode += '\n\t'+ 'for elem in subMenus:' sourceCode += '\n\t\t'+ 'itemParam = dict([(key,elem.pop(key)) for key in elem.keys() if key in LISTITEM_KEYS])' isFolder = str(paramDict['menu'] != 'media') if paramDict.has_key('menu') else 'True' sourceCode += '\n\t\t'+ 'itemParam["isFolder"] = ' + isFolder sourceCode += '\n\t\t'+ 'otherParam = ' + str(otherParam) if len(addonInfoKeys) > 1: sourceCode += '\n\t\t'+ 'otherParam["addonInfo"] = addonInfo.get(menu, "%s")' % addonInfoDef if contextMenuFlag: sourceCode += '\n\t\t'+ 'otherParam["contextMenu"] = dict(contextMenu)' if addonInfoFlag: sourceCode += '\n\t\t'+ 'otherParam["addonInfo"] = dict([(key,elem.pop(key)) for key in elem.keys() if key in INFOLABELS_KEYS])' if regexp.find('videoUrl') == -1: sourceCode += '\n\t\t'+ 'paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["header", "footer"]])' else: sourceCode += '\n\t\t'+ 'paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["url", "header", "footer"]])' sourceCode += '\n\t\t'+ 'paramDict.update(' + str({ key:value for key, value in paramDict.items() if key not in ['header','headregexp','nextregexp', 'iconflag', 'iconimage']}) + ')' # sourceCode += '\n\t\t'+ 'paramDict = ' + str({ key:value for key, value in paramDict.items() if key not in ['nextregexp', 'iconflag', 'iconimage']}) sourceCode += '\n\t\t'+ 'paramDict.update(elem)' if spanFlag: sourceCode += '\n\t\t'+ 'paramDict["url"] = url' sourceCode += '\n\t\t'+ 'menuContent.append([paramDict, itemParam, otherParam])' sourceCode += '\n\t'+ 'return menuContent' if searchFlag: sourceCode += ' or EMPTYCONTENT' return sourceCode
def getMenuHeaderFooter(param, args, data, menus): htmlUnescape = HTMLParser.HTMLParser().unescape menuId = args.get('menu', ['rootmenu'])[0] url = args.get("url")[0] headerFooter = [] for k, elem in enumerate(menus): opLabel, opregexp = elem opdefault, sep, opvalues = opregexp.partition('|') opvalues = opvalues or opdefault opdefault = opdefault if sep else '' pIni, pFin = 0, -1 if opdefault.startswith('(?#<SPAN>)'): pIni, match = -1, CustomRegEx.search(opdefault, data) if match: pIni, pFin = match.span(0) opmenu = CustomRegEx.findall(opvalues, data[pIni:pFin]) if not opmenu: continue tags = CustomRegEx.compile(opvalues).groupindex.keys() if 'url' in tags: menuUrl = [elem[tags.index('url')] for elem in opmenu] if len(tags) > 1 else opmenu[0] if 'label' in tags: menuLabel = map(htmlUnescape, [elem[tags.index('label')] for elem in opmenu]) else: placeHolder = 'Next >>>' if param == 'footer' else 'Header >>>' menuLabel = len(menuUrl)*[placeHolder] if len(opmenu) == 1: opLabel = menuLabel[0] if 'varvalue' in tags: varValue = [elem[tags.index('varvalue')] for elem in opmenu] if len(tags) > 1 else opmenu if opdefault: cmpregex = CustomRegEx.compile(opdefault) tags = cmpregex.groupindex.keys() match = cmpregex.search(data) if tags: if 'label' in tags: opdefault = htmlUnescape(match.group(1) if match else '') elif 'defvalue' in tags: opdefault = htmlUnescape(match.group('defvalue')) elif 'varname' in tags: varName = match.group('varname') urlquery = urlparse.urlsplit(url).query queryDict = dict(urlparse.parse_qsl(urlquery)) opdefault = queryDict.get(varName, '') try: indx = varValue.index(opdefault) except: opdefault = '' else: opdefault = menuLabel[indx] menuUrl = [] for elem in varValue: queryDict[varName] = elem menuUrl.append('?' + urllib.urlencode(queryDict)) paramDict = dict([(key, value[0]) for key, value in args.items() if hasattr(value, "__getitem__") and key not in ["header", "footer"]]) paramDict.update({'section':param, 'url':url, param:k, 'menu':menuId}) paramDict['menulabel'] = base64.urlsafe_b64encode(str(menuLabel)) paramDict['menuurl'] = base64.urlsafe_b64encode(str(menuUrl)) label = '[COLOR yellow]' + opLabel + opdefault + '[/COLOR]' itemParam = {'isFolder':True, 'label':label} headerFooter.append([paramDict, itemParam, None]) return headerFooter
def getFormXmlStr(content): form_xml = '<?xml version="1.0" encoding="utf-8" standalone="yes"?>\n<settings>\n' pattern = r'(?#<form>)' comPattern = CustomRegEx.compile(pattern) k = 0 posIni = 0 while True: formData = getFormData(comPattern, content, posIni) if not formData: break posIni, formAttr, formFields = formData formAttr = dict([(key, escapeXml(value)) for key, value in formAttr.items()]) form_xml += '\t<category label="Form %s">\n' % (k + 1) if formAttr: form_xml += '\t\t<setting type="lsep" label ="Form attributes"/>\n' for name, value in sorted(formAttr.items()): form_xml += '\t\t<setting id="fa_{0}" type="text" label="{0}" default="{1}" enable="false"/>\n'.format( name, value) bFlag = 0 for key in formFields: if formFields[key].has_key('prepend'): if bFlag == 0: bFlag = 1 form_xml += '\t\t<setting type="lsep" label ="Form Prepend Vars"/>\n' else: if bFlag < 2: bFlag = 2 form_xml += '\t\t<setting type="lsep" label ="Form Vars"/>\n' if isinstance(formFields[key].get('value', ''), basestring): formFields[key].update([ (fkey, escapeXml(formFields[key][fkey])) for fkey in ['name', 'value', 'checked'] if formFields[key].has_key(fkey) ]) atype = formFields[key].get('type', '') if atype == 'hidden': felem = '<setting id="{name}" type="text" label="{name}" default="{value}" enable="false"/>\n' pass elif atype in ['radio', 'checkbox']: formFields[key]['checked'] = 'true' if formFields[ key].has_key('checked') else 'false' felem = '<setting id="{name}" type="bool" label ="{name}" default="{checked}"/>\n' pass elif atype == 'text': formFields[key]['value'] = formFields[key].get('value', '') felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n' elif atype == 'submit': felem = '<setting type="lsep" label ="{value}" noline="true"/>\n' elif atype == 'file': formFields[key]['defaultValue'] = formFields[key].get( 'defaultValue', '') felem = '<setting id="if_{name}" type="file" label="{name}" default="{defaultValue}"/>' else: formFields[key]['value'] = formFields[key].get('value', '') felem = '<setting id="{name}" type="text" label="{name}" default="{value}"/>\n' else: toEscape = ['name', 'value', 'default'] formFields[key]['value'] = '|'.join(formFields[key]['value']) if formFields[key].has_key('lvalue'): formFields[key]['lvalue'] = '|'.join( formFields[key]['lvalue']) toEscape.append('lvalue') formFields[key]['default'] = formFields[key].get('default', '') formFields[key].update([(fkey, escapeXml(formFields[key][fkey])) for fkey in toEscape]) if formFields[key].has_key('lvalue'): felem = '<setting id="{name}" type="drpdwnlst" label="{name}" lvalues="{lvalue}" values="{value}" default="{default}"/>\n' else: felem = '<setting id="{name}" type="labelenum" label="{name}" lvalues="{value}" default="{default}"/>\n' form_xml += '\t\t' + felem.format(**formFields[key]) form_xml += '\t</category>\n' k += 1 form_xml += '</settings>\n' return form_xml
def openload(videoId, headers = None): headers = headers or {} headers['User-Agent'] = MOBILE_BROWSER encodeHeaders = urllib.urlencode(headers) urlStr = 'https://openload.co/embed/%s/<headers>%s' % (videoId, encodeHeaders) content = basicFunc.openUrl(urlStr)[1] varTags = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' pattern = r'(?#<video script.*=puzzle>)' puzzle = CustomRegEx.findall(pattern, content)[0] vars = sorted(set(re.findall(r'\(([^=)(]+)\) *=', puzzle))) keys1 = re.findall(r', *(?P<key>[^: ]+) *:', puzzle) keys2 = re.findall(r"\(゚Д゚\) *\[[^']+\] *=", puzzle) keys = sorted(set(keys1 + keys2)) totVars = vars + keys for k in range(len(vars)): puzzle = puzzle.replace(vars[k], varTags[k]) for k in range(len(keys)): puzzle = puzzle.replace(keys[k], varTags[-k - 1]) # puzzle = puzzle.replace('\xef\xbe\x89'.decode('utf-8'), '').replace(' ','') puzzle = re.sub(r'[ \x80-\xff]','',puzzle) pat_dicId = r'\(([A-Z])\)={' m = re.search(pat_dicId, puzzle) assert m, 'No se encontro Id del diccionario' dicId = m.group(1) # pat_obj = r"\(\(%s\)\+\\'_\\'\)" % dicId dic_pat1 = r"\(\(%s\)\+\'_\'\)" % dicId dic_pat2 = r"\(%s\+([^+)]+)\)" % dicId dic_pat3 = r"\(%s\)\.(.+?)\b" % dicId dic_pat4 = r"(?<=[{,])([^: ]+)(?=:)" puzzle = re.sub(dic_pat1, "'[object object]_'", puzzle) puzzle = re.sub(dic_pat2, lambda x: "('[object object]'+str((%s)))" % x.group(1), puzzle) puzzle = re.sub(dic_pat3, lambda x: "(%s)['%s']" % (dicId, x.group(1)), puzzle) puzzle = re.sub(dic_pat4, lambda x: "'%s'" % x.group(1), puzzle) pat_str1 = r"\((\(.+?\)|[A-Z])\+\'_\'\)" pat_str2 = r"\([^()]+\)\[[A-Z]\]\[[A-Z]\]" pat_str3 = r"(?<=;)([^+]+)\+=([^;]+)" puzzle = re.sub(pat_str1, lambda x: "(str((%s))+'_')" % x.group(1), puzzle) puzzle = re.sub(pat_str2, "'function'", puzzle) puzzle = re.sub(pat_str3, lambda x: "%s=%s+%s" % (x.group(1), x.group(1), x.group(2)), puzzle) codeGlb = {} code = puzzle.split(';') code.pop() code[0] = code[0][:2] + "'undefined'" for linea in code[:-1]: linea = re.sub(r"\(([A-Z]+)\)", lambda x: x.group(1), linea) varss = re.split(r"(?<=[_a-zA-Z\]])=(?=[^=])",linea) value = eval(varss.pop(), codeGlb) for var in varss: m = re.match(r"([^\[]+)\[([^\]]+)\]", var) if m: var, key = m.groups() key = eval(key, codeGlb) codeGlb[var][key] = value else: codeGlb[var] = value linea = code[-1] linea = re.sub(r"\(([A-Z]+)\)", lambda x: x.group(1), linea) linea = re.sub(r"\([oc]\^_\^o\)", lambda x: "%s" % eval(x.group(), codeGlb), linea) while re.search(r"\([^)\]'\[(]+\)", linea): linea = re.sub(r"\([^)\]'\[(]+\)", lambda x: "%s" % eval(x.group(), codeGlb), linea) linea = re.sub(r"[A-Z](?=[^\]\[])", lambda x: "%s" % eval(x.group(), codeGlb), linea) linea = re.sub(r"E\[[\'_A-Z]+\]", lambda x: "%s" % eval(x.group(), codeGlb), linea) linea = linea.replace('+', '') linea = linea.decode('unicode-escape') m = re.search(r'http.+?true', linea) urlStr = basicFunc.openUrl(m.group(), True) urlStr = '%s|%s' % (m.group(),urllib.urlencode({'User-Agent':MOBILE_BROWSER})) return urlStr