def parseTextToGroups(txt, regex): p = re.compile(regex, re.IGNORECASE + re.DOTALL + re.MULTILINE) m = p.match(smart_unicode(txt)) if m: return m.groups() else: return None
def __getCachedSource(self): try: data = getFileContent(self.cachedSourcePath) data = enc.smart_unicode(data) except: #data = data.decode('utf-8') pass return data
def appendFileContent(filename, txt): try: f = open(filename, 'a') f.write(smart_unicode(txt).encode('utf-8')) f.close() return True except: return False
def getSource(self, url, form_data, referer='', ignoreCache=False, demystify=False): if url == self.getLastUrl() and not ignoreCache: data = self.__getCachedSource() else: data = enc.smart_unicode(super(CachedWebRequest,self).getSource(url, form_data, referer, demystify)) if data: # Cache url self.__setLastUrl(url) # Cache page setFileContent(self.cachedSourcePath, data) return data
def setFileContent(filename, txt, createFolders=False): try: if createFolders: folderPath = os.path.dirname(filename) if not os.path.exists(folderPath): os.makedirs(folderPath, 0777) f = open(filename, 'w') f.write(smart_unicode(txt).encode('utf-8')) f.close() return True except: return False
def getSource(self, url, form_data, referer='', demystify=False): data = enc.smart_unicode(super(DemystifiedWebRequest, self).getSource(url, form_data, referer)) if not data: return None if not demystify: # remove comments r = re.compile('<!--.*?(?!//)--!*>', re.IGNORECASE + re.DOTALL + re.MULTILINE) m = r.findall(data) if m: for comment in m: data = data.replace(comment,'') else: import decryptionUtils as crypt data = crypt.doDemystify(data) return data
def __getCachedSource(self): data = enc.smart_unicode(getFileContent(self.cachedSourcePath)) return data