def getPendingTxsHashes(self): pendingTxs = [] htmlParser = HtmlParser() for pendingTx in htmlParser.getPendingTxs(): hash = self._getHashFromHtml(pendingTx) htmlParser = HtmlParser(hash) print(hash) if hasattr(htmlParser, 'tableText'): pendingTxs.append([hash, htmlParser.getTimestamp()]) return pendingTxs
def __init__(self): # 初始化程序 self.download = Downloader() self.parser = HtmlParser() self.save = SaveData() self.workbook = Workbook() self.ch = Choice() print('初始化完成...')
def updateBlockTable(): for blockId in query.getIdBlocks(): print(blockId) htmlParser = HtmlParser(str(blockId)) blockInfo = htmlParser.getBlock() query.insertBlock( Block(id=blockId, hash='', timestamp=blockInfo['timestamp'], minedIn=blockInfo['minedIn']))
def getTxsData(self): hash = self._getNotConfirmedTx() print(hash) htmlParser = HtmlParser(hash) if hasattr(htmlParser, 'tableText'): return { 'hash': hash, 'blockId': htmlParser.getBlockNumber(), 'gasPrice': htmlParser.getGasPrice(), 'gasLimit': htmlParser.getGasLimit() } return self._getFakeDataTx(hash)
def __init__(self): #开启的线程数目 self.pcount = 1 #结果输出队列 self.dqueue = queue.Queue() #错误信息输出队列 self.equeue = queue.Queue() self.manager = UrlManager() self.downloader = HtmlDownloader() self.parser = HtmlParser() self.output = DataOutput() # self.proxies = getProxy() self.proxies = getFromPool2() self.inactivepro = [] self.count = 0 self.sumSuccess = 0 self.sumFail = 0 self.updating = False
def __init__(self): self.manager = UrlManager() self.parser = HtmlParser() self.downloader = HtmlDownloader() self.output = DataOutput()
def __init__(self): # 初始化程序 self.download = Downloader() self.parser = HtmlParser() self.mysql = Mysqldb()
def main(): # create argument parser parser = argparse.ArgumentParser( description='Convert ConfigCrusher program measurement results.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) # add arguments to parser and parse prepareParser(parser) args = parser.parse_args() # set up logger global LOGGER LOGGER = logging.getLogger('crusherToJSONLogger') LOGGER.setLevel(logging.DEBUG) # check if debug should be enabled logLevel = logging.INFO if args.verbose: logLevel = logging.DEBUG # channel to stream log events to console ch = logging.StreamHandler() ch.setLevel(logLevel) formatter = logging.Formatter('[%(levelname)s] (%(asctime)s): %(message)s') ch.setFormatter(formatter) LOGGER.addHandler(ch) # log to file if enabled logPath = args.logfile if len(logPath) > 0: if not logPath.endswith(".log"): logPath += ".log" fileHandler = logging.FileHandler(logPath) fileHandler.setFormatter(formatter) LOGGER.addHandler(fileHandler) LOGGER.info('Logger ready.') # validate output folder outFolder = args.outpath if not (outFolder.endswith("/") or outFolder.endswith("\\")): outFolder += "/" if not os.path.exists(outFolder): LOGGER.warning('The output folder does not exist! Creating it...') try: os.makedirs(outFolder) except Exception as ex: LOGGER.exception('Failed to create output folder!') return outFolder = os.path.normcase(outFolder) LOGGER.info('Output folder created: {}'.format(outFolder)) else: # check that path leads to a folder if not os.path.isdir(outFolder): LOGGER.error('The output folder path does not lead to a folder!') return # validate color schema file schemaPath = args.colorschema if not os.path.isfile(schemaPath): LOGGER.error( 'The given schema path is no valid file: {}'.format(schemaPath)) return # check if recursive export is desired recursive = True if args.recursive else False # check if user wants to overwrite existing files overwrite = True if args.overwrite else False # export the highlighted HTML code as well if desired exportHTML = True if args.exporthtml else False if exportHTML: LOGGER.info('Additional HTML export enabled.') # try to read JSON color schema jsonSchema = None with open(schemaPath, "r") as file: try: jsonSchema = json.loads(file.read()) except Exception as ex: LOGGER.error(ex) if jsonSchema is None: return # check if path exists filePath = args.path if not os.path.exists(filePath): LOGGER.error('Failed to convert! Given path does not exist: {}'.format( filePath)) return None # check if path leads to file or folder if os.path.isfile(filePath): # parses html code to unity rt format parser = HtmlParser(colorSchema=jsonSchema) # convert a file and export the result LOGGER.info('Converting the file...') resultPath = convertFile(htmlParser=parser, filePath=filePath, outputFolder=outFolder, exportHTML=exportHTML, overwrite=overwrite) elif os.path.isdir(filePath): # convert all files of the folder LOGGER.info('Converting the files{}...'.format( ' recursively' if recursive else '')) resultPath = convertFiles(folderPath=filePath, outputFolder=outFolder, jsonSchema=jsonSchema, exportHTML=exportHTML, overwrite=overwrite, recursive=recursive) # print result path if not resultPath is None: LOGGER.info('Result path: ' + os.path.abspath(resultPath))
def convertFiles(folderPath, outputFolder, jsonSchema, exportHTML=False, overwrite=False, recursive=False): ''' Converts all files source code to a syntax highlighted rich text format. This method does not check if the given path is valid! Returns None on errors, the path to the exported files otherwise. ''' firstOutPath = None pathLength = len(folderPath) if folderPath.endswith('/') or folderPath.endswith('\\'): folderPath = folderPath[:-1] srcDirName = os.path.normcase(os.path.basename(folderPath)) outputFolder = os.path.normcase(os.path.normpath(outputFolder)) for curDir, subDirs, files in os.walk(folderPath, topdown=True): curDir_relative = os.path.normpath( os.path.join(srcDirName, curDir[pathLength:])) LOGGER.info('Entering directory: {}'.format(curDir_relative)) # create export path #LOGGER.debug('Joining paths "{}" and "{}"'.format(outputFolder, curDir_relative)) curOutFolder = os.path.normcase( os.path.join(outputFolder, curDir_relative)) LOGGER.debug('Current output folder: {}'.format(curOutFolder)) if os.path.exists(curOutFolder): if os.path.isfile(curOutFolder): LOGGER.error( 'Failed to export to: {} (is a file instead of a folder)'. format(os.path.abspath(curOutFolder))) return None else: # create the output folder LOGGER.info('Creating folder: {}'.format(curOutFolder)) try: os.mkdir(curOutFolder) except Exception as ex: LOGGER.exception( 'Failed to create an output folder: {}'.format( curOutFolder)) return None if firstOutPath is None: firstOutPath = curOutFolder # convert and export all the files of this folder for file in files: # parses html code to unity rt format parser = HtmlParser(colorSchema=jsonSchema) LOGGER.info('Converting file: {}'.format(file)) path = convertFile(htmlParser=parser, filePath=os.path.join(curDir, file), outputFolder=curOutFolder, exportHTML=exportHTML, overwrite=overwrite) if not path is None: LOGGER.info('File exported: {}'.format(path)) # do not take sub-folders into account if recursion is disabled if not recursive: break return firstOutPath
import pandas as pd from requestUtil import * from htmlParser import HtmlParser id=1 #Column names COLUMN_NAMES = ["HTML_ID", "TAG_NAME", "ATTRIBUTE_ID", "ATTRIBUTE_NAME", "ATTRIBUTE_CLASS", "ATTRIBUTE_PLACEHOLDER", "IN_FORM", "TAG_DEPTH", "TAG_STRING", "LABEL"] #Initializing dataframe df = pd.DataFrame(columns=COLUMN_NAMES) #Read urls from xslx file loginurls = pd.read_csv("loginurls.csv") #Creating parser object htmlParser=HtmlParser() #Iterating over all login urls for loginurl in loginurls["LOGIN_URL"]: try: print("Requesting : " + loginurl) src = getHtmlString(loginurl) df = HtmlParser.parseHtml(src, id, df, loginurl) print("finished parsing html num " + str(id)) except Exception as e: print("Could not load: " + loginurl) id = id + 1