Python HtmlParser.HtmlParser 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: htmlParser

클래스/타입: HtmlParser

메소드/함수: HtmlParser

hotexamples.com에서의 예제들: 10

Python HtmlParser.HtmlParser - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 htmlParser.HtmlParser.HtmlParser에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

HtmlParser(10)

parser(6)

htmlToText(2)

getBlock(1)

getBlockNumber(1)

getGasLimit(1)

getGasPrice(1)

getPendingTxs(1)

getTimestamp(1)

htmlCmp(1)

parseHtml(1)

parse_gzh_list_html(1)

parse_history_article_html(1)

parse_history_article_list_html(1)

예제 #1

파일 보기

 def getPendingTxsHashes(self):
     pendingTxs = []
     htmlParser = HtmlParser()
     for pendingTx in htmlParser.getPendingTxs():
         hash = self._getHashFromHtml(pendingTx)
         htmlParser = HtmlParser(hash)
         print(hash)
         if hasattr(htmlParser, 'tableText'):
             pendingTxs.append([hash, htmlParser.getTimestamp()])
     return pendingTxs

예제 #2

파일 보기

 def __init__(self):
     # 初始化程序
     self.download = Downloader()
     self.parser = HtmlParser()
     self.save = SaveData()
     self.workbook = Workbook()
     self.ch = Choice()
     print('初始化完成...')

예제 #3

파일 보기

def updateBlockTable():
    for blockId in query.getIdBlocks():
        print(blockId)
        htmlParser = HtmlParser(str(blockId))
        blockInfo = htmlParser.getBlock()
        query.insertBlock(
            Block(id=blockId,
                  hash='',
                  timestamp=blockInfo['timestamp'],
                  minedIn=blockInfo['minedIn']))

예제 #4

파일 보기

 def getTxsData(self):
     hash = self._getNotConfirmedTx()
     print(hash)
     htmlParser = HtmlParser(hash)
     if hasattr(htmlParser, 'tableText'):
         return {
             'hash': hash,
             'blockId': htmlParser.getBlockNumber(),
             'gasPrice': htmlParser.getGasPrice(),
             'gasLimit': htmlParser.getGasLimit()
         }
     return self._getFakeDataTx(hash)

예제 #5

파일 보기

파일: spiderMan.py 프로젝트: luc99hen/spider

 def __init__(self):
     #开启的线程数目
     self.pcount = 1
     #结果输出队列
     self.dqueue = queue.Queue()
     #错误信息输出队列
     self.equeue = queue.Queue()
     self.manager = UrlManager()
     self.downloader = HtmlDownloader()
     self.parser = HtmlParser()
     self.output = DataOutput()
     # self.proxies = getProxy()
     self.proxies = getFromPool2()
     self.inactivepro = []
     self.count = 0
     self.sumSuccess = 0
     self.sumFail = 0
     self.updating = False

예제 #6

파일 보기

 def __init__(self):
     self.manager = UrlManager()
     self.parser = HtmlParser()
     self.downloader = HtmlDownloader()
     self.output = DataOutput()

예제 #7

파일 보기

 def __init__(self):
     # 初始化程序
     self.download = Downloader()
     self.parser = HtmlParser()
     self.mysql = Mysqldb()

예제 #8

파일 보기

파일: main.py 프로젝트: S1r0hub/vrvis-thesis

def main():

    # create argument parser
    parser = argparse.ArgumentParser(
        description='Convert ConfigCrusher program measurement results.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    # add arguments to parser and parse
    prepareParser(parser)
    args = parser.parse_args()

    # set up logger
    global LOGGER
    LOGGER = logging.getLogger('crusherToJSONLogger')
    LOGGER.setLevel(logging.DEBUG)

    # check if debug should be enabled
    logLevel = logging.INFO
    if args.verbose: logLevel = logging.DEBUG

    # channel to stream log events to console
    ch = logging.StreamHandler()
    ch.setLevel(logLevel)
    formatter = logging.Formatter('[%(levelname)s] (%(asctime)s): %(message)s')
    ch.setFormatter(formatter)
    LOGGER.addHandler(ch)

    # log to file if enabled
    logPath = args.logfile
    if len(logPath) > 0:
        if not logPath.endswith(".log"):
            logPath += ".log"
        fileHandler = logging.FileHandler(logPath)
        fileHandler.setFormatter(formatter)
        LOGGER.addHandler(fileHandler)
    LOGGER.info('Logger ready.')

    # validate output folder
    outFolder = args.outpath
    if not (outFolder.endswith("/") or outFolder.endswith("\\")):
        outFolder += "/"

    if not os.path.exists(outFolder):
        LOGGER.warning('The output folder does not exist! Creating it...')

        try:
            os.makedirs(outFolder)
        except Exception as ex:
            LOGGER.exception('Failed to create output folder!')
            return

        outFolder = os.path.normcase(outFolder)
        LOGGER.info('Output folder created: {}'.format(outFolder))

    else:

        # check that path leads to a folder
        if not os.path.isdir(outFolder):
            LOGGER.error('The output folder path does not lead to a folder!')
            return

    # validate color schema file
    schemaPath = args.colorschema
    if not os.path.isfile(schemaPath):
        LOGGER.error(
            'The given schema path is no valid file: {}'.format(schemaPath))
        return

    # check if recursive export is desired
    recursive = True if args.recursive else False

    # check if user wants to overwrite existing files
    overwrite = True if args.overwrite else False

    # export the highlighted HTML code as well if desired
    exportHTML = True if args.exporthtml else False
    if exportHTML: LOGGER.info('Additional HTML export enabled.')

    # try to read JSON color schema
    jsonSchema = None
    with open(schemaPath, "r") as file:
        try:
            jsonSchema = json.loads(file.read())
        except Exception as ex:
            LOGGER.error(ex)
    if jsonSchema is None: return

    # check if path exists
    filePath = args.path
    if not os.path.exists(filePath):
        LOGGER.error('Failed to convert! Given path does not exist: {}'.format(
            filePath))
        return None

    # check if path leads to file or folder
    if os.path.isfile(filePath):

        # parses html code to unity rt format
        parser = HtmlParser(colorSchema=jsonSchema)

        # convert a file and export the result
        LOGGER.info('Converting the file...')
        resultPath = convertFile(htmlParser=parser,
                                 filePath=filePath,
                                 outputFolder=outFolder,
                                 exportHTML=exportHTML,
                                 overwrite=overwrite)

    elif os.path.isdir(filePath):

        # convert all files of the folder
        LOGGER.info('Converting the files{}...'.format(
            ' recursively' if recursive else ''))
        resultPath = convertFiles(folderPath=filePath,
                                  outputFolder=outFolder,
                                  jsonSchema=jsonSchema,
                                  exportHTML=exportHTML,
                                  overwrite=overwrite,
                                  recursive=recursive)

    # print result path
    if not resultPath is None:
        LOGGER.info('Result path: ' + os.path.abspath(resultPath))

예제 #9

파일 보기

파일: main.py 프로젝트: S1r0hub/vrvis-thesis

def convertFiles(folderPath,
                 outputFolder,
                 jsonSchema,
                 exportHTML=False,
                 overwrite=False,
                 recursive=False):
    '''
    Converts all files source code to a syntax highlighted rich text format.
    This method does not check if the given path is valid!
    Returns None on errors, the path to the exported files otherwise.
    '''

    firstOutPath = None
    pathLength = len(folderPath)
    if folderPath.endswith('/') or folderPath.endswith('\\'):
        folderPath = folderPath[:-1]
    srcDirName = os.path.normcase(os.path.basename(folderPath))
    outputFolder = os.path.normcase(os.path.normpath(outputFolder))

    for curDir, subDirs, files in os.walk(folderPath, topdown=True):
        curDir_relative = os.path.normpath(
            os.path.join(srcDirName, curDir[pathLength:]))
        LOGGER.info('Entering directory: {}'.format(curDir_relative))

        # create export path
        #LOGGER.debug('Joining paths "{}" and "{}"'.format(outputFolder, curDir_relative))
        curOutFolder = os.path.normcase(
            os.path.join(outputFolder, curDir_relative))
        LOGGER.debug('Current output folder: {}'.format(curOutFolder))
        if os.path.exists(curOutFolder):
            if os.path.isfile(curOutFolder):
                LOGGER.error(
                    'Failed to export to: {} (is a file instead of a folder)'.
                    format(os.path.abspath(curOutFolder)))
                return None
        else:
            # create the output folder
            LOGGER.info('Creating folder: {}'.format(curOutFolder))
            try:
                os.mkdir(curOutFolder)
            except Exception as ex:
                LOGGER.exception(
                    'Failed to create an output folder: {}'.format(
                        curOutFolder))
                return None

        if firstOutPath is None: firstOutPath = curOutFolder

        # convert and export all the files of this folder
        for file in files:

            # parses html code to unity rt format
            parser = HtmlParser(colorSchema=jsonSchema)

            LOGGER.info('Converting file: {}'.format(file))
            path = convertFile(htmlParser=parser,
                               filePath=os.path.join(curDir, file),
                               outputFolder=curOutFolder,
                               exportHTML=exportHTML,
                               overwrite=overwrite)

            if not path is None: LOGGER.info('File exported: {}'.format(path))

        # do not take sub-folders into account if recursion is disabled
        if not recursive: break

    return firstOutPath

예제 #10

파일 보기

파일: htmliterator.py 프로젝트: talitz/automatic-sign-in-model

import pandas as pd
from requestUtil import *
from htmlParser import HtmlParser

id=1
#Column names
COLUMN_NAMES = ["HTML_ID", "TAG_NAME", "ATTRIBUTE_ID", "ATTRIBUTE_NAME", "ATTRIBUTE_CLASS", "ATTRIBUTE_PLACEHOLDER", "IN_FORM", "TAG_DEPTH", "TAG_STRING", "LABEL"]
#Initializing dataframe
df = pd.DataFrame(columns=COLUMN_NAMES)


#Read urls from xslx file
loginurls = pd.read_csv("loginurls.csv")

#Creating parser object
htmlParser=HtmlParser()

#Iterating over all login urls
for loginurl in loginurls["LOGIN_URL"]:
   try:
      print("Requesting : " + loginurl)
      src = getHtmlString(loginurl)
      df = HtmlParser.parseHtml(src, id, df, loginurl)
      print("finished parsing html num " + str(id))
   except Exception as e:
      print("Could not load: " + loginurl)
   id = id + 1