Python parserCompanyAndWeb Examples

Programming Language: Python

Namespace/Package Name: scripts.lzParser

Method/Function: parserCompanyAndWeb

Examples at hotexamples.com: 3

Python parserCompanyAndWeb - 3 examples found. These are the top rated real world Python examples of scripts.lzParser.parserCompanyAndWeb extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: impdb.py Project: strongant/gsorglz

def parseWebsite():
    log = open('../logs/parseweb.log', 'a')
    # 每次取5000条
    count = TempLz.select().count()
    pagesize = 5000
    pagecount = int(math.ceil(float(count) / float(pagesize)))
    for i in range(pagecount):
        datas = TempLz.select().where(TempLz.id>18879).order_by(TempLz.id).paginate(i + 1, pagesize)
        if datas is not None:
            for d in datas:
                data = d.lzpage
                if data is not None:
                    parseData = parserCompanyAndWeb(data)
                    com = buildCompany(parseData['company'])
                    web = buildWebsite(parseData['web'])
                    if com is not None and web is not None:
                        c = Company.getOne(Company.coname == com.coname)
                        if c is not None:
                            web.regID = c
                            impWebsite(web)
                        else:
                            impCompanyInfo(com)
                            tempCom = Company.getOne(Company.regID == com.regID)
                            web.regID = tempCom
                            impWebsite(web)
                log.write(str(d.id)+ "\n")
                print d.id
    log.flush()
    log.close()

Example #2

Show file

File: agent.py Project: strongant/gsorglz

def fetchLzPage(isLzUrl, lzPath, shortUrl, subTask):
    # 根据亮照的完整url进行抓取
    status = downloadByPath(isLzUrl, lzPath)
    if not os.path.exists(lzPath):
        print "lzpath:",lzPath
        #logger.debug('亮照页面无法访问:', isLzUrl)
        #dt = format(datetime.datetime.now(), '%Y-%m-%d %H:%M:%S')
        # qw = Website.update(updateDate=dt).where(Website.webId == subTask.webId.webId)
        # qw.execute()
        q = TaskInfo.update(state='5', remark=isLzUrl).where(TaskInfo.id == subTask.id)
        q.execute()
    else:
        try:
            f = open(lzPath, 'r')
            parseData = parserCompanyAndWeb(f.read())
            com = buildCompany(parseData['company'])
            tempBuildWeb = buildWebsite(parseData['web'])
            judgeLzResult(com, tempBuildWeb, shortUrl, subTask)
        except Exception, e:
            print e

Example #3

Show file

File: impdb.py Project: strongant/gsorglz

    # print TempLz.select().where(TempLz.lzpage.is_null()).count()==0

    # lzpage = TempLz.getOne(TempLz.id==1).lzpage
    # c = TempLz.select(fn.Count(fn.Distinct(TempLz.name))).scalar()
    #TempLz.select().where(TempLz.lzpage.is_null(False)).order_by(TempLz.id).paginate(1, 10)
    #parseWebsite()
    # lzpage = TempLz.select().where(TempLz.name=="上海昊锌科技有限公司")
    # for l in lzpage:
    #     page = l.lzpage
    #     parseData = parserCompanyAndWeb(page)
    #     w =  buildWebsite(parseData['web'])
    #     print parseData['web']
    # str = u'\u57df\u540d:http://www.haoxinkj.com/'
    #
    # arrs = str.split(':')

    # str = u'\u57df\u540d:http://www.haoxinkj.com/'
    # i = str.find(":")+1
    #
    # print str[i:]
    templz = TempLz.getOne(TempLz.id==5512)
    lzpage = templz.lzpage
    parseData = parserCompanyAndWeb(lzpage)
    web = parseData.get('web')
    tw = buildWebsite(web)