Example #1
0
def load(html, encode, xpaths):
    parser = HtmlParser(html, encode)
    parser.parse()
    for key in xpaths:
        xpath = xpaths.get(key)
        elements = parser.get_element_by_xpath(xpath, encode)
        value = elements[0][2].encode('utf-8')
Example #2
0
def load(id,tm,url,html,encode, xpaths):
    parser = HtmlParser(html,encode)
    parser.parse()

    db_sql =  "insert into job_detail(url,src_desc,type,title,\
    keywords,department,job_require,job_duty,\
    job_welfare,label,company,company_desc,\
    logo,salary,work_experience,\
    edu, field,location,head_count,pub_time) values("

    jd = page_pb2.JobDescription()
    js ="{\"pub_tm\":\"" + tm + "\","
    js = js + "\"url\":\"" + url + "\","
    for key in xpaths:
#        print "[ON]handle " + key
        xpath=xpaths.get(key)
        elements = parser.get_element_by_xpath(xpath,encode)
        if (len(elements) == 0):
            print "[ERR] " + key
            continue
        value = elements[0][2].encode('utf-8')
        js += "\"" + key + "\":\"" + value + "\","
#        set_pb(jd,key,value)
    fp=open("./data/"+id+".dat",'w')
    fp.write(js.rstrip(',') + "}")
    fp.close()
Example #3
0
def load(id,html,encode, xpaths):
    parser = HtmlParser(html,encode)
    parser.parse()
    jd = page_pb2.JobDescription()
    js ="{";
    for key in xpaths:
#        print "[ON]handle " + key
        xpath=xpaths.get(key)
        elements = parser.get_element_by_xpath(xpath,encode)
        if (len(elements) == 0):
            print "[ERR] " + key
            continue
        value = elements[0][2].encode('utf-8')
        js += "\"" + key + "\":\"" + value + "\","
#        set_pb(jd,key,value)
    fp=open("./data/"+id+".dat",'w')
    fp.write(js.rstrip(',') + "}")
    fp.close()
Example #4
0
def load(id, html, encode, xpaths):
    parser = HtmlParser(html, encode)
    parser.parse()
    jd = page_pb2.JobDescription()
    js = "{"
    for key in xpaths:
        #        print "[ON]handle " + key
        xpath = xpaths.get(key)
        elements = parser.get_element_by_xpath(xpath, encode)
        if (len(elements) == 0):
            print "[ERR] " + key
            continue
        value = elements[0][2].encode('utf-8')
        js += "\"" + key + "\":\"" + value + "\","


#        set_pb(jd,key,value)
    fp = open("./data/" + id + ".dat", 'w')
    fp.write(js.rstrip(',') + "}")
    fp.close()