Example #1
0
def crawl(vehicle, verify_code=None):
    global cxlm, chfs, sf
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'beijing_new'
    result['version'] = 1
    
    data = {"sf":sf,
                "fdjhhm":vehicle.engine_num.encode("utf-8"),
                "carnono":vehicle.license_plate_num[-6:],
                "cxlm":cxlm,
                "yzm":verify_code.verify_code}
    if sf == 11:
        data["chfs"] = ""
    else:
        data["hpzllb"] = "02"
    print data
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)

    if second_resp.content.__contains__(u"验证码输入有误"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"您输入的车牌号或发动机号有误") or second_resp.content.__contains__(u"您没有输入完整的车牌号和发动机号"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    if second_resp.content.__contains__(u"您没有未接受处理的违法记录") or second_resp.content.__contains__(u"您没有未接受处理的在京违法记录"):
        result['violations'] = []
        return result
    
    tr_list = reg(second_resp.content, r"<tr[\s\S]+?(<td>\d{4}-\d{2}-\d{2}[\s\S]+?)</tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        tds = reg(tr[0], r"<td.*?>([\s\S]+?)</td>")
        violation.fine = tds[4][0]
        violation.address = tds[1][0]
        violation.time = tds[0][0]
        violation.violation_type = reg(tds[2][0], r"<a.+?>(.+?)</a>")[0][0]
        handled = True
        if tds[5][0].encode("utf-8") == "未处理":
            handled = False
        violation.handled = handled
        violation.point = tds[3][0]
        violation.agency = ""
        violations.append(dict(violation))
        
        print violation.time
        print violation.address
        print violation.violation_type
        print violation.fine
        print violation.point
        print violation.handled
        print " -------- "
    result['violations'] = violations
    return result
Example #2
0
def crawl(vehicle, verify_code=None):
    global gg_value
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'dongguan'
    result['version'] = 1
    
    data = {"action":"Illagel",
                "headno":vehicle.license_plate_num[:-6],
                "no":vehicle.license_plate_num[-6:],
                "back4":vehicle.body_num,
                "fdjh6":vehicle.engine_num,
                "validate":verify_code.verify_code,
                "type":"02",
                "tele":"18607325868",
                "gg":gg_value}
    
    for k in data.keys():
        print k, data[k] 
    
    
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)

    if second_resp.content.__contains__(u"验证码错误"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"发动机号错误") or second_resp.content.__contains__(u"车架号错误") or second_resp.content.__contains__(u"车辆信息错误"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    if second_resp.content.__contains__(u"没有违章信息"):
        result['violations'] = []
        return result
    
    tr_list = reg(second_resp.content, r"<tr><td>(.+?</table></td>.+?)</td></tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        violation.fine = reg(tr[0], r"<td>(\d{1,6}?)</td><td>")[0][0]
        sub_table_content = reg(tr[0], r'<table class="illegal_table".+?>(.+?)</table>')[0][0]
        sub_info_list = reg(sub_table_content, r'<td[^r]*?>(.+?)</td>')
        violation.address = sub_info_list[2][0]
        violation.time = sub_info_list[0][0].replace("年","-").replace("月","-").replace("日","").replace("时",":").replace("分",":").replace("秒","")
        violation.violation_type = sub_info_list[3][0]
        violation.handled = False
        point = sub_info_list[1][0][1]
        if point == "7":
            point = "12"
        violation.point = point
        violation.agency = ""
        violations.append(dict(violation))
    result['violations'] = violations
    return result
Example #3
0
def crawl(vehicle, verify_code=None):
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()

    if verify_code.verify_code == None:
        raise WrongVerifyCode()

    result = dict()
    result["name"] = "huizhou"
    result["version"] = 1

    data = {
        "action": "Illagel",
        "headno": vehicle.license_plate_num[:-6],
        "no": vehicle.license_plate_num[-6:],
        "back4": vehicle.body_num,
        "validate": verify_code.verify_code,
        "type": "02",
    }
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)
    if second_resp.content.__contains__(u"验证码错误!0"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"车架号错误"):
        result["error"] = "VEHICLE INFO ERROR"
        return result
    if second_resp.content.__contains__(u"没有违章信息"):
        result["violations"] = []
        return result

    tr_list = reg(second_resp.content, r"<tr><td>(.+?</table></td>.+?)</td></tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        violation.fine = reg(tr[0], r"<td>(\d+?)</td>")[1][0]
        sub_table_content = reg(tr[0], r'<table class="illegal_table".+?>(.+?)</table>')[0][0]
        sub_info_list = reg(sub_table_content, r"<td[^3]*?>(.+?)</td>")
        violation.address = sub_info_list[2][0]
        violation.time = (
            sub_info_list[0][0]
            .replace("年", "-")
            .replace("月", "-")
            .replace("日", "")
            .replace("时", ":")
            .replace("分", ":")
            .replace("秒", "")
        )
        violation.violation_type = sub_info_list[3][0]
        violation.handled = False
        violation.point = sub_info_list[1][0][1]
        violation.agency = ""
        violations.append(dict(violation))
    result["violations"] = violations
    return result
Example #4
0
def crawl(vehicle, verify_code=None):
    global view_state
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'zhengzhou'
    result['version'] = 1
    
    data = {"__VIEWSTATE":view_state,
                "txtHphm":vehicle.license_plate_num,
                "txtClsbdh":vehicle.body_num,
                "ddlHpzl":"02",
                "txtYzm":verify_code.verify_code,
                "Button1":" 查 询 "}
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)
#    print second_resp.content
    if second_resp.content.__contains__(u"验证码填写有误"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"恭喜您,没有您的违法信息") or second_resp.content.__contains__(u"没有找到相关的车辆信息"):
        result['violations'] = []
        return result
    tr_list = reg(second_resp.content, r"<tr>(<td align='center'>\d{4}-\d{2}-\d{2}.+?)</tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        td_infos = reg(tr[0], r"<td.*?>(.+?)</td>")
        violation.fine = td_infos[4][0].strip()
        violation.address = td_infos[1][0].strip()
        violation.time = td_infos[0][0].strip()
        violation.violation_type = td_infos[2][0].strip()
        violation.handled = True
        if td_infos[5][0].strip() == "未交款":
            violation.handled = False
        violation.agency = ""
        violations.append(dict(violation))
    result['violations'] = violations
    return result
Example #5
0
def crawl(vehicle, verify_code=None):
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'jilin'
    result['version'] = 1
    
    data = {"province":vehicle.license_plate_num[:-6],
                "hphm":vehicle.license_plate_num[-6:],
                "engine":vehicle.body_num[-4:],
                "yzm":verify_code.verify_code.strip(),
                "hpzl":"02"}
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)
#    print second_resp.content
#    return
    if second_resp.content.__contains__(u"验证码错误"):
        raise WrongVerifyCode()
    if second_resp.content.__contains__(u"车辆识别代号后四位输入有误"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    
    result_table = reg(second_resp.content, r'<table id="wzjl_table"[\s\S]+?>([\s\S]+?)</table>')
    tr_list = reg(result_table[0][0], r"<tr>([\s\S]+?)</tr>")
    violations = []
    for tr in tr_list:
        violation = Violation()
        tds = reg(tr[0], r"<td[\s\S]+?>([\s\S]+?)</td>")
        violation.fine = tds[4][0].strip()
        violation.address = tds[3][0].strip()
        violation.time = tds[2][0].strip()
        violation.violation_type = tds[6][0].strip()
        violation.handled = False
        violation.point = tds[5][0].strip()
        violation.agency = ""
        violations.append(dict(violation))
    result['violations'] = violations
    return result
Example #6
0
def crawl(vehicle, verify_code=None):
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'guangdong'
    result['version'] = 1
    
    data = {
                "province":vehicle.license_plate_num[:-6].encode("gb2312"),
                "hphm":vehicle.license_plate_num[-6:],
                "CJHM":vehicle.body_num[-6:],
                "fdjh":vehicle.body_num[-6:],
                "mofei":verify_code.verify_code,
                "hpzl":"02",
                "x":random.randint(10, 99),
                "y":random.randint(10, 99)
                }
    
    print data
    
    second_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers, proxies=proxies)
    print second_resp.content
#    return
    if second_resp.content.__contains__(u"车辆目前无未处理的违章记录"):
        result['violations'] = []
        return result
    if second_resp.content.__contains__(u"您查询的次数过多") or second_resp.content.__contains__(u"系统繁忙,请等待30秒后再查"):
        result['error'] = 'NETWORK ERROR'
        return result
    
    tr_list = reg(second_resp.content, r'<tr.*?>[\s\S]+?(<td.+\d{4}-\d{2}-\d{2} \d{2}:\d{2}[\s\S]+?)</tr>')
    violations = []
    further_request_url_prefix = "http://www.ttdaiban.com/"
    for tr in tr_list:
        tds = reg(tr[0], r'<td.*?>(.*?)</td>')
        further_request_url_suffix = reg(tds[1][0], r"<a.+'../(.+?)'")[0][0]
#        print further_request_url_prefix+further_request_url_suffix
        further_resp = fetch_http(further_request_url_prefix+further_request_url_suffix, "get", headers=verify_code.headers, proxies=proxies)
#        print further_resp.content
        sec_tds = reg(further_resp.content.encode("utf-8"), r"<td.*?>.+?:(.+?)</td>")
        violation = Violation()
        violation.fine = sec_tds[6][0].strip()
        violation.address = reg(sec_tds[3][0], r".*】([\S]+)")[0][0].strip()
        violation.time = sec_tds[2][0].strip()
        violation.violation_type = sec_tds[4][0].strip()
        violation.handled = False
        violation.point = reg(sec_tds[5][0], r"\[.*?(\d+?).*?分\]")[0][0].strip()
        violation.agency = ""
        violations.append(dict(violation))
        
        print violation.time
        print violation.address
        print violation.violation_type
        print violation.fine
        print violation.point
        print " -------- "
    result['violations'] = violations
    return result
Example #7
0
def crawl(vehicle):
    result = dict()
    result['name'] = 'jiangxi'
    result['version'] = 1

    car_type = "02"
    car_no = vehicle.license_plate_num.encode("utf-8")
    body_no = vehicle.body_num
    data = {"a": car_type,
            "b": car_no,
            "c": body_no,
            "x": car_type,
            "y": car_no,
            "z": body_no,
            "w": car_type,
            "e": car_no,
            "s": body_no,
            "v": car_type,
            "x": car_no,
            "q": body_no,
            "j": car_type,
            "k": car_no,
            "l": body_no+"3a="+car_type,
            "b4": car_no,
            "c="+body_no+"4a": car_type,
            "gb": car_no,
            "gz": car_no,
            "cc": body_no+"2a="+car_type,
            "cb": car_no,
            "c1": body_no+"vb="+car_type,
            "av": car_no,
            "ab": body_no,
            "pageRecords":"100",
            "currPage":"1"}

    pre_response = fetch_http(url=pre_request_url, method='get', proxies=proxies)
    g = reg(pre_response.content, r"g\s+=\s*'(\w+)'")
    headers["Cookie"] = pre_response.headers['Set-Cookie']
    data["g"] = g[0][0]

    values = ""
    for key in data:
        values += key+"="+data[key]+"&"
    
    headers['X-Requested-With'] = 'XMLHttpRequest'
    for i in range(5):
        base_response = fetch_http(
                url=post_url,
                method='post',
                data=values[:-1],
                headers=headers,
                proxies=proxies)
        print base_response.content
        if u"该信息不存在" in base_response.content or\
                u"查询超时" in base_response.content:
            continue
        else:
            break
    else:
        result['error'] = 'NET ERROR'
        return result

    content = base_response.content

    if u'很抱歉,您的计算机已超过今天最大查询次数,请明天再使用' in content:
        result['error'] = 'NET ERROR'
        return result

    if u'您输入车牌号码或机动车类型不正确' in content or\
            u'您输入车架号后6位不正确' in content:
        # vehicle info error
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    if u'该车辆暂无违法信息' in content:
        # vehicle has no violation
        result['violations'] = []
        return result
    
    time_reg_result = reg(content, r"\d{4}-\d{2}-\d{2}\s{1}\d{2}:\d{2}")
    time_dict = {}
    for time_str in time_reg_result:
        if time_str[0] in time_dict:
            time_dict[time_str[0]] += 1
        else:
            time_dict[time_str[0]] = 1
    time_list = []
    for key in time_dict:
        if time_dict[key] == 1:
            time_list.append(key)
    time_list.sort(reverse=True)
    
    address_reg_result = reg(
            content,
            r"' style=\"padding-left:5px\" align='left'>(.+?)</td>")
    
    code_reg_result = reg(
            content,
            r"<span title=''>(.+?)</span>")
    
    agency_reg_result = reg(
            content,
            r'style="padding-left:5px" align="left">(.+?)</td>')
    
    violations = []
    for i in range(len(time_list)):
        violation = Violation()
        violation.time = time_list[i]
        violation.violation_type = code_reg_result[i][0]
        violation.address = address_reg_result[i][0]
        violation.agency = agency_reg_result[i][0]
        violation.handled = False
        violations.append(dict(violation))

    result['violations'] = violations
    return result
Example #8
0
def crawl(vehicle, verify_code=None):
    # do some request with verify_code
    # if verify code is wrong, raise WrongVerifyCode
    # raise WrongVerifyCode()
    
    if verify_code.verify_code == None:
        raise WrongVerifyCode()
    
    result = dict()
    result['name'] = 'guangzhou'
    result['version'] = 1
    
    captcha_data = {"captchaId":verify_code.verify_code}
    count = 0
    while count < 5:
        count += 1
        captcha_resp = fetch_http(captcha_confirm_url, "post", data=captcha_data, headers=verify_code.headers)
        print "captcha_content : ", captcha_resp.content
        if captcha_resp.content.__contains__("fail"):
            print "captcha retry ~!"
            continue
        else:
            break
    else:
        raise WrongVerifyCode()
    
    pre_data = {"hpzl":"02",
                    "hphm":vehicle.license_plate_num,
                    "fdjh":vehicle.engine_num,
                    "clsbdh":vehicle.body_num,
                    "captcha":verify_code.verify_code}
    count = 0
    while count < 5:
        count += 1
        pre_resp = fetch_http(pre_request_url, "post", data=pre_data, headers=verify_code.headers)
        print "pre_content", pre_resp.content
        if pre_resp.content != "":
            break
    if pre_resp.content.__contains__("_error"):
        result['error'] = 'VEHICLE INFO ERROR'
        return result
    key = reg(pre_resp.content, r"(?<=key:)(.+)(?=\$\d+)")[0][0]
    total = reg(pre_resp.content, r"(?<=\$)(\d+)")[0][0]
    if total == "0":
        result['violations'] = []
        return result
    
    data = {"platenumtype":"02",
                    "platenum":vehicle.license_plate_num,
                    "engineno":vehicle.engine_num,
                    "vehicleidnum":vehicle.body_num,
                    "key":key}
    count = 0
    while count < 5:
        count += 1
        main_resp = fetch_http(request_url, "post", data=data, headers=verify_code.headers)
        print "main_content : ", main_resp.content
        if main_resp.content.__contains__('"data":"",'):
            print "retry ~!"
            continue
        else:
            break
    vio_data = json.loads(main_resp.content)
    violations = []
    for elem in vio_data["data"]:
        violation = Violation()
        violation.fine = elem["FKJE"]
        violation.point = elem["WFJFS"]
        violation.address = elem["WFDZ"]
        violation.time = elem["WFSJ"]
        violation.violation_type = elem["WFXWMC"]
        violation.handled = False
        violation.agency = ""
        violations.append(dict(violation))
    result['violations'] = violations
    return result