Example #1
0
def user_scrape(D, token, stime=""):
    FIELDS = [u"统计时间", u"新关注人数", u"取消关注人数", u"净增关注人数", u"累积关注人数", u"公众号搜索", u"二维码扫描", u"图文页右上角菜单", u"名片分享", u"其他"]
    writer = UnicodeWriter("user_info.csv", "gbk")
    writer.writerow(FIELDS)
    bag = {}
    url_total = "https://mp.weixin.qq.com/misc/useranalysis?&token=%s&lang=zh_CN"
    html = D.get(url_total % token)
    if not stime:
        m = common_re(r'\{\s*(date:\s"[^\}]+)\}\s*\]\s*\}\s*\]', html)
    else:
        m = common_re(r'\{\s*(date:\s"%s"[^\}]+)\}' % stime, html)
    bag[u"统计时间"] = common_re(r'date:\s"([^"]+)"', m) if m else ""
    bag[u"新关注人数"] = common_re(r"new_user:\s([^\s]+)\s", m) if m else ""
    bag[u"取消关注人数"] = common_re(r"cancel_user:\s([^,]+),", m) if m else ""
    bag[u"净增关注人数"] = common_re(r"netgain_user:\s([^,]+),", m) if m else ""
    bag[u"累积关注人数"] = common_re(r"cumulate_user:\s([^,]+),", m) if m else ""
    assert bag[u"统计时间"]
    a = ["1", "30", "43", "17", "0"]
    b = [u"公众号搜索", u"二维码扫描", u"图文页右上角菜单", u"名片分享", u"其他"]
    for num, i in enumerate(a):
        gain_url = (
            "https://mp.weixin.qq.com/misc/useranalysis?&begin_date=%s&end_date=%s&source=%s&token=%s&lang=zh_CN&f=json&ajax=1"
            % (bag[u"统计时间"], bag[u"统计时间"], i, token)
        )
        gain_html = D.get(gain_url)
        bag[b[num]] = common_re(r'"new_user":([^,]+),', gain_html)
    writer.writerow(bag.get(field) for field in FIELDS)
    return bag[u"统计时间"]
Example #2
0
def get_last_trade_date():
    url = 'http://data.eastmoney.com/stock/lhb.html'
    html = download().get(url)
    m = common_re(r'"readonly"\svalue\="([^"]+)"', html)
    if m:
        return m
    else:
        print 'Error: Can not get last trade date!'
        return ''
Example #3
0
def main():
    D = download(is_cookie=True)
    if not login(D, userid, passwd, dynamic_passwd):
        # login fail, return
        return
    hr_url = 'https://hr.guosen.com.cn/sso/SsoHrssServlet'
    html = D.get(hr_url)
    login_data = {}
    login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html)
    login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"', html)
    login_data['Authenticator'] = common_re(r'"Authenticator"[^>]+value="([^"]+)"', html)
    login_data['ReturnURL'] = 'https://hr.guosen.com.cn/sso/SsoHrssServlet'
    html = D.post('https://sso.guosen.com.cn/login.aspx', login_data)            
    login_data = {}
    login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html)
    login_data['Result'] = '0' 
    login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"', html)
    login_data['UserAccount'] = userid
    login_data['ErrorDescription'] = ''
    login_data['Authenticator'] = common_re(r'"Authenticator"[^>]+value="([^"]+)"', html)
    login_data['IASUserAccount'] = userid
    html = D.post(hr_url, login_data)         

    end_url = 'https://hr.guosen.com.cn/hrss/ta/Clockin.jsp?_funcode=E0020902'
    D.get(end_url)
    post_url = 'https://hr.guosen.com.cn/hrss/dorado/smartweb2.RPC.d?__rpc=true'
    search_data = {}
    search_data['__type'] = 'loadData'
    search_data['__viewInstanceId'] = 'nc.bs.hrss.ta.Clockin~nc.bs.hrss.ta.ClockinViewModel'
    search_data['__xml'] = get_post_xml(month, year) 
    html = D.post(post_url, search_data)
    if 'result succeed="true"' in html:
        print 'Hello world!'
    else:
        print html
Example #4
0
def login(D, username, passwd):
    url = 'https://mp.weixin.qq.com/cgi-bin/login'
    post_data = {}
    post_data['username'] = username
    m = hashlib.md5()
    m.update(passwd)
    post_data['pwd'] = m.hexdigest()
    post_data['imgcode'] = ''
    post_data['f'] = 'json'
    m = D.post(url, data=post_data)
    token = common_re(r'token=([^"]+)"', m)
    url =  'https://mp.weixin.qq.com/cgi-bin/home?t=home/index&lang=zh_CN&token=%s' % token
    html = D.get(url)
    if 'success">已认证' in html:
        print 'Login succefully!'
        return token 
    else:
        print 'Login fail.'
        return False
Example #5
0
def login(D, userid, passwd, dynamic_passwd):
    url = 'https://sso.guosen.com.cn/Login.aspx'
    html = D.get(url)
    post_data = {}
    post_data['__LASTFOCUS'] = ''
    post_data['__VIEWSTATE'] = common_re(r'id="__VIEWSTATE"\s*value="([^"]+)"',
                                         html)
    post_data['__EVENTTARGET'] = common_re(
        r'id="__EVENTTARGET"\s*value="([^"]+)"', html)
    post_data['__EVENTARGUMENT'] = common_re(
        r'id="__EVENTARGUMENT"\s*value="([^"]+)"', html)
    post_data['__EVENTVALIDATION'] = common_re(
        r'id="__EVENTVALIDATION"\s*value="([^"]+)"', html)
    post_data['hidReturnUrl'] = 'Default.aspx'
    post_data['hidIASID'] = '000'
    post_data['txtUserName'] = userid
    post_data['chkRememberAccount'] = 'on'
    post_data['txtPassword'] = passwd
    post_data['chkEnableDynamicCode'] = 'on'
    post_data['txtDynamicPassword'] = dynamic_passwd
    post_data['hidSms'] = '0'
    post_data['txtValidateCode'] = ''
    post_data['btnLogin.x'] = '0'
    post_data['btnLogin.y'] = '0'
    html = D.post(url, post_data)
    sso_url = 'https://sso.guosen.com.cn/Default.aspx'
    login_data = {}
    login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html)
    login_data['Result'] = '0'
    login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"',
                                        html)
    login_data['UserAccount'] = userid
    login_data['ErrorDescription'] = ''
    login_data['Authenticator'] = common_re(
        r'"Authenticator"[^>]+value="([^"]+)"', html)
    login_data['IASUserAccount'] = userid
    html = D.post(sso_url, login_data)
    if '您好!欢迎访问单点系统' in html and '<span id="lblCurrentUser"></span>' not in html:
        print 'Login sucessfully!'
        return True
    else:
        print 'Login fail...'
        return False
Example #6
0
def login(D, userid, passwd, dynamic_passwd):
    url = 'https://sso.guosen.com.cn/Login.aspx'
    html = D.get(url)
    post_data = {}
    post_data['__LASTFOCUS'] = ''
    post_data['__VIEWSTATE'] = common_re(r'id="__VIEWSTATE"\s*value="([^"]+)"', html)
    post_data['__EVENTTARGET'] = common_re(r'id="__EVENTTARGET"\s*value="([^"]+)"', html)
    post_data['__EVENTARGUMENT'] = common_re(r'id="__EVENTARGUMENT"\s*value="([^"]+)"', html)
    post_data['__EVENTVALIDATION'] = common_re(r'id="__EVENTVALIDATION"\s*value="([^"]+)"', html)
    post_data['hidReturnUrl'] = 'Default.aspx'
    post_data['hidIASID'] = '000'
    post_data['txtUserName'] = userid
    post_data['chkRememberAccount'] = 'on'
    post_data['txtPassword'] = passwd
    post_data['chkEnableDynamicCode'] = 'on'
    post_data['txtDynamicPassword'] = dynamic_passwd
    post_data['hidSms'] = '0'
    post_data['txtValidateCode'] =  ''
    post_data['btnLogin.x'] = '0'
    post_data['btnLogin.y'] =  '0'
    html = D.post(url, post_data)
    sso_url = 'https://sso.guosen.com.cn/Default.aspx'
    login_data = {}
    login_data['IASID'] = common_re(r'"IASID"[^>]+value="([^"]+)"', html)
    login_data['Result'] = '0' 
    login_data['TimeStamp'] = common_re(r'"TimeStamp"[^>]+value="([^"]+)"', html)
    login_data['UserAccount'] = userid
    login_data['ErrorDescription'] = ''
    login_data['Authenticator'] = common_re(r'"Authenticator"[^>]+value="([^"]+)"', html)
    login_data['IASUserAccount'] = userid
    html = D.post(sso_url, login_data)    
    if '您好!欢迎访问单点系统' in html and '<span id="lblCurrentUser"></span>' not in html:
        print 'Login sucessfully!'
        return True
    else:
        print 'Login fail...'
        return False