Example #1
0
File: jrj.py Project: baozhen/demo
def data_insert(product,licailist):
    #用于记录日志
    global db
    insert = 0#插入标志,为1才插入
    startdate = product.get('sell_Org_Date')
    enddate = product.get('sell_End_Date')
    currency = product.get('entr_Curncy_Name')
    productdays = product.get('days')
    if productdays == '':
        productdays =0
    rate = product.get('prd_Max_Yld_De')
    name = product.get('prd_Sname')
    BankName = product.get('bank_Name')
    money = product.get('entr_Min_Curncy')
    if money =='':
        money = 0
    endday = product.get('end_Date')
    id1 = str(product.get('inner_Code'))
    url ='http://bankpro.jrj.com.cn/product/'+str(product.get('inner_Code'))+'/'
    html = urllib.urlopen(url).read()
    soup = BeautifulSoup(html)
    increasemoney = re.findall('<td class="txr">委托递增金额(元).*?</td>\s+<td class="cur">(\d+).*?</td>',str(soup))
    try:
        start= re.findall('<td class="cur">收益起始日期</td><td>(.*?)</td>',str(soup))[0]
    except:
        start = ''
    try:
        atype= re.findall('<td class="cur">收益类型</td><td>(.*?)</td>',str(soup))[0]
    except:
        atype = ''
    try:
        amoney = re.findall('<td class="cur">起购金额递增单位</td><td>(.*?)</td>',str(soup))[0]
    except:
        amoney = 0
    try:
        area = re.findall('<td class="cur">销售地区</td><td colspan="5"><span.*?>(.*?)</span></td>',str(soup))[0]
    except:
        area = ''
    #工商银行人民币2012-12-122013-06-252012-12-062012-12-1150000
    #key = BankName+currency+start[:10]+endday+startdate+enddate+money
    key = BankName+currency+startdate+enddate+str(int(money))+str(int(productdays))
    if key not in licailist:
        print "not in"
        insert = 1
    else:
        print key,"already in!"
    try:
        sql= "insert into sohulist(ID,ProductName,BankName,Currency,Duration,Product_StartDate,Sell_StartDate,Sell_EndDate,PayDuration,Return_Rate,Start_Money,Type,Product_EndDate,create_time,Increasing_Unit,Area,source) values('"+id1+"','"+name+"','"+BankName+"','"+currency+"',"+productdays+",'"+start+"','"+startdate+"','"+enddate+"','"+productdays+"',"+rate+","+money+",'"+atype+"','"+endday+"','"+str(datetime.now())[:19]+"','"+amoney+"','"+area+"','jrj')"
    except:
        sql =''
    print 'sql=',sql
    try:
        if insert == 1:
            testlog = log.getLogging('jrj')
            db.execute(sql)
            print "insert ok!"
            testlog.critical(str(datetime.now())[:19]+'\tjrj\t'+key+'\n' )
    except:
        print "insert error"
        pass
Example #2
0
import log
g_log=log.getLogging(__name__)

from concurrent import futures
import time
import threading
import grpc

import bank_pb2
import bank_pb2_grpc
import bank_sql

# from bank import bank_pb2
# from bank import bank_pb2_grpc
# from bank import bank_sql


_ONE_DAY_IN_SECONDS = 60 * 60 * 24


class bank_server(bank_pb2_grpc.bankServicer):
    def __init__(self):
        super(bank_server, self).__init__()
        self.bankSql=bank_sql.bank()
        self.bankSql.run()
        self.threadLock = threading.Lock()

    def deposit(self, request, context):
        # g_log.info(request)
        g_log.info("deposit: "+ request.account + " " + str(request.value))
        self.threadLock.acquire()
Example #3
0
File: jrj.py Project: baozhen/demo
        tmpstr = tmpstr+str(int(id1.Duration))
        idlist.append(id1.values())
        licailist.append(tmpstr)

    jrjsql = 'select ID from jrjlist;'
    jrjIDs = []

    for jrj in db.query(jrjsql):
        jrjIDs.append(jrj.ID)
    dic =json.loads(html.replace('var bps=','')) 
    products = dic.get('bankProductList')
    for a in products:
        id1 = str(a.get('inner_Code'))
        if id1 in jrjIDs:
            print id1,"already crawlered"
            continue
        if id1 in idlist:
            print id1+' is already in it'
            continue
        print u"insert into jrjlist(ID) values('"+id1+"');"
        db.execute(u"insert into jrjlist(ID) values('"+id1+"');")
        data_insert(a,licailist)
    #'''

if __name__ == "__main__":
    testlog = log.getLogging('jrj')
    testlog.critical(str(datetime.now())[:19]+'\tjrj\tstarted\n' )
    print str(datetime.now())[:19]+'\tjrj\tstarted\n' 
    main()

Example #4
0
# -*- coding: utf8 -*-

__author__ = 'Tan Ying<*****@*****.**>' 

import os
import sys
import log
import json
import config
import commands

logger = log.getLogging('utils.py')

def is_argument_in_dataset(argument, dataset, ignore_case = False):
    """
    参数是否在数据集中
    """
    for item in dataset:
        if ignore_case:
            if argument.lower() == item.lower():
                return True
        else:
            if argument == item:
                return True
    return False

def change_list_to_json(list, file):
    string = ''
    if len(list) > 0:
        string = '{\n'
Example #5
0
        url = re.findall('<a href="/(.*?)"',hlist[0],re.S)[0]
        durl = u'http://www.hui800.com/' + url
        dhtml = urllib.urlopen(durl).read().decode('utf-8')

        dlist = re.findall('<body.*?>(.*?)</body>',dhtml,re.S)[0]
        address = re.findall('<a href="(.*?)"',dlist,re.S)[1]
        '''

        hlist = re.findall('<div class="dealsug b615">(.*?)</div>',html,re.S)
        url = re.findall('<a href="/(.*?)"',hlist[0],re.S)[0]
        durl = u'http://www.hui800.com/' + url
        dhtml = urllib.urlopen(durl).read().decode('utf-8')


        dlist = re.findall('<body.*?>(.*?)</body>',dhtml,re.S)[0]
        address = re.findall('<a href="(.*?)"',dlist,re.S)[1]

    except:
        address=''
    return address






if __name__ == "__main__":
    testlog = log.getLogging('qq')
    testlog.critical(str(datetime.now())[:19]+'\tqq\tstarted\n' )
    main()
Example #6
0
def run(html,blist,productid,productname):
    global db
    logs = open("crawler.log",'a')
    bankdic = {
            u'上海浦东发展银行':u'浦发银行',                 
            u'中国光大银行':u'光大银行',          
            u'中国农业银行':u'农业银行',        
            u'中国工商银行':u'工商银行',        
            u'中国建设银行':u'建设银行',        
            u'中国民生银行':u'民生银行',          
            u'中国邮政储蓄银行':u'邮政储蓄银行',      
            u'中国银行':u'中国银行',            
            u'交通银行':u'交通银行',            
            u'兴业银行':u'兴业银行',              
            u'北京银行':u'北京银行',              
            u'华夏银行':u'华夏银行',              
            u'平安银行':u'平安银行',              
            u'广发银行':u'广发银行',              
            u'招商银行':u'招商银行',
            u'上海银行':u'上海银行',   
            u'中信银行':u'中信银行', 
            }
    banklist=['中国银行','建设银行','农业银行','工商银行','交通银行','招商银行','民生银行','平安银行','浦发银行','广发银行','光大银行','华夏银行','中信银行','北京银行','上海银行','兴业银行','深发展银行','邮政储蓄银行'] 
    bankname=blist[0].replace(u'股份有限公司','')
    #print productid,blist
    sell_startDate = blist[1]
    sell_endDate = blist[2][:10]
    sell_endDate = sell_endDate.replace(' ','')
    #print len(sell_endDate),'qq'
    product_endDate=blist[3]
    currency=blist[4]
    start_money=blist[7]
    print 'start_money= ',start_money
    if (start_money=='0') or (start_money==''):
        print 'money error!!!'
        return
    return_rate=blist[8]
    return_type=blist[10]
    hlist = re.findall("<li>(.*?)</li>",html,re.S)
    product_startDate=hlist[4].replace(u'收益起计:','')
    area=re.findall('<a title="(.*?)" class=',hlist[6])[0]
    
    product_type=hlist[8].replace(u'对象:','')
    duration=hlist[12].replace(u'付息周期:','')
    duration=duration.replace(u'天','')
    duration=duration.replace(u'日','')
    
    if u"月" in duration:
        d = int(re.findall('\d+',duration)[0])*30
        duration=str(d)
    elif u"年" in duration:
        d = int(re.findall('\d+',duration)[0])*360
        duration=str(d)
    if len(duration)==0:
        duration=str(0)

    #复制出来的 licailis
    sql = u'select ID,BankName,Currency,Duration,Product_StartDate,Sell_StartDate,Sell_EndDate,Duration,Product_EndDate,Start_Money,Return_Rate from sohulist ;'
    licailist = []
    for id1 in db.query(sql):
        tmplist = []
        tmpstr = ''
        tmpstr = tmpstr+id1.BankName
        tmpstr = tmpstr+id1.Currency
        #tmpstr = tmpstr+id1.Product_StartDate
        #tmpstr = tmpstr+id1.Product_EndDate
        tmpstr = tmpstr+id1.Sell_StartDate.replace(' ','')
        tmpstr = tmpstr+id1.Sell_EndDate.replace(' ','')
        tmpstr = tmpstr+str(int(id1.Start_Money))
        tmpstr = tmpstr+str(int(id1.Duration))
        licailist.append(tmpstr)

    #if bankname in bankdic:
    if len(bankname)>0:
        if bankname in bankdic:
            bankname=bankdic.get(bankname)
    
        sell_endDate = sell_endDate[:10]
        sell_endDate = sell_endDate.replace(' ','')
        key = bankname+currency+sell_startDate+sell_endDate+str(int(start_money))+str(int(duration))
        print 'key =',key
        #这里开始判断
        #======================
        #======================
        #print '啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊啊'
        sql1 = u'select ID from qqlist;'
        idlist = []
        for id1 in db.query(sql1):
            idlist.append(id1.ID)
        if productid in idlist:
            print 'qqlist已经存在'
            return 0
        else:
            sql = u"insert into qqlist(ID,ProductName,BankName,Currency,Duration,Product_StartDate,Sell_StartDate,Sell_EndDate,PayDuration,Return_Rate,Type,Start_Money,Area,Product_EndDate,Product_Type) values ('"+productid+"','"+productname+"','"+bankname+"','"+currency+"',"+duration+",'"+product_startDate+"','"+sell_startDate+"','"+sell_endDate+"','"+duration+"','"+return_rate+"','"+return_type+"',"+start_money+",'"+area+"','"+product_endDate+"','"+product_type+"');"
            #print sql    
            try:
                pass
                db.execute(sql)
                print 'qqlist不存在,插入qqlist'
            except:
                print "error sql@@@@@@@: "
        #sqlsohulist= u"select  * from sohulist WHERE BankName='"+bankname+"' and Start_Money="+start_money+" and  Sell_StartDate = '"+sell_startDate+"' and Sell_EndDate  = '"+sell_endDate+"'and Product_StartDate='"+product_startDate+"' and Product_EndDate='"+product_endDate+"';"
        if key not in licailist:
            sqlsohu = u"insert into sohulist(ID,ProductName,BankName,Currency,Duration,Product_StartDate,Sell_StartDate,Sell_EndDate,PayDuration,Return_Rate,Type,Start_Money,Area,Product_EndDate,Product_Type,create_time,source) values('"+productid+"','"+productname+"','"+bankname+"','"+currency+"',"+duration+",'"+product_startDate+"','"+sell_startDate+"','"+sell_endDate+"','"+duration+"','"+return_rate+"','"+return_type+"',"+start_money+",'"+area+"','"+product_endDate+"','"+product_type+"','"+str(datetime.now())[:19]+"','qq');"
            #print 'sqlsohu',sqlsohu
            sql2 = u'select ID from sohulist;'
            idlist2 = []
            for id2 in db.query(sql2):
                idlist2.append(id2.ID)
            if productid not in idlist2:
                try:
                    print 'sohu里没有'
                    db.execute(sqlsohu)
                    testlog = log.getLogging('qq')
                    testlog.critical(str(datetime.now())[:19]+'\tqq\t'+productname+'\n' )
                except:
                    print "error sql: "
            else:
                print 'sohulist idlist里已经有了'
        else:
            print 'sohu里已经有了'
Example #7
0
File: sohu.py Project: baozhen/demo
def run(url,licailist):
    print "len of list is ",len(licailist)
    global db
    logs = open("crawler.log",'a')
    bankdic = {
            u'中国邮政储蓄银行':u'邮政储蓄银行',
            u'平安银行(原)':u'平安银行',
            }
    html = urllib.urlopen("http://db.money.sohu.com"+url).read().replace('%','').decode('cp936').replace(u'平安银行(原)','平安银行').replace(u'中国邮政储蓄银行','邮政储蓄银行')
    product_id = re.findall("view/\d+/(\d+).html",url)[0]
    print product_id,
    soup = BeautifulSoup(html)
    strs = str(soup.table).replace('\t','').replace('\r\n','').replace(' ','').replace('</p>','').replace('<p>','')
    hlist = re.findall("<td.*?>(.*?)</td>\s+<td.*?>(.*?)</td>",strs,re.S)
    #print hlist
    p = []
    sql = u'select ID from sohulist;'
    idlist = []
    idlist.append('00051509')
    insert = 1
    for id2 in db.query(sql):
        idlist.append(id2.ID)
    if product_id in idlist:
        insert = 0
        print "already in"
        
    p.append(product_id)
    for h in hlist:
        p.append(h[1])
        #sql = u"insert into qqlist(Id,Name,BankName,StartDate,EndDate,Currency,Duration,Return_Rate,StartMoney,Type) values ('"+id1+"','"+name+"','"+bankname+"','"+startdate+"','"+enddate+"','"+currency+"',"+duration+",'"+returnrate+"%%',"+startmoney+",'"+type1+"');"
    t = hlist[4][1]
    start =  datetime(int(t[:4]),int(t[5:7]),int(t[8:10]))
    duration = int(re.findall('\d+',hlist[3][1])[0])
    if p[2] in ['上海银行','农业银行','中国银行','招商银行','浦发银行','广发银行','光大银行','北京银行','建设银行','兴业银行','平安银行','交通银行','华夏银行','中信银行','江苏银行','包商银行','杭州银行','湖北银行','富滇银行','恒丰银行','青岛银行','兰州银行']:
        endday = str(start+ timedelta(duration))[:10]
    else:
        endday = str(start+ timedelta(duration-1))[:10]
        print start,str(duration),endday
    p.append(endday)
    tmpstr=u''
    
    for m in p[:4]:
        tmpstr = tmpstr+"'"+m+"',"
    temp=p[4]
    if '天' in temp:
        temp=int(filter(str.isdigit,temp))
    if '月' in p[4]:
        temp=int(filter(str.isdigit,temp))*30 
    if '年' in p[4]:
        temp=int(filter(str.isdigit,temp))*365
    duration=str(temp)
    tmpstr = tmpstr+duration+","
    #['2012-12-18', '2012-12-11', '2012-12-17', '93\xe5\xa4\xa9', '4.7', '\xe4\xbf\xa1\xe6\x89\x98\xe7\xb1\xbb']
    for m in p[5:8]:
        tmpstr = tmpstr+"'"+m+"',"
    tmpstr = tmpstr+"'"+duration+"',"
    
    tmpstr = tmpstr+""+p[9]+","
    tmpstr = tmpstr+"'"+p[10]+"',"
    if '万元' in p[11]:
        tmpstr = tmpstr+""+p[11].replace('万元','0000')+","
        moneynu= p[11].replace('万元','0000')
        if len(moneynu)> len('9999999'):
            return
    else:
        moneynu = re.findall('\d+',p[11])[0]
        if len(moneynu)> len('9999999'):
            return
        tmpstr = tmpstr+""+moneynu+","
    #工商银行人民币2012-12-122013-06-252012-12-062012-12-1150000
    #工商银行人民币2012-12-072013-04-232012-12-042012-12-061000000
    #key = p[2]+p[3]+p[5]+endday+p[6]+p[7]+moneynu
    key = p[2]+p[3]+p[6][:10]+p[7][:10]+str(int(moneynu))+str(int(duration))
    print "key",key
    if key in licailist:
        insert = 0
        print "already in"

    for m in p[12:]:
        tmpstr = tmpstr+"'"+m+"',"
    print type(tmpstr)
    sql = u"insert into sohulist values("+tmpstr[:-1].replace('\n','').replace('%','%%')+",'"+str(datetime.now())[:19]+"','sohu')"
    if insert ==1:
        print sql.encode('utf-8')
        try:
            testlog = log.getLogging('sohu')
            db.execute(sql)
            testlog.critical(str(datetime.now())[:19]+'\tsohu\t'+key+'\n' )
        except:
            print "error"
Example #8
0
File: sohu.py Project: baozhen/demo
            return
        tmpstr = tmpstr+""+moneynu+","
    #工商银行人民币2012-12-122013-06-252012-12-062012-12-1150000
    #工商银行人民币2012-12-072013-04-232012-12-042012-12-061000000
    #key = p[2]+p[3]+p[5]+endday+p[6]+p[7]+moneynu
    key = p[2]+p[3]+p[6][:10]+p[7][:10]+str(int(moneynu))+str(int(duration))
    print "key",key
    if key in licailist:
        insert = 0
        print "already in"

    for m in p[12:]:
        tmpstr = tmpstr+"'"+m+"',"
    print type(tmpstr)
    sql = u"insert into sohulist values("+tmpstr[:-1].replace('\n','').replace('%','%%')+",'"+str(datetime.now())[:19]+"','sohu')"
    if insert ==1:
        print sql.encode('utf-8')
        try:
            testlog = log.getLogging('sohu')
            db.execute(sql)
            testlog.critical(str(datetime.now())[:19]+'\tsohu\t'+key+'\n' )
        except:
            print "error"
        

if __name__ == "__main__":
    testlog = log.getLogging('sohu')
    testlog.critical(str(datetime.now())[:19]+'\tsohu\tstarted\n' )
    print str(datetime.now())[:19]+'\tsohu\tstarted\n' 
    main()