def loadconf(): global CONFIG; global SQL_CONN; config = ConfigParser.ConfigParser() config.readfp(codecs.open("conf/ef.ini", "r")); log.info("conf loaded"); SQL_CONN = { "host" : config.get("database", "host"), "db" : config.get("database", "db"), "port" : config.getint("database", "port"), "user" : config.get("database", "user"), "passwd" : config.get("database", "passwd") }
def update_all_nav(fundcode=[],start=None,end=datetime.now()): log.info('update_all_nav start'); global SQL_CONN; req = 'http://biz.finance.sina.com.cn/fundinfo/open/lsjz.php?fund_code='; fundstart = {}; endtime = end.strftime('%Y-%m-%d'); conn = pydb.connect(**SQL_CONN); cur = conn.cursor(); cur.execute("select code,birthday from fund_info"); for row in cur.fetchall(): fundstart[row[0]] = {'startdate1' : row[1].strftime('%Y-%m-%d'),'enddate1':endtime}; if fundcode is None: fundcode = fundstart.keys(); sql4data = '''insert into fund_data ( code , date , nav , tnav) values (%s,%s,%s,%s) on duplicate key update nav=%s, tnav=%s '''; ind = 1; count = len(fundcode); cur = conn.cursor(); for fc in fundcode: log.info("start %s %s/%s" % (fc,ind,count)); crawl = crawler(); crawl.settranslator("xml"); postdata = fundstart[fc]; if start is not None: postdata['startdate1'] = start.strftime('%Y-%m-%d'); header = {'Refer':req + fc}; crawl.seturi(req + fc,postdata,header); crawl.setcfgfile("crawler_allfund_of_sina.xml"); result = crawl.parse(); log.debug("data %s %s" % (fc,result)); for data in result or []: if(result): cur.execute(sql4data,(fc,data['date'])+(data['nav'],data['tnav'])*2); else: log.error("no information about %s" % fc); warnings = cur.fetchwarnings() if warnings: log.warn("db:" + warnings); conn.commit(); log.info("done %s %s/%s" % (fc,ind,count)); ind = ind + 1; conn.close(); log.info('update_all_nav done');
def seturi(self,uri,params=None,headers=None): log.info("loading %s %s" % (uri,params)); self.uri = uri; if(params is not None and type(params) == dict): params = urllib.urlencode(params); for i in range(1,self.MAX_URL_OPEN): try: opener=urllib.URLopener(); opener.addheader("User-Agent","Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; Embedded Web Browser from: http://bsalsa.com/; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.2; .NET CLR 1.1.4322; Tablet PC 2.0)"); opener.addheader("Cache-Control","no-cache"); for k,v in (headers or {}).iteritems(): opener.addheader(k,v); self.content = opener.open(uri,params).read(); break; except BaseException,e: log.error("%s got error %s" % (i,e)); if i < self.MAX_URL_OPEN: time.sleep(5);
def parse(self): self.result = None; if(self.content is None): log.error("no content for parsing"); return; if(self.cfg is None): log.error("no config file"); return; if(self.tran is None): log.error("no translator"); return; log.info("parsing"); data = self._parse(self.tran.getroot(),self.cfg); log.info("done!"); _writetologfile(self.uuid , json.dumps(data,ensure_ascii=False,indent=4)); return data;
def update_all_nav2(fundcode=['050001'],start=None,end=datetime.now()): log.info('update_all_nav2 start'); req = 'http://biz.finance.sina.com.cn/fundinfo/open/lsjz.php?fund_code='; for fc in fundcode: crawl = crawler(); header = {'Refer':req + fc}; crawl.seturi(req + fc,None,header); crawl.setcfgfile("crawler_allfund_of_sina.xml"); result = crawl.parse(); log.debug("data %s %s" % (fc,result)); print result log.info('update_all_nav done');
def update_fund_info(): log.info('update_fund_info start'); global SQL_CONN; crawl = crawler(); crawl.settranslator("xml"); crawl.setcfgfile("crawler_fund_of_163.xml"); result = crawl.parse(); sql4info = '''INSERT INTO fund_info ( CODE , NAME , manager , size , company , birthday , `type` , status) VALUES (%s,%s,%s,%s,%s,%s,%s,%s) ON DUPLICATE KEY UPDATE NAME=%s, manager=%s, size=%s, company=%s, birthday=%s, `type`=%s, status=%s'''; sql4data = '''insert into fund_data ( code , date , nav , tnav) values (%s,%s,%s,%s) on duplicate key update nav=%s, tnav=%s '''; conn = pydb.connect(**SQL_CONN); cur = conn.cursor(); array = result.get('data',[]); for data in array: log.info("update %s %s" % (data['code'],data['name'])); d1 = (data['code'],)+(data['name'], data['manager'], data['size'], data['company'], data['birthday'], data['type'], data['status'])*2; log.debug("data %s" % list(d1)); cur.execute(sql4info,d1); d2=(data['code'],data['date'])+(data['nav'],data['tnav'])*2 log.debug("data %s" % list(d2)); cur.execute(sql4data,d2); warnings = cur.fetchwarnings() if warnings: log.warn("db:" + warnings); conn.commit(); cur.close(); conn.close(); log.info('update_fund_info done');