def extractDict(self): if checkPath(homepath,self.folder,self.urls): pass else: try: self.fd["posttime"] = 0 if self.kind=="1": self.sell(self.urls) elif self.kind=="2": self.rent(self.urls) elif self.kind=="3": self.buy(self.urls) else: self.require(self.urls) self.fd['city'] = urlparse(self.urls)[1].replace('.58.com',"") makePath(homepath,self.folder,self.urls) #超过七天 # if self.fd["posttime"]: # if (time.time() -self.fd["posttime"]) > 7*24*36000:return except Exception,e: msglogger.info("%s 链接采集异常"%self.urls) # print "%s||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"%self.urls if isDEV: # self.fd.update(getDefaultVal(4)) dfv=getDefaultVal(self.kind) for item in dfv.items() : # print item[0],item[1] if item[0] not in self.fd: self.fd[item[0]]=dfv.get(item[0]) for item in dfv.items() : print item[0],self.fd[item[0]],type(self.fd[item[0]]) return else: dfv=getDefaultVal(self.kind) for item in dfv.items() : # print item[0],item[1] if item[0] not in self.fd: self.fd[item[0]]=dfv.get(item[0]) try: if self.fd['city'] == 'su':self.fd['city'] = 'suzhou' except: self.fd['city'] = 'suzhou' self.fd["is_checked"] = 1 self.fd["web_flag"] = "58" if self.fd.get('is_ok')==False: # print "jjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjj" self.fd={} #print "%s %s %s %s %s"%(("%s.soufun.com"% self.citycode),self.citycode, self.kind ,time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), self.urls) return self.fd if not self.fd["is_checked"]: for i in self.fd.items(): print i[0],i[1] req=urllib2.Request("http://site.jjr360.com/app.php", urllib.urlencode(self.fd)) p=self.br.open(req).read().strip() print p.decode('gbk')
def extractDict(self): if checkPath(homepath,self.folder,self.urls): pass else: try: if self.kind=="1": self.sell(self.urls) elif self.kind=="2": self.rent(self.urls) elif self.kind=="3": self.buy(self.urls) else: self.require(self.urls) makePath(homepath,self.folder,self.urls) #超过七天 # if (time.time() -self.fd["posttime"]) > 7*24*36000:return except Exception,e: self.fd['house_title']=None msglogger.info("%s 链接采集异常"%self.urls) # print "%s||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||"%self.urls if isDEV: # self.fd.update(getDefaultVal(4)) dfv=getDefaultVal(self.kind) for item in dfv.items() : # print item[0],item[1] if item[0] not in self.fd: self.fd[item[0]]=dfv.get(item[0]) for item in dfv.items() : print item[0],self.fd[item[0]],type(self.fd[item[0]]) return else: dfv=getDefaultVal(self.kind) for item in dfv.items() : # print item[0],item[1] if item[0] not in self.fd: self.fd[item[0]]=dfv.get(item[0]) self.fd["is_checked"] = 1 self.fd["web_flag"] = "gj" #print "%s %s %s %s %s"%(("%s.soufun.com"% self.citycode),self.citycode, self.kind ,time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(time.time())), self.urls) return self.fd if not self.fd["is_checked"]: for i in self.fd.items(): print i[0],i[1] print "*"*80