def get_integral_page(self, response): #正则匹配相应的积分 try: print 'get_integral_page' integral = re.findall(u'金币: <a href=".*?">(.*?)</a>'.encode('gbk', 'ignore'), response.body, re.I)[0].replace(' ', '') print 'integral', integral if integral: #如果取到相应的积分,执行判断该积分是否>20,小于20,更新数据库,跳出,大于20,更新数据库,向下执行 update_user_integral_sql = sql.update_account_point(self.username, self.platform, integral) n = handle_db(update_user_integral_sql) log.msg(('update user(%s)\'s integral is: %s, %s' % (self.username, integral, n)), log.INFO) #用户积分低于多少不能进行下载,可配置. if int(integral) > settings.INTEGERAL: request = self.get_topic(response) return request else: print 'return None' return else: log.msg('cann\'t get user\'s integral', log.ERROR) request = self.get_topic(response) return request except: log.msg(str(traceback.print_exc()), log.ERROR) request = self.get_topic(response) print 'except' return request
def get_apk(self, response): filename = ''.join([str(random.randrange(1,100000)), '.apk']) # if os.path.exists(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) is False: # os.mkdir(''.join([os.getcwd(), os.sep, 'gfan', os.sep, 'download', os.sep, time.strftime('%Y-%m-%d'),os.sep])) #调用新建文件夹的方法 down_dir = utils.make_spiderdir(self.platform, 'download') #解码:有的文件可能是utf-8编码,解为unicode try: filename = filename.decode('utf-8', 'ignore') except: pass filename = ''.join([down_dir, os.sep, str(time.time()).split('.')[0], filename]) open(filename, 'wb').write(response.body) #下载后在存数据库,确保准确性 topic_url = response.request.meta['topic_url'] hashurl = sql.hash_topic(topic_url) updatedate = time.strftime('%Y-%m-%d %H:%M:%S') filename = filename.replace('\\', '\\\\') insert_sql = sql.insert_softwareinfo(self.platform, hashurl, topic_url, updatedate, filename) status = handle_db(insert_sql) log.msg(str(status), log.DEBUG) #更新topic数据库表 update_topic_url = sql.topicurl_withcrawed(topic_url) status = handle_db(update_topic_url) log.msg(str(status), log.DEBUG) #能进入本方法执行,表示已经下载了该response.积分-1 account_sql = sql.select_accountbyusername(self.username, self.platform) point_num = handle_db(account_sql)['data'][0][5] point_num -= 1 #然后执行更新 update_account_pointsql = sql.update_account_point(self.username, self.platform, point_num) n = handle_db(update_account_pointsql) if n['errorNo'] == 0: log.msg(('<username: %s \'s integral is : -1 ,now integral is %s>' % (self.username, self.reply_num)), log.INFO) try: autocopy.copy(filename, self.platform) log.msg('copy job is successed', log.INFO) except: log.msg(str(traceback.print_exc()), log.ERROR) log.msg('copy job is failture', log.ERROR) request_topic = self.repeat_reply(response) return request_topic
def get_integral_page(self, response): #正则匹配相应的积分 try: integral = re.findall(u'<li><em>金币</em>(.*?)</li>'.encode('gbk', 'ignore'), response.body, re.I)[0].replace(' ', '') if integral: #如果取到相应的积分,执行判断该积分是否>20,小于20,更新数据库,跳出,大于20,更新数据库,向下执行 update_user_integral_sql = sql.update_account_point(self.username, self.platform, integral) n = handle_db(update_user_integral_sql) log.msg(('update user(%s)\'s integral is: %s, %s' % (self.username, integral, n)), log.INFO) #回复加下载模块并行,不在使用积分低于多少不能下载机制。因为这里会导致不在回复 。 request = self.get_topic(response) return request else: log.msg('cann\'t get user\'s integral', log.ERROR) request = self.get_topic(response) return request except: log.msg(str(traceback.print_exc()), log.ERROR) request = self.get_topic(response) return request