def set_other_info(self): high_date = get_date_with_day_duration(0) low_date = get_date_with_day_duration(-7) if self._duration == '1w': low_date = get_date_with_day_duration(-7) elif self._duration == '6m': low_date = get_date_with_day_duration(-180) low_date_str = "%d%02d%02d000000" % (low_date.year, low_date.month, low_date.day) high_date_str = "%d%02d%02d000000" % (high_date.year, high_date.month, high_date.day) self.time_duration = "{}_{}".format(low_date_str, high_date_str)
def report(self): today = get_date_with_day_duration(0) yesterday = get_date_with_day_duration(-1) today_str = "%d%02d%02d" % (today.year, today.month, today.day) yesterday_str = "%d%02d%02d" % (yesterday.year, yesterday.month, yesterday.day) msg1 = '' msg2 = '' for folder in self._channels: #被封渠道,不用计算, 加上提示信息 if folder in Config.SEALED_CHANNELS: msg1 += "%s: N/A(%s)\n" % (folder,Config.PROMPT_INFO['sealed_info'].encode('utf-8')) msg2 += "N/A ==> N/A" continue today_rs = self.get_result_with_folder_and_date(folder, today_str) yesterday_rs = self.get_result_with_folder_and_date(folder, yesterday_str) if not today_rs: today_rs = 'N/A' if not yesterday_rs: yesterday_rs = 'N/A' is_normal = self.check_if_normal(today_rs, yesterday_rs) # 没有统计爬全率的渠道,均设置正常 if folder in Config.CHANNELS_NOT_CALCURATE_FULL_RATE: is_normal = True if today_str != 'N/A': percent = today_rs.split(':')[-1] else: percent = 'N/A' msg1 += "%s: %s\n" % (folder, percent) msg2 += "%s ==> %s\n" % (yesterday_rs, today_rs) if not is_normal: msg2 = msg2[:-1] + "\t <数据变化异常>\n" msg = """时间:%s\n\n检查JD结果:\n %s \n\n 数据变化:\n%s""" % (today_str, msg1, msg2) sendmail(self._emails, self._email_title, msg)
def set_file_path(self): now = get_date_with_day_duration(0) now_str = "%d%02d%02d" % (now.year, now.month, now.day) self.id_fn = "%s/%s" % (self._owner, Config.IDX_FILE_NAME) self.result_fn = Config.RESULT_FOLDER_TEMPLATE % (self._owner, now_str) # 最近一周 if self._duration == '1w': self.id_fn = "%s/%s" % (self._owner, Config.LATEST_ONE_WEEK_IDX_FILE_NAME) self.result_fn = Config.LATEST_ONE_WEEK_RESULT_FOLDER_TEMPLATE % (self._owner, now_str)
def event_handler(self, evt, msg, **kwargs): if evt == 'DONE': util.send_email(["<*****@*****.**>"], "{} 样本对比".format(self.channel), msg) # 最终结果存放文件 filedest = 'app/share/%s_result.txt' % self.channel if os.path.exists(filedest): now = util.get_date_with_day_duration() history_fn = os.path.join(os.path.dirname(filedest), '%s_%s.txt' % (self.channel, '%d%02d%02d%02d%02d' % (now.year, now.month, now.day, now.hour, now.minute))) # 将结果移动到历史文件 os.system('mv %s %s' % (filedest, history_fn)) # 将最后计算结果放到share 目录下, 提供下载 os.system('mv %s %s' % (self.result_file, filedest))
def get_bin_file_and_last_pos(self): # 之前文件更新过, 则继续处理 filename, modified_time, last_pos = self._get_file_and_pos_from_saved_file() if filename: stat = os.stat(filename) if int(stat.st_mtime) > modified_time: return filename, int(last_pos) # 之前的文件处理完了,处理这个月的 (文件是按月切分的) path_template = "/data/crawler/_files3_/%s/%d/%s_%04d%02d.bin" today = util.get_date_with_day_duration(0) bin_file = path_template % (self.channel, today.year, self.channel, today.year, today.month) # 如果相等,则表示此文件是最新的,但是今天没有更新 if filename == bin_file: return None, None return bin_file, 0
def get_ids(self): now = get_date_with_day_duration(0) now_str = "%d%02d%02d" % (now.year, now.month, now.day) dir = self._idx_fn.split('/')[0] if not os.path.exists(dir): os.mkdir(dir) with open(self._tmp_idx_fn, 'wb') as f: while(self.count < CVConfig.TOTAL_COUNT): jlid = self.get_one() if jlid: print "SUCESS, count:{}".format(self.count) f.write('%s\n' % jlid) self.count += 1 else: print "FAIL" # 将数据移到文件idx, 临时文件移除, if os.path.exists(self._idx_fn): os.system("mv %s %s_%s" % (self._idx_fn, self._idx_fn, now_str)) os.system('mv %s %s' % (self._tmp_idx_fn, self._idx_fn))
def get_cases(self): now = get_date_with_day_duration(0) now_str = "%d%02d%02d" % (now.year, now.month, now.day) if not os.path.exists('datas'): os.mkdir('datas') tmp_fn = 'datas/idx-ing' with open(tmp_fn, 'wb') as f: while self.count < JdConfig.TOTAL_IDS: i = self.get_one() if i: f.write('%d\n' % i) self.count += 1 print "total count:", self.count else: print "fail id: ", i if os.path.exists('datas/idx'): os.system("mv datas/idx datas/idx_%s" % now_str) os.system('mv %s datas/idx' % tmp_fn)
def get_today_bin_file_name(self): today = util.get_date_with_day_duration(0) bin_file = IncEtlDispatcher.CRAWLER_BIN_FILE_TEMPLATE % (self.channel, today.year, self.channel, today.year, today.month) return bin_file
jdid = self.test_page(url, jdid) if jdid: self.counter += 1 sys.stderr.write("OK %d\n" % self.counter) else: sys.stderr.write("FAIL\n") return jdid except Exception as e: print e return None if __name__ == "__main__": wl = JdWLTests() now = get_date_with_day_duration(0) now_str = "%d%02d%02d" % (now.year, now.month, now.day) if not os.path.exists('datas'): os.mkdir('datas') tmp_fn = 'datas/idx-ing' with open(tmp_fn, 'w') as fo: while wl.counter < JdConfig.TOTAL_IDS: jdid = wl.find_one() if jdid is not None: fo.write("%s\n" % jdid) if os.path.exists('datas/idx'): os.system("mv datas/idx datas/idx_%s" % now_str) os.system('mv %s datas/idx' % tmp_fn)