def retrieve_reports(url, market, dst_file, date_val, cols) -> DownloadDetails: request_data = dict(market=market, date=date_val) response = requests.post(url=url, headers=headers.get_random_headers(), data=request_data) if response.status_code == requests.codes.ok: text = response.text try: json_val = json.loads(text) results = json_val['results'] except (ValueError, KeyError) as e: app_logger.error(e, exc_info=True) else: app_logger.info("Downloaded entries for date %s OK" % date_val) # save if there's some non-empty data if results: if save_to_file(dst_csv_file_name=dst_file, csv_cols=cols, data=results): app_logger.info("Saved entries for date %s OK" % date_val) return DownloadDetails(skipped=False, status=True) else: app_logger.warning("Skipped empty entries for date %s" % date_val) return DownloadDetails(skipped=True, status=True) else: app_logger.error( "Data for %s is not available, request returned %d status" % (date_val, response.status_code)) return DownloadDetails(skipped=False, status=False)
def alarm_big_key(begin_time=datetime.now() - timedelta(hours=12)): begin_time_str = begin_time.strftime("%Y-%m-%d %H:%M:%S") app_logger.info("开始发送报警") title = "大key报警" try: with get_db() as session: alarm_logs = CRUD_Alarm_Log.get_not_sended_log( session, begin_time_str) except Exception as ex: app_logger.error("获取报警信息失败! ex:{0}".format(ex), exc_info=True) raise Exception("获取报警信息失败") if len(alarm_logs) == 0: app_logger.info("无未发送报警") for alarm_log in alarm_logs: sleep(2) try: dingclient.sendto_ding(title, alarm_log.message) except Exception as ex: app_logger.error("发送报警失败! ex:{0}, message:{1}".format( ex, alarm_log.message), exc_info=True) continue try: with get_db() as session: CRUD_Alarm_Log.set_log_is_sended(session, alarm_log.log_id) except Exception as ex: app_logger.error("跟新报警消息状态失败! ex:{0}, message:{1}".format( ex, alarm_log.message), exc_info=True) continue
def load_last_date(fname): try: with open(fname) as f: val = f.readline().strip() return datetime.strptime(val, "%d-%m-%Y") except Exception as e: app_logger.info(e, exc_info=True) return None
async def check_port(ip, port): logger.info("Check {} on {}".format(port, ip)) conn = asyncio.open_connection(ip, port) try: reader, writer = await asyncio.wait_for(conn, timeout=3) return {"port": port, "state": "open"} except: return {"port": port, "state": "close"}
def disable_job(job_id): """标记job为不可用""" app_logger.info("禁用job, jid:{0}".format(job_id)) try: with get_db() as session: CRUD_Analysis_Job.disable_job(session, job_id) except Exception as ex: app_logger.error("禁用job失败, iid:{0}, ex:{1}".format(job_id, ex)) return 0
def enable_instance(instance_id): """标记实例为可用""" app_logger.info("启用实例, iid:{0}".format(instance_id)) try: with get_db() as session: CRUD_Instances_Info.enable_instance(session, instance_id) except Exception as ex: app_logger.error("启用实例失败, iid:{0}, ex:{1}".format(instance_id, ex)) return 0
def check_keys(keys_info): alarm_keys_list = [] alarm_log_dict = {} if len(keys_info) == 0: return alarm_log_dict current_time = datetime.now().strftime(("%Y-%m-%d %H:%M:%S")) instance_id = keys_info[0]["tags"]["instance_id"] instance_name = keys_info[0]["tags"]["instance_name"] for key in keys_info: if key["tags"]["type"] in alarm_threshold.keys(): if int(key["fields"]["bytes"]) > alarm_threshold[key["tags"] ["type"]]: alarm_keys_info = { "key": key["tags"]["key"], "type": key["tags"]["type"], "bytes": key["fields"]["bytes"] } alarm_keys_list.append(alarm_keys_info) elif int(key["fields"]["bytes"]) > alarm_threshold["other"]: alarm_keys_info = { "key": key["tags"]["key"], "type": key["tags"]["type"], "bytes": key["fields"]["bytes"] } alarm_keys_list.append(alarm_keys_info) if len(alarm_keys_list) == 0: return alarm_log_dict alarm_message_dict = { "iid": instance_id, "i_name": instance_name, "keys": alarm_keys_list, "current": current_time } alarm_message_dict.update(alarm_threshold) alarm_message = alarm_template.format(**alarm_message_dict) app_logger.info(alarm_message) alarm_log_dict = { "instance_id": instance_id, "instance_name": instance_name, "message": alarm_message } return alarm_log_dict
def preprocess_data() -> bool: app_logger.info("Preparing data started...") start = time.time() series: List[dd.Series] = [] for f in os.listdir(constants.DATA_DIR): if not f.endswith(".csv"): app_logger.warning("non-CSV file found in DATA_DIR: %s" % f) continue app_logger.info("Processing %s" % f) try: if len(series) < 2: df = dd.read_csv(constants.DATA_DIR / f, header=None) if len( df.columns ) != PriceDataProcessor.REQUIRED_CSV_FORMAT_COLUMNS_COUNT: app_logger.error( "File %s has insufficient amount of columns: required %d, found %d" % (f, PriceDataProcessor. REQUIRED_CSV_FORMAT_COLUMNS_COUNT, len(df.columns))) continue # we are interested in the 4th column's values fourth_col: dd.Series = df.iloc[:, 3] unique_vals_series = fourth_col.drop_duplicates() series.append(unique_vals_series) if len(series) == 2: # merge two Series into one and remove duplicates s = dd.concat(series).drop_duplicates() # keep the result Series in the first list's element del series[-1] series[0] = s except Exception as e: app_logger.error("Processing file %f had errors: " + str(e)) app_logger.info("Processing %s done" % f) if series: s: dd.Series = series[0] s.to_csv(constants.PROCESSED_DATA_DIR / "single.csv", single_file=True, index=False, header=False) else: app_logger.error( "Prepare data: could not generate the result CSV file") app_logger.info("Preparing data completed in %s seconds" % str(time.time() - start)) return bool(series)
def run(self): postcodes_file_path = PROCESSED_DATA_DIR / "single.csv" if not postcodes_file_path.exists(): app_logger.error("No 'single.csv' file %s found. Exiting..." % postcodes_file_path) return # order of column names is important columns = ["Address", "Postcode", "Council Tax band", "Local authority reference number"] for chunk_df in pd.read_csv(postcodes_file_path, chunksize=100, header=None, usecols=[0]): for _, row in chunk_df.iterrows(): postcode = row[0] result_file = RESULTS_DIR / "{}.csv".format(postcode.replace(" ", "_")) if result_file.exists(): app_logger.warning("Skipping result file %s, already exists" % result_file) continue app_logger.info("Scraping %s postcode started" % postcode) items = self.query(postcode=postcode) result_list = [[result_item.address, postcode, result_item.council_tax_band, result_item.local_auth_ref_number] for result_item in items] if items else [] result_df = pd.DataFrame(result_list, columns=columns) result_df.to_csv(result_file, index=False) if items: app_logger.info("Scraping %s postcode completed" % postcode) else: app_logger.info("Scraping %s postcode completed, but it discovered no entries" % postcode) # sleep (5, 60) seconds randomly secs = random.randint(5, 60) app_logger.info("Sleeping %d seconds" % secs) time.sleep(secs)
def reflush_redis_instances(): app_logger.info("开始刷新实例!") try: all_instances_info = ALIREDIS.get_all_instances_info() except Exception as ex: app_logger.error("获取实例信息失败, ex:{0}".format(ex), exc_info=True) raise Exception("获取实例信息失败") app_logger.info("刷新实例!完成") with get_db() as session: for instances_info in all_instances_info: app_logger.info( "开始更新实例信息,instances_info:{0}".format(instances_info)) try: CRUD_Instances_Info.in_update_notin_insert( session, instances_info) except Exception as ex: app_logger.error( "更新实例信息失败! instances_info:{0}".format(instances_info), exc_info=True) app_logger.info("刷新实例完成") return 0
def create_analysis_job(instance_id=None): instances = [] if instance_id: try: job_info = ALIREDIS.create_cache_analysisjob(instance_id) # 如果存入数据库失败需要根据日志手工补 app_logger.info("新job信息:{0}".format(job_info)) with get_db() as session: CRUD_Analysis_Job.in_update_notin_insert(session, job_info) except Exception as ex: app_logger.error("创建job,跟新job表失败, ID:{0} ex:{1}".format( instance_id, ex), exc_info=True) else: try: with get_db() as session: instances = CRUD_Instances_Info.get_all_instances(session) except Exception as ex: app_logger.error("获取实例列表出错", exc_info=True) raise Exception("获取实例列表出错") if len(instances) == 0: app_logger.info("实例id为空值") return 0 for instance in instances: try: job_info = ALIREDIS.create_cache_analysisjob(instance.instance_id) app_logger.info("新job信息:{0}".format(job_info)) with get_db() as session: CRUD_Analysis_Job.in_update_notin_insert(session, job_info) except Exception as ex: app_logger.error("创建job,跟新job表失败, ID:{0} ex:{1}".format( instance.instance_id, ex), exc_info=True) return 0
def sync_and_udpate_day_job(day=str(date.today())): app_logger.info("开始更新job状态") with get_db() as session: try: jobs = CRUD_Analysis_Job.get_currentday_not_finnish_job( session, day) except Exception as ex: app_logger.error("获取未完成作业列表失败!", exc_info=True) raise Exception("获取未完成作业列表失败!") if len(jobs) == 0: app_logger.info("无未更新job") return 0 for job in jobs: try: job_info, job_data = ALIREDIS.get_analysisjob_info( job.instance_id, job.job_id) except RequestJobError as ex: app_logger.error("获取job信息失败! iid:{0},jid:{1},ex:{2}".format( job.instance_id, job.job_id, ex), exc_info=True) with get_db() as session: CRUD_Instances_Info.disable_instance(session, job.instance_id) CRUD_Analysis_Job.disable_job(session, job.job_id) app_logger.error( "因作业接口调用失败,怀疑实例为空实例,已经禁用实例与作业!! iid:{0},jid:{1}".format( job.instance_id, job.job_id)) continue except Exception as ex: app_logger.error("获取job信息失败! iid:{0},jid:{1},ex:{2}".format( job.instance_id, job.job_id, ex), exc_info=True) continue try: with get_db() as session: instance = CRUD_Instances_Info.get_instance( session, job.instance_id) except Exception as ex: app_logger.error("获取实例信息失败! iid:{0},jid:{1},ex:{2}".format( job.instance_id, job.job_id, ex), exc_info=True) continue if len(instance) == 0: continue try: app_logger.info("开始更新job状态, job_info:{0}".format(job_info)) with get_db() as session: CRUD_Analysis_Job.in_update_notin_insert(session, job_info) except Exception as ex: app_logger.error("更新job状态失败!job_info:{0}".format(job_info), exc_info=True) # 如果没有完成分析则跳出 if job_data == None: continue try: app_logger.info("开始更新key信息, job_info:{0}, job_data:{1}".format( job_info, job_data)) format_response_dict = Redis_BigKeys.format_big_keys_info( job_data, day, instance.instance_id, instance.instance_name) if format_response_dict["keys_info"]: with get_influxdb() as session: Redis_BigKeys.slave_to_influxdb( session, format_response_dict["keys_info"]) with get_influxdb() as session: Redis_BigKeys.slave_to_influxdb( session, format_response_dict["keyprefixes"]) except Exception as ex: app_logger.error( "存入key信息失败! job_info:{0}, day:{1}, iid:{2}, iname:{3}, ex:{4}". format(job_info, day, instance.instance_id, instance.instance_name, ex), exc_info=True) continue try: app_logger.info("更新savedata状态,job_info:{0}".format(job_info)) with get_db() as session: CRUD_Analysis_Job.update_job_save_data_status( session, job.job_id, 1) except Exception as ex: app_logger.error("更新savedata状态失败! jid:{0},ex:{1}".format( job.job_id, ex), exc_info=True) continue try: app_logger.info("生成报警信息,jid:{0}, date:{1}".format( job.job_id, format_response_dict["keys_info"])) alarm_log_dict = check_keys(format_response_dict["keys_info"]) # 如果没有触发阈值,则不保存报警信息 if len(alarm_log_dict) == 0: continue with get_db() as session: CRUD_Alarm_Log.insert(session, alarm_log_dict) except Exception as ex: app_logger.error("保存alarmlog失败, jid:{0},ex:{1}".format( job.job_id, ex), exc_info=True) return 0
# validate ip try: socket.inet_aton(ip) except socket.error: raise web.HTTPBadRequest(text='Invalid IP') # validate ports if not (1 <= begin_port <= 65535): raise web.HTTPBadRequest(text='Invalid begin port') if not (1 <= end_port <= 65535): raise web.HTTPBadRequest(text='Invalid end port') report = await scan(ip, begin_port, end_port + 1) return web.json_response(report) def create_app(): app = web.Application(middlewares=[json_error_middleware], logger=logger) app.router.add_route('GET', '/{ip}/{begin_port}/{end_port}', handle) return app if __name__ == '__main__': loop = asyncio.get_event_loop() runner = web.AppRunner(create_app()) loop.run_until_complete(runner.setup()) site = web.TCPSite(runner, port=8080) loop.run_until_complete(site.start()) logger.info('Start portscan server on 8080') loop.run_forever()
def fetch_data(self) -> bool: self.errors = False self.problem_urls = [] self.clear_dir(constants.DATA_DIR) self.clear_dir(constants.PROCESSED_DATA_DIR) app_logger.info("Fetching started") if not constants.DOWNLOAD_LINKS: app_logger.warning("no links to download") self.errors = True return False start = time.time() for url in constants.DOWNLOAD_LINKS: app_logger.info("downloading %s..." % url) name = self.get_file_name(url) h = headers.get_random_headers() h['Referer'] = self.REFERER try: resp = requests.get(url=url, stream=True, headers=h) if resp.status_code != HTTPStatus.OK: app_logger("Request error: bad response code " + str(resp.status_code)) self.problem_urls.append(url) self.errors = True continue app_logger.info("saving %s..." % url) with open(constants.DATA_DIR / name, 'wb') as f: for chunk in resp.iter_content(chunk_size=self.CHUNK_SIZE): if chunk: f.write(chunk) app_logger.info("saved") except requests.exceptions.RequestException as e: app_logger.error("Request error: " + str(e)) self.problem_urls.append(url) self.errors = True except Exception as e: app_logger.error("General error: " + str(e)) self.problem_urls.append(url) self.errors = True # remove file data leftovers in case of errors # (it may be corrupted, incomplete, etc) self.del_file(constants.DATA_DIR / name) app_logger.info("Fetching data completed in %s seconds" % str(time.time() - start)) # check if at least some urls have been downloaded without problem return not self.errors or (len(self.problem_urls) < len( constants.DOWNLOAD_LINKS))