Ejemplo n.º 1
0
def retrieve_reports(url, market, dst_file, date_val, cols) -> DownloadDetails:
    request_data = dict(market=market, date=date_val)

    response = requests.post(url=url,
                             headers=headers.get_random_headers(),
                             data=request_data)

    if response.status_code == requests.codes.ok:
        text = response.text
        try:
            json_val = json.loads(text)
            results = json_val['results']
        except (ValueError, KeyError) as e:
            app_logger.error(e, exc_info=True)
        else:
            app_logger.info("Downloaded entries for date %s OK" % date_val)
            # save if there's some non-empty data
            if results:
                if save_to_file(dst_csv_file_name=dst_file,
                                csv_cols=cols,
                                data=results):
                    app_logger.info("Saved entries for date %s OK" % date_val)
                    return DownloadDetails(skipped=False, status=True)
            else:
                app_logger.warning("Skipped empty entries for date %s" %
                                   date_val)
                return DownloadDetails(skipped=True, status=True)
    else:
        app_logger.error(
            "Data for %s is not available, request returned %d status" %
            (date_val, response.status_code))

    return DownloadDetails(skipped=False, status=False)
Ejemplo n.º 2
0
def alarm_big_key(begin_time=datetime.now() - timedelta(hours=12)):
    begin_time_str = begin_time.strftime("%Y-%m-%d %H:%M:%S")
    app_logger.info("开始发送报警")
    title = "大key报警"
    try:
        with get_db() as session:
            alarm_logs = CRUD_Alarm_Log.get_not_sended_log(
                session, begin_time_str)

    except Exception as ex:
        app_logger.error("获取报警信息失败! ex:{0}".format(ex), exc_info=True)
        raise Exception("获取报警信息失败")

    if len(alarm_logs) == 0:
        app_logger.info("无未发送报警")

    for alarm_log in alarm_logs:
        sleep(2)
        try:
            dingclient.sendto_ding(title, alarm_log.message)
        except Exception as ex:
            app_logger.error("发送报警失败! ex:{0}, message:{1}".format(
                ex, alarm_log.message),
                             exc_info=True)
            continue

        try:
            with get_db() as session:
                CRUD_Alarm_Log.set_log_is_sended(session, alarm_log.log_id)
        except Exception as ex:
            app_logger.error("跟新报警消息状态失败! ex:{0}, message:{1}".format(
                ex, alarm_log.message),
                             exc_info=True)
            continue
Ejemplo n.º 3
0
def load_last_date(fname):
    try:
        with open(fname) as f:
            val = f.readline().strip()
            return datetime.strptime(val, "%d-%m-%Y")
    except Exception as e:
        app_logger.info(e, exc_info=True)
        return None
Ejemplo n.º 4
0
async def check_port(ip, port):
    logger.info("Check {} on {}".format(port, ip))
    conn = asyncio.open_connection(ip, port)
    try:
        reader, writer = await asyncio.wait_for(conn, timeout=3)
        return {"port": port, "state": "open"}
    except:
        return {"port": port, "state": "close"}
Ejemplo n.º 5
0
def disable_job(job_id):
    """标记job为不可用"""
    app_logger.info("禁用job, jid:{0}".format(job_id))
    try:
        with get_db() as session:
            CRUD_Analysis_Job.disable_job(session, job_id)
    except Exception as ex:
        app_logger.error("禁用job失败, iid:{0}, ex:{1}".format(job_id, ex))
    return 0
Ejemplo n.º 6
0
def enable_instance(instance_id):
    """标记实例为可用"""
    app_logger.info("启用实例, iid:{0}".format(instance_id))
    try:
        with get_db() as session:
            CRUD_Instances_Info.enable_instance(session, instance_id)
    except Exception as ex:
        app_logger.error("启用实例失败, iid:{0}, ex:{1}".format(instance_id, ex))
    return 0
Ejemplo n.º 7
0
def check_keys(keys_info):

    alarm_keys_list = []
    alarm_log_dict = {}
    if len(keys_info) == 0:
        return alarm_log_dict

    current_time = datetime.now().strftime(("%Y-%m-%d %H:%M:%S"))
    instance_id = keys_info[0]["tags"]["instance_id"]
    instance_name = keys_info[0]["tags"]["instance_name"]
    for key in keys_info:
        if key["tags"]["type"] in alarm_threshold.keys():
            if int(key["fields"]["bytes"]) > alarm_threshold[key["tags"]
                                                             ["type"]]:
                alarm_keys_info = {
                    "key": key["tags"]["key"],
                    "type": key["tags"]["type"],
                    "bytes": key["fields"]["bytes"]
                }
                alarm_keys_list.append(alarm_keys_info)
        elif int(key["fields"]["bytes"]) > alarm_threshold["other"]:
            alarm_keys_info = {
                "key": key["tags"]["key"],
                "type": key["tags"]["type"],
                "bytes": key["fields"]["bytes"]
            }
            alarm_keys_list.append(alarm_keys_info)

    if len(alarm_keys_list) == 0:
        return alarm_log_dict

    alarm_message_dict = {
        "iid": instance_id,
        "i_name": instance_name,
        "keys": alarm_keys_list,
        "current": current_time
    }
    alarm_message_dict.update(alarm_threshold)
    alarm_message = alarm_template.format(**alarm_message_dict)
    app_logger.info(alarm_message)

    alarm_log_dict = {
        "instance_id": instance_id,
        "instance_name": instance_name,
        "message": alarm_message
    }

    return alarm_log_dict
Ejemplo n.º 8
0
    def preprocess_data() -> bool:
        app_logger.info("Preparing data started...")

        start = time.time()

        series: List[dd.Series] = []
        for f in os.listdir(constants.DATA_DIR):
            if not f.endswith(".csv"):
                app_logger.warning("non-CSV file found in DATA_DIR: %s" % f)
                continue

            app_logger.info("Processing %s" % f)
            try:
                if len(series) < 2:
                    df = dd.read_csv(constants.DATA_DIR / f, header=None)

                    if len(
                            df.columns
                    ) != PriceDataProcessor.REQUIRED_CSV_FORMAT_COLUMNS_COUNT:
                        app_logger.error(
                            "File %s has insufficient amount of columns: required %d, found %d"
                            % (f, PriceDataProcessor.
                               REQUIRED_CSV_FORMAT_COLUMNS_COUNT,
                               len(df.columns)))
                        continue

                    # we are interested in the 4th column's values
                    fourth_col: dd.Series = df.iloc[:, 3]
                    unique_vals_series = fourth_col.drop_duplicates()
                    series.append(unique_vals_series)

                if len(series) == 2:
                    # merge two Series into one and remove duplicates
                    s = dd.concat(series).drop_duplicates()

                    # keep the result Series in the first list's element
                    del series[-1]
                    series[0] = s

            except Exception as e:
                app_logger.error("Processing file %f had errors: " + str(e))

            app_logger.info("Processing %s done" % f)

        if series:
            s: dd.Series = series[0]
            s.to_csv(constants.PROCESSED_DATA_DIR / "single.csv",
                     single_file=True,
                     index=False,
                     header=False)
        else:
            app_logger.error(
                "Prepare data: could not generate the result CSV file")

        app_logger.info("Preparing data completed in %s seconds" %
                        str(time.time() - start))
        return bool(series)
Ejemplo n.º 9
0
    def run(self):
        postcodes_file_path = PROCESSED_DATA_DIR / "single.csv"

        if not postcodes_file_path.exists():
            app_logger.error("No 'single.csv' file %s found. Exiting..." % postcodes_file_path)
            return

        # order of column names is important
        columns = ["Address", "Postcode", "Council Tax band", "Local authority reference number"]

        for chunk_df in pd.read_csv(postcodes_file_path, chunksize=100, header=None, usecols=[0]):
            for _, row in chunk_df.iterrows():

                postcode = row[0]
                result_file = RESULTS_DIR / "{}.csv".format(postcode.replace(" ", "_"))
                if result_file.exists():
                    app_logger.warning("Skipping result file %s, already exists" % result_file)
                    continue

                app_logger.info("Scraping %s postcode started" % postcode)
                items = self.query(postcode=postcode)

                result_list = [[result_item.address,
                                postcode,
                                result_item.council_tax_band,
                                result_item.local_auth_ref_number] for result_item in items] if items else []

                result_df = pd.DataFrame(result_list, columns=columns)
                result_df.to_csv(result_file, index=False)
                if items:
                    app_logger.info("Scraping %s postcode completed" % postcode)
                else:
                    app_logger.info("Scraping %s postcode completed, but it discovered no entries" % postcode)

                # sleep (5, 60) seconds randomly
                secs = random.randint(5, 60)
                app_logger.info("Sleeping %d seconds" % secs)
                time.sleep(secs)
Ejemplo n.º 10
0
def reflush_redis_instances():
    app_logger.info("开始刷新实例!")
    try:
        all_instances_info = ALIREDIS.get_all_instances_info()
    except Exception as ex:
        app_logger.error("获取实例信息失败, ex:{0}".format(ex), exc_info=True)
        raise Exception("获取实例信息失败")
    app_logger.info("刷新实例!完成")

    with get_db() as session:
        for instances_info in all_instances_info:
            app_logger.info(
                "开始更新实例信息,instances_info:{0}".format(instances_info))
            try:
                CRUD_Instances_Info.in_update_notin_insert(
                    session, instances_info)
            except Exception as ex:
                app_logger.error(
                    "更新实例信息失败! instances_info:{0}".format(instances_info),
                    exc_info=True)
        app_logger.info("刷新实例完成")

    return 0
Ejemplo n.º 11
0
def create_analysis_job(instance_id=None):
    instances = []
    if instance_id:
        try:
            job_info = ALIREDIS.create_cache_analysisjob(instance_id)
            # 如果存入数据库失败需要根据日志手工补
            app_logger.info("新job信息:{0}".format(job_info))
            with get_db() as session:
                CRUD_Analysis_Job.in_update_notin_insert(session, job_info)
        except Exception as ex:
            app_logger.error("创建job,跟新job表失败, ID:{0} ex:{1}".format(
                instance_id, ex),
                             exc_info=True)
    else:
        try:
            with get_db() as session:
                instances = CRUD_Instances_Info.get_all_instances(session)
        except Exception as ex:
            app_logger.error("获取实例列表出错", exc_info=True)
            raise Exception("获取实例列表出错")

    if len(instances) == 0:
        app_logger.info("实例id为空值")
        return 0

    for instance in instances:
        try:
            job_info = ALIREDIS.create_cache_analysisjob(instance.instance_id)
            app_logger.info("新job信息:{0}".format(job_info))
            with get_db() as session:
                CRUD_Analysis_Job.in_update_notin_insert(session, job_info)
        except Exception as ex:
            app_logger.error("创建job,跟新job表失败, ID:{0} ex:{1}".format(
                instance.instance_id, ex),
                             exc_info=True)

    return 0
Ejemplo n.º 12
0
def sync_and_udpate_day_job(day=str(date.today())):
    app_logger.info("开始更新job状态")
    with get_db() as session:
        try:
            jobs = CRUD_Analysis_Job.get_currentday_not_finnish_job(
                session, day)
        except Exception as ex:
            app_logger.error("获取未完成作业列表失败!", exc_info=True)
            raise Exception("获取未完成作业列表失败!")

    if len(jobs) == 0:
        app_logger.info("无未更新job")
        return 0

    for job in jobs:
        try:
            job_info, job_data = ALIREDIS.get_analysisjob_info(
                job.instance_id, job.job_id)
        except RequestJobError as ex:
            app_logger.error("获取job信息失败! iid:{0},jid:{1},ex:{2}".format(
                job.instance_id, job.job_id, ex),
                             exc_info=True)
            with get_db() as session:
                CRUD_Instances_Info.disable_instance(session, job.instance_id)
                CRUD_Analysis_Job.disable_job(session, job.job_id)
            app_logger.error(
                "因作业接口调用失败,怀疑实例为空实例,已经禁用实例与作业!! iid:{0},jid:{1}".format(
                    job.instance_id, job.job_id))
            continue
        except Exception as ex:
            app_logger.error("获取job信息失败! iid:{0},jid:{1},ex:{2}".format(
                job.instance_id, job.job_id, ex),
                             exc_info=True)
            continue

        try:
            with get_db() as session:
                instance = CRUD_Instances_Info.get_instance(
                    session, job.instance_id)
        except Exception as ex:
            app_logger.error("获取实例信息失败! iid:{0},jid:{1},ex:{2}".format(
                job.instance_id, job.job_id, ex),
                             exc_info=True)
            continue

        if len(instance) == 0:
            continue

        try:
            app_logger.info("开始更新job状态, job_info:{0}".format(job_info))
            with get_db() as session:
                CRUD_Analysis_Job.in_update_notin_insert(session, job_info)
        except Exception as ex:
            app_logger.error("更新job状态失败!job_info:{0}".format(job_info),
                             exc_info=True)

        # 如果没有完成分析则跳出
        if job_data == None:
            continue

        try:
            app_logger.info("开始更新key信息, job_info:{0}, job_data:{1}".format(
                job_info, job_data))
            format_response_dict = Redis_BigKeys.format_big_keys_info(
                job_data, day, instance.instance_id, instance.instance_name)

            if format_response_dict["keys_info"]:
                with get_influxdb() as session:
                    Redis_BigKeys.slave_to_influxdb(
                        session, format_response_dict["keys_info"])

                with get_influxdb() as session:
                    Redis_BigKeys.slave_to_influxdb(
                        session, format_response_dict["keyprefixes"])

        except Exception as ex:
            app_logger.error(
                "存入key信息失败! job_info:{0}, day:{1}, iid:{2}, iname:{3}, ex:{4}".
                format(job_info, day, instance.instance_id,
                       instance.instance_name, ex),
                exc_info=True)
            continue

        try:
            app_logger.info("更新savedata状态,job_info:{0}".format(job_info))
            with get_db() as session:
                CRUD_Analysis_Job.update_job_save_data_status(
                    session, job.job_id, 1)
        except Exception as ex:
            app_logger.error("更新savedata状态失败! jid:{0},ex:{1}".format(
                job.job_id, ex),
                             exc_info=True)
            continue

        try:
            app_logger.info("生成报警信息,jid:{0}, date:{1}".format(
                job.job_id, format_response_dict["keys_info"]))
            alarm_log_dict = check_keys(format_response_dict["keys_info"])
            # 如果没有触发阈值,则不保存报警信息
            if len(alarm_log_dict) == 0:
                continue

            with get_db() as session:
                CRUD_Alarm_Log.insert(session, alarm_log_dict)
        except Exception as ex:
            app_logger.error("保存alarmlog失败, jid:{0},ex:{1}".format(
                job.job_id, ex),
                             exc_info=True)

    return 0
Ejemplo n.º 13
0
    # validate ip
    try:
        socket.inet_aton(ip)
    except socket.error:
        raise web.HTTPBadRequest(text='Invalid IP')

    # validate ports
    if not (1 <= begin_port <= 65535):
        raise web.HTTPBadRequest(text='Invalid begin port')
    if not (1 <= end_port <= 65535):
        raise web.HTTPBadRequest(text='Invalid end port')

    report = await scan(ip, begin_port, end_port + 1)
    return web.json_response(report)


def create_app():
    app = web.Application(middlewares=[json_error_middleware], logger=logger)
    app.router.add_route('GET', '/{ip}/{begin_port}/{end_port}', handle)
    return app


if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    runner = web.AppRunner(create_app())
    loop.run_until_complete(runner.setup())
    site = web.TCPSite(runner, port=8080)
    loop.run_until_complete(site.start())

    logger.info('Start portscan server on 8080')
    loop.run_forever()
Ejemplo n.º 14
0
    def fetch_data(self) -> bool:
        self.errors = False
        self.problem_urls = []

        self.clear_dir(constants.DATA_DIR)
        self.clear_dir(constants.PROCESSED_DATA_DIR)

        app_logger.info("Fetching started")

        if not constants.DOWNLOAD_LINKS:
            app_logger.warning("no links to download")
            self.errors = True
            return False

        start = time.time()

        for url in constants.DOWNLOAD_LINKS:
            app_logger.info("downloading %s..." % url)

            name = self.get_file_name(url)

            h = headers.get_random_headers()
            h['Referer'] = self.REFERER

            try:
                resp = requests.get(url=url, stream=True, headers=h)
                if resp.status_code != HTTPStatus.OK:
                    app_logger("Request error: bad response code " +
                               str(resp.status_code))
                    self.problem_urls.append(url)
                    self.errors = True
                    continue

                app_logger.info("saving %s..." % url)

                with open(constants.DATA_DIR / name, 'wb') as f:
                    for chunk in resp.iter_content(chunk_size=self.CHUNK_SIZE):
                        if chunk:
                            f.write(chunk)

                app_logger.info("saved")

            except requests.exceptions.RequestException as e:
                app_logger.error("Request error: " + str(e))
                self.problem_urls.append(url)
                self.errors = True
            except Exception as e:
                app_logger.error("General error: " + str(e))
                self.problem_urls.append(url)
                self.errors = True

                # remove file data leftovers in case of errors
                # (it may be corrupted, incomplete, etc)
                self.del_file(constants.DATA_DIR / name)

        app_logger.info("Fetching data completed in %s seconds" %
                        str(time.time() - start))

        # check if at least some urls have been downloaded without problem
        return not self.errors or (len(self.problem_urls) < len(
            constants.DOWNLOAD_LINKS))