def stop_datafeed(mlad_ctx): MlClient.stop_datafeed( mlad_ctx["es_object"], datafeed_id=mlad_ctx["datafeed_id"], body=mlad_ctx["mlad_properties"]["datafeed_stop_params"]) return mlad_ctx
def create_job(mlad_ctx): job_id = mlad_ctx["job_id"] job_body = mlad_ctx["mlad_properties"]["job_body"] job_body["results_index_name"] = job_id MlClient.put_job(mlad_ctx["es_object"], job_id=job_id, body=job_body) return mlad_ctx
def get_existing_model_ids(cls, es_client: Elasticsearch) -> List[str]: """Get model IDs for existing ML models.""" ml_client = MlClient(es_client) return [ m['model_id'] for m in ml_client.get_trained_models()['trained_model_configs'] if m['model_id'] in cls.ml_manifests() ]
def check_model_exists(cls, es_client: Elasticsearch, model_id: str) -> bool: """Check if a model exists on a stack by model id.""" ml_client = MlClient(es_client) return model_id in [ m['model_id'] for m in ml_client.get_trained_models()['trained_model_configs'] ]
def start_datafeed(mlad_ctx): MlClient.start_datafeed(mlad_ctx["es_object"], datafeed_id=mlad_ctx["datafeed_id"], body=mlad_ctx["mlad_properties"]["datafeed_time"]) # MlClient.start_datafeed( # mlad_ctx["es_object"], datafeed_id=mlad_ctx["datafeed_id"]) return mlad_ctx
def create_datafeed(mlad_ctx): datafeed_body = mlad_ctx["mlad_properties"]["datafeed_body"] | { "job_id": mlad_ctx["job_id"], "indices": mlad_ctx["datafeed_indices"] } MlClient.put_datafeed(mlad_ctx["es_object"], datafeed_id=mlad_ctx["datafeed_id"], body=datafeed_body) return mlad_ctx
def check_model_files(ctx): """Check ML model files on an elasticsearch instance.""" from elasticsearch.client import IngestClient, MlClient from .misc import get_ml_model_manifests_by_model_id es_client: Elasticsearch = ctx.obj['es'] ml_client = MlClient(es_client) ingest_client = IngestClient(es_client) def safe_get(func, arg): try: return func(arg) except elasticsearch.NotFoundError: return None models = [ m for m in ml_client.get_trained_models().get( 'trained_model_configs', []) if m['created_by'] != '_xpack' ] if models: if len([m for m in models if m['model_id'].startswith('dga_')]) > 1: click.secho( 'Multiple DGA models detected! It is not recommended to run more than one DGA model at a time', fg='yellow') manifests = get_ml_model_manifests_by_model_id() click.echo(f'DGA Model{"s" if len(models) > 1 else ""} found:') for model in models: manifest = manifests.get(model['model_id']) click.echo( f' - {model["model_id"]}, associated release: {manifest.html_url if manifest else None}' ) else: click.echo('No DGA Models found') support_files = { 'create_script': safe_get(es_client.get_script, 'dga_ngrams_create'), 'delete_script': safe_get(es_client.get_script, 'dga_ngrams_transform_delete'), 'enrich_pipeline': safe_get(ingest_client.get_pipeline, 'dns_enrich_pipeline'), 'inference_pipeline': safe_get(ingest_client.get_pipeline, 'dns_dga_inference_enrich_pipeline') } click.echo('Support Files:') for support_file, results in support_files.items(): click.echo( f' - {support_file}: {"found" if results else "not found"}')
def remove_dga_model(ctx, model_id, force, es_client: Elasticsearch = None, ml_client: MlClient = None, ingest_client: IngestClient = None): """Remove ML DGA files.""" from elasticsearch.client import IngestClient, MlClient es_client = es_client or ctx.obj['es'] ml_client = ml_client or MlClient(es_client) ingest_client = ingest_client or IngestClient(es_client) def safe_delete(func, fid, verbose=True): try: func(fid) except elasticsearch.NotFoundError: return False if verbose: click.echo(f' - {fid} deleted') return True model_exists = False if not force: existing_models = ml_client.get_trained_models() model_exists = model_id in [ m['model_id'] for m in existing_models.get('trained_model_configs', []) ] if model_exists or force: if model_exists: click.secho('[-] Existing model detected - deleting files', fg='yellow') deleted = [ safe_delete(ingest_client.delete_pipeline, 'dns_dga_inference_enrich_pipeline'), safe_delete(ingest_client.delete_pipeline, 'dns_enrich_pipeline'), safe_delete(es_client.delete_script, 'dga_ngrams_transform_delete'), # f'{model_id}_dga_ngrams_transform_delete' safe_delete(es_client.delete_script, 'dga_ngrams_create'), # f'{model_id}_dga_ngrams_create' safe_delete(ml_client.delete_trained_model, model_id) ] if not any(deleted): click.echo('No files deleted') else: click.echo(f'Model: {model_id} not found')
def get_records(mlad_ctx): job_id = mlad_ctx["job_id"] mlad_properties = mlad_ctx["mlad_properties"] result = MlClient.get_records(mlad_ctx["es_object"], job_id=job_id, body=mlad_properties["get_records_params"]) """ {"count":904,"records":[ {'job_id': '20201123-cpu-reqtest', 'result_type': 'record', 'probability': 7.98024424988483e-61, 'multi_bucket_impact': -5.0, 'record_score': 98.77009619528458, 'initial_record_score': 98.77009619528458, 'bucket_span': 900, 'detector_index': 0, 'is_interim': False, 'timestamp': 1606423500000, 'partition_field_name': 'hostname', 'partition_field_value': 'TANet-CCU-ASR9010-01', 'function': 'mean', 'function_description': 'mean', 'typical': [2.6715904907637635], 'actual': [86.33333333333334], 'field_name': 'cpu', 'influencers': [{'influencer_field_name': 'hostname', 'influencer_field_values': ['TANet-CCU-ASR9010-01']}], 'hostname': ['TANet-CCU-ASR9010-01']} ]} """ mlad_ctx["ad_result"] = result return mlad_ctx
def get_all_ml_files(cls, es_client: Elasticsearch) -> dict: """Get all scripts, pipelines, and models which start with ml_*.""" pipelines = IngestClient(es_client).get_pipeline() scripts = es_client.cluster.state()['metadata']['stored_scripts'] models = MlClient( es_client).get_trained_models()['trained_model_configs'] manifests = get_ml_model_manifests_by_model_id() files = { 'pipeline': { n: s for n, s in pipelines.items() if n.lower().startswith('ml_') }, 'script': {n: s for n, s in scripts.items() if n.lower().startswith('ml_')}, 'model': { m['model_id']: { 'model': m, 'release': manifests[m['model_id']] } for m in models if m['model_id'] in manifests }, } return files
def ml_put_calendar_events(self, es_put_api_body): # If no new events exist, nix it. if not es_put_api_body: print('No new events found.') # Otherwise adds new events processed from the calendar filter method. else: MlClient(self.es_auth).post_calendar_events(self.es_calendar_id, es_put_api_body, params=None, headers=None) print('POST command sent, applying events to ' + self.es_calendar_id + ':\n' + es_put_api_body)
def remove_model(ctx: click.Context, model_id): """Remove ML model files.""" es_client = MlClient(ctx.obj['es']) model_ids = MachineLearningClient.get_existing_model_ids(ctx.obj['es']) if not model_id: model_id = click.prompt('Model ID to remove', type=click.Choice(model_ids)) try: result = es_client.delete_trained_model(model_id) except elasticsearch.ConflictError as e: click.echo(f'{e}: try running `remove-scripts-pipelines` first') ctx.exit(1) table = Table.from_list(['model_id', 'status'], [{ 'model_id': model_id, 'status': result }]) click.echo(table) return result
def ml_get_calendar_events(self): #Raw data is retrieved, then filtered down to the events JSON object. es_get_api_raw = MlClient(self.es_auth).get_calendar_events(self.es_calendar_id, params=None, headers=None) es_get_api_raw = json.dumps(es_get_api_raw["events"]) #Body is formed using events data, for loop removes needless objects. es_get_api_body = json.loads(es_get_api_raw) utc = pytz.timezone('UTC') for element in es_get_api_body: del element['calendar_id'] del element['event_id'] element['start_time'] = datetime.datetime.utcfromtimestamp(float(element['start_time'])/1000).astimezone(tz=None).isoformat() element['end_time'] = datetime.datetime.utcfromtimestamp(float(element['end_time'])/1000).astimezone(tz=None).isoformat() print(json.dumps(es_get_api_body)) return es_get_api_body
def forecast_job(ctx): forecast_result = ctx["analy_es_object"].ping() and MlClient.forecast( ctx["analy_es_object"], job_id=ctx["ml_job_id"], params={ "duration": "1d", "expires_in": "3d" }) if forecast_result and forecast_result["acknowledged"]: ctx["forecast"]["job_id"] = forecast_result["forecast_id"] ctx["forecast"]["job_time"] = datetime.now().strftime( "%Y-%m-%d %H:%M:%S") else: ctx = forecast_job(ctx) return ctx
def delete_ml_job(ctx: click.Context, job_name, job_type, verbose=True): """Remove experimental ML jobs.""" es_client: Elasticsearch = ctx.obj['es'] ml_client = MlClient(es_client) try: if job_type == 'anomaly_detection': ml_client.delete_job(job_name) elif job_type == 'data_frame_analytic': ml_client.delete_data_frame_analytics(job_name) elif job_type == 'datafeed': ml_client.delete_datafeed(job_name) else: client_error(f'Unknown ML job type: {job_type}') except (elasticsearch.NotFoundError, elasticsearch.ConflictError) as e: client_error(str(e), e, ctx=ctx) if verbose: click.echo(f'Deleted {job_type} job: {job_name}')
def upload_ml_job(ctx: click.Context, job_file, overwrite): """Upload experimental ML jobs.""" es_client: Elasticsearch = ctx.obj['es'] ml_client = MlClient(es_client) with open(job_file, 'r') as f: job = json.load(f) def safe_upload(func): try: func(name, body) except (elasticsearch.ConflictError, elasticsearch.RequestError) as err: if isinstance( err, elasticsearch.RequestError ) and err.error != 'resource_already_exists_exception': client_error(str(err), err, ctx=ctx) if overwrite: ctx.invoke(delete_ml_job, job_name=name, job_type=job_type) func(name, body) else: client_error(str(err), err, ctx=ctx) try: job_type = job['type'] name = job['name'] body = job['body'] if job_type == 'anomaly_detection': safe_upload(ml_client.put_job) elif job_type == 'data_frame_analytic': safe_upload(ml_client.put_data_frame_analytics) elif job_type == 'datafeed': safe_upload(ml_client.put_datafeed) else: client_error(f'Unknown ML job type: {job_type}') click.echo(f'Uploaded {job_type} job: {name}') except KeyError as e: client_error(f'{job_file} missing required info: {e}')
def setup_dga_model(ctx, model_tag, repo, model_dir, overwrite): """Upload ML DGA model and dependencies and enrich DNS data.""" import io import requests import shutil import zipfile es_client: Elasticsearch = ctx.obj['es'] client_info = es_client.info() license_client = LicenseClient(es_client) if license_client.get()['license']['type'].lower() not in ('platinum', 'enterprise'): client_error( 'You must have a platinum or enterprise subscription in order to use these ML features' ) # download files if necessary if not model_dir: if not model_tag: client_error( 'model-tag or model-dir required to download model files') click.echo(f'Downloading artifact: {model_tag}') release_url = f'https://api.github.com/repos/{repo}/releases/tags/{model_tag}' release = requests.get(release_url) release.raise_for_status() assets = [ a for a in release.json()['assets'] if a['name'].startswith('ML-DGA') and a['name'].endswith('.zip') ] if len(assets) != 1: client_error( f'Malformed release: expected 1 match ML-DGA zip, found: {len(assets)}!' ) zipped_url = assets[0]['browser_download_url'] zipped = requests.get(zipped_url) z = zipfile.ZipFile(io.BytesIO(zipped.content)) dga_dir = get_path('ML-models', 'DGA') model_dir = os.path.join(dga_dir, model_tag) os.makedirs(dga_dir, exist_ok=True) shutil.rmtree(model_dir, ignore_errors=True) z.extractall(dga_dir) click.echo(f'files saved to {model_dir}') # read files as needed z.close() def get_model_filename(pattern): paths = list(Path(model_dir).glob(pattern)) if not paths: client_error( f'{model_dir} missing files matching the pattern: {pattern}') if len(paths) > 1: client_error( f'{model_dir} contains multiple files matching the pattern: {pattern}' ) return paths[0] @contextmanager def open_model_file(name): pattern = expected_ml_dga_patterns[name] with open(get_model_filename(pattern), 'r') as f: yield json.load(f) model_id, _ = os.path.basename( get_model_filename('dga_*_model.json')).rsplit('_', maxsplit=1) click.echo( f'Setting up DGA model: "{model_id}" on {client_info["name"]} ({client_info["version"]["number"]})' ) # upload model ml_client = MlClient(es_client) ingest_client = IngestClient(es_client) existing_models = ml_client.get_trained_models() if model_id in [ m['model_id'] for m in existing_models.get('trained_model_configs', []) ]: if overwrite: ctx.invoke(remove_dga_model, model_id=model_id, es_client=es_client, ml_client=ml_client, ingest_client=ingest_client, force=True) else: client_error( f'Model: {model_id} already exists on stack! Try --overwrite to force the upload' ) click.secho('[+] Uploading model (may take a while)') with open_model_file('model') as model_file: try: ml_client.put_trained_model(model_id=model_id, body=model_file) except elasticsearch.ConnectionTimeout: msg = 'Connection timeout, try increasing timeout using `es --timeout <secs> experimental setup_dga_model`.' client_error(msg) # install scripts click.secho('[+] Uploading painless scripts') with open_model_file('dga_ngrams_create') as painless_install: es_client.put_script(id='dga_ngrams_create', body=painless_install) # f'{model_id}_dga_ngrams_create' with open_model_file('dga_ngrams_transform_delete') as painless_delete: es_client.put_script(id='dga_ngrams_transform_delete', body=painless_delete) # f'{model_id}_dga_ngrams_transform_delete' # Install ingest pipelines click.secho('[+] Uploading pipelines') def _build_es_script_error(err, pipeline_file): error = err.info['error'] cause = error['caused_by'] error_msg = [ f'Script error while uploading {pipeline_file}: {cause["type"]} - {cause["reason"]}', ' '.join(f'{k}: {v}' for k, v in error['position'].items()), '\n'.join(error['script_stack']) ] return click.style('\n'.join(error_msg), fg='red') with open_model_file('dns_enrich_pipeline') as ingest_pipeline1: try: ingest_client.put_pipeline(id='dns_enrich_pipeline', body=ingest_pipeline1) except elasticsearch.RequestError as e: if e.error == 'script_exception': client_error(_build_es_script_error(e, 'ingest_pipeline1'), e, ctx=ctx) else: raise with open_model_file( 'dns_dga_inference_enrich_pipeline') as ingest_pipeline2: try: ingest_client.put_pipeline(id='dns_dga_inference_enrich_pipeline', body=ingest_pipeline2) except elasticsearch.RequestError as e: if e.error == 'script_exception': client_error(_build_es_script_error(e, 'ingest_pipeline2'), e, ctx=ctx) else: raise click.echo('Ensure that you have updated your packetbeat.yml config file.') click.echo(' - reference: ML_DGA.md #2-update-packetbeat-configuration') click.echo( 'Associated rules and jobs can be found under ML-experimental-detections releases in the repo' ) click.echo('To upload rules, run: kibana upload-rule <ml-rule.toml>') click.echo( 'To upload ML jobs, run: es experimental upload-ml-job <ml-job.json>')
def open_job(mlad_ctx): MlClient.open_job(mlad_ctx["es_object"], job_id=mlad_ctx["job_id"]) return mlad_ctx
def close_job(mlad_ctx): MlClient.close_job(mlad_ctx["es_object"], job_id=mlad_ctx["job_id"]) return mlad_ctx
def ml_client(self) -> MlClient: return MlClient(self.es_client)