def stop_datafeed(mlad_ctx):
    MlClient.stop_datafeed(
        mlad_ctx["es_object"],
        datafeed_id=mlad_ctx["datafeed_id"],
        body=mlad_ctx["mlad_properties"]["datafeed_stop_params"])

    return mlad_ctx
def create_job(mlad_ctx):
    job_id = mlad_ctx["job_id"]
    job_body = mlad_ctx["mlad_properties"]["job_body"]
    job_body["results_index_name"] = job_id

    MlClient.put_job(mlad_ctx["es_object"], job_id=job_id, body=job_body)

    return mlad_ctx
Example #3
0
 def get_existing_model_ids(cls, es_client: Elasticsearch) -> List[str]:
     """Get model IDs for existing ML models."""
     ml_client = MlClient(es_client)
     return [
         m['model_id']
         for m in ml_client.get_trained_models()['trained_model_configs']
         if m['model_id'] in cls.ml_manifests()
     ]
Example #4
0
 def check_model_exists(cls, es_client: Elasticsearch,
                        model_id: str) -> bool:
     """Check if a model exists on a stack by model id."""
     ml_client = MlClient(es_client)
     return model_id in [
         m['model_id']
         for m in ml_client.get_trained_models()['trained_model_configs']
     ]
def start_datafeed(mlad_ctx):
    MlClient.start_datafeed(mlad_ctx["es_object"],
                            datafeed_id=mlad_ctx["datafeed_id"],
                            body=mlad_ctx["mlad_properties"]["datafeed_time"])
    # MlClient.start_datafeed(
    #     mlad_ctx["es_object"], datafeed_id=mlad_ctx["datafeed_id"])

    return mlad_ctx
def create_datafeed(mlad_ctx):
    datafeed_body = mlad_ctx["mlad_properties"]["datafeed_body"] | {
        "job_id": mlad_ctx["job_id"],
        "indices": mlad_ctx["datafeed_indices"]
    }

    MlClient.put_datafeed(mlad_ctx["es_object"],
                          datafeed_id=mlad_ctx["datafeed_id"],
                          body=datafeed_body)

    return mlad_ctx
Example #7
0
def check_model_files(ctx):
    """Check ML model files on an elasticsearch instance."""
    from elasticsearch.client import IngestClient, MlClient
    from .misc import get_ml_model_manifests_by_model_id

    es_client: Elasticsearch = ctx.obj['es']
    ml_client = MlClient(es_client)
    ingest_client = IngestClient(es_client)

    def safe_get(func, arg):
        try:
            return func(arg)
        except elasticsearch.NotFoundError:
            return None

    models = [
        m for m in ml_client.get_trained_models().get(
            'trained_model_configs', []) if m['created_by'] != '_xpack'
    ]

    if models:
        if len([m for m in models if m['model_id'].startswith('dga_')]) > 1:
            click.secho(
                'Multiple DGA models detected! It is not recommended to run more than one DGA model at a time',
                fg='yellow')

        manifests = get_ml_model_manifests_by_model_id()

        click.echo(f'DGA Model{"s" if len(models) > 1 else ""} found:')
        for model in models:
            manifest = manifests.get(model['model_id'])
            click.echo(
                f'    - {model["model_id"]}, associated release: {manifest.html_url if manifest else None}'
            )
    else:
        click.echo('No DGA Models found')

    support_files = {
        'create_script':
        safe_get(es_client.get_script, 'dga_ngrams_create'),
        'delete_script':
        safe_get(es_client.get_script, 'dga_ngrams_transform_delete'),
        'enrich_pipeline':
        safe_get(ingest_client.get_pipeline, 'dns_enrich_pipeline'),
        'inference_pipeline':
        safe_get(ingest_client.get_pipeline,
                 'dns_dga_inference_enrich_pipeline')
    }

    click.echo('Support Files:')
    for support_file, results in support_files.items():
        click.echo(
            f'    - {support_file}: {"found" if results else "not found"}')
Example #8
0
def remove_dga_model(ctx,
                     model_id,
                     force,
                     es_client: Elasticsearch = None,
                     ml_client: MlClient = None,
                     ingest_client: IngestClient = None):
    """Remove ML DGA files."""
    from elasticsearch.client import IngestClient, MlClient

    es_client = es_client or ctx.obj['es']
    ml_client = ml_client or MlClient(es_client)
    ingest_client = ingest_client or IngestClient(es_client)

    def safe_delete(func, fid, verbose=True):
        try:
            func(fid)
        except elasticsearch.NotFoundError:
            return False
        if verbose:
            click.echo(f' - {fid} deleted')
        return True

    model_exists = False
    if not force:
        existing_models = ml_client.get_trained_models()
        model_exists = model_id in [
            m['model_id']
            for m in existing_models.get('trained_model_configs', [])
        ]

    if model_exists or force:
        if model_exists:
            click.secho('[-] Existing model detected - deleting files',
                        fg='yellow')

        deleted = [
            safe_delete(ingest_client.delete_pipeline,
                        'dns_dga_inference_enrich_pipeline'),
            safe_delete(ingest_client.delete_pipeline, 'dns_enrich_pipeline'),
            safe_delete(es_client.delete_script,
                        'dga_ngrams_transform_delete'),
            # f'{model_id}_dga_ngrams_transform_delete'
            safe_delete(es_client.delete_script, 'dga_ngrams_create'),
            # f'{model_id}_dga_ngrams_create'
            safe_delete(ml_client.delete_trained_model, model_id)
        ]

        if not any(deleted):
            click.echo('No files deleted')
    else:
        click.echo(f'Model: {model_id} not found')
def get_records(mlad_ctx):
    job_id = mlad_ctx["job_id"]
    mlad_properties = mlad_ctx["mlad_properties"]

    result = MlClient.get_records(mlad_ctx["es_object"],
                                  job_id=job_id,
                                  body=mlad_properties["get_records_params"])
    """
    {"count":904,"records":[
        {'job_id': '20201123-cpu-reqtest',
         'result_type': 'record',
         'probability': 7.98024424988483e-61,
         'multi_bucket_impact': -5.0,
         'record_score': 98.77009619528458,
         'initial_record_score': 98.77009619528458,
         'bucket_span': 900,
         'detector_index': 0,
         'is_interim': False,
         'timestamp': 1606423500000,
         'partition_field_name': 'hostname',
         'partition_field_value': 'TANet-CCU-ASR9010-01',
         'function': 'mean',
         'function_description': 'mean',
         'typical': [2.6715904907637635],
         'actual': [86.33333333333334],
         'field_name': 'cpu',
         'influencers': [{'influencer_field_name': 'hostname',
           'influencer_field_values': ['TANet-CCU-ASR9010-01']}],
         'hostname': ['TANet-CCU-ASR9010-01']}
    ]}
    """

    mlad_ctx["ad_result"] = result
    return mlad_ctx
Example #10
0
    def get_all_ml_files(cls, es_client: Elasticsearch) -> dict:
        """Get all scripts, pipelines, and models which start with ml_*."""
        pipelines = IngestClient(es_client).get_pipeline()
        scripts = es_client.cluster.state()['metadata']['stored_scripts']
        models = MlClient(
            es_client).get_trained_models()['trained_model_configs']
        manifests = get_ml_model_manifests_by_model_id()

        files = {
            'pipeline': {
                n: s
                for n, s in pipelines.items() if n.lower().startswith('ml_')
            },
            'script':
            {n: s
             for n, s in scripts.items() if n.lower().startswith('ml_')},
            'model': {
                m['model_id']: {
                    'model': m,
                    'release': manifests[m['model_id']]
                }
                for m in models if m['model_id'] in manifests
            },
        }
        return files
Example #11
0
    def ml_put_calendar_events(self, es_put_api_body):
        # If no new events exist, nix it.
        if not es_put_api_body:
            print('No new events found.')

        # Otherwise adds new events processed from the calendar filter method.
        else:
            MlClient(self.es_auth).post_calendar_events(self.es_calendar_id, es_put_api_body, params=None, headers=None)	
            print('POST command sent, applying events to ' + self.es_calendar_id + ':\n' + es_put_api_body)
Example #12
0
def remove_model(ctx: click.Context, model_id):
    """Remove ML model files."""
    es_client = MlClient(ctx.obj['es'])
    model_ids = MachineLearningClient.get_existing_model_ids(ctx.obj['es'])

    if not model_id:
        model_id = click.prompt('Model ID to remove',
                                type=click.Choice(model_ids))

    try:
        result = es_client.delete_trained_model(model_id)
    except elasticsearch.ConflictError as e:
        click.echo(f'{e}: try running `remove-scripts-pipelines` first')
        ctx.exit(1)

    table = Table.from_list(['model_id', 'status'], [{
        'model_id': model_id,
        'status': result
    }])
    click.echo(table)
    return result
Example #13
0
    def ml_get_calendar_events(self):
        #Raw data is retrieved, then filtered down to the events JSON object.
        es_get_api_raw = MlClient(self.es_auth).get_calendar_events(self.es_calendar_id, params=None, headers=None)
        es_get_api_raw = json.dumps(es_get_api_raw["events"])

        #Body is formed using events data, for loop removes needless objects. 
        es_get_api_body = json.loads(es_get_api_raw)
        utc = pytz.timezone('UTC')
        for element in es_get_api_body:
            del element['calendar_id']
            del element['event_id']
            element['start_time'] = datetime.datetime.utcfromtimestamp(float(element['start_time'])/1000).astimezone(tz=None).isoformat()
            element['end_time'] = datetime.datetime.utcfromtimestamp(float(element['end_time'])/1000).astimezone(tz=None).isoformat()

        print(json.dumps(es_get_api_body))  
        return es_get_api_body
Example #14
0
def forecast_job(ctx):
    forecast_result = ctx["analy_es_object"].ping() and MlClient.forecast(
        ctx["analy_es_object"],
        job_id=ctx["ml_job_id"],
        params={
            "duration": "1d",
            "expires_in": "3d"
        })

    if forecast_result and forecast_result["acknowledged"]:
        ctx["forecast"]["job_id"] = forecast_result["forecast_id"]
        ctx["forecast"]["job_time"] = datetime.now().strftime(
            "%Y-%m-%d %H:%M:%S")
    else:
        ctx = forecast_job(ctx)

    return ctx
Example #15
0
def delete_ml_job(ctx: click.Context, job_name, job_type, verbose=True):
    """Remove experimental ML jobs."""
    es_client: Elasticsearch = ctx.obj['es']
    ml_client = MlClient(es_client)

    try:
        if job_type == 'anomaly_detection':
            ml_client.delete_job(job_name)
        elif job_type == 'data_frame_analytic':
            ml_client.delete_data_frame_analytics(job_name)
        elif job_type == 'datafeed':
            ml_client.delete_datafeed(job_name)
        else:
            client_error(f'Unknown ML job type: {job_type}')
    except (elasticsearch.NotFoundError, elasticsearch.ConflictError) as e:
        client_error(str(e), e, ctx=ctx)

    if verbose:
        click.echo(f'Deleted {job_type} job: {job_name}')
Example #16
0
def upload_ml_job(ctx: click.Context, job_file, overwrite):
    """Upload experimental ML jobs."""
    es_client: Elasticsearch = ctx.obj['es']
    ml_client = MlClient(es_client)

    with open(job_file, 'r') as f:
        job = json.load(f)

    def safe_upload(func):
        try:
            func(name, body)
        except (elasticsearch.ConflictError,
                elasticsearch.RequestError) as err:
            if isinstance(
                    err, elasticsearch.RequestError
            ) and err.error != 'resource_already_exists_exception':
                client_error(str(err), err, ctx=ctx)

            if overwrite:
                ctx.invoke(delete_ml_job, job_name=name, job_type=job_type)
                func(name, body)
            else:
                client_error(str(err), err, ctx=ctx)

    try:
        job_type = job['type']
        name = job['name']
        body = job['body']

        if job_type == 'anomaly_detection':
            safe_upload(ml_client.put_job)
        elif job_type == 'data_frame_analytic':
            safe_upload(ml_client.put_data_frame_analytics)
        elif job_type == 'datafeed':
            safe_upload(ml_client.put_datafeed)
        else:
            client_error(f'Unknown ML job type: {job_type}')

        click.echo(f'Uploaded {job_type} job: {name}')
    except KeyError as e:
        client_error(f'{job_file} missing required info: {e}')
Example #17
0
def setup_dga_model(ctx, model_tag, repo, model_dir, overwrite):
    """Upload ML DGA model and dependencies and enrich DNS data."""
    import io
    import requests
    import shutil
    import zipfile

    es_client: Elasticsearch = ctx.obj['es']
    client_info = es_client.info()
    license_client = LicenseClient(es_client)

    if license_client.get()['license']['type'].lower() not in ('platinum',
                                                               'enterprise'):
        client_error(
            'You must have a platinum or enterprise subscription in order to use these ML features'
        )

    # download files if necessary
    if not model_dir:
        if not model_tag:
            client_error(
                'model-tag or model-dir required to download model files')

        click.echo(f'Downloading artifact: {model_tag}')

        release_url = f'https://api.github.com/repos/{repo}/releases/tags/{model_tag}'
        release = requests.get(release_url)
        release.raise_for_status()
        assets = [
            a for a in release.json()['assets']
            if a['name'].startswith('ML-DGA') and a['name'].endswith('.zip')
        ]

        if len(assets) != 1:
            client_error(
                f'Malformed release: expected 1 match ML-DGA zip, found: {len(assets)}!'
            )

        zipped_url = assets[0]['browser_download_url']
        zipped = requests.get(zipped_url)
        z = zipfile.ZipFile(io.BytesIO(zipped.content))

        dga_dir = get_path('ML-models', 'DGA')
        model_dir = os.path.join(dga_dir, model_tag)
        os.makedirs(dga_dir, exist_ok=True)
        shutil.rmtree(model_dir, ignore_errors=True)
        z.extractall(dga_dir)
        click.echo(f'files saved to {model_dir}')

        # read files as needed
        z.close()

    def get_model_filename(pattern):
        paths = list(Path(model_dir).glob(pattern))
        if not paths:
            client_error(
                f'{model_dir} missing files matching the pattern: {pattern}')
        if len(paths) > 1:
            client_error(
                f'{model_dir} contains multiple files matching the pattern: {pattern}'
            )

        return paths[0]

    @contextmanager
    def open_model_file(name):
        pattern = expected_ml_dga_patterns[name]
        with open(get_model_filename(pattern), 'r') as f:
            yield json.load(f)

    model_id, _ = os.path.basename(
        get_model_filename('dga_*_model.json')).rsplit('_', maxsplit=1)

    click.echo(
        f'Setting up DGA model: "{model_id}" on {client_info["name"]} ({client_info["version"]["number"]})'
    )

    # upload model
    ml_client = MlClient(es_client)
    ingest_client = IngestClient(es_client)

    existing_models = ml_client.get_trained_models()
    if model_id in [
            m['model_id']
            for m in existing_models.get('trained_model_configs', [])
    ]:
        if overwrite:
            ctx.invoke(remove_dga_model,
                       model_id=model_id,
                       es_client=es_client,
                       ml_client=ml_client,
                       ingest_client=ingest_client,
                       force=True)
        else:
            client_error(
                f'Model: {model_id} already exists on stack! Try --overwrite to force the upload'
            )

    click.secho('[+] Uploading model (may take a while)')

    with open_model_file('model') as model_file:
        try:
            ml_client.put_trained_model(model_id=model_id, body=model_file)
        except elasticsearch.ConnectionTimeout:
            msg = 'Connection timeout, try increasing timeout using `es --timeout <secs> experimental setup_dga_model`.'
            client_error(msg)

    # install scripts
    click.secho('[+] Uploading painless scripts')

    with open_model_file('dga_ngrams_create') as painless_install:
        es_client.put_script(id='dga_ngrams_create', body=painless_install)
        # f'{model_id}_dga_ngrams_create'

    with open_model_file('dga_ngrams_transform_delete') as painless_delete:
        es_client.put_script(id='dga_ngrams_transform_delete',
                             body=painless_delete)
        # f'{model_id}_dga_ngrams_transform_delete'

    # Install ingest pipelines
    click.secho('[+] Uploading pipelines')

    def _build_es_script_error(err, pipeline_file):
        error = err.info['error']
        cause = error['caused_by']

        error_msg = [
            f'Script error while uploading {pipeline_file}: {cause["type"]} - {cause["reason"]}',
            ' '.join(f'{k}: {v}' for k, v in error['position'].items()),
            '\n'.join(error['script_stack'])
        ]

        return click.style('\n'.join(error_msg), fg='red')

    with open_model_file('dns_enrich_pipeline') as ingest_pipeline1:
        try:
            ingest_client.put_pipeline(id='dns_enrich_pipeline',
                                       body=ingest_pipeline1)
        except elasticsearch.RequestError as e:
            if e.error == 'script_exception':
                client_error(_build_es_script_error(e, 'ingest_pipeline1'),
                             e,
                             ctx=ctx)
            else:
                raise

    with open_model_file(
            'dns_dga_inference_enrich_pipeline') as ingest_pipeline2:
        try:
            ingest_client.put_pipeline(id='dns_dga_inference_enrich_pipeline',
                                       body=ingest_pipeline2)
        except elasticsearch.RequestError as e:
            if e.error == 'script_exception':
                client_error(_build_es_script_error(e, 'ingest_pipeline2'),
                             e,
                             ctx=ctx)
            else:
                raise

    click.echo('Ensure that you have updated your packetbeat.yml config file.')
    click.echo('    - reference: ML_DGA.md #2-update-packetbeat-configuration')
    click.echo(
        'Associated rules and jobs can be found under ML-experimental-detections releases in the repo'
    )
    click.echo('To upload rules, run: kibana upload-rule <ml-rule.toml>')
    click.echo(
        'To upload ML jobs, run: es experimental upload-ml-job <ml-job.json>')
def open_job(mlad_ctx):
    MlClient.open_job(mlad_ctx["es_object"], job_id=mlad_ctx["job_id"])

    return mlad_ctx
def close_job(mlad_ctx):
    MlClient.close_job(mlad_ctx["es_object"], job_id=mlad_ctx["job_id"])

    return mlad_ctx
Example #20
0
 def ml_client(self) -> MlClient:
     return MlClient(self.es_client)