Ejemplo n.º 1
0
Archivo: utils.py Proyecto: hysds/hysds
def get_module(m):
    """Import module and return."""

    try:
        return import_module(m)
    except ImportError:
        logger.error('Failed to import module "%s".' % m)
        raise
Ejemplo n.º 2
0
def download_file(url, path, cache=False):
    """Download file/dir for input."""

    params = get_download_params(url)
    if cache:
        url_hash = hashlib.md5(url.encode()).hexdigest()
        ### app.conf.ROOT_WORK_DIR = os.environ['HYSDS_ROOT_WORK_DIR']
        ### logger.info("****** in utils.py:download_file(), app.conf.ROOT_WORK_DIR: %s" % app.conf.ROOT_WORK_DIR)

        # get it from env variable set in the top level shell script (e.g., celery_worker.sh)
        root_cache_dir = os.environ['HYSDS_ROOT_CACHE_DIR']
        logger.info("****** in utils.py:download_file(), root_cache_dir: %s" %
                    root_cache_dir)
        ### hash_dir = os.path.join(app.conf.ROOT_WORK_DIR,
        hash_dir = os.path.join(root_cache_dir, 'cache', *url_hash[0:4])
        cache_dir = os.path.join(hash_dir, url_hash)
        makedirs(cache_dir)
        signal_file = os.path.join(cache_dir, '.localized')
        if os.path.exists(signal_file):
            logger.info("cache hit for {} at {}".format(url, cache_dir))
        else:
            logger.info("cache miss for {}".format(url))
            try:
                osaka.main.get(url, cache_dir, params=params)
            except Exception as e:
                shutil.rmtree(cache_dir)
                tb = traceback.format_exc()
                raise RuntimeError(
                    "Failed to download {} to cache {}: {}\n{}".format(
                        url, cache_dir, str(e), tb))
            with atomic_write(signal_file, overwrite=True) as f:
                f.write("%sZ\n" % datetime.utcnow().isoformat())
        for i in os.listdir(cache_dir):
            if i == '.localized':
                continue
            cached_obj = os.path.join(cache_dir, i)
            if os.path.isdir(cached_obj):
                dst = os.path.join(path, i) if os.path.isdir(path) else path
                try:
                    os.symlink(cached_obj, dst)
                except:
                    logger.error("Failed to soft link {} to {}".format(
                        cached_obj, dst))
                    raise
            else:
                try:
                    os.symlink(cached_obj, path)
                except:
                    logger.error("Failed to soft link {} to {}".format(
                        cached_obj, path))
                    raise
    else:
        return osaka.main.get(url, path, params=params)
Ejemplo n.º 3
0
def evaluate_user_rules_job(job_id, alias=STATUS_ALIAS):
    """
    Process all user rules in ES database and check if this job ID matches.
    If so, submit jobs. Otherwise do nothing.
    """

    time.sleep(10)  # sleep 10 seconds to allow ES documents to be indexed
    ensure_job_indexed(job_id, alias)  # ensure job is indexed

    # get all enabled user rules
    query = {
        "query": {
            "term": {
                "enabled": True
            }
        }
    }
    rules = mozart_es.query(index=USER_RULES_JOB_INDEX, body=query)
    logger.info("Total %d enabled rules to check." % len(rules))

    for rule in rules:
        time.sleep(1)  # sleep between queries

        rule = rule['_source']  # extracting _source from the rule itself
        logger.info('rule: %s' % json.dumps(rule, indent=2))

        try:
            updated_query = update_query(job_id, rule)  # check for matching rules
            rule['query'] = updated_query
            rule['query_string'] = json.dumps(updated_query)
        except (RuntimeError, Exception) as e:
            logger.error("unable to update user_rule's query, skipping")
            logger.error(e)
            continue

        rule_name = rule['rule_name']
        final_qs = rule['query_string']
        logger.info("updated query: %s" % json.dumps(final_qs, indent=2))

        # check for matching rules
        try:
            result = mozart_es.es.search(index=alias, body=final_qs)
            if result['hits']['total']['value'] == 0:
                logger.info("Rule '%s' didn't match for %s" % (rule_name, job_id))
                continue
        except ElasticsearchException as e:
            logger.error("Failed to query ES")
            logger.error(e)
            continue

        doc_res = result['hits']['hits'][0]
        logger.info("Rule '%s' successfully matched for %s" % (rule_name, job_id))

        # submit trigger task
        queue_job_trigger(doc_res, rule)
        logger.info("Trigger task submitted for %s: %s" % (job_id, rule['job_type']))
    return True
Ejemplo n.º 4
0
Archivo: utils.py Proyecto: hysds/hysds
def download_file(url, path, cache=False):
    """Download file/dir for input."""

    params = get_download_params(url)
    if cache:
        url_hash = hashlib.md5(url.encode()).hexdigest()
        hash_dir = os.path.join(app.conf.ROOT_WORK_DIR, "cache",
                                *url_hash[0:4])
        cache_dir = os.path.join(hash_dir, url_hash)
        makedirs(cache_dir)
        signal_file = os.path.join(cache_dir, ".localized")
        if os.path.exists(signal_file):
            logger.info("cache hit for {} at {}".format(url, cache_dir))
        else:
            logger.info("cache miss for {}".format(url))
            try:
                osaka.main.get(url, cache_dir, params=params)
            except Exception as e:
                shutil.rmtree(cache_dir)
                tb = traceback.format_exc()
                raise RuntimeError(
                    "Failed to download {} to cache {}: {}\n{}".format(
                        url, cache_dir, str(e), tb))
            with atomic_write(signal_file, overwrite=True) as f:
                f.write("%sZ\n" % datetime.utcnow().isoformat())
        for i in os.listdir(cache_dir):
            if i == ".localized":
                continue
            cached_obj = os.path.join(cache_dir, i)
            if os.path.isdir(cached_obj):
                dst = os.path.join(path, i) if os.path.isdir(path) else path
                try:
                    os.symlink(cached_obj, dst)
                except:
                    logger.error("Failed to soft link {} to {}".format(
                        cached_obj, dst))
                    raise
            else:
                try:
                    os.symlink(cached_obj, path)
                except:
                    logger.error("Failed to soft link {} to {}".format(
                        cached_obj, path))
                    raise
    else:
        return osaka.main.get(url, path, params=params)
Ejemplo n.º 5
0
Archivo: utils.py Proyecto: hysds/hysds
def get_func(f):
    """Import function and return."""

    if "." in f:
        mod_name, func_name = f.rsplit(".", 1)
        mod = get_module(mod_name)
        try:
            return getattr(mod, func_name)
        except AttributeError:
            logger.error('Failed to get function "%s" from module "%s".' %
                         (func_name, mod_name))
            raise
    else:
        try:
            return eval(f)
        except NameError:
            logger.error('Failed to get function "%s".' % (f))
            raise
Ejemplo n.º 6
0
def ensure_dataset_indexed(objectid, system_version, alias):
    """Ensure dataset is indexed."""
    query = {
        "query": {
            "bool": {
                "must": [{
                    'term': {
                        '_id': objectid
                    }
                }, {
                    'term': {
                        'system_version.keyword': system_version
                    }
                }]
            }
        }
    }
    logger.info("ensure_dataset_indexed query: %s" % json.dumps(query))

    try:
        count = grq_es.get_count(index=alias, body=query)
        if count == 0:
            error_message = "Failed to find indexed dataset: %s (%s)" % (
                objectid, system_version)
            logger.error(error_message)
            raise RuntimeError(error_message)
        logger.info("Found indexed dataset: %s (%s)" %
                    (objectid, system_version))

    except ElasticsearchException as e:
        logger.error("Unable to execute query")
        logger.error(e)
Ejemplo n.º 7
0
def fail_job(event, uuid, exc, short_error):
    """Set job status to job-failed."""

    query = {
        "query": {
            "bool": {
                "must": [
                    {"term": {"uuid": uuid}}
                ]
            }
        }
    }
    search_url = "%s/job_status-current/_search" % app.conf["JOBS_ES_URL"]

    headers = {"Content-Type": "application/json"}
    r = requests.post(search_url, data=json.dumps(query), headers=headers)

    if r.status_code != 200:
        logger.error("Failed to query for task UUID %s: %s" % (uuid, r.content))
        return

    result = r.json()
    total = result["hits"]["total"]["value"]
    if total == 0:
        logger.error("Failed to query for task UUID %s: %s" % (uuid, r.content))
        return

    res = result["hits"]["hits"][0]
    job_status = res["_source"]
    job_status["status"] = "job-failed"
    job_status["error"] = exc
    job_status["short_error"] = short_error
    job_status["traceback"] = event.get("traceback", "")

    time_end = datetime.utcnow().isoformat() + "Z"
    job_status.setdefault("job", {}).setdefault("job_info", {})["time_end"] = time_end
    log_job_status(job_status)
Ejemplo n.º 8
0
Archivo: utils.py Proyecto: hysds/hysds
def triage(job, ctx):
    """Triage failed job's context and job json as well as _run.sh."""

    # set time_start if not defined (job failed prior to setting it)
    if "time_start" not in job["job_info"]:
        job["job_info"]["time_start"] = "{}Z".format(
            datetime.utcnow().isoformat("T"))

    # default triage id
    default_triage_id_format = "triaged_job-{job_id}_task-{job[task_id]}"
    default_triage_id_regex = "triaged_job-(?P<job_id>.+)_task-(?P<task_id>[-\\w])"

    # if exit code of job command is zero, don't triage anything
    exit_code = job["job_info"]["status"]
    if exit_code == 0:
        logger.info("Job exited with exit code %s. No need to triage." %
                    exit_code)
        return True

    # disable triage
    if ctx.get("_triage_disabled", False):
        logger.info(
            "Flag _triage_disabled set to True. Not performing triage.")
        return True

    # Check if custom triage id format was provided
    if "_triage_id_format" in ctx:
        triage_id_format = ctx["_triage_id_format"]
    else:
        triage_id_format = default_triage_id_format

    # get job info
    job_dir = job["job_info"]["job_dir"]
    job_id = job["job_info"]["id"]
    logger.info("job id: {}".format(job_id))

    # Check if the job_id is a triaged dataset. If so, let's parse out the job_id
    logger.info("Checking to see if the job_id matches the regex: {}".format(
        default_triage_id_regex))
    match = re.search(default_triage_id_regex, job_id)
    if match:
        logger.info(
            "job_id matches the triage dataset regex. Parsing out job_id")
        parsed_job_id = match.groupdict()["job_id"]
        logger.info("extracted job_id: {}".format(parsed_job_id))
    else:
        logger.info(
            "job_id does not match the triage dataset regex: {}".format(
                default_triage_id_regex))
        parsed_job_id = job_id

    # create triage dataset
    # Attempt to first use triage id format from user, but if there is any problem use the default id format instead
    try:
        triage_id = triage_id_format.format(job_id=parsed_job_id,
                                            job=job,
                                            job_context=ctx)
    except Exception as e:
        logger.warning(
            "Failed to apply custom triage id format because of {}: {}. Falling back to default triage id"
            .format(e.__class__.__name__, e))
        triage_id = default_triage_id_format.format(job_id=parsed_job_id,
                                                    job=job,
                                                    job_context=ctx)
    triage_dir = os.path.join(job_dir, triage_id)
    makedirs(triage_dir)

    # create dataset json
    ds_file = os.path.join(triage_dir, "{}.dataset.json".format(triage_id))
    ds = {
        "version": "v{}".format(hysds.__version__),
        "label": "triage for job {}".format(parsed_job_id),
    }
    if "cmd_start" in job["job_info"]:
        ds["starttime"] = job["job_info"]["cmd_start"]
    if "cmd_end" in job["job_info"]:
        ds["endtime"] = job["job_info"]["cmd_end"]
    with open(ds_file, "w") as f:
        json.dump(ds, f, sort_keys=True, indent=2)

    # create met json
    met_file = os.path.join(triage_dir, "{}.met.json".format(triage_id))
    with open(met_file, "w") as f:
        json.dump(job["job_info"], f, sort_keys=True, indent=2)

    # triage job-related files
    for f in glob(os.path.join(job_dir, "_*")):
        if os.path.isdir(f):
            shutil.copytree(f, os.path.join(triage_dir, os.path.basename(f)))
        else:
            shutil.copy(f, triage_dir)

    # triage log files
    for f in glob(os.path.join(job_dir, "*.log")):
        if os.path.isdir(f):
            shutil.copytree(f, os.path.join(triage_dir, os.path.basename(f)))
        else:
            shutil.copy(f, triage_dir)

    # triage additional globs
    for g in ctx.get("_triage_additional_globs", []):
        for f in glob(os.path.join(job_dir, g)):
            f = os.path.normpath(f)
            dst = os.path.join(triage_dir, os.path.basename(f))
            if os.path.exists(dst):
                dst = "{}.{}Z".format(dst, datetime.utcnow().isoformat("T"))
            try:
                if os.path.isdir(f):
                    shutil.copytree(f, dst)
                else:
                    shutil.copy(f, dst)
            except Exception as e:
                tb = traceback.format_exc()
                logger.error(
                    "Skipping copying of {}. Got exception: {}\n{}".format(
                        f, str(e), tb))

    # publish
    prod_json = publish_dataset(triage_dir, ds_file, job, ctx)

    # write published triage to file
    pub_triage_file = os.path.join(job_dir, "_triaged.json")
    with open(pub_triage_file, "w") as f:
        json.dump(prod_json, f, indent=2, sort_keys=True)

    # signal run_job() to continue
    return True
Ejemplo n.º 9
0
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

from aws_requests_auth.boto_utils import BotoAWSRequestsAuth
from elasticsearch import RequestsHttpConnection

from hysds.celery import app
from hysds.log_utils import logger

try:
    from hysds_commons.elasticsearch_utils import ElasticsearchUtility
except (ImportError, ModuleNotFoundError):
    logger.error('Cannot import hysds_commons.elasticsearch_utils')

MOZART_ES = None
GRQ_ES = None


def get_mozart_es():
    global MOZART_ES
    if MOZART_ES is None:
        MOZART_ES = ElasticsearchUtility(app.conf.JOBS_ES_URL, logger)
    return MOZART_ES


def get_grq_es():
    global GRQ_ES

    if GRQ_ES is None:
Ejemplo n.º 10
0
def evaluate_user_rules_dataset(objectid,
                                system_version,
                                alias=DATASET_ALIAS,
                                job_queue=JOBS_PROCESSED_QUEUE):
    """
    Process all user rules in ES database and check if this objectid matches.
    If so, submit jobs. Otherwise do nothing.
    """

    time.sleep(
        10)  # sleep for 10 seconds; let any documents finish indexing in ES
    ensure_dataset_indexed(objectid, system_version,
                           alias)  # ensure dataset is indexed

    # get all enabled user rules
    query = {"query": {"term": {"enabled": True}}}
    rules = mozart_es.query(index=USER_RULES_DATASET_INDEX, body=query)
    logger.info("Total %d enabled rules to check." % len(rules))

    for document in rules:
        time.sleep(1)  # sleep between queries

        rule = document['_source']
        logger.info("rule: %s" % json.dumps(rule, indent=2))

        try:
            updated_query = update_query(objectid, system_version, rule)
            rule['query'] = updated_query
            rule['query_string'] = json.dumps(updated_query)
        except (RuntimeError, Exception) as e:
            logger.error("unable to update user_rule's query, skipping")
            logger.error(e)
            continue

        rule_name = rule['rule_name']
        job_type = rule['job_type']  # set clean descriptive job name
        final_qs = rule['query_string']
        logger.info("updated query: %s" % json.dumps(final_qs, indent=2))

        # check for matching rules
        try:
            result = grq_es.es.search(index=alias, body=final_qs)
            if result['hits']['total']['value'] == 0:
                logger.info("Rule '%s' didn't match for %s (%s)" %
                            (rule_name, objectid, system_version))
                continue
            doc_res = result['hits']['hits'][0]
            logger.info("Rule '%s' successfully matched for %s (%s)" %
                        (rule_name, objectid, system_version))
        except (ElasticsearchException, Exception) as e:
            logger.error("Failed to query ES")
            logger.error(e)
            continue

        if job_type.startswith('hysds-io-'):
            job_type = job_type.replace('hysds-io-', '', 1)
        job_name = "%s-%s" % (job_type, objectid)

        queue_dataset_trigger(doc_res, rule, job_name)  # submit trigger task
        logger.info("Trigger task submitted for %s (%s): %s" %
                    (objectid, system_version, job_type))
    return True
Ejemplo n.º 11
0
def evaluate_user_rules_dataset(
        objectid,
        system_version,
        es_url=app.conf.GRQ_ES_URL,
        alias=app.conf.DATASET_ALIAS,
        user_rules_idx=app.conf.USER_RULES_DATASET_INDEX,
        job_queue=app.conf.JOBS_PROCESSED_QUEUE):
    """Process all user rules in ES database and check if this objectid matches.
       If so, submit jobs. Otherwise do nothing."""

    # sleep for 10 seconds; let any documents finish indexing in ES
    time.sleep(10)

    # ensure dataset is indexed
    ensure_dataset_indexed(objectid, system_version, es_url, alias)

    # get all enabled user rules
    query = {"query": {"term": {"enabled": True}}}
    r = requests.post(
        '%s/%s/.percolator/_search?search_type=scan&scroll=10m&size=100' %
        (es_url, user_rules_idx),
        data=json.dumps(query))
    r.raise_for_status()
    scan_result = r.json()
    count = scan_result['hits']['total']
    scroll_id = scan_result['_scroll_id']
    rules = []
    while True:
        r = requests.post('%s/_search/scroll?scroll=10m' % es_url,
                          data=scroll_id)
        res = r.json()
        scroll_id = res['_scroll_id']
        if len(res['hits']['hits']) == 0: break
        for hit in res['hits']['hits']:
            rules.append(hit['_source'])
    logger.info("Got %d enabled rules to check." % len(rules))

    # process rules
    for rule in rules:
        # sleep between queries
        time.sleep(1)

        # check for matching rules
        update_query(objectid, system_version, rule)
        final_qs = rule['query_string']
        try:
            r = requests.post('%s/%s/_search' % (es_url, alias), data=final_qs)
            r.raise_for_status()
        except:
            logger.error("Failed to query ES. Got status code %d:\n%s" %
                         (r.status_code, traceback.format_exc()))
            continue
        result = r.json()
        if result['hits']['total'] == 0:
            logger.info("Rule '%s' didn't match for %s (%s)" %
                        (rule['rule_name'], objectid, system_version))
            continue
        else:
            doc_res = result['hits']['hits'][0]
        logger.info("Rule '%s' successfully matched for %s (%s)" %
                    (rule['rule_name'], objectid, system_version))
        #logger.info("doc_res: %s" % json.dumps(doc_res, indent=2))

        # set clean descriptive job name
        job_type = rule['job_type']
        if job_type.startswith('hysds-io-'):
            job_type = job_type.replace('hysds-io-', '', 1)
        job_name = "%s-%s" % (job_type, objectid)

        # submit trigger task
        queue_dataset_trigger(doc_res, rule, es_url, job_name)
        logger.info("Trigger task submitted for %s (%s): %s" %
                    (objectid, system_version, rule['job_type']))

    return True
Ejemplo n.º 12
0
def evaluate_user_rules_job(job_id,
                            es_url=app.conf.JOBS_ES_URL,
                            alias=app.conf.STATUS_ALIAS,
                            user_rules_idx=app.conf.USER_RULES_JOB_INDEX,
                            job_queue=app.conf.JOBS_PROCESSED_QUEUE):
    """Process all user rules in ES database and check if this job ID matches.
       If so, submit jobs. Otherwise do nothing."""

    # sleep 10 seconds to allow ES documents to be indexed
    time.sleep(10)

    # ensure job is indexed
    ensure_job_indexed(job_id, es_url, alias)

    # get all enabled user rules
    query = {"query": {"term": {"enabled": True}}}
    r = requests.post(
        '%s/%s/.percolator/_search?search_type=scan&scroll=10m&size=100' %
        (es_url, user_rules_idx),
        data=json.dumps(query))
    r.raise_for_status()
    scan_result = r.json()
    count = scan_result['hits']['total']
    scroll_id = scan_result['_scroll_id']
    rules = []
    while True:
        r = requests.post('%s/_search/scroll?scroll=10m' % es_url,
                          data=scroll_id)
        res = r.json()
        scroll_id = res['_scroll_id']
        if len(res['hits']['hits']) == 0: break
        for hit in res['hits']['hits']:
            rules.append(hit['_source'])
    logger.info("Got %d enabled rules to check." % len(rules))

    # process rules
    for rule in rules:
        # sleep between queries
        time.sleep(1)

        # check for matching rules
        update_query(job_id, rule)
        final_qs = rule['query_string']
        try:
            r = requests.post('%s/job_status-current/job/_search' % es_url,
                              data=final_qs)
            r.raise_for_status()
        except:
            logger.error("Failed to query ES. Got status code %d:\n%s" %
                         (r.status_code, traceback.format_exc()))
            continue
        result = r.json()
        if result['hits']['total'] == 0:
            logger.info("Rule '%s' didn't match for %s" %
                        (rule['rule_name'], job_id))
            continue
        else:
            doc_res = result['hits']['hits'][0]
        logger.info("Rule '%s' successfully matched for %s" %
                    (rule['rule_name'], job_id))
        #logger.info("doc_res: %s" % json.dumps(doc_res, indent=2))

        # submit trigger task
        queue_job_trigger(doc_res, rule, es_url)
        logger.info("Trigger task submitted for %s: %s" %
                    (job_id, rule['job_type']))

    return True