Esempi in Python per search_job

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: cast_helper

Metodo/funzione: search_job

Esempi su hotexamples.com: 4

search_job in Python: 4 esempi trovati. Questi sono i migliori esempi reali in Python per cast_helper.search_job, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Esempio n. 1

Mostra file

File: findJobKeys.py Progetto: shahzadlone/CAST

def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool for finding keywords in the "message" field during the run time of a job.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument(
        '-k',
        '--keywords',
        metavar='key',
        dest='keywords',
        nargs='*',
        default=['.*'],
        help=
        'A list of keywords to search for in the Big Data Store. Case insensitive regular expressions (default : .*). If your keyword is a phrase (e.g. "xid 13") regular expressions are not supported at this time.'
    )
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Displays any logs that matched the keyword search.')
    parser.add_argument(
        '--size',
        metavar='size',
        dest='size',
        default=30,
        help='The number of results to be returned. (default=30)')
    parser.add_argument(
        '-H',
        '--hostnames',
        metavar='host',
        dest='hosts',
        nargs='*',
        default=None,
        help=
        'A list of hostnames to filter the results to (filters on the "hostname" field, job independent).'
    )

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es, args.allocation_id, args.job_id,
                                 args.job_id_secondary)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")

    print("Got {0} Hit(s) for specified job, searching for keywords.".format(
        total_hits))
    if total_hits != 1:
        print(
            "This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    # TODO make this code more fault tolerant
    hits = cast.deep_get(tr_res, "hits", "hits")
    tr_data = cast.deep_get(hits[0], "_source", "data")

    # ---------------------------------------------------------------------------------------------
    # TODO Add utility script to do this.

    # Build the hostnames string:
    if args.hosts is None:
        args.hosts = tr_data.get("compute_nodes")

    hostnames = {
        "multi_match": {
            "query": " ".join(args.hosts),
            "type": "best_fields",
            "fields": ["hostname", "source"],
            "tie_breaker": 0.3,
            "minimum_should_match": 1
        }
    }

    # ---------------------------------------------------------------------------------------------
    # TODO Add a utility script to manage this.

    date_format = '%Y-%m-%d %H:%M:%S.%f'
    print_format = '%Y-%m-%d %H:%M:%S:%f'
    search_format = 'epoch_millis'

    # Determine the timerange:
    start_time = datetime.strptime(tr_data.get("begin_time"), date_format)

    timestamp_range = {
        "gte": start_time.strftime('%s000'),
        "format": search_format
    }

    # If a history is present end_time is end_time, otherwise it's now.
    if "history" in tr_data:
        end_time = datetime.strptime(
            tr_data.get("history").get("end_time"), date_format)
        timestamp_range["lte"] = end_time.strftime('%s999')

    timerange = {"range": {"@timestamp": timestamp_range}}
    # ---------------------------------------------------------------------------------------------

    # Build the message query.
    keywords = {}
    should_keywords = []
    for key in args.keywords:
        if key.find(" ") == -1:
            should = {"regexp": {"message": key.lower()}}
        else:
            should = {"match_phrase": {"message": key}}

        should_keywords.append(should)

        keywords[key] = {"filter": should}

    # ---------------------------------------------------------------------------------------------

    # Submit the query
    body = {
        "query": {
            "bool": {
                "must":
                [timerange, hostnames, {
                    "exists": {
                        "field": "message"
                    }
                }],
                "should": should_keywords,
                "minimum_should_match": 1
            }
        },
        "sort": ["timestamp"],
        "_source": ["timestamp", "message", "hostname"],
        "size": args.size,
        "aggs": keywords
    }

    try:
        key_res = es.search(index="_all", body=body)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    # Print the count table.
    total = cast.deep_get(key_res, 'hits', 'total')
    print("Got {0} keyword hits:\n".format(total))

    aggregations = key_res.get("aggregations")

    max_width = 7
    for key in args.keywords:
        max_width = max(max_width, len(key))

    print('{0: >{1}} | Count'.format("Keyword", max_width))
    for agg in aggregations:
        print('{0: >{1}} | {2}'.format(
            agg, max_width, cast.deep_get(aggregations, agg, "doc_count")))

    print(" ")

    # Verbosely print the hits
    if args.verbose:
        hits = key_res.get('hits', {"hits": []})["hits"]
        print("Displaying {0} of {1} logs:".format(len(hits), total))
        for hit in hits:
            source = hit["_source"]
            print("{0} {1} | {2}".format(source.get("timestamp"),
                                         source.get("hostname"),
                                         source.get("message")))

Esempio n. 2

Mostra file

File: findJobMetrics.py Progetto: NickyDaB/CAST

def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool for finding metrics about the nodes participating in the supplied job id.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument('-H',
                        '--hostnames',
                        metavar='host',
                        dest='hosts',
                        nargs='*',
                        default=None,
                        help='A list of hostnames to filter the results to.')
    parser.add_argument(
        '-f',
        '--fields',
        metavar='field',
        dest='fields',
        nargs='*',
        default=None,
        help='A list of fields to retrieve metrics for (REQUIRED).')
    parser.add_argument('-i',
                        '--index',
                        metavar='index',
                        dest='index',
                        default='_all',
                        help='The index to query for metrics records.')
    parser.add_argument(
        '--correlation',
        action='store_true',
        help=
        "Displays the correlation between the supplied fields over the job run."
    )

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    if args.fields is None:
        print("Fields weren't set for metrics analysis.")
        return 2

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es, args.allocation_id, args.job_id,
                                 args.job_id_secondary)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")

    print("Got {0} Hit(s) for specified job:".format(total_hits))
    if total_hits == None:
        print("# Sorry. Could not find any matching results.")
        return 0
    if total_hits != 1:
        print(
            "This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    hits = cast.deep_get(tr_res, "hits", "hits")
    allocation = cast.deep_get(hits[0], "_source", "data")

    # ---------------------------------------------------------------------------------------------
    # Build the hostnames string:
    if args.hosts is None:
        args.hosts = allocation.get("compute_nodes")

    hostnames = {
        "multi_match": {
            "query": " ".join(args.hosts),
            "type": "best_fields",
            "fields": ["hostname", "source"],
            "tie_breaker": 0.3,
            "minimum_should_match": 1
        }
    }

    # ---------------------------------------------------------------------------------------------
    date_format = '%Y-%m-%d %H:%M:%S.%f'
    print_format = '%Y-%m-%d %H:%M:%S:%f'
    search_format = 'epoch_millis'

    # Determine the timerange:
    start_time = datetime.strptime(allocation.get("begin_time"), date_format)

    timestamp_range = {
        "gte": "{0}000".format(start_time.strftime('%s')),
        "format": search_format
    }

    # If a history is present end_time is end_time, otherwise it's now.
    if "history" in allocation:
        end_time = datetime.strptime(
            allocation.get("history").get("end_time"), date_format)
        timestamp_range["lte"] = "{0}999".format(end_time.strftime('%s'))

    timerange = {"range": {"@timestamp": timestamp_range}}
    # ---------------------------------------------------------------------------------------------

    # Matrix stats are very interesting..
    stats = {"statistics": {"matrix_stats": {"fields": args.fields}}}

    for field in args.fields:
        stats[field] = {"extended_stats": {"field": field}}

    body = {
        "query": {
            "bool": {
                "must": [hostnames, timerange]
            }
        },
        "aggs": stats,
        "size": 0
    }

    try:
        key_res = es.search(
            index=args.index,  # TODO This should be replaced.
            body=body)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    if args.allocation_id > 0:
        print("\nMetric Analysis for Allocation ID {0} :\n".format(
            args.allocation_id))
    else:
        print("\nMetric Analysis for Job ID {0} - {1} :\n".format(
            args.job_id, args.job_id_secondary))

    # Print the table.
    aggs = cast.deep_get(key_res, "aggregations")
    if aggs is not None:
        max_width = len("Field")
        for agg in aggs:
            max_width = max(max_width, len(agg))

        print("{0:>{1}} | {2: >14} | {3: >14} | {4: >14} | {5: >14} | Count".
              format("Field", max_width, "Min", "Max", "Average", "Std Dev"))

        print_fmt = "{0: >{1}} | {2:>14.3f} | {3:>14.3f} | {4:>14.3f} | {5:>14.3f} | {6}"
        print_str = "{0: >{1}} | {2:>14.3} | {3:>14.3} | {4:>14.3} | {5:>14.3} | {6}"

        for agg in aggs:
            try:
                print(
                    print_fmt.format(agg, max_width, aggs[agg]["min"],
                                     aggs[agg]["max"], aggs[agg]["avg"],
                                     aggs[agg]["std_deviation"],
                                     aggs[agg]["count"]))
            except ValueError:
                continue
            except KeyError:
                continue

        #print matrix stats
        if args.correlation:
            print("\n{0}".format("=" * 80))
            print("Field Correlations:")
            stat_fields = aggs["statistics"].get("fields", [])
            for stat in stat_fields:
                name = stat["name"]
                print("\n{0}:".format(name))

                correlation = stat["correlation"]
                corr_d = sorted(correlation.items(),
                                key=operator.itemgetter(1))

                for field in corr_d:
                    if field[0] != name:
                        print("  {0} : {1}".format(field[0], field[1]))

    else:
        print("No aggregations were found.")
    return 0

Esempio n. 3

Mostra file

File: findWeightedErrors.py Progetto: NickyDaB/CAST

def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool which takes a weighted listing of keyword searches and presents aggregations of this data to the user.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Displays the top --size logs matching the --errormap mappings.')
    parser.add_argument(
        '--size',
        metavar='size',
        dest='size',
        default=10,
        help='The number of results to be returned. (default=10)')
    parser.add_argument('-H',
                        '--hostnames',
                        metavar='host',
                        dest='hosts',
                        nargs='*',
                        default=None,
                        help='A list of hostnames to filter the results to.')
    parser.add_argument(
        '--errormap',
        metavar="file",
        dest="err_map_file",
        default=None,
        help='A map of errors to scan the user jobs for, including weights.')

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    # Load the weighted error mapping.
    error_map = None
    if args.err_map_file:
        error_map = JSONSerializer().loads(open(args.err_map_file).read())

    if error_map is None:
        parser.print_help()
        print("Error map '%s', could not be loaded" % args.err_map_file)
        return 2

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es, args.allocation_id, args.job_id,
                                 args.job_id_secondary)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")
    # Finding no matches with valid search criteria is a legit case.
    # return 0, not 3
    if total_hits == None:
        print("# Sorry. Could not find any matching results.")
        return 0
    if total_hits != 1:
        print(
            "This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    # TODO make this code more fault tolerant
    hits = cast.deep_get(tr_res, "hits", "hits")
    tr_data = cast.deep_get(hits[0], "_source", "data")

    # ---------------------------------------------------------------------------------------------

    # Build the hostnames string:
    if args.hosts is None:
        args.hosts = tr_data.get("compute_nodes")

    hostnames = {
        "multi_match": {
            "query": " ".join(args.hosts),
            "type": "best_fields",
            "fields": ["hostname", "source"],
            "tie_breaker": 0.3,
            "minimum_should_match": 1
        }
    }

    # ---------------------------------------------------------------------------------------------

    (ranges, should_match) = cast.build_timestamp_range(
        tr_data.get("begin_time"), cast.deep_get(tr_data, "history",
                                                 "end_time"))

    ranges.append(hostnames)

    # ---------------------------------------------------------------------------------------------
    # Build a body for the mapping query.
    body = {
        "_source": ["@timestamp"],
        "size": args.size,
    }

    # Check the keywords supplied by the json.
    results = {}
    for error in error_map:
        (category, result) = build_mapping_query(es, body.copy(), ranges,
                                                 error)
        results[category] = result

    print(" ")
    # Print the results.
    for category, response in sorted(
            results.iteritems(),
            key=lambda (k, v): cast.deep_get(v, "hits", "max_score"),
            reverse=True):

        # Get aggregations.
        aggregations = response.get("aggregations", [])
        total = cast.deep_get(response, "hits", "total")

        print("\"{0}\" Max Score : {1}".format(
            category, cast.deep_get(response, "hits", "max_score")))
        print("\"{0}\" Count : {1}".format(category, total))

        if aggregations is not None:
            # Sort aggregations by document count.
            for (aggregation, value) in sorted(aggregations.iteritems(),
                                               key=lambda
                                               (k, v): v.get("doc_count"),
                                               reverse=True):
                print("  \"{0}\" : {1}".format(aggregation,
                                               value.get("doc_count")))

        if args.verbose:
            hits = cast.deep_get(response, "hits", "hits")

            print("\nTop {0} \"{1}\" Results:".format(len(hits), category))
            print("-" * 42)
            for hit in hits:
                print(json.dumps(hit["_source"]))
        print("=" * 42)
        print(" ")

Esempio n. 4

Mostra file

def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool for finding when a job was running through use of the big data store.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument('-H',
                        '--hostnames',
                        metavar='host',
                        dest='hosts',
                        nargs='*',
                        default=None,
                        help='A list of hostnames to filter the results to ')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Displays additional details about the job in the output.')

    args = parser.parse_args()

    # If allocation_id or job_id wasn't specified, printing help on failure.
    if args.allocation_id == -1 and args.job_id == -1:
        parser.print_help()
        print(
            "Missing either allocationid or jobid. Require 1 of these fields to search."
        )
        return 2

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    # set up the fields for the search operation.
    fields = cast.SEARCH_JOB_FIELDS
    if args.verbose:
        fields.append("data.compute_nodes")

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es,
                                 args.allocation_id,
                                 args.job_id,
                                 args.job_id_secondary,
                                 fields=fields)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")

    print("# Found {0} matches for specified the job.".format(total_hits))
    if total_hits == 0:
        print("# Sorry. Could not find any matching results.")
        return 0
    if total_hits != 1:
        print(
            "# This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    # TODO make this code more fault tolerant
    hits = cast.deep_get(tr_res, "hits", "hits")
    if len(hits) > 0:
        tr_data = cast.deep_get(hits[0], "_source", "data")

        date_format = '%Y-%m-%d %H:%M:%S.%f'
        print_format = '%Y-%m-%d.%H:%M:%S:%f'
        search_format = '"yyyy-MM-dd HH:mm:ss:SSS"'

        start_time = datetime.strptime(tr_data["begin_time"],
                                       '%Y-%m-%d %H:%M:%S.%f')
        start_time = '{0}'.format(start_time.strftime(print_format)[:-3])

        # If a history is present end_time is end_time, otherwise it's now.
        if "history" in tr_data:
            end_time = datetime.strptime(tr_data["history"]["end_time"],
                                         date_format)
            end_time = '{0}'.format(end_time.strftime(print_format)[:-3])
        else:
            end_time = "now"

        print("\nallocation-id: {0}".format(tr_data["allocation_id"]))
        print("job-id: {0} - {1}".format(tr_data["primary_job_id"],
                                         tr_data["secondary_job_id"]))
        print("user-name: {0} \nuser-id: {1}".format(tr_data["user_name"],
                                                     tr_data["user_id"]))
        print("begin-time: {0} \nend-time: {1}".format(start_time, end_time))

        if args.verbose:
            nodes = tr_data.get("compute_nodes", [])

            print('hostnames: ')
            for node in nodes:
                print("   - {0}".format(node))