Beispiel #1
0
def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool which takes a weighted listing of keyword searches and presents aggregations of this data to the user.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Displays the top --size logs matching the --errormap mappings.')
    parser.add_argument(
        '--size',
        metavar='size',
        dest='size',
        default=10,
        help='The number of results to be returned. (default=10)')
    parser.add_argument('-H',
                        '--hostnames',
                        metavar='host',
                        dest='hosts',
                        nargs='*',
                        default=None,
                        help='A list of hostnames to filter the results to.')
    parser.add_argument(
        '--errormap',
        metavar="file",
        dest="err_map_file",
        default=None,
        help='A map of errors to scan the user jobs for, including weights.')

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    # Load the weighted error mapping.
    error_map = None
    if args.err_map_file:
        error_map = JSONSerializer().loads(open(args.err_map_file).read())

    if error_map is None:
        parser.print_help()
        print("Error map '%s', could not be loaded" % args.err_map_file)
        return 2

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es, args.allocation_id, args.job_id,
                                 args.job_id_secondary)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")
    # Finding no matches with valid search criteria is a legit case.
    # return 0, not 3
    if total_hits == None:
        print("# Sorry. Could not find any matching results.")
        return 0
    if total_hits != 1:
        print(
            "This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    # TODO make this code more fault tolerant
    hits = cast.deep_get(tr_res, "hits", "hits")
    tr_data = cast.deep_get(hits[0], "_source", "data")

    # ---------------------------------------------------------------------------------------------

    # Build the hostnames string:
    if args.hosts is None:
        args.hosts = tr_data.get("compute_nodes")

    hostnames = {
        "multi_match": {
            "query": " ".join(args.hosts),
            "type": "best_fields",
            "fields": ["hostname", "source"],
            "tie_breaker": 0.3,
            "minimum_should_match": 1
        }
    }

    # ---------------------------------------------------------------------------------------------

    (ranges, should_match) = cast.build_timestamp_range(
        tr_data.get("begin_time"), cast.deep_get(tr_data, "history",
                                                 "end_time"))

    ranges.append(hostnames)

    # ---------------------------------------------------------------------------------------------
    # Build a body for the mapping query.
    body = {
        "_source": ["@timestamp"],
        "size": args.size,
    }

    # Check the keywords supplied by the json.
    results = {}
    for error in error_map:
        (category, result) = build_mapping_query(es, body.copy(), ranges,
                                                 error)
        results[category] = result

    print(" ")
    # Print the results.
    for category, response in sorted(
            results.iteritems(),
            key=lambda (k, v): cast.deep_get(v, "hits", "max_score"),
            reverse=True):

        # Get aggregations.
        aggregations = response.get("aggregations", [])
        total = cast.deep_get(response, "hits", "total")

        print("\"{0}\" Max Score : {1}".format(
            category, cast.deep_get(response, "hits", "max_score")))
        print("\"{0}\" Count : {1}".format(category, total))

        if aggregations is not None:
            # Sort aggregations by document count.
            for (aggregation, value) in sorted(aggregations.iteritems(),
                                               key=lambda
                                               (k, v): v.get("doc_count"),
                                               reverse=True):
                print("  \"{0}\" : {1}".format(aggregation,
                                               value.get("doc_count")))

        if args.verbose:
            hits = cast.deep_get(response, "hits", "hits")

            print("\nTop {0} \"{1}\" Results:".format(len(hits), category))
            print("-" * 42)
            for hit in hits:
                print(json.dumps(hit["_source"]))
        print("=" * 42)
        print(" ")
Beispiel #2
0
def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool for finding keywords in the "message" field during the run time of a job.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument(
        '-k',
        '--keywords',
        metavar='key',
        dest='keywords',
        nargs='*',
        default=['.*'],
        help=
        'A list of keywords to search for in the Big Data Store. Case insensitive regular expressions (default : .*). If your keyword is a phrase (e.g. "xid 13") regular expressions are not supported at this time.'
    )
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Displays any logs that matched the keyword search.')
    parser.add_argument(
        '--size',
        metavar='size',
        dest='size',
        default=30,
        help='The number of results to be returned. (default=30)')
    parser.add_argument(
        '-H',
        '--hostnames',
        metavar='host',
        dest='hosts',
        nargs='*',
        default=None,
        help=
        'A list of hostnames to filter the results to (filters on the "hostname" field, job independent).'
    )

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es, args.allocation_id, args.job_id,
                                 args.job_id_secondary)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")

    print("Got {0} Hit(s) for specified job, searching for keywords.".format(
        total_hits))
    if total_hits != 1:
        print(
            "This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    # TODO make this code more fault tolerant
    hits = cast.deep_get(tr_res, "hits", "hits")
    tr_data = cast.deep_get(hits[0], "_source", "data")

    # ---------------------------------------------------------------------------------------------
    # TODO Add utility script to do this.

    # Build the hostnames string:
    if args.hosts is None:
        args.hosts = tr_data.get("compute_nodes")

    hostnames = {
        "multi_match": {
            "query": " ".join(args.hosts),
            "type": "best_fields",
            "fields": ["hostname", "source"],
            "tie_breaker": 0.3,
            "minimum_should_match": 1
        }
    }

    # ---------------------------------------------------------------------------------------------
    # TODO Add a utility script to manage this.

    date_format = '%Y-%m-%d %H:%M:%S.%f'
    print_format = '%Y-%m-%d %H:%M:%S:%f'
    search_format = 'epoch_millis'

    # Determine the timerange:
    start_time = datetime.strptime(tr_data.get("begin_time"), date_format)

    timestamp_range = {
        "gte": start_time.strftime('%s000'),
        "format": search_format
    }

    # If a history is present end_time is end_time, otherwise it's now.
    if "history" in tr_data:
        end_time = datetime.strptime(
            tr_data.get("history").get("end_time"), date_format)
        timestamp_range["lte"] = end_time.strftime('%s999')

    timerange = {"range": {"@timestamp": timestamp_range}}
    # ---------------------------------------------------------------------------------------------

    # Build the message query.
    keywords = {}
    should_keywords = []
    for key in args.keywords:
        if key.find(" ") == -1:
            should = {"regexp": {"message": key.lower()}}
        else:
            should = {"match_phrase": {"message": key}}

        should_keywords.append(should)

        keywords[key] = {"filter": should}

    # ---------------------------------------------------------------------------------------------

    # Submit the query
    body = {
        "query": {
            "bool": {
                "must":
                [timerange, hostnames, {
                    "exists": {
                        "field": "message"
                    }
                }],
                "should": should_keywords,
                "minimum_should_match": 1
            }
        },
        "sort": ["timestamp"],
        "_source": ["timestamp", "message", "hostname"],
        "size": args.size,
        "aggs": keywords
    }

    try:
        key_res = es.search(index="_all", body=body)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    # Print the count table.
    total = cast.deep_get(key_res, 'hits', 'total')
    print("Got {0} keyword hits:\n".format(total))

    aggregations = key_res.get("aggregations")

    max_width = 7
    for key in args.keywords:
        max_width = max(max_width, len(key))

    print('{0: >{1}} | Count'.format("Keyword", max_width))
    for agg in aggregations:
        print('{0: >{1}} | {2}'.format(
            agg, max_width, cast.deep_get(aggregations, agg, "doc_count")))

    print(" ")

    # Verbosely print the hits
    if args.verbose:
        hits = key_res.get('hits', {"hits": []})["hits"]
        print("Displaying {0} of {1} logs:".format(len(hits), total))
        for hit in hits:
            source = hit["_source"]
            print("{0} {1} | {2}".format(source.get("timestamp"),
                                         source.get("hostname"),
                                         source.get("message")))
Beispiel #3
0
def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description='''A tool for finding a list of the supplied user's jobs.''')
    

    parser.add_argument( '-u', '--user', metavar='username', dest='user', default=None,
        help="The user name to perform the query on, either this or -U must be set.")
    parser.add_argument( '-U', '--userid', metavar='userid', dest='userid', default=None,
        help="The user id to perform the query on, either this or -u must be set.")
    parser.add_argument( '--size', metavar='size', dest='size', default=1000,
        help='The number of results to be returned. (default=1000)')
    parser.add_argument( '--state', metavar='state', dest='state', default=None, 
        help='Searches for jobs matching the supplied state.')

    parser.add_argument( '--starttime', metavar='YYYY-MM-DDTHH:MM:SS', dest='starttime', default=None,
        help='A timestamp representing the beginning of the absolute range to look for failed jobs, if not set no lower bound will be imposed on the search.')
    parser.add_argument( '--endtime', metavar='YYYY-MM-DDTHH:MM:SS', dest='endtime', default=None,
        help='A timestamp representing the ending of the absolute range to look for failed jobs, if not set no upper bound will be imposed on the search.')

    # TODO should this be a percentage?
    parser.add_argument( '--commonnodes', metavar='threshold', dest='commonnodes', default=-1,
        help='Displays a list of nodes that the user jobs had in common if set. Only nodes with collisions exceeding the threshold are shown. (Default: -1)')

    parser.add_argument( '-v', '--verbose', action='store_true',
        help='Displays all retrieved fields from the `cast-allocation` index.')

    parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, 
        help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".')


    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target == None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    if args.user is None and args.userid is None:
        parser.print_help()
        print("Missing user, --user or --userid must be supplied.")
        return 2
    
    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(
        args.target, 
        sniff_on_start=True,
        sniff_on_connection_fail=True,
        sniffer_timeout=60
    )

    # Ammend compute nodes for common node search.
    fields=cast.USER_JOB_FIELDS
    if args.commonnodes > 0:
       fields += ["data.compute_nodes"]



    resp = cast.search_user_jobs(es, 
        user_name  = args.user, 
        user_id    = args.userid,
        job_state  = args.state,
        start_time = args.starttime,
        end_time   = args.endtime,
        size       = args.size)
        

    # Parse the response from elasticsearch.
    hits       = cast.deep_get(resp, "hits", "hits")
    total_hits = cast.deep_get(resp, "hits","total")
    node_collisions = {}

    print_fmt="{5: >10} | {0: >5} | {1: >8} | {2: <8} | {3: <26} | {4: <26}"
    print(print_fmt.format("AID", "P Job ID", "S Job ID", "Begin Time", "End Time", "State"))

    hits.sort(key=lambda x: cast.deep_get(x,"_source","data","allocation_id"), reverse=False)

    # Process hits.
    for hit in hits:
        data=cast.deep_get(hit,"_source","data")
        
        if data:
            print( print_fmt.format(
                data.get("allocation_id"), data.get("primary_job_id"), data.get("secondary_job_id"),
                data.get("begin_time"), cast.deep_get(data, "history","end_time"),
                data.get("state")))
            
            # Generate a counter. 
            if args.commonnodes > 0:
                for node in data.get("compute_nodes"):
                    node_collisions[node] = 1 + node_collisions.get(node, 0)


    # Print out common nodes with collisions above threshold.
    if args.commonnodes > 0:
        max_width=4
        collision_found=False

        # get the max width to improve printing. 
        for key in node_collisions:
            max_width=max(len(key),max_width)

        print( "=============================" )
        print( "Nodes common between jobs:" )
        print( "=============================" )
        print("{0:>{1}} : {2}".format("node", max_width, "common count"))
        
        node_count=int(args.commonnodes)
        for key,value in sorted( node_collisions.iteritems(), key=lambda (k,v): (v,k), reverse=False):
            if int(value) > node_count:
                collision_found = True
                print("{0:>{1}} : {2}".format(key, max_width, value))
        
        if not collision_found:
            print("No nodes exceeded collision threshold: {0}".format(args.commonnodes))
Beispiel #4
0
def main(args):

	# Specify the arguments.
    parser = argparse.ArgumentParser(
        description='''A tool for finding jobs running at the specified time.''')
    parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, 
        help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".')
    parser.add_argument( '-T', '--time', metavar='YYYY-MM-DD HH:MM:SS', dest='timestamp', default="now",
        help='A timestamp representing a point in time to search for all running CSM Jobs. HH, MM, SS are optional, if not set they will be initialized to 0. (default=now)')
    parser.add_argument( '-s', '--size', metavar='size', dest='size', default=1000,
        help='The number of results to be returned. (default=1000)')
    parser.add_argument( '-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None,
        help='A list of hostnames to filter the results to.')

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target == None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    # Parse the user's date.
    date_format='(\d{4})-(\d{1,2})-(\d{1,2})[ \.T]*(\d{0,2}):{0,1}(\d{0,2}):{0,1}(\d{0,2})'
    date_print_format='%Y-%m-%d %H:%M:%S'
    date_search_format='"yyyy-MM-dd HH:mm:ss"'

    target_date=args.timestamp
    time_search=re.search(date_format, target_date)

    # Build the target timestamp and verify validity.
    if time_search : 
        (year,month,day,hour,minute,second) = time_search.groups()
        date = datetime( year=int(year), month=int(month), day=int(day), 
            hour=int(hour if hour else  0), 
            minute=int(minute if minute else 0), 
            second=int(second if second else 0) )

        target_date=datetime.strftime(date, date_print_format)

    elif target_date == "now":
        target_date=datetime.strftime(datetime.now(), date_print_format)
    else:
        parser.print_help()
        print("Invalid timestamp: {0}".format(target_date))
        return 2

    (range, match_min) =  cast.build_target_time_search(target_date)

    bool_query={ "should" : range, "minimum_should_match" : match_min }

    if args.hosts:
        bool_query["must"] = { 
            "match" : { 
                "data.compute_nodes" : { "query" : " ".join(args.hosts) }
            }
        }

    body={
        "query" : {
            "bool" : bool_query
        },
        "_source" : [ "data.allocation_id", "data.primary_job_id", 
            "data.secondary_job_id", "data.begin_time", "data.history.end_time"],
        "size": args.size
    }
    
    json = JSONSerializer()

    # Open a connection to the elastic cluster.
    es = Elasticsearch(
        args.target, 
        sniff_on_start=True,
        sniff_on_connection_fail=True,
        sniffer_timeout=60
    )

    # Execute the query on the cast-allocation index.
    tr_res = es.search(
        index="cast-allocation",
        body=body
    )

    # Get Hit Data
    hits          = cast.deep_get(tr_res, "hits", "hits")
    total_hits    = cast.deep_get(tr_res, "hits","total")
    hits_displayed= len(hits)

    print("Search found {0} jobs running at '{2}', displaying {1} jobs:\n".format(total_hits, len(hits), target_date)) 

    # Display the results of the search.
    if hits_displayed > 0:
        print_fmt="{0: >13} | {1: >12} | {2: <14} | {3: <26} | {4: <26}"
        print(print_fmt.format("Allocation ID", "Prim. Job ID", "Second. Job ID", "Begin Time", "End Time"))
        for hit in hits:
            data=cast.deep_get(hit, "_source", "data")
            if data:
                print(print_fmt.format(
                    data.get("allocation_id"), data.get("primary_job_id"), data.get("secondary_job_id"),
                    data.get("begin_time"), cast.deep_get(data, "history","end_time")))
        

    return 0
Beispiel #5
0
def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool for finding metrics about the nodes participating in the supplied job id.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument('-H',
                        '--hostnames',
                        metavar='host',
                        dest='hosts',
                        nargs='*',
                        default=None,
                        help='A list of hostnames to filter the results to.')
    parser.add_argument(
        '-f',
        '--fields',
        metavar='field',
        dest='fields',
        nargs='*',
        default=None,
        help='A list of fields to retrieve metrics for (REQUIRED).')
    parser.add_argument('-i',
                        '--index',
                        metavar='index',
                        dest='index',
                        default='_all',
                        help='The index to query for metrics records.')
    parser.add_argument(
        '--correlation',
        action='store_true',
        help=
        "Displays the correlation between the supplied fields over the job run."
    )

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    if args.fields is None:
        print("Fields weren't set for metrics analysis.")
        return 2

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es, args.allocation_id, args.job_id,
                                 args.job_id_secondary)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")

    print("Got {0} Hit(s) for specified job:".format(total_hits))
    if total_hits == None:
        print("# Sorry. Could not find any matching results.")
        return 0
    if total_hits != 1:
        print(
            "This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    hits = cast.deep_get(tr_res, "hits", "hits")
    allocation = cast.deep_get(hits[0], "_source", "data")

    # ---------------------------------------------------------------------------------------------
    # Build the hostnames string:
    if args.hosts is None:
        args.hosts = allocation.get("compute_nodes")

    hostnames = {
        "multi_match": {
            "query": " ".join(args.hosts),
            "type": "best_fields",
            "fields": ["hostname", "source"],
            "tie_breaker": 0.3,
            "minimum_should_match": 1
        }
    }

    # ---------------------------------------------------------------------------------------------
    date_format = '%Y-%m-%d %H:%M:%S.%f'
    print_format = '%Y-%m-%d %H:%M:%S:%f'
    search_format = 'epoch_millis'

    # Determine the timerange:
    start_time = datetime.strptime(allocation.get("begin_time"), date_format)

    timestamp_range = {
        "gte": "{0}000".format(start_time.strftime('%s')),
        "format": search_format
    }

    # If a history is present end_time is end_time, otherwise it's now.
    if "history" in allocation:
        end_time = datetime.strptime(
            allocation.get("history").get("end_time"), date_format)
        timestamp_range["lte"] = "{0}999".format(end_time.strftime('%s'))

    timerange = {"range": {"@timestamp": timestamp_range}}
    # ---------------------------------------------------------------------------------------------

    # Matrix stats are very interesting..
    stats = {"statistics": {"matrix_stats": {"fields": args.fields}}}

    for field in args.fields:
        stats[field] = {"extended_stats": {"field": field}}

    body = {
        "query": {
            "bool": {
                "must": [hostnames, timerange]
            }
        },
        "aggs": stats,
        "size": 0
    }

    try:
        key_res = es.search(
            index=args.index,  # TODO This should be replaced.
            body=body)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    if args.allocation_id > 0:
        print("\nMetric Analysis for Allocation ID {0} :\n".format(
            args.allocation_id))
    else:
        print("\nMetric Analysis for Job ID {0} - {1} :\n".format(
            args.job_id, args.job_id_secondary))

    # Print the table.
    aggs = cast.deep_get(key_res, "aggregations")
    if aggs is not None:
        max_width = len("Field")
        for agg in aggs:
            max_width = max(max_width, len(agg))

        print("{0:>{1}} | {2: >14} | {3: >14} | {4: >14} | {5: >14} | Count".
              format("Field", max_width, "Min", "Max", "Average", "Std Dev"))

        print_fmt = "{0: >{1}} | {2:>14.3f} | {3:>14.3f} | {4:>14.3f} | {5:>14.3f} | {6}"
        print_str = "{0: >{1}} | {2:>14.3} | {3:>14.3} | {4:>14.3} | {5:>14.3} | {6}"

        for agg in aggs:
            try:
                print(
                    print_fmt.format(agg, max_width, aggs[agg]["min"],
                                     aggs[agg]["max"], aggs[agg]["avg"],
                                     aggs[agg]["std_deviation"],
                                     aggs[agg]["count"]))
            except ValueError:
                continue
            except KeyError:
                continue

        #print matrix stats
        if args.correlation:
            print("\n{0}".format("=" * 80))
            print("Field Correlations:")
            stat_fields = aggs["statistics"].get("fields", [])
            for stat in stat_fields:
                name = stat["name"]
                print("\n{0}:".format(name))

                correlation = stat["correlation"]
                corr_d = sorted(correlation.items(),
                                key=operator.itemgetter(1))

                for field in corr_d:
                    if field[0] != name:
                        print("  {0} : {1}".format(field[0], field[1]))

    else:
        print("No aggregations were found.")
    return 0
Beispiel #6
0
def main(args):

	# Specify the arguments.
    parser = argparse.ArgumentParser(
        description='''A tool for finding jobs running during the specified time range on a specified node.''')

    parser.add_argument( '-t', '--target', metavar='hostname:port', dest='target', default=None, 
        help='An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".')
    parser.add_argument( '--starttime', metavar='YYYY-MM-DDTHH:MM:SS', dest='starttime', default=None,
        help='A timestamp representing the beginning of the absolute range to look for failed jobs, if not set no lower bound will be imposed on the search.')
    parser.add_argument( '--endtime', metavar='YYYY-MM-DDTHH:MM:SS', dest='endtime', default=None,
        help='A timestamp representing the ending of the absolute range to look for failed jobs, if not set no upper bound will be imposed on the search.')
    parser.add_argument( '-H', '--hostnames', metavar='host', dest='hosts', nargs='*', default=None,
        help='A list of hostnames to filter the results to.')
    parser.add_argument( '-s', '--size', metavar='size', dest='size', default=1000,
        help='The number of results to be returned. (default=1000)')

    args = parser.parse_args()

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target == None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    (range, match_min) =  cast.build_time_range(args.starttime, args.endtime)

    bool_query={ "should" : range, "minimum_should_match" : match_min }

    if args.hosts:
        bool_query["must"] = { 
            "match" : { 
                "data.compute_nodes" : { "query" : " ".join(args.hosts) }
            }
        }

    body={
        "query" : {
            "bool" : bool_query
        },
        "_source" : [ "data.allocation_id", "data.primary_job_id", "data.user_id", "data.user_name",
            "data.secondary_job_id", "data.begin_time", "data.history.end_time"],
        "size": args.size
    }
    
    json = JSONSerializer()

    # Open a connection to the elastic cluster.
    es = Elasticsearch(
        args.target, 
        sniff_on_start=True,
        sniff_on_connection_fail=True,
        sniffer_timeout=60
    )

    # Execute the query on the cast-allocation index.
    tr_res = es.search(
        index="cast-allocation",
        body=body
    )

    # Get Hit Data
    hits          = cast.deep_get(tr_res, "hits", "hits")
    total_hits    = cast.deep_get(tr_res, "hits","total")
    hits_displayed= len(hits)

    print("# Search found {0} jobs running, displaying {1} jobs:\n".format(total_hits, len(hits)))

    # Display the results of the search.
    if hits_displayed > 0:
        print_fmt="{5: <10} | {0: >13} | {1: >12} | {2: <14} | {3: <26} | {4: <26}"
        print(print_fmt.format("Allocation ID", "Prim. Job ID", "Second. Job ID", "Begin Time", "End Time", "User Name"))
        hits.sort(key=lambda x: cast.deep_get(x,"_source","data","allocation_id"), reverse=False)
        for hit in hits:
            data=cast.deep_get(hit, "_source", "data")
            if data:
                print(print_fmt.format(
                    data.get("allocation_id"), data.get("primary_job_id"), data.get("secondary_job_id"),
                    data.get("begin_time"), cast.deep_get(data, "history","end_time"), 
                    data.get("user_name")))
        

    return 0
Beispiel #7
0
def main(args):

    # Specify the arguments.
    parser = argparse.ArgumentParser(
        description=
        '''A tool for finding when a job was running through use of the big data store.'''
    )

    parser.add_argument('-a',
                        '--allocationid',
                        metavar='int',
                        dest='allocation_id',
                        default=-1,
                        help='The allocation ID of the job.')
    parser.add_argument('-j',
                        '--jobid',
                        metavar='int',
                        dest='job_id',
                        default=-1,
                        help='The job ID of the job.')
    parser.add_argument('-s',
                        '--jobidsecondary',
                        metavar='int',
                        dest='job_id_secondary',
                        default=0,
                        help='The secondary job ID of the job (default : 0).')
    parser.add_argument(
        '-t',
        '--target',
        metavar='hostname:port',
        dest='target',
        default=None,
        help=
        'An Elasticsearch server to be queried. This defaults to the contents of environment variable "CAST_ELASTIC".'
    )
    parser.add_argument('-H',
                        '--hostnames',
                        metavar='host',
                        dest='hosts',
                        nargs='*',
                        default=None,
                        help='A list of hostnames to filter the results to ')
    parser.add_argument(
        '-v',
        '--verbose',
        action='store_true',
        help='Displays additional details about the job in the output.')

    args = parser.parse_args()

    # If allocation_id or job_id wasn't specified, printing help on failure.
    if args.allocation_id == -1 and args.job_id == -1:
        parser.print_help()
        print(
            "Missing either allocationid or jobid. Require 1 of these fields to search."
        )
        return 2

    # If the target wasn't specified check the environment for the target value, printing help on failure.
    if args.target is None:
        if TARGET_ENV in os.environ:
            args.target = os.environ[TARGET_ENV]
        else:
            parser.print_help()
            print("Missing target, '%s' was not set." % TARGET_ENV)
            return 2

    # set up the fields for the search operation.
    fields = cast.SEARCH_JOB_FIELDS
    if args.verbose:
        fields.append("data.compute_nodes")

    # Open a connection to the elastic cluster, if this fails is wrong on the server.
    es = Elasticsearch(args.target,
                       sniff_on_start=True,
                       sniff_on_connection_fail=True,
                       sniffer_timeout=60)

    # Execute the query on the cast-allocation index.
    try:
        tr_res = cast.search_job(es,
                                 args.allocation_id,
                                 args.job_id,
                                 args.job_id_secondary,
                                 fields=fields)
    except exceptions.RequestError as e:
        cast.print_request_error(e)
        return 4

    total_hits = cast.deep_get(tr_res, "hits", "total")

    print("# Found {0} matches for specified the job.".format(total_hits))
    if total_hits == 0:
        print("# Sorry. Could not find any matching results.")
        return 0
    if total_hits != 1:
        print(
            "# This implementation only supports queries where the hit count is equal to 1."
        )
        return 3

    # TODO make this code more fault tolerant
    hits = cast.deep_get(tr_res, "hits", "hits")
    if len(hits) > 0:
        tr_data = cast.deep_get(hits[0], "_source", "data")

        date_format = '%Y-%m-%d %H:%M:%S.%f'
        print_format = '%Y-%m-%d.%H:%M:%S:%f'
        search_format = '"yyyy-MM-dd HH:mm:ss:SSS"'

        start_time = datetime.strptime(tr_data["begin_time"],
                                       '%Y-%m-%d %H:%M:%S.%f')
        start_time = '{0}'.format(start_time.strftime(print_format)[:-3])

        # If a history is present end_time is end_time, otherwise it's now.
        if "history" in tr_data:
            end_time = datetime.strptime(tr_data["history"]["end_time"],
                                         date_format)
            end_time = '{0}'.format(end_time.strftime(print_format)[:-3])
        else:
            end_time = "now"

        print("\nallocation-id: {0}".format(tr_data["allocation_id"]))
        print("job-id: {0} - {1}".format(tr_data["primary_job_id"],
                                         tr_data["secondary_job_id"]))
        print("user-name: {0} \nuser-id: {1}".format(tr_data["user_name"],
                                                     tr_data["user_id"]))
        print("begin-time: {0} \nend-time: {1}".format(start_time, end_time))

        if args.verbose:
            nodes = tr_data.get("compute_nodes", [])

            print('hostnames: ')
            for node in nodes:
                print("   - {0}".format(node))