Example #1
0
def push(td_client, logline: str, logfile_year, rindex):
    """Push the processed metrics data to the metrics service"""

    del logfile_year  # Ignored parameter for now
    try:
        emr = json.loads(logline+"\n")

        etimes_dict = emr["etimes"]
        etimes: dict = dict()
        for key, value_any in etimes_dict.items():
            val = value_any["value"]
            if "type" in value_any:
                val_type = value_any["type"]
                etimes[key] = tdp.Any(type=val_type, value=str(val))
            else:
                etimes[key] = tdp.Any(value=str(val))

        values_dict = emr["values"]
        scalars: dict = dict()
        for key in values_dict:
            value_any = values_dict[key]
            val = value_any["value"]
            if "type" in value_any:
                val_type = value_any["type"]
                scalars[key] = tdp.Any(type=val_type, value=val)
            else:
                scalars[key] = tdp.Any(value=val)

        emr_meta = emr["meta"]

        training_id = emr_meta["training_id"]
        if training_id == None and "TRAINING_ID" in os.environ:
            training_id = os.environ["TRAINING_ID"],

        emetrics = tdp.EMetrics(
            meta=tdp.MetaInfo(
                training_id=training_id,
                time=int(emr_meta["time"]),
                rindex=int(emr_meta["rindex"]),
            ),
            grouplabel=emr["grouplabel"],
            etimes=etimes,
            values=scalars,
        )

        if td_client is not None:
            td_client.AddEMetrics(emetrics)

        # for now, print to stdout (support old endpoint).
        # TODO: Don't print to stdout for metrics
        json_form = print_json.to_string(emetrics)
        print(json_form)

    except Exception as inst:
        print("Unexpected error when attempting to process evaluation metric record:", sys.exc_info()[0])
        print(inst)
        sys.stdout.flush()

    return rindex+1
Example #2
0
def emitEvalMetric(
        td_client: tdb.TrainingDataClientBuffered,
        group_label: str,  # group label, likely test or train
        iterStep,
        timestamp,
        values_dict,
        rindex: int,
        event_wall_time: float,
        sub_identifier):
    """Push the processed metrics data to the metrics service"""
    try:
        etimes = dict()

        etimes['iteration'] = tdp.Any(type=tdp.Any.INT, value=str(iterStep))
        etimes['timestamp'] = tdp.Any(type=tdp.Any.STRING, value=timestamp)
        etimes['wall_time'] = tdp.Any(type=tdp.Any.FLOAT,
                                      value=str(event_wall_time))

        # # d = datetime.datetime.utcnow() # <-- get time in UTC
        # #     print d.isoformat("T") + "Z"
        # #     if timestamp == None:
        # #         timestamp = start_time + datetime.timedelta(seconds=rowdict['Seconds'])
        #
        # dict_MetricData['timestamp'] = timestamp
        # time=int(event_wall_time * 1000),

        etb_logger.debug("Creating emetrics record")
        emetrics = tdp.EMetrics(meta=tdp.MetaInfo(
            training_id=os.environ["TRAINING_ID"],
            time=extract_datetime.get_meta_timestamp(),
            rindex=int(rindex),
            subid=sub_identifier),
                                grouplabel=group_label,
                                etimes=etimes,
                                values=values_dict)

        if td_client is not None:
            etb_logger.debug("Calling AddEMetrics")
            td_client.AddEMetrics(emetrics)

    except Exception as inst:
        etb_logger.error(
            "Unexpected error when attempting to send emetrics: %s",
            sys.exc_info()[0])
        print("Unexpected error when attempting to send emetrics:",
              sys.exc_info()[0])
        print(type(inst))
        print(inst.args)
        traceback.print_exc()
        print(inst)

        sys.stdout.flush()

    return rindex + 1
Example #3
0
def emitEvalMetric(em_file_path: str, log_dir, td_client, group_label, iterStep, timestamp, values_dict, rindex, eventWallTime):
    '''Push the processed metrics data to the metrics service'''
    try:

        etimes = dict()

        etimes['iteration'] = tdp.Any(type=tdp.Any.INT, value=str(iterStep))
        etimes['timestamp'] = tdp.Any(type=tdp.Any.STRING, value=timestamp)

        # # d = datetime.datetime.utcnow() # <-- get time in UTC
        # #     print d.isoformat("T") + "Z"
        # #     if timestamp == None:
        # #         timestamp = start_time + datetime.timedelta(seconds=rowdict['Seconds'])
        #
        # dict_MetricData['timestamp'] = timestamp

        emetrics = tdp.EMetrics(
            meta=tdp.MetaInfo(
                training_id=os.environ["TRAINING_ID"],
                time=int(eventWallTime),
                rindex=int(rindex)
            ),
            grouplabel=group_label,
            etimes=etimes,
            values=values_dict
        )

        if td_client is not None:
            td_client.AddEMetrics(emetrics)

        json_form = print_json.to_string(emetrics)

        with open(em_file_path, 'a') as em_stream:
            em_stream.write(json_form)
            em_stream.write("\n")

        # for now, print to stdout.
        # TODO: Don't print to stdout for metrics
        print(json_form)

    except Exception as inst:
        print("Unexpected error when attempting to send emetrics:", sys.exc_info()[0])
        print(type(inst))
        print(inst.args)
        traceback.print_exc()
        print(inst)

        sys.stdout.flush()

    return rindex+1
Example #4
0
    def report(self, rindex: int) -> int:
        if self.is_queued_for_report():
            values_dict = {}
            event_list = self.step_to_event_list_dict[self.step_added]
            rfcRFC3339Formatted = None
            event_wall_time = 0.0

            # Transmogrify the ScalerEvent list into a values dictionary
            for event_set in event_list:
                label = event_set.label
                event = event_set.event  # type: event_accumulator.ScalarEvent
                if event_wall_time == 0.0:
                    event_wall_time = float(event.wall_time)
                    if rfcRFC3339Formatted is None:
                        dt = datetime.datetime.fromtimestamp(event.wall_time)
                        rfcRFC3339Formatted = dt.isoformat("T") + "Z"

                values_dict[label] = tdp.Any(type=tdp.Any.FLOAT,
                                             value=str(event.value))

            # etb_logger.debug("Calling emitEvalMetric step=%r: rindex=%r", self.step_added, rindex)
            emitEvalMetric(td_client=self.td_client,
                           group_label=self.group,
                           iterStep=self.step_added,
                           timestamp=rfcRFC3339Formatted,
                           values_dict=values_dict,
                           rindex=rindex,
                           event_wall_time=event_wall_time,
                           sub_identifier=self.sub_identifier)
            rindex += 1
            self.step_added = -1
            self.wall_time = -1.0

        return rindex
Example #5
0
def emitEvalMetricViaTracker(em_file_path: str, tracker, iterStep, reportInterval,
                             remainderCompare, rindex):

    ea = tracker.ea
    ea.Reload() # loads events from file

    # if emitIntermediateDiagnostics:
    #     emitEvalMetricMessage(tracker.log_dir, iterStep,
    #                           "skimming at iterStep #"+str(iterStep))
    #
    #     # rootScalerRecord = ea.Scalars(labelKeyList[0][1])[0]
    #     # print(rootScalerRecord)

    tags = ea.Tags()
    scaler_keys = tags[event_accumulator.SCALARS]
    for scaler_key in scaler_keys:

        scalerEvents = ea.Scalars(scaler_key)

        # print(type(scalerEvents))

        for event in scalerEvents:
            # time.sleep(0.1)
            label_event_list = tracker.iterDict.get(str(event.step))
            if label_event_list == None:
                eventSet = EventSet()
                eventSet.label = scaler_key
                eventSet.event = event
                tracker.iterDict[str(event.step)] = [eventSet]
            else:
                if isInList(label_event_list, scaler_key, event.step) == True:
                    #already been reported, continue
                    continue
                eventSet = EventSet()
                eventSet.label = scaler_key
                eventSet.event = event
                tracker.iterDict[str(event.step)].append(eventSet)

    latest_iter = iterStep

    for key in tracker.iterDict.keys():
        labelValueList = {}
        eventList = tracker.iterDict[key]
        foundReportable = False
        rfcRFC3339Formatted = None
        eventWallTime = None

        for eventSet in eventList:
            if eventSet.reported == False:
                foundReportable = True
                label = eventSet.label;
                event = eventSet.event
                iterStep = event.step
                eventWallTime = event.wall_time
                if iterStep > latest_iter:
                    latest_iter = iterStep
                if rfcRFC3339Formatted == None:
                    dt = datetime.datetime.fromtimestamp(event.wall_time)
                    rfcRFC3339Formatted = dt.isoformat("T") + "Z"

                labelValueList[label] = tdp.Any(type=tdp.Any.FLOAT, value=str(event.value))
                eventSet.reported = True

        if reportInterval > 0 and ((iterStep % reportInterval) != remainderCompare):
            foundReportable = False

        if foundReportable:
            emitEvalMetric(em_file_path, tracker.log_dir, tracker.tdClient, tracker.group, iterStep, rfcRFC3339Formatted,
                           labelValueList, rindex, eventWallTime)
            rindex+=1

        # time.sleep(0.1)

    return latest_iter, rindex
Example #6
0
def extract_and_push_emetrics(td_client: tdb.TrainingDataClientBuffered,
                              log_file: str,
                              log_line: str,
                              logfile_year: str,
                              line_index: int,
                              record_index: int,
                              subdir: str,
                              extra_any: Any = None) -> Tuple[int, int]:

    state: ExtraPushData4RegExExtractor = extra_any
    # # Do our best to get a good start time.
    # if not state.did_get_good_time:
    #     # keep trying to get a good start time from the log line, until it's pointless
    #     start_time, did_get_good_time = \
    #         edt.extract_datetime(log_line, logfile_year, state.start_time)

    if td_client.em_file_path is None:
        em_file_path = os.path.join(os.path.dirname(log_file),
                                    match_log_file.EMETRICS_FILE_BASE_NAME)
        td_client.set_em_file_path(em_file_path)
        logging.debug("em_file_path: %s, subdir: %s", td_client.em_file_path,
                      subdir)

    # logging.debug("push_log_line.push %d (subdir: %s) %s", line_index, subdir, log_line)
    line_index, _ = push_log_line.push(td_client, log_file, log_line,
                                       logfile_year, line_index, 0, subdir,
                                       state)

    state.line_length_stack.append(len(log_line))
    state.text_window += log_line
    if len(state.line_length_stack) > state.line_lookahead:
        length_first_line = state.line_length_stack[0]
        state.line_length_stack = state.line_length_stack[1:]
        state.text_window = state.text_window[length_first_line:]

    logging.debug("len groups: %d", len(state.groups))
    for group_key in state.groups:
        group = state.groups[group_key]
        name = group_key
        regex_expanded = group["regex_expanded"]
        matches = regex_expanded.match(state.text_window)
        if matches is not None:
            values_dict = matches.groupdict()

            # meta_dict_desc = group["meta"]
            etimes_descriptions: dict = group["etimes"]
            if etimes_descriptions is None:
                logging.warning("Did not find etimes! Found: ")
                for axis_key in group:
                    print("key: " + axis_key)
                    sys.stdout.flush()
                break

            etimes: dict = dict()
            for etime_key in etimes_descriptions:
                item = etimes_descriptions[etime_key]
                valOrRef: str = item["value"]
                if valOrRef.startswith("$"):
                    value_inner = valOrRef[1:]
                    value_actual = values_dict[value_inner]
                else:
                    value_actual = valOrRef
                grpc_value_type = type_string_to_grpc_type(item["type"])
                etimes[etime_key] = tdp.Any(type=grpc_value_type,
                                            value=value_actual)

            if "scalars" in group:
                scalars_descriptions: dict = group["scalars"]
            elif "values" in group:
                scalars_descriptions: dict = group["values"]
            else:
                scalars_descriptions = None

            if scalars_descriptions is None:
                logging.warning("Did not find scalars! Found: ")
                for axis_key in group:
                    print("key: " + axis_key)
                    sys.stdout.flush()
                break

            scalars: dict = dict()
            for scalar_key in scalars_descriptions:
                item = scalars_descriptions[scalar_key]
                valOrRef: str = item["value"]
                if valOrRef.startswith("$"):
                    value_inner = valOrRef[1:]
                    value_actual = values_dict[value_inner]
                else:
                    value_actual = valOrRef
                value_type = item["type"]
                grpc_value_type = type_string_to_grpc_type(value_type)
                scalars[scalar_key] = tdp.Any(type=grpc_value_type,
                                              value=value_actual)

            # date_string: str = line
            subid_value = subdir
            if "meta" in group:
                meta_list: dict = group["meta"]
                # if "time" in meta_list:
                #     valOrRef: str = meta_list["time"]
                #     if valOrRef.startswith("$"):
                #         value_ref = valOrRef[1:]
                #         # date_string = values_dict[value_ref]
                #     else:
                #         # date_string = valOrRef
                #         pass
                if "subid" in meta_list:
                    valOrRef: str = meta_list["subid"]
                    if valOrRef.startswith("$"):
                        logging.debug(
                            "resetting subdir(subid): %s, metalist: %r",
                            subid_value, meta_list)
                        value_ref = valOrRef[1:]
                        subid_value = values_dict[value_ref]
                    elif not valOrRef == "":
                        logging.debug(
                            "resetting subdir(subid): %s, metalist: %r",
                            subid_value, meta_list)
                        subid_value = valOrRef

            logging.debug(
                "about to push evaluation metrics with subdir(subid): %s",
                subid_value)

            # At this point, don't keep trying to get a start time if we haven't already
            state.did_get_good_time = True
            emetrics = tdp.EMetrics(meta=tdp.MetaInfo(
                training_id=os.environ["TRAINING_ID"],
                time=edt.get_meta_timestamp(),
                rindex=record_index,
                subid=subid_value),
                                    grouplabel=name,
                                    etimes=etimes,
                                    values=scalars)
            record_index += 1
            # state.total_lines_pushed += 1

            if td_client is not None:
                td_client.AddEMetrics(emetrics)

            # for now, print to stdout (support old endpoint).
            # print(json_form)

            state.text_window = ""
            state.line_length_stack = []
            break

    return line_index, record_index
Example #7
0
def extract(em_file_path: str, manifest: str, follow: bool, should_connect: bool=True):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    symbol_dict: Dict[str, str] = read_symbol_libs(dir_path)

    evaluation_metrics_spec = read_extract_description(manifest, symbol_dict)

    logfile = evaluation_metrics_spec["in"]

    job_directory = os.environ["JOB_STATE_DIR"]
    regex = r"\$JOB_STATE_DIR"
    logfile = re.sub(regex, job_directory, logfile, 0)

    # Not sure why I seem to loose the under-bar somewhere along the line.
    if "line_lookahead" in evaluation_metrics_spec:
        line_lookahead: int = int(evaluation_metrics_spec["line_lookahead"])
    elif "linelookahead" in evaluation_metrics_spec:
        line_lookahead: int = int(evaluation_metrics_spec["linelookahead"])
    else:
        line_lookahead: int = 4

    groups: dict = evaluation_metrics_spec["groups"]

    line_length_stack: List[int] = []
    text_window = ""
    record_index = 0
    read_pos = 0
    line_index = 1

    learner_job_is_running = True
    logfile_year = None
    start_time: datetime = None
    did_get_good_time: bool = False

    if should_connect:
        tdClient = connect.get_connection()
    else:
        tdClient = None

    while learner_job_is_running:
        if os.path.exists(logfile):
            if logfile_year is None:
                logfile_year = extract_datetime.get_log_created_year(logfile)

            with open(logfile, 'r') as log_stream:
                log_stream.seek(read_pos)

                try:
                    for line in iter(log_stream):
                        # Do our best to get a good start time.
                        if not did_get_good_time:
                            # keep trying to get a good start time from the log line, until it's pointless
                            start_time, did_get_good_time = \
                                extract_datetime.extract_datetime(line, logfile_year, start_time)

                        line_index = push_log_line.push(tdClient, line, logfile_year, line_index)

                        line_length_stack.append(len(line))
                        text_window += line
                        if len(line_length_stack) > line_lookahead:
                            length_first_line = line_length_stack[0]
                            line_length_stack = line_length_stack[1:]
                            text_window = text_window[length_first_line:]

                        for group_key in groups:
                            group = groups[group_key]
                            name = group_key
                            regex_expanded = group["regex_expanded"]
                            matches = regex_expanded.match(text_window)
                            if matches is not None:
                                values_dict = matches.groupdict()

                                # meta_dict_desc = group["meta"]
                                etimes_descriptions: dict = group["etimes"]
                                if etimes_descriptions is None:
                                    print("Did not find etimes! Found: ")
                                    for axis_key in group:
                                        print("key: "+axis_key)
                                        sys.stdout.flush()
                                    break

                                etimes: dict = dict()
                                for etime_key in etimes_descriptions:
                                    item = etimes_descriptions[etime_key]
                                    valOrRef: str = item["value"]
                                    if valOrRef.startswith("$"):
                                        value_inner = valOrRef[1:]
                                        value_actual = values_dict[value_inner]
                                    else:
                                        value_actual = valOrRef
                                    grpc_value_type = type_string_to_grpc_type(item["type"])
                                    etimes[etime_key] = tdp.Any(type=grpc_value_type, value=value_actual)

                                if "scalars" in group:
                                    scalars_descriptions: dict = group["scalars"]
                                elif "values" in group:
                                    scalars_descriptions: dict = group["values"]
                                else:
                                    scalars_descriptions = None

                                if scalars_descriptions is None:
                                    print("Did not find scalars! Found: ")
                                    for axis_key in group:
                                        print("key: "+axis_key)
                                        sys.stdout.flush()
                                    break

                                scalars: dict = dict()
                                for scalar_key in scalars_descriptions:
                                    item = scalars_descriptions[scalar_key]
                                    valOrRef: str = item["value"]
                                    if valOrRef.startswith("$"):
                                        value_inner = valOrRef[1:]
                                        value_actual = values_dict[value_inner]
                                    else:
                                        value_actual = valOrRef
                                    value_type = item["type"]
                                    grpc_value_type = type_string_to_grpc_type(value_type)
                                    scalars[scalar_key] = tdp.Any(type=grpc_value_type, value=value_actual)

                                date_string: str = line
                                if "meta" in group:
                                    meta_list: dict = group["meta"]
                                    if "time" in meta_list:
                                        valOrRef: str = meta_list["time"]
                                        if valOrRef.startswith("$"):
                                            value_ref = valOrRef[1:]
                                            date_string = values_dict[value_ref]
                                        else:
                                            date_string = valOrRef

                                # At this point, don't keep trying to get a start time if we haven't already
                                did_get_good_time = True
                                # TODO: pass in the type specified by the regex
                                line_time, _ = extract_datetime.extract_datetime(date_string, logfile_year, None)
                                microseconds = (line_time - start_time).microseconds
                                timestamp = int(microseconds)
                                record_index += 1
                                emetrics = tdp.EMetrics(
                                    meta=tdp.MetaInfo(
                                        training_id=os.environ["TRAINING_ID"],
                                        time=timestamp,
                                        rindex=record_index
                                    ),
                                    grouplabel=name,
                                    etimes=etimes,
                                    values=scalars
                                )

                                json_form = print_json.to_string(emetrics)

                                with open(em_file_path, 'a') as em_stream:
                                    em_stream.write(json_form)
                                    em_stream.write("\n")

                                if tdClient is not None:
                                    tdClient.AddEMetrics(emetrics)

                                # for now, print to stdout (support old endpoint).
                                # TODO: Don't print to stdout for metrics
                                print(json_form)

                                text_window = ""
                                line_length_stack = []
                                break

                except Exception as inst:
                    print("Unexpected error when attempting to process evaluation metric record:",
                          sys.exc_info()[0])
                    print(inst)
                    sys.stdout.flush()

                read_pos = log_stream.tell()

            learner_job_is_running = follow

        # wait a second before reading the file again
        # (unless you want to constantly check the logs for new content?)
        time.sleep(1)
Example #8
0
def push(td_client: tdb.TrainingDataClientBuffered,
         log_file: str,
         log_line: str,
         logfile_year: str,
         rindex: int,
         rindex2: int,
         subdir: str,
         extra: Any = None) -> Tuple[int, int]:
    """Push the processed metrics data to the metrics service"""

    del log_file  # Ignored parameter for now
    del logfile_year  # Ignored parameter for now
    del extra  # Ignored parameter for now
    try:
        emr = json.loads(log_line + "\n")

        logging.debug("Processing etimes...")
        etimes_dict = emr["etimes"]
        etimes: dict = dict()
        for key, value_any in etimes_dict.items():
            val = value_any["value"]
            if "type" in value_any:
                val_type = value_any["type"]
                etimes[key] = tdp.Any(type=val_type, value=str(val))
            else:
                etimes[key] = tdp.Any(value=str(val))

        logging.debug("Processing values...")
        values_dict = emr["values"]
        scalars: dict = dict()
        for key in values_dict:
            value_any = values_dict[key]
            val = value_any["value"]
            if "type" in value_any:
                val_type = value_any["type"]
                scalars[key] = tdp.Any(type=val_type, value=val)
            else:
                scalars[key] = tdp.Any(value=val)

        emr_meta = emr["meta"]

        logging.debug("Getting training_id...")
        training_id = emr_meta["training_id"]
        if training_id is None and "TRAINING_ID" in os.environ:
            training_id = os.environ["TRAINING_ID"],

        if "time" in emr_meta and emr_meta["time"] is not None:
            time = emr_meta["time"]
        else:
            time = edt.get_meta_timestamp()

        if "subid" in emr_meta and emr_meta["subid"] is not None:
            subid = emr_meta["subid"]
        else:
            subid = subdir

        logging.debug("Assembling record...")
        emetrics = tdp.EMetrics(
            meta=tdp.MetaInfo(training_id=training_id,
                              time=time,
                              rindex=int(emr_meta["rindex"]),
                              subid=subid),
            grouplabel=emr["grouplabel"],
            etimes=etimes,
            values=scalars,
        )

        if td_client is not None:
            if False:
                print_json.logging_output(emetrics)
            td_client.AddEMetrics(emetrics)

    except Exception as inst:
        print(
            "Unexpected error when attempting to process evaluation metric record:",
            sys.exc_info()[0])
        print(inst)
        sys.stdout.flush()

    return rindex + 1, rindex2