Esempio n. 1
0
def push(td_client, logline: str, logfile_year, rindex):
    """Push the processed metrics data to the metrics service"""

    del logfile_year  # Ignored parameter for now
    try:
        emr = json.loads(logline+"\n")

        etimes_dict = emr["etimes"]
        etimes: dict = dict()
        for key, value_any in etimes_dict.items():
            val = value_any["value"]
            if "type" in value_any:
                val_type = value_any["type"]
                etimes[key] = tdp.Any(type=val_type, value=str(val))
            else:
                etimes[key] = tdp.Any(value=str(val))

        values_dict = emr["values"]
        scalars: dict = dict()
        for key in values_dict:
            value_any = values_dict[key]
            val = value_any["value"]
            if "type" in value_any:
                val_type = value_any["type"]
                scalars[key] = tdp.Any(type=val_type, value=val)
            else:
                scalars[key] = tdp.Any(value=val)

        emr_meta = emr["meta"]

        training_id = emr_meta["training_id"]
        if training_id == None and "TRAINING_ID" in os.environ:
            training_id = os.environ["TRAINING_ID"],

        emetrics = tdp.EMetrics(
            meta=tdp.MetaInfo(
                training_id=training_id,
                time=int(emr_meta["time"]),
                rindex=int(emr_meta["rindex"]),
            ),
            grouplabel=emr["grouplabel"],
            etimes=etimes,
            values=scalars,
        )

        if td_client is not None:
            td_client.AddEMetrics(emetrics)

        # for now, print to stdout (support old endpoint).
        # TODO: Don't print to stdout for metrics
        json_form = print_json.to_string(emetrics)
        print(json_form)

    except Exception as inst:
        print("Unexpected error when attempting to process evaluation metric record:", sys.exc_info()[0])
        print(inst)
        sys.stdout.flush()

    return rindex+1
Esempio n. 2
0
    def __write_em_file_path(self, force: bool = False):
        # The force flag is sent by the caller, which is sent by someone else,
        # which signals intent.  Even though it's not being used right now,
        # I'd rather keep the argument for the moment.
        del force

        if self.em_file_path is not None and len(self.emetrics_file_buf):
            lines_written = 0
            try:
                self.logger.info("writing %d records to %s, thread index %d",
                                 len(self.emetrics_file_buf),
                                 self.em_file_path, threading.get_ident())

                # The lines below with "Flush nfs buffer??" are about trying to make sure the nfs cache is flushed.
                # The result of a long struggle, there's probably a better way.

                if self.last_em_file_size > 0:
                    # Flush nfs buffer??
                    if not os.path.exists(self.em_file_path):
                        self.logger.error(
                            "file was created, but now it doesn't exist!!! %s",
                            self.em_file_path)

                with open(file=self.em_file_path, mode='a',
                          buffering=-1) as em_stream:
                    for emetrics in self.emetrics_file_buf:
                        try:
                            json_form = print_json.to_string(emetrics)
                            em_stream.write(json_form)
                            em_stream.write("\n")
                            lines_written += 1
                        except OSError as err:
                            self.logger.warning(
                                "Unexpected error writing emetrics file: %s",
                                err)

                # Please keep this in place for now for debugging.
                # # if force:
                # Flush nfs buffer??
                fd = os.open(self.em_file_path, os.O_RDONLY)
                after_stat = os.fstat(fd)
                os.close(fd)

                if after_stat.st_size <= self.last_em_file_size:
                    self.logger.error("what?: file grew smaller! b: %d, a: %d",
                                      self.last_em_file_size,
                                      after_stat.st_size)

                self.last_em_file_size = after_stat.st_size

            except OSError as error:  # parent of IOError, OSError *and* WindowsError where available
                self.logger.warning(
                    "Unexpected error opening emetrics file: %s", error)
            finally:
                self.emetrics_file_buf = self.emetrics_file_buf[lines_written:]
Esempio n. 3
0
def emitEvalMetric(em_file_path: str, log_dir, td_client, group_label, iterStep, timestamp, values_dict, rindex, eventWallTime):
    '''Push the processed metrics data to the metrics service'''
    try:

        etimes = dict()

        etimes['iteration'] = tdp.Any(type=tdp.Any.INT, value=str(iterStep))
        etimes['timestamp'] = tdp.Any(type=tdp.Any.STRING, value=timestamp)

        # # d = datetime.datetime.utcnow() # <-- get time in UTC
        # #     print d.isoformat("T") + "Z"
        # #     if timestamp == None:
        # #         timestamp = start_time + datetime.timedelta(seconds=rowdict['Seconds'])
        #
        # dict_MetricData['timestamp'] = timestamp

        emetrics = tdp.EMetrics(
            meta=tdp.MetaInfo(
                training_id=os.environ["TRAINING_ID"],
                time=int(eventWallTime),
                rindex=int(rindex)
            ),
            grouplabel=group_label,
            etimes=etimes,
            values=values_dict
        )

        if td_client is not None:
            td_client.AddEMetrics(emetrics)

        json_form = print_json.to_string(emetrics)

        with open(em_file_path, 'a') as em_stream:
            em_stream.write(json_form)
            em_stream.write("\n")

        # for now, print to stdout.
        # TODO: Don't print to stdout for metrics
        print(json_form)

    except Exception as inst:
        print("Unexpected error when attempting to send emetrics:", sys.exc_info()[0])
        print(type(inst))
        print(inst.args)
        traceback.print_exc()
        print(inst)

        sys.stdout.flush()

    return rindex+1
Esempio n. 4
0
def extract(em_file_path: str, manifest: str, follow: bool, should_connect: bool=True):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    symbol_dict: Dict[str, str] = read_symbol_libs(dir_path)

    evaluation_metrics_spec = read_extract_description(manifest, symbol_dict)

    logfile = evaluation_metrics_spec["in"]

    job_directory = os.environ["JOB_STATE_DIR"]
    regex = r"\$JOB_STATE_DIR"
    logfile = re.sub(regex, job_directory, logfile, 0)

    # Not sure why I seem to loose the under-bar somewhere along the line.
    if "line_lookahead" in evaluation_metrics_spec:
        line_lookahead: int = int(evaluation_metrics_spec["line_lookahead"])
    elif "linelookahead" in evaluation_metrics_spec:
        line_lookahead: int = int(evaluation_metrics_spec["linelookahead"])
    else:
        line_lookahead: int = 4

    groups: dict = evaluation_metrics_spec["groups"]

    line_length_stack: List[int] = []
    text_window = ""
    record_index = 0
    read_pos = 0
    line_index = 1

    learner_job_is_running = True
    logfile_year = None
    start_time: datetime = None
    did_get_good_time: bool = False

    if should_connect:
        tdClient = connect.get_connection()
    else:
        tdClient = None

    while learner_job_is_running:
        if os.path.exists(logfile):
            if logfile_year is None:
                logfile_year = extract_datetime.get_log_created_year(logfile)

            with open(logfile, 'r') as log_stream:
                log_stream.seek(read_pos)

                try:
                    for line in iter(log_stream):
                        # Do our best to get a good start time.
                        if not did_get_good_time:
                            # keep trying to get a good start time from the log line, until it's pointless
                            start_time, did_get_good_time = \
                                extract_datetime.extract_datetime(line, logfile_year, start_time)

                        line_index = push_log_line.push(tdClient, line, logfile_year, line_index)

                        line_length_stack.append(len(line))
                        text_window += line
                        if len(line_length_stack) > line_lookahead:
                            length_first_line = line_length_stack[0]
                            line_length_stack = line_length_stack[1:]
                            text_window = text_window[length_first_line:]

                        for group_key in groups:
                            group = groups[group_key]
                            name = group_key
                            regex_expanded = group["regex_expanded"]
                            matches = regex_expanded.match(text_window)
                            if matches is not None:
                                values_dict = matches.groupdict()

                                # meta_dict_desc = group["meta"]
                                etimes_descriptions: dict = group["etimes"]
                                if etimes_descriptions is None:
                                    print("Did not find etimes! Found: ")
                                    for axis_key in group:
                                        print("key: "+axis_key)
                                        sys.stdout.flush()
                                    break

                                etimes: dict = dict()
                                for etime_key in etimes_descriptions:
                                    item = etimes_descriptions[etime_key]
                                    valOrRef: str = item["value"]
                                    if valOrRef.startswith("$"):
                                        value_inner = valOrRef[1:]
                                        value_actual = values_dict[value_inner]
                                    else:
                                        value_actual = valOrRef
                                    grpc_value_type = type_string_to_grpc_type(item["type"])
                                    etimes[etime_key] = tdp.Any(type=grpc_value_type, value=value_actual)

                                if "scalars" in group:
                                    scalars_descriptions: dict = group["scalars"]
                                elif "values" in group:
                                    scalars_descriptions: dict = group["values"]
                                else:
                                    scalars_descriptions = None

                                if scalars_descriptions is None:
                                    print("Did not find scalars! Found: ")
                                    for axis_key in group:
                                        print("key: "+axis_key)
                                        sys.stdout.flush()
                                    break

                                scalars: dict = dict()
                                for scalar_key in scalars_descriptions:
                                    item = scalars_descriptions[scalar_key]
                                    valOrRef: str = item["value"]
                                    if valOrRef.startswith("$"):
                                        value_inner = valOrRef[1:]
                                        value_actual = values_dict[value_inner]
                                    else:
                                        value_actual = valOrRef
                                    value_type = item["type"]
                                    grpc_value_type = type_string_to_grpc_type(value_type)
                                    scalars[scalar_key] = tdp.Any(type=grpc_value_type, value=value_actual)

                                date_string: str = line
                                if "meta" in group:
                                    meta_list: dict = group["meta"]
                                    if "time" in meta_list:
                                        valOrRef: str = meta_list["time"]
                                        if valOrRef.startswith("$"):
                                            value_ref = valOrRef[1:]
                                            date_string = values_dict[value_ref]
                                        else:
                                            date_string = valOrRef

                                # At this point, don't keep trying to get a start time if we haven't already
                                did_get_good_time = True
                                # TODO: pass in the type specified by the regex
                                line_time, _ = extract_datetime.extract_datetime(date_string, logfile_year, None)
                                microseconds = (line_time - start_time).microseconds
                                timestamp = int(microseconds)
                                record_index += 1
                                emetrics = tdp.EMetrics(
                                    meta=tdp.MetaInfo(
                                        training_id=os.environ["TRAINING_ID"],
                                        time=timestamp,
                                        rindex=record_index
                                    ),
                                    grouplabel=name,
                                    etimes=etimes,
                                    values=scalars
                                )

                                json_form = print_json.to_string(emetrics)

                                with open(em_file_path, 'a') as em_stream:
                                    em_stream.write(json_form)
                                    em_stream.write("\n")

                                if tdClient is not None:
                                    tdClient.AddEMetrics(emetrics)

                                # for now, print to stdout (support old endpoint).
                                # TODO: Don't print to stdout for metrics
                                print(json_form)

                                text_window = ""
                                line_length_stack = []
                                break

                except Exception as inst:
                    print("Unexpected error when attempting to process evaluation metric record:",
                          sys.exc_info()[0])
                    print(inst)
                    sys.stdout.flush()

                read_pos = log_stream.tell()

            learner_job_is_running = follow

        # wait a second before reading the file again
        # (unless you want to constantly check the logs for new content?)
        time.sleep(1)