Esempio n. 1
0
def mk_full_id_dict(_path, _file, _type):
    _dict = {}
    if _type == 'subscriber':
        fields = SUB_FIELDS
    elif _type == 'peer':
        fields = PEER_FIELDS
    elif _type == 'tgid':
        fields = TGID_FIELDS
    try:
        with open(_path + _file, 'r', encoding='latin1') as _handle:
            ids = csv_dict_reader(_handle,
                                  fieldnames=fields,
                                  restkey='OTHER',
                                  dialect='excel',
                                  delimiter=',')
            for row in ids:
                for item in row:
                    try:
                        _dict[int(row['ID'])] = dict(row)
                    except:
                        pass
            _handle.close
            return (_dict)
    except IOError:
        return _dict
Esempio n. 2
0
 def _read_file(self, key):
     resp = self.boto_client.get_object(Bucket=self.bucket, Key=key)
     if key.endswith('.parquet'):
         body = resp['Body'].read()
         reader = parquet_dict_reader(io.BytesIO(body))
         yield from reader
         with THREAD_LOCK:
             self.bytes_processed += len(body)
             self.compressed_bytes_processed += resp['ContentLength']
     else:
         with gz_open(resp['Body'], mode='rt') as gz_f:
             reader = csv_dict_reader(gz_f, delimiter=' ')
             reader.fieldnames = [
                 f.replace('-', '_') for f in reader.fieldnames
             ]
             yield from reader
             with THREAD_LOCK:
                 self.bytes_processed += gz_f.tell()
                 self.compressed_bytes_processed += resp['ContentLength']
Esempio n. 3
0
        p.communicate()
    except OSError as ex:
        print_file(COMBINED_OUTPUT_FILE_NAME)
        print '#### ERROR Caught OSError `{0}`.'.format(ex)
        print '&&&& FAILED {0}'.format(BENCHMARK_NAME)
        exit(-1)

    print_file(COMBINED_OUTPUT_FILE_NAME)

    if p.returncode != 0:
        print '#### ERROR Process exited with code {0}.'.format(p.returncode)
        print '&&&& FAILED {0}'.format(BENCHMARK_NAME)
        exit(p.returncode)

    with open(COMBINED_OUTPUT_FILE_NAME) as input_file:
        reader = csv_dict_reader(input_file)

        variable_units = reader.next()  # Get units header row.

        distinguishing_variables = reader.fieldnames

        measured_variables = [("STL Average Throughput", "+"),
                              ("Thrust Average Throughput", "+")]

        for record in reader:
            for variable, directionality in measured_variables:
                # Don't monitor regressions for STL implementations, nvbug 28980890:
                if "STL" in variable:
                    continue
                print "&&&& PERF {0}_{1}_{2}bit_{3}mib_{4} {5} {6}{7}".format(
                    record["Algorithm"], record["Element Type"],
Esempio n. 4
0
    def __init__(self, input_files, output_file, preserve_whitespace=True):
        """Read input files and open the output file and construct a new `io_manager`
    object.

    If `preserve_whitespace` is `False`, leading and trailing whitespace is
    stripped from each CSV cell.

    Raises
      AssertionError :
        If `len(input_files) <= 0` or `type(preserve_whitespace) != bool`.
    """
        assert len(input_files) > 0, "No input files provided."

        assert type(preserve_whitespace) == bool

        self.preserve_whitespace = preserve_whitespace

        self.readers = deque()

        self.variable_names = None
        self.variable_units = None

        self.input_files = deque()

        for input_file in input_files:
            input_file_object = open(input_file)
            reader = csv_dict_reader(filter_comments(input_file_object))

            if not self.preserve_whitespace:
                strip_list(reader.fieldnames)

            if self.variable_names is None:
                self.variable_names = reader.fieldnames
            else:
                # Make sure all inputs have the same schema.
                assert self.variable_names == reader.fieldnames,                      \
                  "Input file (`" + input_file + "`) variable schema `"             + \
                  str(reader.fieldnames) + "` does not match the variable schema `" + \
                  str(self.variable_names) + "`."

            # Consume the next row, which should be the second line of the header.
            variable_units = reader.next()

            if not self.preserve_whitespace:
                strip_dict(variable_units)

            if self.variable_units is None:
                self.variable_units = variable_units
            else:
                # Make sure all inputs have the same units schema.
                assert self.variable_units == variable_units,                         \
                  "Input file (`" + input_file + "`) units schema `"                + \
                  str(variable_units) + "` does not match the units schema `"       + \
                  str(self.variable_units) + "`."

            self.readers.append(reader)
            self.input_files.append(input_file_object)

        if output_file == "-":  # Output to stdout.
            self.output_file = stdout
        else:  # Output to user-specified file.
            self.output_file = open(output_file, "w")

        self.writer = csv_dict_writer(self.output_file,
                                      fieldnames=self.variable_names)
Esempio n. 5
0
  def __init__(self,
               baseline_input_file, observed_input_file,
               output_file,
               preserve_whitespace = False):
    """Read input files and open the output file and construct a new `io_manager`
    object.

    If `preserve_whitespace` is `False`, leading and trailing whitespace is
    stripped from each CSV cell.

    Raises
      AssertionError :
        If `type(preserve_whitespace) != bool`.
    """
    assert type(preserve_whitespace) == bool

    self.preserve_whitespace = preserve_whitespace

    # Open baseline results.
    self.baseline_input_file = open(baseline_input_file)
    self.baseline_reader = csv_dict_reader(
      filter_comments(self.baseline_input_file)
    )

    if not self.preserve_whitespace:
      strip_list(self.baseline_reader.fieldnames)

    self.variable_names = list(self.baseline_reader.fieldnames) # Copy.
    self.variable_units = self.baseline_reader.next()

    if not self.preserve_whitespace:
      strip_dict(self.variable_units)

    # Open observed results.
    self.observed_input_file = open(observed_input_file)
    self.observed_reader = csv_dict_reader(
      filter_comments(self.observed_input_file)
    )

    if not self.preserve_whitespace:
      strip_list(self.observed_reader.fieldnames)

    # Make sure all inputs have the same variables schema.
    assert self.variable_names == self.observed_reader.fieldnames,             \
      "Observed results input file (`" + observed_input_file + "`) "         + \
      "variable schema `" + str(self.observed_reader.fieldnames) + "` does " + \
      "not match the baseline results input file (`" + baseline_input_file   + \
      "`) variable schema `" + str(self.variable_names) + "`."

    # Consume the next row, which should be the second line of the header.
    observed_variable_units = self.observed_reader.next()

    if not self.preserve_whitespace:
      strip_dict(observed_variable_units)

    # Make sure all inputs have the same units schema.
    assert self.variable_units == observed_variable_units,                    \
      "Observed results input file (`" + observed_input_file + "`) "        + \
      "units schema `" + str(observed_variable_units) + "` does not "       + \
      "match the baseline results input file (`" + baseline_input_file      + \
      "`) units schema `" + str(self.variable_units) + "`."

    if   output_file == "-": # Output to stdout.
      self.output_file = stdout
    else:                    # Output to user-specified file.
      self.output_file = open(output_file, "w")

    self.writer = csv_dict_writer(
      self.output_file, fieldnames = self.variable_names
    )
  def __init__(self, input_files, output_file, preserve_whitespace = True):
    """Read input files and open the output file and construct a new `io_manager`
    object.

    If `preserve_whitespace` is `False`, leading and trailing whitespace is
    stripped from each CSV cell.

    Raises
      AssertionError :
        If `len(input_files) <= 0` or `type(preserve_whitespace) != bool`.
    """
    assert len(input_files) > 0, "No input files provided."

    assert type(preserve_whitespace) == bool

    self.preserve_whitespace = preserve_whitespace

    self.readers = deque()

    self.variable_names = None
    self.variable_units = None

    self.input_files = deque()

    for input_file in input_files:
      input_file_object = open(input_file)
      reader = csv_dict_reader(filter_comments(input_file_object))

      if not self.preserve_whitespace:
        strip_list(reader.fieldnames)

      if self.variable_names is None:
        self.variable_names = reader.fieldnames
      else:
        # Make sure all inputs have the same schema.
        assert self.variable_names == reader.fieldnames,                      \
          "Input file (`" + input_file + "`) variable schema `"             + \
          str(reader.fieldnames) + "` does not match the variable schema `" + \
          str(self.variable_names) + "`."

      # Consume the next row, which should be the second line of the header.
      variable_units = reader.next()

      if not self.preserve_whitespace:
        strip_dict(variable_units)

      if self.variable_units is None:
        self.variable_units = variable_units
      else:
        # Make sure all inputs have the same units schema.
        assert self.variable_units == variable_units,                         \
          "Input file (`" + input_file + "`) units schema `"                + \
          str(variable_units) + "` does not match the units schema `"       + \
          str(self.variable_units) + "`."

      self.readers.append(reader)
      self.input_files.append(input_file_object)
 
    if   output_file == "-": # Output to stdout.
      self.output_file = stdout
    else:                    # Output to user-specified file.
      self.output_file = open(output_file, "w")

    self.writer = csv_dict_writer(
      self.output_file, fieldnames = self.variable_names
    )
Esempio n. 7
0
    p.communicate()
  except OSError as ex:
    print_file(COMBINED_OUTPUT_FILE_NAME)
    print '#### ERROR Caught OSError `{0}`.'.format(ex)
    print '&&&& FAILED {0}'.format(BENCHMARK_NAME)
    exit(-1)

  print_file(COMBINED_OUTPUT_FILE_NAME)

  if p.returncode != 0:
    print '#### ERROR Process exited with code {0}.'.format(p.returncode)
    print '&&&& FAILED {0}'.format(BENCHMARK_NAME)
    exit(p.returncode)

  with open(COMBINED_OUTPUT_FILE_NAME) as input_file:
    reader = csv_dict_reader(input_file)

    variable_units = reader.next() # Get units header row.

    distinguishing_variables = reader.fieldnames

    measured_variables = [
      ("STL Average Throughput",    "+"),
      ("Thrust Average Throughput", "+")
    ]

    for record in reader:
      for variable, directionality in measured_variables:
        print "&&&& PERF {0}_{1}_{2}bit_{3}mib_{4} {5} {6}{7}".format(
          record["Algorithm"],
          record["Element Type"],