def test_job_conf(self):
     job_conf = {}
     for k in mrv1_to_mrv2:
         job_conf[k] = k
     jc = JobConf(
         [item for sublist in iteritems(job_conf) for item in sublist])
     for k in mrv2_to_mrv1:
         self.assertEqual(jc[k], job_conf[mrv2_to_mrv1[k]])
Exemple #2
0
def dump_counters(hs, logger):
    counters = hs.get_counters()
    for phase in ['mapping', 'reducing']:
        logger.info("%s counters:", phase.capitalize())
        for group in counters[phase]:
            logger.info("  Group %s", group)
            for c, v in iteritems(counters[phase][group]):
                logger.info("   %s: %s", c, v)
Exemple #3
0
def dump_counters(hs, logger):
    counters = hs.get_counters()
    for phase in ['mapping', 'reducing']:
        logger.info("%s counters:", phase.capitalize())
        for group in counters[phase]:
            logger.info("  Group %s", group)
            for c, v in iteritems(counters[phase][group]):
                logger.info("   %s: %s", c, v)
Exemple #4
0
 def test_job_conf(self):
     job_conf = {}
     for k in mrv1_to_mrv2:
         job_conf[k] = k
     jc = JobConf(
         [item for sublist in iteritems(job_conf) for item in sublist]
     )
     for k in mrv2_to_mrv1:
         self.assertEqual(jc[k], job_conf[mrv2_to_mrv1[k]])
Exemple #5
0
 def __serialize_as_needed(self, key, value):
     out_kv = {'K': key, 'V': value}
     jc = self.job_conf
     if AVRO_OUTPUT in jc and (self.is_reducer() or self.__is_map_only()):
         for mode, record in iteritems(out_kv):
             serializer = self.__serializers.get(mode)
             if serializer is not None:
                 out_kv[mode] = serializer.serialize(record)
     return out_kv['K'], out_kv['V']
 def check_counts(self, fname, exp_count):
     count = count_outputs(fname)
     try:
         for k, v in iteritems(exp_count):
             self.assertTrue(k in count)
             self.assertEqual(count[k], v)
     except AssertionError:
         print(count)
         raise
Exemple #7
0
 def run(self):
     wc = {}
     if os.path.isdir(self.input_path):
         for fn in os.listdir(self.input_path):
             if fn[0] == ".":
                 continue
             self._wordcount_file(wc, fn, self.input_path)
     else:
         self._wordcount_file(wc, self.input_path)
     if self.min_occurrence:
         wc = dict(t for t in iteritems(wc) if t[1] >= self.min_occurrence)
     return wc
 def run(self):
     wc = {}
     if os.path.isdir(self.input_path):
         for fn in os.listdir(self.input_path):
             if fn[0] == ".":
                 continue
             self._wordcount_file(wc, fn, self.input_path)
     else:
         self._wordcount_file(wc, self.input_path)
     if self.min_occurrence:
         wc = dict(t for t in iteritems(wc) if t[1] >= self.min_occurrence)
     return wc
Exemple #9
0
 def __run_test(self, mode, mapper_class, context_class):
     cmd_file = self.__write_cmd_file(mode)
     pp.run_task(
         pp.Factory(mapper_class=mapper_class), private_encoding=False,
         context_class=context_class, cmd_file=cmd_file)
     out_fn = cmd_file + '.out'
     out_records = []
     with open(out_fn, 'rb') as f:
         bf = BinaryDownStreamAdapter(f)
         for cmd, args in bf:
             if cmd == bf.OUTPUT:
                 name, color = args
                 out_records.append({'name': name, 'favorite_color': color})
     self.assertEqual(len(out_records), len(self.records))
     for out_r, r in zip(out_records, self.records):
         for k, v in iteritems(out_r):
             self.assertEqual(v.decode('UTF-8'), r[k])
Exemple #10
0
 def spill_all(self):
     self.ctx.increment_counter(self.spill_counter, 1)
     self.ctx.increment_counter(self.spilled_bytes_counter, self.used_bytes)
     ctx = self.ctx
     writer = ctx.writer
     ctx.writer = None
     # disable auto-deserialize (ctx.key will be called by reduce)
     # FIXME: this might break custom Context implementations
     get_input_key = ctx.get_input_key
     ctx.get_input_key = types.MethodType(lambda self: self._key, ctx)
     for key, values in iteritems(self.data):
         ctx._key, ctx._values = key, iter(values)
         self.reducer.reduce(ctx)
     ctx.writer = writer
     ctx.get_input_key = get_input_key
     self.data.clear()
     self.used_bytes = 0
Exemple #11
0
 def __run_test(self, mode, mapper_class, context_class):
     cmd_file = self.__write_cmd_file(mode)
     pp.run_task(pp.Factory(mapper_class=mapper_class),
                 private_encoding=False,
                 context_class=context_class,
                 cmd_file=cmd_file)
     out_fn = cmd_file + '.out'
     out_records = []
     with open(out_fn, 'rb') as f:
         bf = BinaryDownStreamAdapter(f)
         for cmd, args in bf:
             if cmd == bf.OUTPUT:
                 name, color = args
                 out_records.append({'name': name, 'favorite_color': color})
     self.assertEqual(len(out_records), len(self.records))
     for out_r, r in zip(out_records, self.records):
         for k, v in iteritems(out_r):
             self.assertEqual(v.decode('UTF-8'), r[k])
Exemple #12
0
def main(exp, res):

    expected = {}
    for l in iter_lines(exp):
        p = l.strip().split(';')
        expected.setdefault(p[1], Counter())[p[2]] += 1

    computed = {}
    for l in iter_lines(res):
        p = l.strip().split('\t')
        computed[p[0]] = eval(p[1])

    if set(computed) != set(expected):
        sys.exit("ERROR: computed keys != expected keys: %r != %r" %
                 (sorted(computed), sorted(expected)))
    for k, v in iteritems(expected):
        if computed[k] != v:
            sys.exit("ERROR: %r: %r != %r" % (k, computed[k], dict(v)))
    print('All is ok!')
Exemple #13
0
def main(exp, res):

    expected = {}
    for l in iter_lines(exp):
        p = l.strip().split(';')
        expected.setdefault(p[1], Counter())[p[2]] += 1

    computed = {}
    for l in iter_lines(res):
        p = l.strip().split('\t')
        computed[p[0]] = eval(p[1])

    if set(computed) != set(expected):
        sys.exit("ERROR: computed keys != expected keys: %r != %r" % (
            sorted(computed), sorted(expected)))
    for k, v in iteritems(expected):
        if computed[k] != v:
            sys.exit("ERROR: %r: %r != %r" % (k, computed[k], dict(v)))
    print('All is ok!')
Exemple #14
0
    def get_counters(self):
        r"""
         Extract counters information accumulated by this simulator instance.
         The expected usage is as follows::

         .. code-block:: python

          counters = hs.get_counters()
          for phase in ['mapping', 'reducing']:
              print "{} counters:".format(phase.capitalize())
             for group in counters[phase]:
                 print "  Group {}".format(group)
                 for c, v in counters[phase][group].iteritems():
                     print "   {}: {}".format(c, v)

        """
        ctable = {'mapping': {}, 'reducing': {}}
        for k, v in iteritems(self.counters):
            ctable.setdefault(
                k[0], {}).setdefault(v[0][0], {}).setdefault(v[0][1], v[1])
        return ctable
Exemple #15
0
 def write_header_down_stream(self, down_stream, authorization, job_conf):
     out_jc = sum([[k, v] for k, v in iteritems(job_conf)], [])
     self.write_authorization(down_stream, authorization)
     down_stream.send(down_stream.START_MESSAGE, 0)
     down_stream.send(down_stream.SET_JOB_CONF, *out_jc)