Esempio n. 1
0
    def __call__(self):
        consts = CountFormatter
        unit = consts.COUNT_BYTES if self.args.count_bytes else consts.COUNT_ELEMENTS
        self.formatter.start()

        i = 0
        for in_file in self.args.files:
            if i and not self.args.cumulative:
                subtotals = [0] * len(extractors)
            for doc in read_raw_docs(in_file, on_end='break'):
                if not i:
                    names, extractors = self._get_counters(doc)
                    totals = [0] * len(extractors)
                    subtotals = [0] * len(extractors)
                    self.formatter.set_fields(names)

                doc_counts = [extract(doc) for extract in extractors]
                for j, c in enumerate(doc_counts):
                    subtotals[j] += c
                    totals[j] += c
                if self.args.show_interval and (
                        i + 1) % self.args.show_interval == 0:
                    if self.args.cumulative:
                        self.formatter.add_row(totals,
                                               i,
                                               agg=consts.AGG_SUM,
                                               filename=in_file.name,
                                               unit=unit)
                    else:
                        self.formatter.add_row(doc_counts,
                                               i,
                                               filename=in_file.name,
                                               unit=unit)

                i += 1

            if self.args.show_subtotal:
                try:
                    self.formatter.add_row(subtotals,
                                           consts.FILE,
                                           agg=consts.AGG_SUM,
                                           filename=in_file.name,
                                           unit=unit)
                except NameError:
                    print("No documents to count", file=sys.stderr)

        try:
            if self.args.show_total:
                self.formatter.add_row(totals,
                                       consts.ALL,
                                       agg=consts.AGG_SUM,
                                       unit=unit)
            if self.args.show_average:
                self.formatter.add_row([x / i for x in totals],
                                       consts.ALL,
                                       agg=consts.AGG_AVG,
                                       unit=unit)
        except NameError:
            print("No documents to count", file=sys.stderr)
        self.formatter.finish()
Esempio n. 2
0
    def _integrate_names(self, unpacker):
        for i, doc in enumerate(read_raw_docs(unpacker)):
            obj = {}
            obj['__version__'] = doc.version
            store_defs = list(self._process_store_defs(doc.stores,
                                                       doc.klasses))
            obj['__meta__'] = {
                'fields':
                dict(
                    self._fields_to_dict(doc.klasses[META_TYPE][1],
                                         store_defs)),
                'item':
                self._process_annot(doc.doc, doc.klasses[META_TYPE][1])
            }
            if self.args.numbered:
                obj['#'] = i
            for (store_name, store), instances in zip(store_defs,
                                                      doc.instances):
                obj[store_name] = store
                if not self.args.hide_instances:
                    store['items'] = [
                        self._process_annot(item, store['fields'])
                        for item in instances
                    ]
                    if self.args.numbered:
                        for j, item in enumerate(store['items']):
                            item['#'] = j
                store['fields'] = dict(
                    self._fields_to_dict(store['fields'], store_defs))

            if self.args.reverse_pointers:
                self._reverse_pointers_with_names(obj)

            yield obj
Esempio n. 3
0
 def _integrate_names(self, unpacker):
   for i, doc in enumerate(read_raw_docs(unpacker)):
     obj = {}
     obj['__version__'] = doc.version
     store_defs = list(self._process_store_defs(doc.stores, doc.klasses))
     obj['__meta__'] = self._process_annot(doc.doc, doc.klasses[META_TYPE][1])
     if self.args.numbered:
         obj['#'] = i
     for (store_name, store), instances in zip(store_defs, doc.instances):
       obj[store_name] = store
       if not self.args.hide_instances:
         store['items'] = [self._process_annot(item, store['fields']) for item in instances]
         if self.args.numbered:
           for j, item in enumerate(store['items']):
             item['#'] = j
       store['fields'] = dict(self._fields_to_dict(store['fields'], store_defs))
     yield obj
Esempio n. 4
0
  def __call__(self):
    consts = CountFormatter
    unit = consts.COUNT_BYTES if self.args.count_bytes else consts.COUNT_ELEMENTS
    self.formatter.start()

    i = 0
    for in_file in self.args.files:
      if i and not self.args.cumulative:
        subtotals = [0] * len(extractors)
      for doc in read_raw_docs(in_file, on_end='break'):
        if not i:
          names, extractors = self._get_counters(doc)
          totals = [0] * len(extractors)
          subtotals = [0] * len(extractors)
          self.formatter.set_fields(names)

        doc_counts = [extract(doc) for extract in extractors]
        for j, c in enumerate(doc_counts):
          subtotals[j] += c
          totals[j] += c
        if self.args.show_interval and (i + 1) % self.args.show_interval == 0:
          if self.args.cumulative:
            self.formatter.add_row(totals, i, agg=consts.AGG_SUM, filename=in_file.name, unit=unit)
          else:
            self.formatter.add_row(doc_counts, i, filename=in_file.name, unit=unit)

        i += 1

      if self.args.show_subtotal:
        try:
          self.formatter.add_row(subtotals, consts.FILE, agg=consts.AGG_SUM, filename=in_file.name, unit=unit)
        except NameError:
          print("No documents to count", file=sys.stderr)

    try:
      if self.args.show_total:
        self.formatter.add_row(totals, consts.ALL, agg=consts.AGG_SUM, unit=unit)
      if self.args.show_average:
        self.formatter.add_row([x / i for x in totals], consts.ALL, agg=consts.AGG_AVG, unit=unit)
    except NameError:
      print("No documents to count", file=sys.stderr)
    self.formatter.finish()
Esempio n. 5
0
 def _headers_only(self, unpacker):
   for doc in read_raw_docs(unpacker):
     yield doc.version
     yield doc.klasses
     yield doc.stores
Esempio n. 6
0
 def _headers_only(self, unpacker):
     for doc in read_raw_docs(unpacker):
         yield doc.version
         yield doc.klasses
         yield doc.stores