def __call__(self): consts = CountFormatter unit = consts.COUNT_BYTES if self.args.count_bytes else consts.COUNT_ELEMENTS self.formatter.start() i = 0 for in_file in self.args.files: if i and not self.args.cumulative: subtotals = [0] * len(extractors) for doc in read_raw_docs(in_file, on_end='break'): if not i: names, extractors = self._get_counters(doc) totals = [0] * len(extractors) subtotals = [0] * len(extractors) self.formatter.set_fields(names) doc_counts = [extract(doc) for extract in extractors] for j, c in enumerate(doc_counts): subtotals[j] += c totals[j] += c if self.args.show_interval and ( i + 1) % self.args.show_interval == 0: if self.args.cumulative: self.formatter.add_row(totals, i, agg=consts.AGG_SUM, filename=in_file.name, unit=unit) else: self.formatter.add_row(doc_counts, i, filename=in_file.name, unit=unit) i += 1 if self.args.show_subtotal: try: self.formatter.add_row(subtotals, consts.FILE, agg=consts.AGG_SUM, filename=in_file.name, unit=unit) except NameError: print("No documents to count", file=sys.stderr) try: if self.args.show_total: self.formatter.add_row(totals, consts.ALL, agg=consts.AGG_SUM, unit=unit) if self.args.show_average: self.formatter.add_row([x / i for x in totals], consts.ALL, agg=consts.AGG_AVG, unit=unit) except NameError: print("No documents to count", file=sys.stderr) self.formatter.finish()
def _integrate_names(self, unpacker): for i, doc in enumerate(read_raw_docs(unpacker)): obj = {} obj['__version__'] = doc.version store_defs = list(self._process_store_defs(doc.stores, doc.klasses)) obj['__meta__'] = { 'fields': dict( self._fields_to_dict(doc.klasses[META_TYPE][1], store_defs)), 'item': self._process_annot(doc.doc, doc.klasses[META_TYPE][1]) } if self.args.numbered: obj['#'] = i for (store_name, store), instances in zip(store_defs, doc.instances): obj[store_name] = store if not self.args.hide_instances: store['items'] = [ self._process_annot(item, store['fields']) for item in instances ] if self.args.numbered: for j, item in enumerate(store['items']): item['#'] = j store['fields'] = dict( self._fields_to_dict(store['fields'], store_defs)) if self.args.reverse_pointers: self._reverse_pointers_with_names(obj) yield obj
def _integrate_names(self, unpacker): for i, doc in enumerate(read_raw_docs(unpacker)): obj = {} obj['__version__'] = doc.version store_defs = list(self._process_store_defs(doc.stores, doc.klasses)) obj['__meta__'] = self._process_annot(doc.doc, doc.klasses[META_TYPE][1]) if self.args.numbered: obj['#'] = i for (store_name, store), instances in zip(store_defs, doc.instances): obj[store_name] = store if not self.args.hide_instances: store['items'] = [self._process_annot(item, store['fields']) for item in instances] if self.args.numbered: for j, item in enumerate(store['items']): item['#'] = j store['fields'] = dict(self._fields_to_dict(store['fields'], store_defs)) yield obj
def __call__(self): consts = CountFormatter unit = consts.COUNT_BYTES if self.args.count_bytes else consts.COUNT_ELEMENTS self.formatter.start() i = 0 for in_file in self.args.files: if i and not self.args.cumulative: subtotals = [0] * len(extractors) for doc in read_raw_docs(in_file, on_end='break'): if not i: names, extractors = self._get_counters(doc) totals = [0] * len(extractors) subtotals = [0] * len(extractors) self.formatter.set_fields(names) doc_counts = [extract(doc) for extract in extractors] for j, c in enumerate(doc_counts): subtotals[j] += c totals[j] += c if self.args.show_interval and (i + 1) % self.args.show_interval == 0: if self.args.cumulative: self.formatter.add_row(totals, i, agg=consts.AGG_SUM, filename=in_file.name, unit=unit) else: self.formatter.add_row(doc_counts, i, filename=in_file.name, unit=unit) i += 1 if self.args.show_subtotal: try: self.formatter.add_row(subtotals, consts.FILE, agg=consts.AGG_SUM, filename=in_file.name, unit=unit) except NameError: print("No documents to count", file=sys.stderr) try: if self.args.show_total: self.formatter.add_row(totals, consts.ALL, agg=consts.AGG_SUM, unit=unit) if self.args.show_average: self.formatter.add_row([x / i for x in totals], consts.ALL, agg=consts.AGG_AVG, unit=unit) except NameError: print("No documents to count", file=sys.stderr) self.formatter.finish()
def _headers_only(self, unpacker): for doc in read_raw_docs(unpacker): yield doc.version yield doc.klasses yield doc.stores