def _build_collections(self, branch_forms): # parse into high-level records (collections, list collections, and singletons) collections = set(k.split("_")[0] for k in branch_forms) collections -= set(k for k in collections if k.startswith("n") and k[1:] in collections) # Create offsets virtual arrays for name in collections: if "n" + name in branch_forms: branch_forms["o" + name] = transforms.counts2offsets_form( branch_forms["n" + name]) # Create global index virtual arrays for indirection idxbranches = [k for k in branch_forms if "Idx" in k] for name in collections: indexers = [k for k in idxbranches if k.startswith(name + "_")] for k in indexers: target = k[len(name) + 1:k.find("Idx")] target = target[0].upper() + target[1:] if target not in collections: raise RuntimeError( "Parsing indexer %s, expected to find collection %s but did not" % (k, target)) branch_forms[k + "G"] = transforms.local2global_form( branch_forms[k], branch_forms["o" + target]) # Create nested indexer from Idx1, Idx2, ... arrays for name, indexers in self.nested_items.items(): if all(idx in branch_forms for idx in indexers): branch_forms[name] = transforms.nestedindex_form( [branch_forms[idx] for idx in indexers]) # Create any special arrays for name, (fcn, args) in self.special_items.items(): if all(k in branch_forms for k in args): branch_forms[name] = fcn(*(branch_forms[k] for k in args)) output = {} for name in collections: mixin = self.mixins.get(name, "NanoCollection") if "o" + name in branch_forms and name not in branch_forms: # list collection offsets = branch_forms["o" + name] content = { k[len(name) + 1:]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") } output[name] = zip_forms(content, name, record_name=mixin, offsets=offsets) output[name]["content"]["parameters"].update({ "__doc__": offsets["parameters"]["__doc__"], "collection_name": name, }) elif "o" + name in branch_forms: # list singleton, can use branch's own offsets output[name] = branch_forms[name] output[name]["parameters"].update({ "__array__": mixin, "collection_name": name }) elif name in branch_forms: # singleton output[name] = branch_forms[name] else: # simple collection output[name] = zip_forms( { k[len(name) + 1:]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") }, name, record_name=mixin, ) output[name]["parameters"].update({"collection_name": name}) return output
def _build_collections(self, branch_forms): # parse into high-level records (collections, list collections, and singletons) collections = set(k.split("_")[0] for k in branch_forms) collections -= set( k for k in collections if k.startswith("n") and k[1:] in collections ) isData = "GenPart" not in collections # Create offsets virtual arrays for name in collections: if "n" + name in branch_forms: branch_forms["o" + name] = transforms.counts2offsets_form( branch_forms["n" + name] ) # Create global index virtual arrays for indirection for indexer, target in self.cross_references.items(): if target.startswith("Gen") and isData: continue if indexer not in branch_forms: if self.warn_missing_crossrefs: warnings.warn( f"Missing cross-reference index for {indexer} => {target}", RuntimeWarning, ) continue if "o" + target not in branch_forms: if self.warn_missing_crossrefs: warnings.warn( f"Missing cross-reference target for {indexer} => {target}", RuntimeWarning, ) continue branch_forms[indexer + "G"] = transforms.local2global_form( branch_forms[indexer], branch_forms["o" + target] ) # Create nested indexer from Idx1, Idx2, ... arrays for name, indexers in self.nested_items.items(): if all(idx in branch_forms for idx in indexers): branch_forms[name] = transforms.nestedindex_form( [branch_forms[idx] for idx in indexers] ) # Create nested indexer from n* counts arrays for name, (local_counts, target) in self.nested_index_items.items(): if local_counts in branch_forms and "o" + target in branch_forms: branch_forms[name] = transforms.counts2nestedindex_form( branch_forms[local_counts], branch_forms["o" + target] ) # Create any special arrays for name, (fcn, args) in self.special_items.items(): if all(k in branch_forms for k in args): branch_forms[name] = fcn(*(branch_forms[k] for k in args)) output = {} for name in collections: mixin = self.mixins.get(name, "NanoCollection") if "o" + name in branch_forms and name not in branch_forms: # list collection offsets = branch_forms["o" + name] content = { k[len(name) + 1 :]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") } output[name] = zip_forms( content, name, record_name=mixin, offsets=offsets ) output[name]["content"]["parameters"].update( { "__doc__": offsets["parameters"]["__doc__"], "collection_name": name, } ) elif "o" + name in branch_forms: # list singleton, can use branch's own offsets output[name] = branch_forms[name] output[name]["parameters"].update( {"__array__": mixin, "collection_name": name} ) elif name in branch_forms: # singleton output[name] = branch_forms[name] else: # simple collection output[name] = zip_forms( { k[len(name) + 1 :]: branch_forms[k] for k in branch_forms if k.startswith(name + "_") }, name, record_name=mixin, ) output[name]["parameters"].update({"collection_name": name}) return output