def iterDatasets(self) -> Iterator[FileDataset]: # Docstring inherited from RepoConverter. # Iterate over reference catalog files. for refCat, dimension in self._refCats: datasetType = DatasetType(refCat, dimensions=[dimension], universe=self.task.universe, storageClass="SimpleCatalog") if self.subset is None: regex = re.compile(r"(\d+)\.fits") for fileName in os.listdir( os.path.join(self.root, "ref_cats", refCat)): m = regex.match(fileName) if m is not None: htmId = int(m.group(1)) dataId = self.task.registry.expandDataId( {dimension: htmId}) yield FileDataset(path=os.path.join( self.root, "ref_cats", refCat, fileName), refs=DatasetRef(datasetType, dataId)) else: for begin, end in self.subset.skypix[dimension]: for htmId in range(begin, end): dataId = self.task.registry.expandDataId( {dimension: htmId}) yield FileDataset(path=os.path.join( self.root, "ref_cats", refCat, f"{htmId}.fits"), refs=DatasetRef(datasetType, dataId)) yield from super().iterDatasets()
def testConstructor(self): """Test that construction preserves and validates values. """ # Construct an unresolved ref. ref = DatasetRef(self.datasetType, self.dataId) self.assertEqual(ref.datasetType, self.datasetType) self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId) self.assertIsInstance(ref.dataId, DataCoordinate) # Constructing an unresolved ref with run and/or components should # fail. run = "somerun" with self.assertRaises(ValueError): DatasetRef(self.datasetType, self.dataId, run=run) # Passing a data ID that is missing dimensions should fail. with self.assertRaises(KeyError): DatasetRef(self.datasetType, {"instrument": "DummyCam"}) # Constructing a resolved ref should preserve run as well as everything # else. ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run) self.assertEqual(ref.datasetType, self.datasetType) self.assertEqual(ref.dataId, DataCoordinate.standardize(self.dataId, universe=self.universe), msg=ref.dataId) self.assertIsInstance(ref.dataId, DataCoordinate) self.assertEqual(ref.id, 1) self.assertEqual(ref.run, run)
def testResolving(self): ref = DatasetRef(self.datasetType, self.dataId, id=1, run="somerun") unresolvedRef = ref.unresolved() self.assertIsNotNone(ref.id) self.assertIsNone(unresolvedRef.id) self.assertIsNone(unresolvedRef.run) self.assertNotEqual(ref, unresolvedRef) self.assertEqual(ref.unresolved(), unresolvedRef) self.assertEqual(ref.datasetType, unresolvedRef.datasetType) self.assertEqual(ref.dataId, unresolvedRef.dataId) reresolvedRef = unresolvedRef.resolved(id=1, run="somerun") self.assertEqual(ref, reresolvedRef) self.assertEqual(reresolvedRef.unresolved(), unresolvedRef) self.assertIsNotNone(reresolvedRef.run)
def ingestExposureDatasets(self, exposure: RawExposureData, *, run: Optional[str] = None) -> List[DatasetRef]: """Ingest all raw files in one exposure. Parameters ---------- exposure : `RawExposureData` A structure containing information about the exposure to be ingested. Must have `RawExposureData.records` populated and all data ID attributes expanded. run : `str`, optional Name of a RUN-type collection to write to, overriding ``self.butler.run``. Returns ------- refs : `list` of `lsst.daf.butler.DatasetRef` Dataset references for ingested raws. """ datasets = [ FileDataset(path=os.path.abspath(file.filename), refs=[ DatasetRef(self.datasetType, d.dataId) for d in file.datasets ], formatter=file.FormatterClass) for file in exposure.files ] self.butler.ingest(*datasets, transfer=self.config.transfer, run=run) return [ref for dataset in datasets for ref in dataset.refs]
def testDetach(self): datasetTypeName = "test" storageClass = StorageClass("testref_StructuredData") dimensions = self.universe.extract(("instrument", "visit")) dataId = dict(instrument="DummyCam", visit=42) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) ref = DatasetRef(datasetType, dataId, id=1) detachedRef = ref.detach() self.assertIsNotNone(ref.id) self.assertIsNone(detachedRef.id) self.assertEqual(ref.datasetType, detachedRef.datasetType) self.assertEqual(ref.dataId, detachedRef.dataId) self.assertEqual(ref.predictedConsumers, detachedRef.predictedConsumers) self.assertEqual(ref.actualConsumers, detachedRef.actualConsumers) self.assertEqual(ref.components, detachedRef.components)
def ingestExposureDatasets(self, exposure: RawExposureData, butler: Optional[Butler] = None ) -> List[DatasetRef]: """Ingest all raw files in one exposure. Parameters ---------- exposure : `RawExposureData` A structure containing information about the exposure to be ingested. Must have `RawExposureData.records` populated and all data ID attributes expanded. butler : `lsst.daf.butler.Butler`, optional Butler to use for ingest. If not provided, ``self.butler`` will be used. Returns ------- refs : `list` of `lsst.daf.butler.DatasetRef` Dataset references for ingested raws. """ if butler is None: butler = self.butler datasets = [FileDataset(path=os.path.abspath(file.filename), refs=[DatasetRef(self.datasetType, d.dataId) for d in file.datasets], formatter=file.FormatterClass) for file in exposure.files] butler.ingest(*datasets, transfer=self.config.transfer) return [ref for dataset in datasets for ref in dataset.refs]
def getDatasetRef(self, id: int, *, universe: DimensionUniverse) -> Optional[DatasetRef]: # Docstring inherited from DatasetRecordStorageManager. sql = sqlalchemy.sql.select( [ self._static.dataset.columns.dataset_type_id, self._static.dataset.columns[self._collections.getRunForeignKeyName()], ] ).select_from( self._static.dataset ).where( self._static.dataset.columns.id == id ) row = self._db.query(sql).fetchone() if row is None: return None recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) if recordsForType is None: self.refresh(universe=universe) recordsForType = self._byId.get(row[self._static.dataset.columns.dataset_type_id]) assert recordsForType is not None, "Should be guaranteed by foreign key constraints." return DatasetRef( recordsForType.datasetType, dataId=recordsForType.getDataId(id=id), id=id, run=self._collections[row[self._collections.getRunForeignKeyName()]].name )
def prep(self): # Docstring inherited from RepoConverter. self.task.log.info(f"Looking for skymaps in root {self.root}.") for coaddName, datasetTypeName in SKYMAP_DATASET_TYPES.items(): if not self.task.isDatasetTypeIncluded(datasetTypeName): continue try: exists = self.butler2.datasetExists(datasetTypeName) except AttributeError: # This mapper doesn't even define this dataset type. continue if not exists: continue instance = self.butler2.get(datasetTypeName) name = self.task.useSkyMap(instance, datasetTypeName) datasetType = DatasetType(datasetTypeName, dimensions=["skymap"], storageClass="SkyMap", universe=self.task.universe) dataId = DataCoordinate.standardize(skymap=name, universe=self.task.universe) struct = FoundSkyMap(name=name, instance=instance, coaddName=coaddName, ref=DatasetRef(datasetType, dataId), filename=self.butler2.getUri(datasetTypeName)) self._foundSkyMapsByCoaddName[coaddName] = struct self.task.log.info("Found skymap %s in %s in %s.", name, datasetTypeName, self.root) super().prep()
def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]: # Docstring inherited from DatasetRecordStorageManager. staticRow = { "dataset_type_id": self._dataset_type_id, self._runKeyColumn: run.key, } dataIds = list(dataIds) # Insert into the static dataset table, generating autoincrement # dataset_id values. with self._db.transaction(): datasetIds = self._db.insert(self._static.dataset, *([staticRow]*len(dataIds)), returnIds=True) assert datasetIds is not None # Combine the generated dataset_id values and data ID fields to # form rows to be inserted into the dynamic table. protoDynamicRow = { "dataset_type_id": self._dataset_type_id, self._collections.getCollectionForeignKeyName(): run.key, } dynamicRows = [ dict(protoDynamicRow, dataset_id=dataset_id, **dataId.byName()) for dataId, dataset_id in zip(dataIds, datasetIds) ] # Insert those rows into the dynamic table. This is where we'll # get any unique constraint violations. self._db.insert(self._dynamic, *dynamicRows) for dataId, datasetId in zip(dataIds, datasetIds): yield DatasetRef( datasetType=self.datasetType, dataId=dataId, id=datasetId, run=run.name, )
def makeDatasetRef(self, datasetTypeName, dataUnits, storageClass, dataId, id=None): """Make a DatasetType and wrap it in a DatasetRef for a test""" datasetType = DatasetType(datasetTypeName, dataUnits, storageClass) if id is None: self.id += 1 id = self.id return DatasetRef(datasetType, dataId, id=id)
def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, predicate: Callable[[DataCoordinate], bool]): dataId3, calibDate = self.translate(nextDataId2, partial=True) def get_detectors(filename): fitsData = lsst.afw.fits.Fits(filename, 'r') # NOTE: The primary header (HDU=0) does not contain detector data. detectors = [] for i in range(1, fitsData.countHdus()): fitsData.setHdu(i) metadata = fitsData.readMetadata() detectors.append(metadata['CCDNUM']) return detectors if predicate(dataId3): detectors = get_detectors(path) refs = [] for detector in detectors: newDataId3 = DataCoordinate.standardize(dataId3, graph=self._datasetType.dimensions, detector=detector) refs.append(DatasetRef(self._datasetType, newDataId3)) datasets[self._datasetType][calibDate].append( FileDataset(refs=refs, path=path, formatter=self._formatter) )
def testSorting(self): """Can we sort a DatasetRef""" ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=1)) ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10)) ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=22)) # Enable detailed diff report self.maxDiff = None # This will sort them on visit number sort = sorted([ref3, ref1, ref2]) self.assertEqual(sort, [ref1, ref2, ref3], msg=f"Got order: {[r.dataId for r in sort]}") # Now include a run ref1 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=43), run="b", id=2) self.assertEqual(ref1.run, "b") ref4 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=10), run="b", id=2) ref2 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=4), run="a", id=1) ref3 = DatasetRef(self.datasetType, dict(instrument="DummyCam", visit=104), run="c", id=3) # This will sort them on run before visit sort = sorted([ref3, ref1, ref2, ref4]) self.assertEqual(sort, [ref2, ref4, ref1, ref3], msg=f"Got order: {[r.dataId for r in sort]}") # Now with strings with self.assertRaises(TypeError): sort = sorted(["z", ref1, "c"])
def ingestStrayLightData(self, butler, directory, *, transfer=None): """Ingest externally-produced y-band stray light data files into a data repository. Parameters ---------- butler : `lsst.daf.butler.Butler` Butler initialized with the collection to ingest into. directory : `str` Directory containing yBackground-*.fits files. transfer : `str`, optional If not `None`, must be one of 'move', 'copy', 'hardlink', or 'symlink', indicating how to transfer the files. """ calibrationLabel = "y-LED-encoder-on" # LEDs covered up around 2018-01-01, no need for correctin after that # date. datetime_end = datetime.datetime(2018, 1, 1) datasets = [] # TODO: should we use a more generic name for the dataset type? # This is just the (rather HSC-specific) name used in Gen2, and while # the instances of this dataset are camera-specific, the datasetType # (which is used in the generic IsrTask) should not be. datasetType = DatasetType("yBackground", dimensions=("physical_filter", "detector", "calibration_label"), storageClass="StrayLightData", universe=butler.registry.dimensions) for detector in self.getCamera(): path = os.path.join(directory, f"ybackground-{detector.getId():03d}.fits") if not os.path.exists(path): log.warn( f"No stray light data found for detector {detector.getId()} @ {path}." ) continue ref = DatasetRef(datasetType, dataId={ "instrument": self.getName(), "detector": detector.getId(), "physical_filter": "HSC-Y", "calibration_label": calibrationLabel }) datasets.append( FileDataset(ref=ref, path=path, formatter=SubaruStrayLightDataFormatter)) with butler.transaction(): butler.registry.registerDatasetType(datasetType) butler.registry.insertDimensionData( "calibration_label", { "instrument": self.getName(), "name": calibrationLabel, "datetime_begin": datetime.date.min, "datetime_end": datetime_end }) butler.ingest(*datasets, transfer=transfer)
def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, List[FileDataset]], *, log: Log, predicate: Callable[[DataCoordinate], bool]): # Docstring inherited from ParsedPathElementHandler. dataId3 = self.translate(nextDataId2, partial=False, log=log) if predicate(dataId3): datasets[self._datasetType].append( FileDataset(refs=[DatasetRef(self._datasetType, dataId3)], path=path))
def comparableRef(self, ref: DatasetRef) -> DatasetRef: """Return a DatasetRef that can be compared to a DatasetRef from other repository. For repositories that do not support round-trip of ID values this method returns unresolved DatasetRef, for round-trip-safe repos it returns unchanged ref. """ return ref if self.datasetsIdType is uuid.UUID else ref.unresolved()
def _makeDSRefVisit(self, dstype, visitId, universe): return DatasetRef(datasetType=dstype, dataId=DataCoordinate.standardize( detector="X", visit=visitId, physical_filter='a', abstract_filter='b', instrument='TestInstrument', universe=universe))
def makeDatasetRef(self, datasetTypeName, dataId=None): """Make a simple DatasetRef""" if dataId is None: dataId = self.dataId if datasetTypeName not in self.datasetTypes: self.datasetTypes[datasetTypeName] = DatasetType( datasetTypeName, list(dataId.keys()), StorageClass()) datasetType = self.datasetTypes[datasetTypeName] return DatasetRef(datasetType, dataId)
def makeDatasetRef(self, datasetTypeName, dimensions, storageClass, dataId, *, id=None, run=None, conform=True): """Make a DatasetType and wrap it in a DatasetRef for a test""" datasetType = DatasetType(datasetTypeName, dimensions, storageClass) if id is None: self.id += 1 id = self.id if run is None: run = "dummy" return DatasetRef(datasetType, dataId, id=id, run=run, conform=conform)
def testAddInputsOutputs(self): """Test of addPredictedInput() method. """ quantum = Quantum(taskName="some.task.object", run=None) # start with empty self.assertEqual(quantum.predictedInputs, dict()) universe = DimensionUniverse() instrument = "DummyCam" datasetTypeName = "test_ds" storageClass = StorageClass("testref_StructuredData") datasetType = DatasetType(datasetTypeName, universe.extract(("instrument", "visit")), storageClass) # add one ref ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42)) quantum.addPredictedInput(ref) self.assertIn(datasetTypeName, quantum.predictedInputs) self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 1) # add second ref ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43)) quantum.addPredictedInput(ref) self.assertEqual(len(quantum.predictedInputs[datasetTypeName]), 2) # mark last ref as actually used self.assertEqual(quantum.actualInputs, dict()) quantum._markInputUsed(ref) self.assertIn(datasetTypeName, quantum.actualInputs) self.assertEqual(len(quantum.actualInputs[datasetTypeName]), 1) # add couple of outputs too self.assertEqual(quantum.outputs, dict()) ref = DatasetRef(datasetType, dict(instrument=instrument, visit=42)) quantum.addOutput(ref) self.assertIn(datasetTypeName, quantum.outputs) self.assertEqual(len(quantum.outputs[datasetTypeName]), 1) ref = DatasetRef(datasetType, dict(instrument=instrument, visit=43)) quantum.addOutput(ref) self.assertEqual(len(quantum.outputs[datasetTypeName]), 2)
def findDatastores(self, ref: DatasetRef) -> Iterable[str]: # Docstring inherited from DatastoreRegistryBridge sql = sqlalchemy.sql.select([ self._tables.dataset_location.columns.datastore_name ]).select_from(self._tables.dataset_location).where( self._tables.dataset_location.columns.dataset_id == ref.getCheckedId()) for row in self._db.query(sql).fetchall(): yield row[self._tables.dataset_location.columns.datastore_name] for name, bridge in self._ephemeral.items(): if ref in bridge: yield name
def handle(self, path: str, nextDataId2, datasets: Mapping[DatasetType, Mapping[Optional[str], List[FileDataset]]], *, predicate: Callable[[DataCoordinate], bool]): # Docstring inherited from ParsedPathElementHandler. dataId3, calibDate = self.translate(nextDataId2, partial=False) if predicate(dataId3): datasets[self._datasetType][calibDate].append( FileDataset( refs=[DatasetRef(self._datasetType, dataId3)], path=path, formatter=self._formatter ) )
def _makeDSRefVisit(self, dstype, visitId, universe): return DatasetRef( datasetType=dstype, dataId=DataCoordinate.standardize( detector="X", visit=visitId, physical_filter="a", band="b", instrument="TestInstrument", universe=universe, ), )
def testConstructor(self): """Test of constructor. """ # Quantum specific arguments taskName = "some.task.object" # can't use a real PipelineTask due to inverted package dependency quantum = Quantum(taskName=taskName) self.assertEqual(quantum.taskName, taskName) self.assertEqual(quantum.initInputs, {}) self.assertEqual(quantum.inputs, NamedKeyDict()) self.assertEqual(quantum.outputs, {}) self.assertIsNone(quantum.dataId) universe = DimensionUniverse() instrument = "DummyCam" datasetTypeName = "test_ds" storageClass = StorageClass("testref_StructuredData") datasetType = DatasetType(datasetTypeName, universe.extract(("instrument", "visit")), storageClass) predictedInputs = { datasetType: [ DatasetRef(datasetType, dict(instrument=instrument, visit=42)), DatasetRef(datasetType, dict(instrument=instrument, visit=43)) ] } outputs = { datasetType: [ DatasetRef(datasetType, dict(instrument=instrument, visit=42)), DatasetRef(datasetType, dict(instrument=instrument, visit=43)) ] } quantum = Quantum(taskName=taskName, inputs=predictedInputs, outputs=outputs) self.assertEqual(len(quantum.inputs[datasetType]), 2) self.assertEqual(len(quantum.outputs[datasetType]), 2)
def testConstructor(self): """Test that construction preserves and validates values. """ # Construct an unresolved ref. ref = DatasetRef(self.datasetType, self.dataId) self.assertEqual(ref.datasetType, self.datasetType) self.assertEqual(ref.dataId, self.dataId, msg=ref.dataId) self.assertIsInstance(ref.dataId, DataCoordinate) self.assertIsNone(ref.components) # Constructing an unresolved ref with run and/or components should # fail. run = "somerun" with self.assertRaises(ValueError): DatasetRef(self.datasetType, self.dataId, run=run) components = { "a": DatasetRef(self.datasetType.makeComponentDatasetType("a"), self.dataId, id=2, run=run) } with self.assertRaises(ValueError): DatasetRef(self.datasetType, self.dataId, components=components) # Passing a data ID that is missing dimensions should fail. with self.assertRaises(KeyError): DatasetRef(self.datasetType, {"instrument": "DummyCam"}) # Constructing a resolved ref should preserve run and components, # as well as everything else. ref = DatasetRef(self.datasetType, self.dataId, id=1, run=run, components=components) self.assertEqual(ref.datasetType, self.datasetType) self.assertEqual(ref.dataId, self.dataId, msg=ref.dataId) self.assertIsInstance(ref.dataId, DataCoordinate) self.assertEqual(ref.id, 1) self.assertEqual(ref.run, run) self.assertEqual(ref.components, components) # Constructing a resolved ref with bad component storage classes # should fail. with self.assertRaises(ValueError): DatasetRef(self.datasetType, self.dataId, id=1, run=run, components={"b": components["a"]}) # Constructing a resolved ref with unresolved components should fail. with self.assertRaises(ValueError): DatasetRef(self.datasetType, self.dataId, id=1, run=run, components={"a": components["a"].unresolved()}) # Constructing a resolved ref with bad component names should fail. with self.assertRaises(ValueError): DatasetRef(self.datasetType, self.dataId, id=1, run=run, components={"c": components["a"]})
def find(self, collection: CollectionRecord, dataId: DataCoordinate) -> Optional[DatasetRef]: # Docstring inherited from DatasetRecordStorageManager. assert dataId.graph == self.datasetType.dimensions sql = self.select(collection=collection, dataId=dataId, id=SimpleQuery.Select, run=SimpleQuery.Select).combine() row = self._db.query(sql).fetchone() if row is None: return None return DatasetRef( datasetType=self.datasetType, dataId=dataId, id=row["id"], run=self._collections[row[self._runKeyColumn]].name )
def makeDatasetRef(self, datasetTypeName, dataId=None, storageClassName="DefaultStorageClass", run="run2", conform=True): """Make a simple DatasetRef""" if dataId is None: dataId = self.dataId # Pretend we have a parent if this looks like a composite compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName) parentStorageClass = DatasetType.PlaceholderParentStorageClass if componentName else None datasetType = DatasetType(datasetTypeName, DimensionGraph(self.universe, names=dataId.keys()), StorageClass(storageClassName), parentStorageClass=parentStorageClass) return DatasetRef(datasetType, dataId, id=1, run=run, conform=conform)
def makeDatasetRef(self, datasetTypeName, dimensions, storageClass, dataId, id=None, run=None): """Make a DatasetType and wrap it in a DatasetRef for a test""" datasetType = DatasetType(datasetTypeName, dimensions, storageClass) if id is None: self.id += 1 id = self.id if run is None: run = Run(id=1, collection="dummy") return DatasetRef(datasetType, dataId, id=id, run=run)
def testConstructor(self): """Test construction preserves values. """ datasetTypeName = "test" storageClass = StorageClass("testref_StructuredData") dimensions = self.universe.extract(("instrument", "visit")) dataId = dict(instrument="DummyCam", visit=42) datasetType = DatasetType(datasetTypeName, dimensions, storageClass) ref = DatasetRef(datasetType, dataId) self.assertEqual(ref.datasetType, datasetType) self.assertEqual(ref.dataId, dataId) self.assertIsNone(ref.producer) self.assertEqual(ref.predictedConsumers, dict()) self.assertEqual(ref.actualConsumers, dict()) self.assertEqual(ref.components, dict())
def _makeDatasetRef(self, datasetTypeName, dimensions, storageClass, dataId, *, id=None, run=None, conform=True): # helper for makeDatasetRef # Pretend we have a parent if this looks like a composite compositeName, componentName = DatasetType.splitDatasetTypeName(datasetTypeName) parentStorageClass = StorageClass("component") if componentName else None datasetType = DatasetType(datasetTypeName, dimensions, storageClass, parentStorageClass=parentStorageClass) if id is None: self.id += 1 id = self.id if run is None: run = "dummy" return DatasetRef(datasetType, dataId, id=id, run=run, conform=conform)
def insert(self, run: RunRecord, dataIds: Iterable[DataCoordinate]) -> Iterator[DatasetRef]: # Docstring inherited from DatasetRecordStorage. staticRow = { "dataset_type_id": self._dataset_type_id, self._runKeyColumn: run.key, } # Iterate over data IDs, transforming a possibly-single-pass iterable # into a list, and remembering any governor dimension values we see. governorValues = GovernorDimensionRestriction.makeEmpty( self.datasetType.dimensions.universe) dataIdList = [] for dataId in dataIds: dataIdList.append(dataId) governorValues.update_extract(dataId) with self._db.transaction(): # Insert into the static dataset table, generating autoincrement # dataset_id values. datasetIds = self._db.insert(self._static.dataset, *([staticRow] * len(dataIdList)), returnIds=True) assert datasetIds is not None # Update the summary tables for this collection in case this is the # first time this dataset type or these governor values will be # inserted there. self._summaries.update(run, self.datasetType, self._dataset_type_id, governorValues) # Combine the generated dataset_id values and data ID fields to # form rows to be inserted into the tags table. protoTagsRow = { "dataset_type_id": self._dataset_type_id, self._collections.getCollectionForeignKeyName(): run.key, } tagsRows = [ dict(protoTagsRow, dataset_id=dataset_id, **dataId.byName()) for dataId, dataset_id in zip(dataIdList, datasetIds) ] # Insert those rows into the tags table. This is where we'll # get any unique constraint violations. self._db.insert(self._tags, *tagsRows) for dataId, datasetId in zip(dataIdList, datasetIds): yield DatasetRef( datasetType=self.datasetType, dataId=dataId, id=datasetId, run=run.name, )