def mark_file_as_discovered(self, path: GcsfsFilePath) -> None: if not path.file_name.startswith(DIRECT_INGEST_UNPROCESSED_PREFIX): raise ValueError("Expect only unprocessed paths in this function.") parts = filename_parts_from_path(path) session = SessionFactory.for_schema_base(OperationsBase) try: if parts.file_type == GcsfsDirectIngestFileType.INGEST_VIEW: metadata = dao.get_file_metadata_row_for_path( session, self.region_code, path ) dt = datetime.datetime.utcnow() if not metadata.export_time: metadata.export_time = dt metadata.discovery_time = dt elif parts.file_type == GcsfsDirectIngestFileType.RAW_DATA: session.add( schema.DirectIngestRawFileMetadata( region_code=self.region_code, file_tag=parts.file_tag, normalized_file_name=path.file_name, discovery_time=datetime.datetime.utcnow(), processed_time=None, datetimes_contained_upper_bound_inclusive=parts.utc_upload_datetime, ) ) else: raise ValueError(f"Unexpected path type: {parts.file_type}") session.commit() except Exception as e: session.rollback() raise e finally: session.close()
def mark_file_as_processed(self, path: GcsfsFilePath) -> None: session = SessionFactory.for_schema_base(OperationsBase) try: metadata = dao.get_file_metadata_row_for_path(session, self.region_code, path) metadata.processed_time = datetime.datetime.utcnow() session.commit() except Exception as e: session.rollback() raise e finally: session.close()
def get_file_metadata(self, path: GcsfsFilePath) -> DirectIngestFileMetadata: session = SessionFactory.for_schema_base(OperationsBase) try: metadata = dao.get_file_metadata_row_for_path(session, self.region_code, path) if isinstance(metadata, schema.DirectIngestRawFileMetadata): metadata_entity: DirectIngestFileMetadata = \ self._raw_file_schema_metadata_as_entity(metadata) elif isinstance(metadata, schema.DirectIngestIngestFileMetadata): metadata_entity = self._ingest_file_schema_metadata_as_entity(metadata) else: raise ValueError(f'Unexpected metadata type: {type(metadata)}') except Exception as e: session.rollback() raise e finally: session.close() return metadata_entity