Ejemplo n.º 1
0
    def test_raw_file_metadata_normalized_file_name_unique_constraint(self):
        session = SessionFactory.for_schema_base(OperationsBase)
        raw_metadata_1 = schema.DirectIngestRawFileMetadata(
            region_code='us_xx_yyyy',
            file_tag='file_tag',
            discovery_time=datetime.datetime(2019, 10, 11),
            normalized_file_name='foo.txt',
            datetimes_contained_upper_bound_inclusive=datetime.datetime(
                2019, 10, 10),
        )
        raw_metadata_2 = schema.DirectIngestRawFileMetadata(
            region_code='us_xx_yyyy',
            file_tag='file_tag',
            discovery_time=datetime.datetime(2019, 11, 12),
            normalized_file_name='foo.txt',
            datetimes_contained_upper_bound_inclusive=datetime.datetime(
                2019, 11, 11),
        )

        session.add(raw_metadata_1)
        session.add(raw_metadata_2)

        with self.assertRaises(IntegrityError):
            session.commit()

        session = SessionFactory.for_schema_base(OperationsBase)
        self.assertEqual([],
                         session.query(
                             schema.DirectIngestRawFileMetadata).all())
Ejemplo n.º 2
0
    def test_raw_file_metadata_normalized_file_name_unique_constraint(
            self) -> None:
        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            raw_metadata_1 = schema.DirectIngestRawFileMetadata(
                region_code="us_xx_yyyy",
                file_tag="file_tag",
                discovery_time=datetime.datetime(2019, 10, 11),
                normalized_file_name="foo.txt",
                datetimes_contained_upper_bound_inclusive=datetime.datetime(
                    2019, 10, 10),
            )
            raw_metadata_2 = schema.DirectIngestRawFileMetadata(
                region_code="us_xx_yyyy",
                file_tag="file_tag",
                discovery_time=datetime.datetime(2019, 11, 12),
                normalized_file_name="foo.txt",
                datetimes_contained_upper_bound_inclusive=datetime.datetime(
                    2019, 11, 11),
            )

            session.add(raw_metadata_1)
            session.add(raw_metadata_2)

            with self.assertRaises(IntegrityError):
                session.commit()

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            self.assertEqual([],
                             session.query(
                                 schema.DirectIngestRawFileMetadata).all())
    def mark_file_as_discovered(self, path: GcsfsFilePath) -> None:
        if not path.file_name.startswith(DIRECT_INGEST_UNPROCESSED_PREFIX):
            raise ValueError("Expect only unprocessed paths in this function.")

        parts = filename_parts_from_path(path)
        session = SessionFactory.for_schema_base(OperationsBase)

        try:
            if parts.file_type == GcsfsDirectIngestFileType.INGEST_VIEW:
                metadata = dao.get_file_metadata_row_for_path(
                    session, self.region_code, path
                )
                dt = datetime.datetime.utcnow()
                if not metadata.export_time:
                    metadata.export_time = dt
                metadata.discovery_time = dt
            elif parts.file_type == GcsfsDirectIngestFileType.RAW_DATA:
                session.add(
                    schema.DirectIngestRawFileMetadata(
                        region_code=self.region_code,
                        file_tag=parts.file_tag,
                        normalized_file_name=path.file_name,
                        discovery_time=datetime.datetime.utcnow(),
                        processed_time=None,
                        datetimes_contained_upper_bound_inclusive=parts.utc_upload_datetime,
                    )
                )
            else:
                raise ValueError(f"Unexpected path type: {parts.file_type}")
            session.commit()
        except Exception as e:
            session.rollback()
            raise e
        finally:
            session.close()
Ejemplo n.º 4
0
 def test_raw_file_metadata(self):
     session = SessionFactory.for_schema_base(OperationsBase)
     raw_metadata = schema.DirectIngestRawFileMetadata(
         region_code='us_xx_yyyy',
         file_tag='file_tag',
         discovery_time=datetime.datetime.now(),
         normalized_file_name='foo.txt',
         datetimes_contained_upper_bound_inclusive=datetime.datetime(
             2019, 10, 11),
     )
     session.add(raw_metadata)
     session.commit()
     result_metadata = one(
         session.query(schema.DirectIngestRawFileMetadata).all())
     self.assertEqual(result_metadata, raw_metadata)
     self.assertIsNotNone(result_metadata.file_id)
Ejemplo n.º 5
0
    def mark_raw_file_as_discovered(self, path: GcsfsFilePath) -> None:
        self._check_is_raw_file_path(path)
        if not path.file_name.startswith(DIRECT_INGEST_UNPROCESSED_PREFIX):
            raise ValueError("Expect only unprocessed paths in this function.")

        parts = filename_parts_from_path(path)
        with SessionFactory.using_database(self.database_key) as session:
            session.add(
                schema.DirectIngestRawFileMetadata(
                    region_code=self.region_code,
                    file_tag=parts.file_tag,
                    normalized_file_name=path.file_name,
                    discovery_time=datetime.datetime.now(tz=pytz.UTC),
                    processed_time=None,
                    datetimes_contained_upper_bound_inclusive=parts.
                    utc_upload_datetime,
                ))
Ejemplo n.º 6
0
 def test_raw_file_metadata(self) -> None:
     with SessionFactory.using_database(self.database_key,
                                        autocommit=False) as session:
         raw_metadata = schema.DirectIngestRawFileMetadata(
             region_code="us_xx_yyyy",
             file_tag="file_tag",
             discovery_time=datetime.datetime.now(),
             normalized_file_name="foo.txt",
             datetimes_contained_upper_bound_inclusive=datetime.datetime(
                 2019, 10, 11),
         )
         session.add(raw_metadata)
         session.commit()
         result_metadata = one(
             session.query(schema.DirectIngestRawFileMetadata).all())
         self.assertEqual(result_metadata, raw_metadata)
         self.assertIsNotNone(result_metadata.file_id)