Esempio n. 1
0
def empty_creds_fs() -> GCSFileStore:
    with mock.patch("cdptools.file_stores.gcs_file_store.GCSFileStore._initialize_creds_fs"):
        fs = GCSFileStore("/fake/path/to/creds.json")
        fs._credentials_path = "/fake/path/to/creds.json"
        fs._client = mock.Mock(storage.Client)
        fs._bucket = MockedBucket("fake_bucket", [MockedBlob("example.mp4", exists=False)])

        return fs
Esempio n. 2
0
def no_creds_fs() -> GCSFileStore:
    return GCSFileStore("fake-cdp-instance.appspot.com")
Esempio n. 3
0
def test_gcs_file_store_init(bucket_name, credentials_path):
    GCSFileStore(bucket_name, credentials_path)
Esempio n. 4
0
def test_download_transcripts(example_transcript, order_by_field):
    with tempfile.TemporaryDirectory() as tmpdir:
        # Patch select rows as list
        with mock.patch.object(
                CloudFirestoreDatabase,
                "select_rows_as_list",
                side_effect=
            [[{
                "event_id":
                "1",
                "legistar_event_id":
                4023,
                "event_datetime":
                datetime(2019, 7, 15, 9, 30),
                "agenda_file_uri":
                "doesnt-matter",
                "minutes_file_uri":
                None,
                "video_uri":
                "doesnt-matter",
                "created":
                datetime(2019, 7, 20, 1, 53, 14, 77790),
                "body_id":
                "1",
                "legistar_event_link":
                "doesnt-matter",
                "source_uri":
                "http://www.seattlechannel.org/CouncilBriefings?videoid=x105823"
            }],
             [{
                 "transcript_id": "1",
                 "confidence": 0.9498944201984921,
                 "event_id": "1",
                 "created": datetime(2019, 7, 20, 1, 53, 18, 611107),
                 "file_id": "1"
             }],
             [{
                 "body_id": "1",
                 "name": "Council Briefing",
                 "created": datetime(2019, 7, 20, 1, 53, 13, 821791),
                 "description": None
             }]]):
            # Patch select row by id
            with mock.patch.object(CloudFirestoreDatabase,
                                   "select_row_by_id",
                                   return_value={
                                       "file_id":
                                       "1",
                                       "content_type":
                                       None,
                                       "filename":
                                       "example_transcript_sentences.json",
                                       "created":
                                       datetime(2019, 7, 20, 1, 53, 10,
                                                726978),
                                       "description":
                                       None,
                                       "uri":
                                       example_transcript
                                   }):

                # Interrupt the request to open the file stream
                with mock.patch("requests.get") as mocked_request:
                    mocked_request.return_value = MockedStreamedRead(
                        example_transcript)

                    # Initialize objects
                    db = CloudFirestoreDatabase("fake-cdp-instance")
                    fs = GCSFileStore("fake-cdp-instance.appspot.com")

                    # Get the event corpus map
                    event_corpus_map = transcript_tools.download_transcripts(
                        db=db,
                        fs=fs,
                        order_by_field=order_by_field,
                        save_dir=tmpdir)

                    # Assert structure
                    assert len(event_corpus_map) == 1

                    # It should have one transcript and the manifest CSV
                    assert len(list(Path(tmpdir).iterdir())) == 2

                    # Assert that the transcript path in the manifest is also correct
                    manifest = pd.read_csv(
                        Path(tmpdir) / "transcript_manifest.csv")
                    with open(manifest.local_path[0], "r") as copied_file:
                        copied_transcript = json.load(copied_file)

                    with open(example_transcript, "r") as original_file:
                        original_transcript = json.load(original_file)

                    assert copied_transcript == original_transcript
Esempio n. 5
0
def test_download_most_recent_transcripts(example_transcript):
    with tempfile.TemporaryDirectory() as tmpdir:
        db = CloudFirestoreDatabase("stg-cdp-seattle")
        fs = GCSFileStore("stg-cdp-seattle.appspot.com")

        # Mock interactions
        with mock.patch(
                "cdptools.databases.cloud_firestore_database.CloudFirestoreDatabase.select_rows_as_list"
        ) as mocked_db_select:
            mocked_db_select.side_effect = [
                [{
                    "transcript_id": "9183055d-300d-4204-8741-57cebbb280a9",
                    "confidence": 0.9498944201984921,
                    "event_id": "0a8fcd28-b920-4088-bd73-ceacb304db0f",
                    "created": datetime(2019, 7, 20, 1, 53, 18, 611107),
                    "file_id": "76b7b54d-2f9b-4cad-b0b8-039a51937c15"
                }],
                [{
                    "event_id":
                    "0a8fcd28-b920-4088-bd73-ceacb304db0f",
                    "legistar_event_id":
                    4023,
                    "event_datetime":
                    datetime(2019, 7, 15, 9, 30),
                    "agenda_file_uri":
                    "http://legistar2.granicus.com/seattle/meetings/2019/7/4023_A_Council_Briefing_19-07-15_Council_Briefing.pdf",  # noqa: E501
                    "minutes_file_uri":
                    None,
                    "video_uri":
                    "http://video.seattle.gov:8080/media/council/brief_071519_2011955V.mp4",
                    "created":
                    datetime(2019, 7, 20, 1, 53, 14, 77790),
                    "body_id":
                    "f0867cf1-7bb0-4f28-83c9-a8cac6152ea4",
                    "legistar_event_link":
                    "https://seattle.legistar.com/MeetingDetail.aspx?LEGID=4023&GID=393&G=FFE3B678-CEF6-4197-84AC-5204EA4CFC0C",  # noqa: E501
                    "source_uri":
                    "http://www.seattlechannel.org/CouncilBriefings?videoid=x105823"
                }],
                [{
                    "body_id": "f0867cf1-7bb0-4f28-83c9-a8cac6152ea4",
                    "name": "Council Briefing",
                    "created": datetime(2019, 7, 20, 1, 53, 13, 821791),
                    "description": None
                }],
                [{
                    "file_id":
                    "76b7b54d-2f9b-4cad-b0b8-039a51937c15",
                    "content_type":
                    None,
                    "filename":
                    "fc52ca9f9febd50ece14f46170014936f76f3d0227688ff96fcf7e369404eee7_ts_sentences_transcript_0.json",  # noqa: E501
                    "created":
                    datetime(2019, 7, 20, 1, 53, 10, 726978),
                    "description":
                    None,
                    "uri":
                    "gs://stg-cdp-seattle.appspot.com/fc52ca9f9febd50ece14f46170014936f76f3d0227688ff96fcf7e369404eee7_ts_sentences_transcript_0.json"  # noqa: E501
                }]
            ]

            with mock.patch(
                    "cdptools.file_stores.gcs_file_store.GCSFileStore.download_file"
            ) as mocked_download:
                mocked_download.return_value = example_transcript

                # Get the event corpus map
                event_corpus_map = transcript_tools.download_most_recent_transcripts(
                    db, fs, tmpdir)

                # Assert structure
                assert len(event_corpus_map) == 1
Esempio n. 6
0
def no_creds_fs() -> GCSFileStore:
    return GCSFileStore("stg-cdp-seattle.appspot.com")