Exemple #1
0
def get_expired_result_sets(
        *,
        db_session: Session = Depends(deps.db_session),
        result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
) -> Any:
    """Get all expired ResultSets"""
    return result_set_crud.get_expired(db_session=db_session)
Exemple #2
0
def create_result_set(
    *,
    db_session: Session = Depends(deps.db_session),
    result_set_notifier: ResultSetNotifier = Depends(deps.result_set_notifier),
    result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
    result_set_in: schemas.ResultSetCreate,
) -> Any:
    """Create a ResultSet"""
    try:
        result_set = result_set_crud.create(db_session, obj_in=result_set_in)
        if result_set.results:
            if result_set_in.job.notify_if_results:
                result_set_notification = schemas.ResultSetNotification(
                    job=result_set_in.job,
                    graph_spec=result_set_in.graph_spec,
                    created=result_set_in.created,
                    num_results=len(result_set_in.results),
                    result_set_id=str(result_set.result_set_id),
                )
                result_set_notifier.notify(
                    notification=result_set_notification)
        return result_set
    except (JobVersionNotFound, ResultSetResultsLimitExceeded,
            ResultSizeExceeded) as ex:
        raise HTTPException(status_code=HTTP_400_BAD_REQUEST,
                            detail=str(ex)) from ex
Exemple #3
0
def delete_expired_result_sets(
        *,
        db_session: Session = Depends(deps.db_session),
        result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
) -> Any:
    """Delete all expired ResultSets"""
    num_pruned = result_set_crud.delete_expired(db_session=db_session)
    return schemas.ResultSetsPruneResult(num_pruned=num_pruned)
Exemple #4
0
def result_set_crud() -> CRUDResultSet:
    """Get a CRUDResultSet object"""
    api_svc_config = APIServiceConfig()
    return CRUDResultSet(
        max_result_set_results=api_svc_config.max_result_set_results,
        max_result_size_bytes=api_svc_config.max_result_size_bytes,
        job_crud=job_crud(),
    )
Exemple #5
0
def create_result_set(
    *,
    db_session: Session = Depends(deps.db_session),
    result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
    result_set_in: schemas.ResultSetCreate,
) -> Any:
    """Create a ResultSet"""
    try:
        return result_set_crud.create(db_session, obj_in=result_set_in)
    except (JobVersionNotFound, ResultSetResultsLimitExceeded, ResultSizeExceeded) as ex:
        raise HTTPException(status_code=HTTP_400_BAD_REQUEST, detail=str(ex))
Exemple #6
0
def get_result_set(
    *,
    db_session: Session = Depends(deps.db_session),
    result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
    result_set_id: str,
) -> Any:
    """Get a ResultSet by id"""
    try:
        return result_set_crud.get(db_session, result_set_id=result_set_id)
    except ResultSetNotFound as ex:
        raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex))
Exemple #7
0
def get_job_latest_result_set(
    *,
    db_session: Session = Depends(deps.db_session),
    result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
    job_name: str,
) -> Any:
    """Get the latest result set of a Job"""
    try:
        return result_set_crud.get_latest_for_active_job(db_session, job_name=job_name)
    except ResultSetNotFound as ex:
        raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex)) from ex
Exemple #8
0
def get_result_set(
    *,
    db_session: Session = Depends(deps.db_session),
    result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
    result_set_id: str,
    result_format: schemas.ResultSetFormat = schemas.ResultSetFormat.json,
) -> Any:
    """Get a ResultSet by id"""
    try:
        result_set = result_set_crud.get(db_session, result_set_id=result_set_id)
    except ResultSetNotFound as ex:
        raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex)) from ex
    if result_format == schemas.ResultSetFormat.csv:
        return Response(content=result_set.to_api_schema().to_csv(), media_type="text/csv")
    return result_set
Exemple #9
0
def get_job_latest_result_set(
    *,
    db_session: Session = Depends(deps.db_session),
    result_set_crud: CRUDResultSet = Depends(deps.result_set_crud),
    job_name: str,
    result_format: schemas.ResultSetFormat = schemas.ResultSetFormat.json,
    response: Response,
    if_none_match: Optional[str] = Header(None),
) -> Any:
    """Get the latest result set of a Job"""
    try:
        result_set = result_set_crud.get_latest_for_active_job(db_session, job_name=job_name)
        response.headers["Cache-Control"] = "public, must-revalidate, proxy-revalidate, max-age=30"
        etag = base64.b64encode(str(result_set.created).encode()).decode()
        response.headers["ETag"] = etag
        if etag == if_none_match:
            return Response(status_code=HTTP_304_NOT_MODIFIED)
    except ResultSetNotFound as ex:
        raise HTTPException(status_code=HTTP_404_NOT_FOUND, detail=str(ex)) from ex
    if result_format == schemas.ResultSetFormat.csv:
        return Response(content=result_set.to_api_schema().to_csv(), media_type="text/csv")
    return result_set
Exemple #10
0
    def test_views_with_two_fresh_result_sets_one_missing_an_account_other_expired(
            self):
        """Add two result sets, one of which are within max_result_age_sec and the other is expired.
        The newer is missing one account.  Run a query against the latest view to validate we get
        data for only one account (from the unexpired set)"""
        db_ro_user = "******"
        job_crud = CRUDJob(
            db_ro_user=db_ro_user,
            result_expiration_sec_default=int(1e6),
            result_expiration_sec_limit=int(1e6),
            max_graph_age_sec_default=int(1e6),
            max_graph_age_sec_limit=int(1e6),
            max_result_age_sec_default=int(1e6),
            max_result_age_sec_limit=int(1e6),
            account_id_key="test_account_id",
        )
        result_set_crud = CRUDResultSet(
            max_result_set_results=int(1e6),
            max_result_size_bytes=int(1e6),
            job_crud=job_crud,
        )
        with temp_db_session() as session:
            session.execute(f"CREATE ROLE {db_ro_user}")
            job_create = schemas.JobCreate(
                name="test_job",
                description="A Test Job",
                graph_spec=schemas.JobGraphSpec(graph_names=["test"]),
                category=schemas.Category.gov,
                severity=schemas.Severity.info,
                query=
                "select ?test_account_id ?foo ?boo where {?test_account_id ?foo ?boo} limit 10",
                max_graph_age_sec=int(1e6),
                result_expiration_sec=int(1e6),
                max_result_age_sec=int(1e6),
                notify_if_results=False,
            )
            created_timestamp = job_crud.create(
                db_session=session, job_create_in=job_create).created
            # activate
            job_update = schemas.JobUpdate(active=True)
            _job = job_crud.update_version(
                db_session=session,
                job_name="test_job",
                created=created_timestamp,
                job_update=job_update,
            )
            job = schemas.Job.from_orm(_job)

            account_id_a = "012345678901"
            account_id_b = "567890123456"

            result_set_1_time = datetime.now() - timedelta(
                seconds=job_create.max_result_age_sec + 1)
            result_set_1_graph_spec = schemas.ResultSetGraphSpec(
                graph_uris_load_times={"test": result_set_1_time.timestamp()})
            results_1 = [
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldhello_a",
                        "boo": "oldthere_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldboo_a",
                        "boo": "oldfoo_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldhello_b",
                        "boo": "oldthere_b"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldboo_b",
                        "boo": "oldfoo_b"
                    },
                ),
            ]
            result_set_1_create = ResultSetCreate(
                job=job,
                graph_spec=result_set_1_graph_spec,
                results=results_1,
                created=result_set_1_time,
            )
            result_set_crud.create(db_session=session,
                                   obj_in=result_set_1_create)

            result_set_2_time = datetime.now()
            result_set_2_graph_spec = schemas.ResultSetGraphSpec(
                graph_uris_load_times={"test": result_set_2_time.timestamp()})
            results_2 = [
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "newhello_a",
                        "boo": "newthere_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "newboo_a",
                        "boo": "newfoo_a"
                    },
                ),
            ]
            result_set_2_create = ResultSetCreate(
                job=job,
                graph_spec=result_set_2_graph_spec,
                results=results_2,
                created=result_set_2_time,
            )
            result_set_crud.create(db_session=session,
                                   obj_in=result_set_2_create)

            # check latest results
            latest_results = session.execute("select * from test_job_latest")
            latest_rows = latest_results.fetchall()
            self.assertSequenceEqual(
                sorted(latest_rows),
                sorted([
                    (result_set_2_time, account_id_a, "newhello_a",
                     "newthere_a"),
                    (result_set_2_time, account_id_a, "newboo_a", "newfoo_a"),
                ]),
            )
            # check all results
            all_results = session.execute("select * from test_job_all")
            all_rows = all_results.fetchall()
            self.assertSequenceEqual(
                sorted(all_rows),
                sorted([
                    (result_set_1_time, account_id_a, "oldhello_a",
                     "oldthere_a"),
                    (result_set_1_time, account_id_a, "oldboo_a", "oldfoo_a"),
                    (result_set_1_time, account_id_b, "oldhello_b",
                     "oldthere_b"),
                    (result_set_1_time, account_id_b, "oldboo_b", "oldfoo_b"),
                    (result_set_2_time, account_id_a, "newhello_a",
                     "newthere_a"),
                    (result_set_2_time, account_id_a, "newboo_a", "newfoo_a"),
                ]),
            )
Exemple #11
0
    def test_views_with_expired_result_set(self):
        """Add a single result set which is older than max_result_age_sec. Validate the latest
        view returns no results, also validate all_view"""
        db_ro_user = "******"
        job_crud = CRUDJob(
            db_ro_user=db_ro_user,
            result_expiration_sec_default=int(1e6),
            result_expiration_sec_limit=int(1e6),
            max_graph_age_sec_default=int(1e6),
            max_graph_age_sec_limit=int(1e6),
            max_result_age_sec_default=int(1e6),
            max_result_age_sec_limit=int(1e6),
            account_id_key="test_account_id",
        )
        result_set_crud = CRUDResultSet(
            max_result_set_results=int(1e6),
            max_result_size_bytes=int(1e6),
            job_crud=job_crud,
        )
        with temp_db_session() as session:
            session.execute(f"CREATE ROLE {db_ro_user}")
            job_create = schemas.JobCreate(
                name="test_job",
                description="A Test Job",
                graph_spec=schemas.JobGraphSpec(graph_names=["test"]),
                category=schemas.Category.gov,
                severity=schemas.Severity.info,
                query=
                "select ?test_account_id ?foo ?boo where {?test_account_id ?foo ?boo} limit 10",
                max_graph_age_sec=int(1e6),
                result_expiration_sec=int(1e6),
                max_result_age_sec=int(1e6),
                notify_if_results=False,
            )
            created_timestamp = job_crud.create(
                db_session=session, job_create_in=job_create).created
            # activate
            job_update = schemas.JobUpdate(active=True)
            _job = job_crud.update_version(
                db_session=session,
                job_name="test_job",
                created=created_timestamp,
                job_update=job_update,
            )
            job = schemas.Job.from_orm(_job)

            account_id_a = "012345678901"
            account_id_b = "567890123456"

            result_set_1_time = datetime.now() - timedelta(
                seconds=job_create.max_result_age_sec + 1)
            result_set_1_graph_spec = schemas.ResultSetGraphSpec(
                graph_uris_load_times={"test": result_set_1_time.timestamp()})
            results_1 = [
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldhello_a",
                        "boo": "oldthere_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_a,
                    result={
                        "foo": "oldboo_a",
                        "boo": "oldfoo_a"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldhello_b",
                        "boo": "oldthere_b"
                    },
                ),
                schemas.Result(
                    account_id=account_id_b,
                    result={
                        "foo": "oldboo_b",
                        "boo": "oldfoo_b"
                    },
                ),
            ]
            result_set_1_create = ResultSetCreate(
                job=job,
                graph_spec=result_set_1_graph_spec,
                results=results_1,
                created=result_set_1_time,
            )
            result_set_crud.create(db_session=session,
                                   obj_in=result_set_1_create)

            # check latest results
            latest_results = session.execute("select * from test_job_latest")
            self.assertEqual(latest_results.rowcount, 0)
            # check all results
            all_results = session.execute("select * from test_job_all")
            all_rows = all_results.fetchall()
            self.assertSequenceEqual(
                sorted(all_rows),
                sorted([
                    (result_set_1_time, account_id_a, "oldhello_a",
                     "oldthere_a"),
                    (result_set_1_time, account_id_a, "oldboo_a", "oldfoo_a"),
                    (result_set_1_time, account_id_b, "oldhello_b",
                     "oldthere_b"),
                    (result_set_1_time, account_id_b, "oldboo_b", "oldfoo_b"),
                ]),
            )