def connect_to_ceph(ceph_bucket_prefix: str, environment: str, bucket: Optional[str] = None) -> CephStore: """Connect to Ceph to store SLI metrics for Thoth.""" prefix = _get_sli_metrics_prefix(ceph_bucket_prefix=ceph_bucket_prefix, environment=environment) ceph = CephStore(prefix=prefix, bucket=bucket) ceph.connect() return ceph
def test_init_kwargs(self): """Test initialization of Ceph based on arguments.""" adapter = CephStore(_BUCKET_PREFIX, **CEPH_INIT_KWARGS) for key, value in CEPH_INIT_KWARGS.items(): assert getattr(adapter, key) == value, \ f"Ceph attribute {key!r} has value {getattr(adapter, key)!r} but expected is {value!r}" assert adapter.prefix == _BUCKET_PREFIX assert not adapter.is_connected()
def store_thoth_sli_on_ceph( ceph_sli: CephStore, metric_class: str, metrics_df: pd.DataFrame, ceph_path: str, is_public: bool = False, ) -> None: """Store Thoth SLI on Ceph.""" metrics_csv = metrics_df.to_csv(index=False, sep="`", header=False) if is_public: _LOGGER.info(f"Storing on public bucket... {ceph_path}") else: _LOGGER.info(f"Storing on private bucket... {ceph_path}") ceph_sli.store_blob(blob=metrics_csv, object_key=ceph_path) _LOGGER.info(f"Succesfully stored Thoth weekly SLI metrics for {metric_class} at {ceph_path}")
def _fixture_adapter(): """Retrieve an adapter to Ceph.""" mock_s3().start() try: yield CephStore(_BUCKET_PREFIX, **CEPH_INIT_KWARGS) finally: mock_s3().stop()
def create_s3_adapter(ceph_bucket_prefix: str, repo: str) -> CephStore: """Create Ceph adapter for deployment metrics.""" deployment_name = "aicoe-ci" prefix = f"{ceph_bucket_prefix}/{deployment_name}/deployment-metrics/{repo}" ceph = CephStore(prefix=prefix) return ceph
def connect_to_ceph( cls, ceph_bucket_prefix: str, processed_data_name: str, environment: str, bucket: Optional[str] = None, ) -> CephStore: """Connect to Ceph to store processed data.""" prefix = cls._get_processed_data_prefix( ceph_bucket_prefix=ceph_bucket_prefix, processed_data_name=processed_data_name, environment=environment, ) ceph = CephStore(prefix=prefix, bucket=bucket) ceph.connect() return ceph
def store_csv_from_dataframe( csv_from_df: str, ceph_sli: CephStore, file_name: str, ceph_path: str, is_public: bool = False, ) -> None: """Store CSV obtained from pd.DataFrame on Ceph. param: csv_from_df: CSV given from pd.DataFrame.to_csv() """ if is_public: _LOGGER.info(f"Storing on public bucket... {ceph_path}") else: _LOGGER.info(f"Storing on private bucket... {ceph_path}") ceph_sli.store_blob(blob=csv_from_df, object_key=ceph_path) _LOGGER.info(f"Succesfully stored {file_name} at {ceph_path}")
def test_init_env(self): """Test initialization of Ceph adapter based on env variables.""" adapter = CephStore(_BUCKET_PREFIX) assert adapter.prefix == _BUCKET_PREFIX for key, value in CEPH_INIT_ENV.items(): attribute = CEPH_ENV_MAP[key] assert getattr(adapter, attribute) == value, \ f"Ceph attribute {attribute!r} has value {getattr(adapter, attribute)!r} but expected is " \ f"{value!r} (env: {key!r})"
def retrieve_thoth_sli_from_ceph(ceph_sli: CephStore, ceph_path: str, total_columns: List[str]) -> pd.DataFrame: """Retrieve Thoth SLI from Ceph.""" _LOGGER.info(f"Retrieving... \n{ceph_path}") try: retrieved_data = ceph_sli.retrieve_blob(object_key=ceph_path).decode('utf-8') data = StringIO(retrieved_data) _LOGGER.debug(f"retrieved data:\n {data}") last_data = pd.read_csv(data, names=total_columns, sep="`") except Exception as e: _LOGGER.warning(f"No file could be retrieved from Ceph: {e}") last_data = pd.DataFrame(columns=total_columns) return last_data
def _fixture_connected_adapter(): """Retrieve a connected adapter to Ceph.""" adapter = CephStore(_BUCKET_PREFIX, **CEPH_INIT_KWARGS) with connected_ceph_adapter(adapter, raw_ceph=True) as connected_adapter: yield connected_adapter
import os from typing import List from thoth.storages import CephStore store = CephStore(prefix='data/thoth/ash-api/ETM/words') store.connect() def retrieve_n_most_similar(token, n) -> List: if not store.document_exists(token): return {'error': f'The token: {token} is not in the vocabulary.'}, 404 token_dict = store.retrieve_document(token) return [t for t in token_dict['most_similar'][:n]] def get(token): """ Retrieves the 25 most similar tokens to {token} path param """ return { 'token': token, 'most-similar-tokens': retrieve_n_most_similar(token, n=25), }, 200