def test_validate_table(self): """Test that an instance of DataTransformations is blocked when the table parameter is invalid """ with self.assertRaises(LookupError): DatasetTransformations(INVALID_TABLE) with self.assertRaises(TypeError): DatasetTransformations({"dimensions": True})
def metadata_main() -> None: """ Main function for handling metadata. """ logger.info(f"Commencing metadata transformation service.") with FileReader(args.filename) as fr: file_data = fr.load_json() if args.metadata_format.lower() == 'c': uuids_metadata = cantabular_metadata(file_data) elif args.metadata_format.lower() == 'o': uuids_metadata = ons_metadata(file_data) else: raise ValueError("Unrecognised metadata format.") if len(uuids_metadata) > 0: variable_metadata_requests = DatasetTransformations.variable_metadata_request( uuids_metadata) with NomisMetadataApiConnector( config.get_credentials('nomis_metadata'), config.get_client('nomis_metadata')) as metadata_connector: uuids = metadata_connector.add_new_metadata( variable_metadata_requests, return_uuids=True) logger.info( f"METADATA TRANSFORMATION SUCCESS. " f"Metadata was created for entities with the following UUIDS: {uuids}" ) else: logger.info("No metadata appended.")
def handle_variables(connector: NomisApiConnector, transformations: DatasetTransformations, variables: List[str]) -> None: """ Handle variable transmission/manipulation. :param connector: An open, initialised instance of `NomisApiConnector`. :param transformations: An initialised instance of `DatasetTransformations` with a valid table attribute. :param variables: A list of variables to be assigned to the dataset. """ logger.debug("\n-----VARIABLE CREATION-----") # Create the variable creation and category request bodies variable_request_body = transformations.variable_creation() type_request_body = transformations.type_creation() type_ids = get_type_ids(type_request_body) category_request_body = transformations.category_creation(type_ids) for variable in variables: # Check to see variables already exist for the given dimensions; IF the variable does NOT exist then create it if not connector.get_variable(variable, return_bool=True): # Create variable for request in variable_request_body: if request["name"] == variable: connector.create_variable(variable, request) # Create variable type requests = [] for request in type_request_body: if request["reference"] == variable: requests.append(request) connector.create_variable_type(variable, requests) # Create the categories for this new variable requests = [] for category in transformations.table["dimension"][variable][ "category"]["index"]: for request in category_request_body: if category == request["code"] and transformations \ .table["dimension"][variable]["category"]["label"][category] == request["title"]: requests.append(request) connector.create_variable_category(variable, requests)
def create_dataset(connector: NomisApiConnector, transformations: DatasetTransformations) -> None: """ Initialise a dataset using the jsonstat table either read in or retrieved from Cantabular. :param connector: An open, initialised instance of `NomisApiConnector`. :param transformations: An initialised instance of `DatasetTransformations` with a valid table attribute. """ connector.create_dataset( args.dataset_id, transformations.dataset_creation(args.dataset_id, args.dataset_title))
def dataset_transformations(connector: NomisApiConnector, exists: bool, data: Tuple[pyjstat.Dataset, List[str]]) -> None: """ Function containing the dataset transformation operations. :param connector: An open, initialised instance of `NomisApiConnector`. :param exists: A bool indicating whether or not the dataset currently exists; if `True`, then the function will handle for updating an existing dataset. Conversely, the function will create a new dataset if exists is `False`. :param data: A tuple containing the required data. That is, a pyjstat dataset corresponding with the query made to cantabular, and the list of variables to be assigned to the dataset. """ logger.info("Commencing dataset transformations.") table, variables = data # Check variables against known geographies. If geography then remove from list and make key. geography_variables = config.get_geography() key = None geography_flag = False table_geography = None for variable in variables: if variable in geography_variables: geography_flag = True key = variable table_geography = copy.deepcopy(table) del table["dimension"][variable] table["id"].remove(variable) variables.remove(variable) # If no variables are geography then make first variable key if geography_flag is False: key = variables[0] transformations = DatasetTransformations(table, geography_flag, table_geography) # Create the dataset if it doesn't exist, otherwise retrieve the non-assigned variables if not exists: non_assigned_variables = variables create_dataset(connector, transformations) handle_variables(connector, transformations, non_assigned_variables) else: are_dimensions_same = check_dataset_dimensions(connector, variables) if are_dimensions_same is False: raise KeyError( "ERROR: Dimensions are not the same as existing dataset.") handle_dimensions(connector, transformations, key) handle_observations(connector, transformations)
def handle_observations( connector: NomisApiConnector, transformations: DatasetTransformations, ) -> None: """ Append/overwrite observations to the dataset. :param connector: An open, initialised instance of `NomisApiConnector`. :param transformations: An initialised instance of `DatasetTransformations` with a valid table attribute. """ logger.debug("\n-----APPENDING OBSERVATIONS-----") connector.overwrite_dataset_observations( args.dataset_id, transformations.observations(args.dataset_id))
def handle_dimensions( connector: NomisApiConnector, transformations: DatasetTransformations, key: Union[str, None], ) -> None: """ Assign dimensions to the dataset. :param connector: An open, initialised instance of `NomisApiConnector`. :param transformations: An initialised instance of `DatasetTransformations` with a valid table attribute. :param key: Key value for dimensions. """ logger.debug("\n-----ASSIGNING DIMENSIONS-----") connector.assign_dimensions_to_dataset( args.dataset_id, transformations.assign_dimensions(key))
def test_dataset_creation(self) -> None: """Test the dataset_creation() method of the DatasetTransformations class by asserting that expected exceptions are raised on invalid parameters, and that on valid calls all returned values and types are as expected. Also ensure that the method works as a staticmethod, i.e. is callable without creating an instance of the class. """ # Ensure expected exceptions are raised upon invalid parameters with self.assertRaises(TypeError): self.valid_dataset_transformations.dataset_creation(VALID_ID, 42) with self.assertRaises(ValueError): self.valid_dataset_transformations.dataset_creation( "", VALID_TITLE) # Retrieve a dataset using valid strings as the dataset title and id ds = self.valid_dataset_transformations.dataset_creation( VALID_ID, VALID_TITLE) # Ensure use as a staticmethod works in the same way self.assertEqual( ds, DatasetTransformations.dataset_creation(VALID_ID, VALID_TITLE)) # Value & Type check self.assertIsInstance(ds, dict) self.assertEqual(ds["id"], VALID_ID) self.assertEqual(ds["title"], VALID_TITLE)
def setUp(self) -> None: """Set up a valid instance of DatasetTransformation.""" self.valid_dataset_transformations = DatasetTransformations( VALID_TABLE)
class TestDatasetTransformations(unittest.TestCase): def setUp(self) -> None: """Set up a valid instance of DatasetTransformation.""" self.valid_dataset_transformations = DatasetTransformations( VALID_TABLE) def test_validate_table(self): """Test that an instance of DataTransformations is blocked when the table parameter is invalid """ with self.assertRaises(LookupError): DatasetTransformations(INVALID_TABLE) with self.assertRaises(TypeError): DatasetTransformations({"dimensions": True}) def test_dataset_creation(self) -> None: """Test the dataset_creation() method of the DatasetTransformations class by asserting that expected exceptions are raised on invalid parameters, and that on valid calls all returned values and types are as expected. Also ensure that the method works as a staticmethod, i.e. is callable without creating an instance of the class. """ # Ensure expected exceptions are raised upon invalid parameters with self.assertRaises(TypeError): self.valid_dataset_transformations.dataset_creation(VALID_ID, 42) with self.assertRaises(ValueError): self.valid_dataset_transformations.dataset_creation( "", VALID_TITLE) # Retrieve a dataset using valid strings as the dataset title and id ds = self.valid_dataset_transformations.dataset_creation( VALID_ID, VALID_TITLE) # Ensure use as a staticmethod works in the same way self.assertEqual( ds, DatasetTransformations.dataset_creation(VALID_ID, VALID_TITLE)) # Value & Type check self.assertIsInstance(ds, dict) self.assertEqual(ds["id"], VALID_ID) self.assertEqual(ds["title"], VALID_TITLE) def test_variable_creation(self) -> None: """Test the variable_creation() method of the DatasetTransformations class by assuring that on the valid instances all types and values are as expected """ # Retrieve variables using the valid pyjstat table and assert types/values are as expected varis = self.valid_dataset_transformations.variable_creation() self.assertIsInstance(varis, list) for var in varis: self.assertIsInstance(var, dict) self.assertTrue(var["name"] in self.valid_dataset_transformations. table["dimension"]) def test_category_creation(self) -> None: """Test the category_creation() method """ # Check valid attempts cats = self.valid_dataset_transformations.category_creation(["10000"]) self.assertIsInstance(cats, list) for cat in cats: self.assertIsInstance(cat, dict) def test_assign_dimensions(self) -> None: """Test the assign_dimensions() method """ dims = self.valid_dataset_transformations.assign_dimensions("SEX") self.assertIsInstance(dims, list) for dim in dims: self.assertIsInstance(dim, dict) def test_observations(self) -> None: """Test the observations() method """ with self.assertRaises(TypeError): self.valid_dataset_transformations.observations(12345) with self.assertRaises(ValueError): self.valid_dataset_transformations.observations("") obs = self.valid_dataset_transformations.observations(VALID_ID) self.assertIsInstance(obs, dict) self.assertEqual(obs["dataset"], VALID_ID) def test_variable_metadata_request(self): """Test the variable_metadata_request() method """ with self.assertRaises(TypeError): self.valid_dataset_transformations.variable_metadata_request( VALID_UUID_MD) mds = self.valid_dataset_transformations.variable_metadata_request( [VALID_UUID_MD]) self.assertIsInstance(mds, list) for md in mds: self.assertIsInstance(md, dict) self.assertEqual(md['belongsTo'], VALID_UUID_MD.uuid)