def test_make_lower(self, _, value, value_name, expected_outcome): lower_case = Validator(value, value_name).make_lower() self.assertIsInstance(lower_case, Validator) self.assertEqual(first=lower_case.value, second=expected_outcome)
def test_get_dict_values(self, _, value, value_name, expected_outcome): dict_values = Validator(value, value_name).get_dict_values() self.assertIsInstance(dict_values, Validator) self.assertEqual(first=dict_values.value, second=expected_outcome)
def test_make_singular(self, _, value, value_name, expected_outcome): singular = Validator(value, value_name).make_singular() self.assertIsInstance(singular, Validator) self.assertEqual(first=singular.value, second=expected_outcome)
def test_check_subset_of_list_exception(self, _, value, value_name, superset, superset_name, expected_exception): with self.assertRaises(expected_exception): Validator(value, value_name).check_subset_of_list(superset, superset_name)
def test_check_no_intersection_with_list_exception(self, _, value, value_name, other_list, list_name, expected_exception): with self.assertRaises(expected_exception): Validator(value, value_name).check_no_intersection_with_list( other_list, list_name)
def test_override_value(self, _, value, value_name, override_flag, override_value, expected_outcome): updated_value = Validator(value, value_name).override_value( override_flag, override_value) self.assertIsInstance(updated_value, Validator) self.assertEqual(first=updated_value.value, second=expected_outcome)
def test_discard_dict_keys_none_value(self, _, value, value_name, expected_outcome): update_dict = Validator(value, value_name).discard_dict_keys_none_value() self.assertIsInstance(update_dict, Validator) self.assertEqual(first=update_dict.value, second=expected_outcome)
def test_set_default_value_if_none(self, _, value, value_name, default, expected_outcome): updated_value = Validator( value, value_name).set_default_value_if_none(default) self.assertIsInstance(updated_value, Validator) self.assertEqual(first=updated_value.value, second=expected_outcome)
def test_filter_list_using_first_character(self, _, value, value_name, first_character, expected_outcome): updated_list = Validator( value, value_name).filter_list_using_first_character(first_character) self.assertIsInstance(updated_list, Validator) self.assertEqual(first=updated_list.value, second=expected_outcome)
def verify_all_required_attributes_mapped( mapping: dict, model_object_name: str, exempt_attributes: list = None, key_separator: str = ".", ) -> None: """ Verifies that all required attributes are included in the mapping, passes silently if they are and raises an exception otherwise :param dict mapping: The required mapping :param str model_object_name: The name of the lusid.models object that the mapping is for :param list[str] exempt_attributes: The attributes that are exempt from needing to be in the required mapping :param str key_separator: The separator to use to join the required attributes together :return: None """ # Check that the provided model name actually exists model_object = getattr(lusid.models, model_object_name, None) if model_object is None: raise TypeError("The provided model_object is not a lusid.model object") # Convert a None to an empty list exempt_attributes = ( Validator(exempt_attributes, "exempt_attributes") .set_default_value_if_none([]) .value ) # Gets the required attributes for this model required_attributes = get_required_attributes_model_recursive( model_object=model_object, key_separator=key_separator ) # Removes the exempt attributes for attribute in required_attributes: # Removes all nested attributes for example if "identifiers" is exempt "identifiers.value" will be removed if attribute.split(key_separator)[0] in exempt_attributes: required_attributes.remove(attribute) missing_attributes = set(required_attributes) - set(list(mapping.keys())) if len(missing_attributes) > 0: raise ValueError( f"""The required attributes {str(missing_attributes)} are missing from the mapping. Please add them.""" )
def validate_mapping_file_structure(mapping: dict, columns: list, file_type: str): """ This function takes a mapping structure and checks that each of the :param dict mapping: mapping containing full mapping structure :param columns: columns from source data to search in :param str file_type: type of file being upserted :return: """ # file_type domain_lookup = load_json_file("config/domain_settings.json") file_type_check = ( Validator(file_type, "file_type") .make_singular() .make_lower() .check_allowed_value(list(domain_lookup.keys())) .value ) # required if "required" in mapping[file_type].keys(): for field in mapping[file_type]["required"]: if isinstance(mapping[file_type]["required"][field], dict): check_mapping_fields_exist( mapping[file_type]["required"][field]["column"].values(), columns, "required", ) else: check_mapping_fields_exist( mapping[file_type]["required"].values(), columns, "required" ) else: raise ValueError(f"'required' mapping field not provided in mapping") # optional if "optional" in mapping.keys(): check_mapping_fields_exist( mapping[file_type]["optional"].values(), columns, "optional" ) # identifier_mapping if "identifier_mapping" in mapping[file_type].keys(): check_mapping_fields_exist( mapping[file_type]["identifier_mapping"].values(), columns, "identifier_mapping", )
def test_check_allowed_value_exception(self, _, value, value_name, allowed_values, expected_exception): with self.assertRaises(expected_exception): Validator(value, value_name).check_allowed_value(allowed_values)
def test_check_no_intersection_with_list_success(self, _, value, value_name, other_list, list_name): Validator(value, value_name).check_no_intersection_with_list( other_list, list_name)
def test_check_subset_of_list_success(self, _, value, value_name, superset, superset_name): Validator(value, value_name).check_subset_of_list(superset, superset_name)
def test_check_allowed_value_success(self, _, value, value_name, allowed_values): Validator(value, value_name).check_allowed_value(allowed_values)
def load_from_data_frame( api_factory: lusid.utilities.ApiClientFactory, scope: str, data_frame: pd.DataFrame, mapping_required: dict, mapping_optional: dict, file_type: str, identifier_mapping: dict = None, property_columns: list = None, properties_scope: str = None, batch_size: int = None, remove_white_space: bool = True, instrument_name_enrichment: bool = False, sub_holding_keys: list = None, holdings_adjustment_only: bool = False, thread_pool_max_workers: int = 5, ): """ Parameters ---------- api_factory : lusid.utilities.ApiClientFactory api_factory The api factory to use scope : str The scope of the resource to load the data into data_frame : pd.DataFrame The DataFrame containing the data mapping_required : dict{str, str} The dictionary mapping the DataFrame columns to LUSID's required attributes mapping_optional : dict{str, str} The dictionary mapping the DataFrame columns to LUSID's optional attributes file_type : str The type of file e.g. transactions, instruments, holdings, quotes, portfolios identifier_mapping : dict{str, str} The dictionary mapping of LUSID instrument identifiers to identifiers in the DataFrame property_columns : list The columns to create properties for properties_scope : str The scope to add the properties to batch_size : int The size of the batch to use when using upsert calls e.g. upsert instruments, upsert quotes etc. remove_white_space : bool remove whitespace either side of each value in the dataframe instrument_name_enrichment : bool request additional identifier information from open-figi sub_holding_keys : list The sub holding keys to use for this request. Can be a list of property keys or a list of columns in the dataframe to use to create sub holdings holdings_adjustment_only : bool Whether to use the adjust_holdings api call rather than set_holdings when working with holdings thread_pool_max_workers : int The maximum number of workers to use in the thread pool used by the function Returns ------- responses: dict The responses from loading the data into LUSID Examples -------- * Loading Instruments .. code-block:: none result = lusidtools.cocoon.load_from_data_frame( api_factory=api_factory, scope=scope, data_frame=instr_df, mapping_required=mapping["instruments"]["required"], mapping_optional={}, file_type="instruments", identifier_mapping=mapping["instruments"]["identifier_mapping"], property_columns=mapping["instruments"]["properties"], properties_scope=scope ) * Loading Instrument Properties .. code-block:: none result = lusidtools.cocoon.load_from_data_frame( api_factory=api_factory, scope=scope, data_frame=strat_properties, mapping_required=strat_mapping, mapping_optional={}, file_type="instrument_property", property_columns=["block tag"], properties_scope=scope ) * Loading Portfolios .. code-block:: none result = lusidtools.cocoon.load_from_data_frame( api_factory=api_factory, scope=scope, data_frame=portfolios, mapping_required=mapping["portfolios"]["required"], mapping_optional={}, file_type="portfolios" ) * Loading Transactions .. code-block:: none result = lusidtools.cocoon.load_from_data_frame( api_factory=api_factory, scope=scope, data_frame=txn_df, mapping_required=mapping["transactions"]["required"], mapping_optional=mapping["transactions"]["optional"], file_type="transactions", identifier_mapping=mapping["transactions"]["identifier_mapping"], property_columns=mapping["transactions"]["properties"], properties_scope=scope ) * Loading Quotes .. code-block:: none result = lpt.load_from_data_frame( api_factory=api_factory, scope=scope, data_frame=df_adjusted_quotes, mapping_required=mapping["quotes"]["required"], mapping_optional={}, file_type="quotes" ) * loading Holdings .. code-block:: none result = lpt.load_from_data_frame( api_factory=api_factory, scope=holdings_scope, data_frame=seg_df, mapping_required=mapping["holdings"]["required"], mapping_optional=mapping["holdings"]["optional"], identifier_mapping=holdings_mapping["holdings"]["identifier_mapping"], file_type="holdings" ) """ # A mapping between the file type and relevant attributes e.g. domain, top_level_model etc. domain_lookup = cocoon.utilities.load_json_file("config/domain_settings.json") # Convert the file type to lower case & singular as well as checking it is of the allowed value file_type = ( Validator(file_type, "file_type") .make_singular() .make_lower() .check_allowed_value(list(domain_lookup.keys())) .value ) # Ensures that it is a single index dataframe Validator(data_frame.index, "data_frame_index").check_is_not_instance(pd.MultiIndex) # Set defaults aligned with the data type of each argument, this allows for users to provide None identifier_mapping = ( Validator(identifier_mapping, "identifier_mapping") .set_default_value_if_none(default={}) .discard_dict_keys_none_value() .value ) properties_scope = ( Validator(properties_scope, "properties_scope") .set_default_value_if_none(default=scope) .value ) property_columns = ( Validator(property_columns, "property_columns") .set_default_value_if_none(default=[]) .value ) sub_holding_keys = ( Validator(sub_holding_keys, "sub_holding_keys") .set_default_value_if_none(default=[]) .value ) batch_size = ( Validator(batch_size, "batch_size") .set_default_value_if_none(domain_lookup[file_type]["default_batch_size"]) .override_value( not domain_lookup[file_type]["batch_allowed"], domain_lookup[file_type]["default_batch_size"], ) .value ) # Discard mappings where the provided value is None mapping_required = ( Validator(mapping_required, "mapping_required") .discard_dict_keys_none_value() .value ) mapping_optional = ( Validator(mapping_optional, "mapping_optional") .discard_dict_keys_none_value() .value ) required_call_attributes = domain_lookup[file_type]["required_call_attributes"] if "scope" in required_call_attributes: required_call_attributes.remove("scope") # Check that all required parameters exist Validator( required_call_attributes, "required_attributes_for_call" ).check_subset_of_list(list(mapping_required.keys()), "required_mapping") # Verify that all the required attributes for this top level model exist in the provided required mapping cocoon.utilities.verify_all_required_attributes_mapped( mapping=mapping_required, model_object_name=domain_lookup[file_type]["top_level_model"], exempt_attributes=["identifiers", "properties", "instrument_identifiers"], ) # Create the thread pool to use with the async_tools.run_in_executor decorator to make sync functions awaitable thread_pool = ThreadPool(thread_pool_max_workers).thread_pool if instrument_name_enrichment: loop = cocoon.async_tools.start_event_loop_new_thread() data_frame, mapping_required = asyncio.run_coroutine_threadsafe( cocoon.instruments.enrich_instruments( api_factory=api_factory, data_frame=data_frame, instrument_identifier_mapping=identifier_mapping, mapping_required=mapping_required, constant_prefix="$", **{"thread_pool": thread_pool}, ), loop, ).result() # Stop the additional event loop cocoon.async_tools.stop_event_loop_new_thread(loop) """ Unnest and populate defaults where a mapping is provided with column and/or default fields in a nested dictionary e.g. {'name': { 'column': 'instrument_name', 'default': 'unknown_name' } } rather than simply {'name': 'instrument_name'} """ ( data_frame, mapping_required, ) = cocoon.utilities.handle_nested_default_and_column_mapping( data_frame=data_frame, mapping=mapping_required, constant_prefix="$" ) ( data_frame, mapping_optional, ) = cocoon.utilities.handle_nested_default_and_column_mapping( data_frame=data_frame, mapping=mapping_optional, constant_prefix="$" ) # Get all the DataFrame columns as well as those that contain at least one null value data_frame_columns = list(data_frame.columns.values) nan_columns = [ column for column in data_frame_columns if data_frame[column].isna().any() ] # Validate that none of the provided columns are missing or invalid Validator( mapping_required, "mapping_required" ).get_dict_values().filter_list_using_first_character("$").check_subset_of_list( data_frame_columns, "DataFrame Columns" ).check_no_intersection_with_list( nan_columns, "Columns with Missing Values" ) Validator( mapping_optional, "mapping_optional" ).get_dict_values().filter_list_using_first_character("$").check_subset_of_list( data_frame_columns, "DataFrame Columns" ) Validator( identifier_mapping, "identifier_mapping" ).get_dict_values().filter_list_using_first_character("$").check_subset_of_list( data_frame_columns, "DataFrame Columns" ) Validator(property_columns, "property_columns").check_subset_of_list( data_frame_columns, "DataFrame Columns" ) # Converts higher level data types such as dictionaries and lists to strings data_frame = data_frame.applymap(cocoon.utilities.convert_cell_value_to_string) if remove_white_space: column_list = [property_columns] for col in [mapping_optional, mapping_required, identifier_mapping]: column_list.append(col.values()) column_list = list(set([item for sublist in column_list for item in sublist])) data_frame = strip_whitespace(data_frame, column_list) # Get the types of the attributes on the top level model for this request open_api_types = getattr( lusid.models, domain_lookup[file_type]["top_level_model"] ).openapi_types # If there is a sub_holding_keys attribute and it has a dict type this means the sub_holding_keys # need to have a property definition and be populated with values from the provided dataframe columns if ( "sub_holding_keys" in open_api_types.keys() and "dict" in open_api_types["sub_holding_keys"] ): Validator(sub_holding_keys, "sub_holding_key_columns").check_subset_of_list( data_frame_columns, "DataFrame Columns" ) # Check for and create missing property definitions for the sub-holding-keys data_frame = cocoon.properties.create_missing_property_definitions_from_file( api_factory=api_factory, properties_scope=properties_scope, domain="Transaction", data_frame=data_frame, property_columns=sub_holding_keys, ) # Check for and create missing property definitions for the properties if domain_lookup[file_type]["domain"] is not None: data_frame = cocoon.properties.create_missing_property_definitions_from_file( api_factory=api_factory, properties_scope=properties_scope, domain=domain_lookup[file_type]["domain"], data_frame=data_frame, property_columns=property_columns, ) # Start a new event loop in a new thread, this is required to run inside a Jupyter notebook loop = cocoon.async_tools.start_event_loop_new_thread() # Keyword arguments to be used in requests to the LUSID API keyword_arguments = { "scope": scope, # This handles that identifiers need to be specified differently based on the request type, allowing users # to provide either the entire key e.g. "Instrument/default/Figi" or just the code "Figi" for any request "full_key_format": domain_lookup[file_type]["full_key_format"], # Gets the allowed unique identifiers "unique_identifiers": cocoon.instruments.get_unique_identifiers( api_factory=api_factory ), "holdings_adjustment_only": holdings_adjustment_only, "thread_pool": thread_pool, } # Get the responses from LUSID responses = asyncio.run_coroutine_threadsafe( _construct_batches( api_factory=api_factory, data_frame=data_frame, mapping_required=mapping_required, mapping_optional=mapping_optional, property_columns=property_columns, properties_scope=properties_scope, instrument_identifier_mapping=identifier_mapping, batch_size=batch_size, file_type=file_type, domain_lookup=domain_lookup, sub_holding_keys=sub_holding_keys, **keyword_arguments, ), loop, ).result() # Stop the additional event loop cocoon.async_tools.stop_event_loop_new_thread(loop) return {file_type + "s": responses}