Exemple #1
0
    def test_make_lower(self, _, value, value_name, expected_outcome):

        lower_case = Validator(value, value_name).make_lower()

        self.assertIsInstance(lower_case, Validator)

        self.assertEqual(first=lower_case.value, second=expected_outcome)
Exemple #2
0
    def test_get_dict_values(self, _, value, value_name, expected_outcome):

        dict_values = Validator(value, value_name).get_dict_values()

        self.assertIsInstance(dict_values, Validator)

        self.assertEqual(first=dict_values.value, second=expected_outcome)
Exemple #3
0
    def test_make_singular(self, _, value, value_name, expected_outcome):

        singular = Validator(value, value_name).make_singular()

        self.assertIsInstance(singular, Validator)

        self.assertEqual(first=singular.value, second=expected_outcome)
Exemple #4
0
    def test_check_subset_of_list_exception(self, _, value, value_name,
                                            superset, superset_name,
                                            expected_exception):

        with self.assertRaises(expected_exception):
            Validator(value,
                      value_name).check_subset_of_list(superset, superset_name)
Exemple #5
0
    def test_check_no_intersection_with_list_exception(self, _, value,
                                                       value_name, other_list,
                                                       list_name,
                                                       expected_exception):

        with self.assertRaises(expected_exception):
            Validator(value, value_name).check_no_intersection_with_list(
                other_list, list_name)
Exemple #6
0
    def test_override_value(self, _, value, value_name, override_flag,
                            override_value, expected_outcome):

        updated_value = Validator(value, value_name).override_value(
            override_flag, override_value)

        self.assertIsInstance(updated_value, Validator)

        self.assertEqual(first=updated_value.value, second=expected_outcome)
Exemple #7
0
    def test_discard_dict_keys_none_value(self, _, value, value_name,
                                          expected_outcome):

        update_dict = Validator(value,
                                value_name).discard_dict_keys_none_value()

        self.assertIsInstance(update_dict, Validator)

        self.assertEqual(first=update_dict.value, second=expected_outcome)
Exemple #8
0
    def test_set_default_value_if_none(self, _, value, value_name, default,
                                       expected_outcome):

        updated_value = Validator(
            value, value_name).set_default_value_if_none(default)

        self.assertIsInstance(updated_value, Validator)

        self.assertEqual(first=updated_value.value, second=expected_outcome)
Exemple #9
0
    def test_filter_list_using_first_character(self, _, value, value_name,
                                               first_character,
                                               expected_outcome):

        updated_list = Validator(
            value,
            value_name).filter_list_using_first_character(first_character)

        self.assertIsInstance(updated_list, Validator)

        self.assertEqual(first=updated_list.value, second=expected_outcome)
Exemple #10
0
def verify_all_required_attributes_mapped(
    mapping: dict,
    model_object_name: str,
    exempt_attributes: list = None,
    key_separator: str = ".",
) -> None:
    """
    Verifies that all required attributes are included in the mapping, passes silently if they are and raises an exception
    otherwise

    :param dict mapping: The required mapping
    :param str model_object_name: The name of the lusid.models object that the mapping is for
    :param list[str] exempt_attributes: The attributes that are exempt from needing to be in the required mapping
    :param str key_separator: The separator to use to join the required attributes together

    :return: None
    """

    # Check that the provided model name actually exists
    model_object = getattr(lusid.models, model_object_name, None)

    if model_object is None:
        raise TypeError("The provided model_object is not a lusid.model object")

    # Convert a None to an empty list
    exempt_attributes = (
        Validator(exempt_attributes, "exempt_attributes")
        .set_default_value_if_none([])
        .value
    )

    # Gets the required attributes for this model
    required_attributes = get_required_attributes_model_recursive(
        model_object=model_object, key_separator=key_separator
    )

    # Removes the exempt attributes
    for attribute in required_attributes:
        # Removes all nested attributes for example if "identifiers" is exempt "identifiers.value" will be removed
        if attribute.split(key_separator)[0] in exempt_attributes:
            required_attributes.remove(attribute)

    missing_attributes = set(required_attributes) - set(list(mapping.keys()))

    if len(missing_attributes) > 0:
        raise ValueError(
            f"""The required attributes {str(missing_attributes)} are missing from the mapping. Please
                             add them."""
        )
Exemple #11
0
def validate_mapping_file_structure(mapping: dict, columns: list, file_type: str):
    """
    This function takes a mapping structure and checks that each of the
    :param dict mapping: mapping containing full mapping structure
    :param columns: columns from source data to search in
    :param str file_type: type of file being upserted
    :return:
    """
    # file_type
    domain_lookup = load_json_file("config/domain_settings.json")
    file_type_check = (
        Validator(file_type, "file_type")
        .make_singular()
        .make_lower()
        .check_allowed_value(list(domain_lookup.keys()))
        .value
    )

    # required
    if "required" in mapping[file_type].keys():
        for field in mapping[file_type]["required"]:
            if isinstance(mapping[file_type]["required"][field], dict):
                check_mapping_fields_exist(
                    mapping[file_type]["required"][field]["column"].values(),
                    columns,
                    "required",
                )
            else:
                check_mapping_fields_exist(
                    mapping[file_type]["required"].values(), columns, "required"
                )
    else:
        raise ValueError(f"'required' mapping field not provided in mapping")

    # optional
    if "optional" in mapping.keys():
        check_mapping_fields_exist(
            mapping[file_type]["optional"].values(), columns, "optional"
        )

    # identifier_mapping
    if "identifier_mapping" in mapping[file_type].keys():
        check_mapping_fields_exist(
            mapping[file_type]["identifier_mapping"].values(),
            columns,
            "identifier_mapping",
        )
Exemple #12
0
    def test_check_allowed_value_exception(self, _, value, value_name,
                                           allowed_values, expected_exception):

        with self.assertRaises(expected_exception):
            Validator(value, value_name).check_allowed_value(allowed_values)
Exemple #13
0
    def test_check_no_intersection_with_list_success(self, _, value,
                                                     value_name, other_list,
                                                     list_name):

        Validator(value, value_name).check_no_intersection_with_list(
            other_list, list_name)
Exemple #14
0
    def test_check_subset_of_list_success(self, _, value, value_name, superset,
                                          superset_name):

        Validator(value,
                  value_name).check_subset_of_list(superset, superset_name)
Exemple #15
0
    def test_check_allowed_value_success(self, _, value, value_name,
                                         allowed_values):

        Validator(value, value_name).check_allowed_value(allowed_values)
Exemple #16
0
def load_from_data_frame(
    api_factory: lusid.utilities.ApiClientFactory,
    scope: str,
    data_frame: pd.DataFrame,
    mapping_required: dict,
    mapping_optional: dict,
    file_type: str,
    identifier_mapping: dict = None,
    property_columns: list = None,
    properties_scope: str = None,
    batch_size: int = None,
    remove_white_space: bool = True,
    instrument_name_enrichment: bool = False,
    sub_holding_keys: list = None,
    holdings_adjustment_only: bool = False,
    thread_pool_max_workers: int = 5,
):
    """

    Parameters
    ----------
    api_factory : lusid.utilities.ApiClientFactory api_factory
        The api factory to use
    scope : str
        The scope of the resource to load the data into
    data_frame : pd.DataFrame
        The DataFrame containing the data
    mapping_required : dict{str, str}
        The dictionary mapping the DataFrame columns to LUSID's required attributes
    mapping_optional : dict{str, str}
        The dictionary mapping the DataFrame columns to LUSID's optional attributes
    file_type : str
        The type of file e.g. transactions, instruments, holdings, quotes, portfolios
    identifier_mapping : dict{str, str}
        The dictionary mapping of LUSID instrument identifiers to identifiers in the DataFrame
    property_columns : list
        The columns to create properties for
    properties_scope : str
        The scope to add the properties to
    batch_size : int
        The size of the batch to use when using upsert calls e.g. upsert instruments, upsert quotes etc.
    remove_white_space : bool
        remove whitespace either side of each value in the dataframe
    instrument_name_enrichment : bool
        request additional identifier information from open-figi
    sub_holding_keys : list
        The sub holding keys to use for this request. Can be a list of property keys or a list of
        columns in the dataframe to use to create sub holdings
    holdings_adjustment_only : bool
        Whether to use the adjust_holdings api call rather than set_holdings when working with holdings
    thread_pool_max_workers : int
        The maximum number of workers to use in the thread pool used by the function

    Returns
    -------
    responses: dict
        The responses from loading the data into LUSID

    Examples
    --------

    * Loading Instruments

    .. code-block:: none

        result = lusidtools.cocoon.load_from_data_frame(
            api_factory=api_factory,
            scope=scope,
            data_frame=instr_df,
            mapping_required=mapping["instruments"]["required"],
            mapping_optional={},
            file_type="instruments",
            identifier_mapping=mapping["instruments"]["identifier_mapping"],
            property_columns=mapping["instruments"]["properties"],
            properties_scope=scope
        )

    * Loading Instrument Properties

    .. code-block:: none

        result = lusidtools.cocoon.load_from_data_frame(
            api_factory=api_factory,
            scope=scope,
            data_frame=strat_properties,
            mapping_required=strat_mapping,
            mapping_optional={},
            file_type="instrument_property",
            property_columns=["block tag"],
            properties_scope=scope
        )

    * Loading Portfolios

    .. code-block:: none

        result = lusidtools.cocoon.load_from_data_frame(
            api_factory=api_factory,
            scope=scope,
            data_frame=portfolios,
            mapping_required=mapping["portfolios"]["required"],
            mapping_optional={},
            file_type="portfolios"
        )

    * Loading Transactions

    .. code-block:: none

        result = lusidtools.cocoon.load_from_data_frame(
            api_factory=api_factory,
            scope=scope,
            data_frame=txn_df,
            mapping_required=mapping["transactions"]["required"],
            mapping_optional=mapping["transactions"]["optional"],
            file_type="transactions",
            identifier_mapping=mapping["transactions"]["identifier_mapping"],
            property_columns=mapping["transactions"]["properties"],
            properties_scope=scope
        )


    * Loading Quotes

    .. code-block:: none

        result = lpt.load_from_data_frame(
            api_factory=api_factory,
            scope=scope,
            data_frame=df_adjusted_quotes,
            mapping_required=mapping["quotes"]["required"],
            mapping_optional={},
            file_type="quotes"
        )

    * loading Holdings

    .. code-block:: none

        result = lpt.load_from_data_frame(
            api_factory=api_factory,
            scope=holdings_scope,
            data_frame=seg_df,
            mapping_required=mapping["holdings"]["required"],
            mapping_optional=mapping["holdings"]["optional"],
            identifier_mapping=holdings_mapping["holdings"]["identifier_mapping"],
            file_type="holdings"
        )

    """

    # A mapping between the file type and relevant attributes e.g. domain, top_level_model etc.
    domain_lookup = cocoon.utilities.load_json_file("config/domain_settings.json")

    # Convert the file type to lower case & singular as well as checking it is of the allowed value
    file_type = (
        Validator(file_type, "file_type")
        .make_singular()
        .make_lower()
        .check_allowed_value(list(domain_lookup.keys()))
        .value
    )

    # Ensures that it is a single index dataframe
    Validator(data_frame.index, "data_frame_index").check_is_not_instance(pd.MultiIndex)

    # Set defaults aligned with the data type of each argument, this allows for users to provide None
    identifier_mapping = (
        Validator(identifier_mapping, "identifier_mapping")
        .set_default_value_if_none(default={})
        .discard_dict_keys_none_value()
        .value
    )

    properties_scope = (
        Validator(properties_scope, "properties_scope")
        .set_default_value_if_none(default=scope)
        .value
    )

    property_columns = (
        Validator(property_columns, "property_columns")
        .set_default_value_if_none(default=[])
        .value
    )

    sub_holding_keys = (
        Validator(sub_holding_keys, "sub_holding_keys")
        .set_default_value_if_none(default=[])
        .value
    )

    batch_size = (
        Validator(batch_size, "batch_size")
        .set_default_value_if_none(domain_lookup[file_type]["default_batch_size"])
        .override_value(
            not domain_lookup[file_type]["batch_allowed"],
            domain_lookup[file_type]["default_batch_size"],
        )
        .value
    )

    # Discard mappings where the provided value is None
    mapping_required = (
        Validator(mapping_required, "mapping_required")
        .discard_dict_keys_none_value()
        .value
    )

    mapping_optional = (
        Validator(mapping_optional, "mapping_optional")
        .discard_dict_keys_none_value()
        .value
    )

    required_call_attributes = domain_lookup[file_type]["required_call_attributes"]
    if "scope" in required_call_attributes:
        required_call_attributes.remove("scope")

    # Check that all required parameters exist
    Validator(
        required_call_attributes, "required_attributes_for_call"
    ).check_subset_of_list(list(mapping_required.keys()), "required_mapping")

    # Verify that all the required attributes for this top level model exist in the provided required mapping
    cocoon.utilities.verify_all_required_attributes_mapped(
        mapping=mapping_required,
        model_object_name=domain_lookup[file_type]["top_level_model"],
        exempt_attributes=["identifiers", "properties", "instrument_identifiers"],
    )

    # Create the thread pool to use with the async_tools.run_in_executor decorator to make sync functions awaitable
    thread_pool = ThreadPool(thread_pool_max_workers).thread_pool

    if instrument_name_enrichment:
        loop = cocoon.async_tools.start_event_loop_new_thread()

        data_frame, mapping_required = asyncio.run_coroutine_threadsafe(
            cocoon.instruments.enrich_instruments(
                api_factory=api_factory,
                data_frame=data_frame,
                instrument_identifier_mapping=identifier_mapping,
                mapping_required=mapping_required,
                constant_prefix="$",
                **{"thread_pool": thread_pool},
            ),
            loop,
        ).result()

        # Stop the additional event loop
        cocoon.async_tools.stop_event_loop_new_thread(loop)

    """
    Unnest and populate defaults where a mapping is provided with column and/or default fields in a nested dictionary
    
    e.g.
    {'name': {
        'column': 'instrument_name',
        'default': 'unknown_name'
        }
    }
    
    rather than simply
    {'name': 'instrument_name'}
    """
    (
        data_frame,
        mapping_required,
    ) = cocoon.utilities.handle_nested_default_and_column_mapping(
        data_frame=data_frame, mapping=mapping_required, constant_prefix="$"
    )
    (
        data_frame,
        mapping_optional,
    ) = cocoon.utilities.handle_nested_default_and_column_mapping(
        data_frame=data_frame, mapping=mapping_optional, constant_prefix="$"
    )

    # Get all the DataFrame columns as well as those that contain at least one null value
    data_frame_columns = list(data_frame.columns.values)
    nan_columns = [
        column for column in data_frame_columns if data_frame[column].isna().any()
    ]

    # Validate that none of the provided columns are missing or invalid
    Validator(
        mapping_required, "mapping_required"
    ).get_dict_values().filter_list_using_first_character("$").check_subset_of_list(
        data_frame_columns, "DataFrame Columns"
    ).check_no_intersection_with_list(
        nan_columns, "Columns with Missing Values"
    )

    Validator(
        mapping_optional, "mapping_optional"
    ).get_dict_values().filter_list_using_first_character("$").check_subset_of_list(
        data_frame_columns, "DataFrame Columns"
    )

    Validator(
        identifier_mapping, "identifier_mapping"
    ).get_dict_values().filter_list_using_first_character("$").check_subset_of_list(
        data_frame_columns, "DataFrame Columns"
    )

    Validator(property_columns, "property_columns").check_subset_of_list(
        data_frame_columns, "DataFrame Columns"
    )

    # Converts higher level data types such as dictionaries and lists to strings
    data_frame = data_frame.applymap(cocoon.utilities.convert_cell_value_to_string)

    if remove_white_space:
        column_list = [property_columns]
        for col in [mapping_optional, mapping_required, identifier_mapping]:
            column_list.append(col.values())

        column_list = list(set([item for sublist in column_list for item in sublist]))
        data_frame = strip_whitespace(data_frame, column_list)

    # Get the types of the attributes on the top level model for this request
    open_api_types = getattr(
        lusid.models, domain_lookup[file_type]["top_level_model"]
    ).openapi_types

    # If there is a sub_holding_keys attribute and it has a dict type this means the sub_holding_keys
    # need to have a property definition and be populated with values from the provided dataframe columns
    if (
        "sub_holding_keys" in open_api_types.keys()
        and "dict" in open_api_types["sub_holding_keys"]
    ):

        Validator(sub_holding_keys, "sub_holding_key_columns").check_subset_of_list(
            data_frame_columns, "DataFrame Columns"
        )

        # Check for and create missing property definitions for the sub-holding-keys
        data_frame = cocoon.properties.create_missing_property_definitions_from_file(
            api_factory=api_factory,
            properties_scope=properties_scope,
            domain="Transaction",
            data_frame=data_frame,
            property_columns=sub_holding_keys,
        )

    # Check for and create missing property definitions for the properties
    if domain_lookup[file_type]["domain"] is not None:
        data_frame = cocoon.properties.create_missing_property_definitions_from_file(
            api_factory=api_factory,
            properties_scope=properties_scope,
            domain=domain_lookup[file_type]["domain"],
            data_frame=data_frame,
            property_columns=property_columns,
        )

    # Start a new event loop in a new thread, this is required to run inside a Jupyter notebook
    loop = cocoon.async_tools.start_event_loop_new_thread()

    # Keyword arguments to be used in requests to the LUSID API
    keyword_arguments = {
        "scope": scope,
        # This handles that identifiers need to be specified differently based on the request type, allowing users
        # to provide either the entire key e.g. "Instrument/default/Figi" or just the code "Figi" for any request
        "full_key_format": domain_lookup[file_type]["full_key_format"],
        # Gets the allowed unique identifiers
        "unique_identifiers": cocoon.instruments.get_unique_identifiers(
            api_factory=api_factory
        ),
        "holdings_adjustment_only": holdings_adjustment_only,
        "thread_pool": thread_pool,
    }

    # Get the responses from LUSID
    responses = asyncio.run_coroutine_threadsafe(
        _construct_batches(
            api_factory=api_factory,
            data_frame=data_frame,
            mapping_required=mapping_required,
            mapping_optional=mapping_optional,
            property_columns=property_columns,
            properties_scope=properties_scope,
            instrument_identifier_mapping=identifier_mapping,
            batch_size=batch_size,
            file_type=file_type,
            domain_lookup=domain_lookup,
            sub_holding_keys=sub_holding_keys,
            **keyword_arguments,
        ),
        loop,
    ).result()

    # Stop the additional event loop
    cocoon.async_tools.stop_event_loop_new_thread(loop)

    return {file_type + "s": responses}