def _create_premis_events(dataset_id, workspace, config):
    """Create premis events from provenance metadata.

    Reads dataset provenance metadata from Metax. For each provenance
    object a METS document that contains a PREMIS event element is
    created.

    :param dataset_id: dataset identifier
    :param workspace: SIP creation directory
    :param config: path to configuration file
    :returns: ``None``
    """
    config_object = Configuration(config)
    metadata = Metax(config_object.get('metax_url'),
                     config_object.get('metax_user'),
                     config_object.get('metax_password'),
                     verify=config_object.getboolean(
                         'metax_ssl_verification')).get_dataset(dataset_id)

    dataset_languages = get_dataset_languages(metadata)

    provenances = metadata["research_dataset"]["provenance"]

    for provenance in provenances:

        event_type = get_localized_value(
            provenance["preservation_event"]["pref_label"],
            languages=dataset_languages)

        try:
            event_datetime = provenance["temporal"]["start_date"]
        except KeyError:
            event_datetime = 'OPEN'

        event_detail = get_localized_value(provenance["description"],
                                           languages=dataset_languages)

        event_outcome = get_localized_value(
            provenance["event_outcome"]["pref_label"],
            languages=dataset_languages)

        event_outcome_detail = get_localized_value(
            provenance["outcome_description"], languages=dataset_languages)

        premis_event.premis_event(workspace=workspace,
                                  event_type=event_type,
                                  event_datetime=event_datetime,
                                  event_detail=event_detail,
                                  event_outcome=event_outcome,
                                  event_outcome_detail=event_outcome_detail)
def find_dir_use_category(parent_path, dirpath2usecategory, languages):
    """Find use category of path.

    Find use_category of the closest parent directory listed in the
    research_dataset. This is done by checking how well the directory
    paths in the research_dataset match with the parent directory path.

    :param parent_path: path to the parent directory of the file
    :param dirpath2usecategory: Dictionary, which maps research_dataset
                                directory paths to the corresponding
                                use_categories.
    :param languages: A list of ISO 639-1 formatted language codes of
                      the dataset
    :returns: `use_category` attribute of directory
    """
    max_matches = 0
    use_category = None

    for dirpath in dirpath2usecategory:
        matches = _match_paths(parent_path, dirpath)

        if matches > max_matches:
            max_matches = matches
            use_category = dirpath2usecategory[dirpath]

    if use_category:
        return get_localized_value(use_category["pref_label"],
                                   languages=languages)

    return None
def test_get_localized_value():
    """Test that correct localized value is selected depending on priority,
    or 'und' or 'zxx' is used as a fallback if nothing else matches

    :returns: ``None``
    """
    assert get_localized_value(DATASET["testa"],
                               languages=["en", "fi"]) == "Test in English"
    assert get_localized_value(DATASET["testa"],
                               languages=["fi", "en"]) == "Testi suomeksi"

    # Use 'und' or 'xzz' if no localized value exists for 'fi' or 'en'
    assert get_localized_value(DATASET["testb"],
                               languages=["en", "fi"]) == "#1,234,567"

    with pytest.raises(KeyError):
        get_localized_value(DATASET["testc"], languages=["en", "fi"])
    def get_provenance_ids(self):
        """List identifiers of provenance events.

        Gets list of dataset provenance events from Metax, and reads
        provenance IDs of the events from event.xml files found in the
        workspace directory.

        :returns: list of provenance IDs
        """
        config_object = Configuration(self.config)
        metax_client = Metax(
            config_object.get('metax_url'),
            config_object.get('metax_user'),
            config_object.get('metax_password'),
            verify=config_object.getboolean('metax_ssl_verification'))
        metadata = metax_client.get_dataset(self.dataset_id)
        languages = get_dataset_languages(metadata)

        # Get the reference file path from Luigi task input
        # It already contains the workspace path.
        event_ids = get_md_references(
            read_md_references(
                self.workspace,
                os.path.basename(
                    self.input()['create_provenance_information'].path)))

        event_type_ids = {}
        for event_id in event_ids:
            event_file = event_id[1:] + "-PREMIS%3AEVENT-amd.xml"
            event_file_path = os.path.join(self.sip_creation_path, event_file)
            if not os.path.exists(event_file_path):
                continue
            root = ET.parse(encode_path(event_file_path)).getroot()
            event_type = root.xpath("//premis:eventType",
                                    namespaces=NAMESPACES)[0].text
            event_type_ids[event_type] = event_id

        provenance_ids = []
        for provenance in metadata["research_dataset"]["provenance"]:
            event_type = get_localized_value(
                provenance["preservation_event"]["pref_label"],
                languages=languages)
            provenance_ids += [event_type_ids[event_type]]

        return provenance_ids
def find_file_use_category(identifier, dataset_metadata):
    """Look for file with identifier from dataset metadata.

    Returns the `use_category` of file if it is found. If file is not
    found from list, return None.

    :param identifier: file identifier
    :param dataset_metadata: dataset metadata dictionary
    :returns: `use_category` attribute of file
    """
    languages = get_dataset_languages(dataset_metadata)

    if 'files' in dataset_metadata['research_dataset']:
        for file_ in dataset_metadata['research_dataset']['files']:
            if file_['identifier'] == identifier:
                return get_localized_value(file_['use_category']['pref_label'],
                                           languages=languages)

    # Nothing found
    return None