def __get_only_retrieved_evaluated(
            self, uris: pd.DataFrame,
            all_properties_dbpedia: dict) -> List[PropertiesDict]:

        prop_dict_list = []

        for uri in uris['uri']:

            if pd.notna(uri):

                content_properties_dbpedia = all_properties_dbpedia[uri]

                # Get only retrieved properties that have a value
                if self.prop_as_uri:
                    content_properties_final = {
                        k: content_properties_dbpedia[k]
                        for k in content_properties_dbpedia
                        if content_properties_dbpedia[k] is not None
                    }
                else:
                    # This goes into the class properties table and gets the labels to the corresponding uri
                    content_properties_final = {
                        self.__class_properties.query('uri == @k')
                        ['label'].values[0]: content_properties_dbpedia[k]
                        for k in content_properties_dbpedia
                        if content_properties_dbpedia[k] is not None
                    }

                prop_content = PropertiesDict(content_properties_final)
            else:
                prop_content = PropertiesDict({})

            prop_dict_list.append(prop_content)

        return prop_dict_list
    def __get_original_retrieved(
            self, uris, all_properties_dbpedia,
            raw_source: RawInformationSource) -> List[PropertiesDict]:

        prop_dict_list = []

        for uri, raw_content in zip(uris['uri'], raw_source):

            if pd.notna(uri):

                content_properties_dbpedia = all_properties_dbpedia[uri]
                content_properties_source = raw_content

                # Get all properties from source, those that have value in dbpedia will have value,
                # those that don't have value in dbpedia will be ''
                content_properties_final = {}
                for k in content_properties_source:
                    if k in self.__class_properties['uri'].tolist():
                        value = content_properties_dbpedia[k]

                        if self.prop_as_uri:
                            key = k
                        else:
                            key = self.__class_properties.query(
                                'uri == @k')['label'].values[0]
                    elif k in self.__class_properties['label'].tolist():
                        uri = self.__class_properties.query(
                            'label == @k')['uri'].values[0]

                        value = content_properties_dbpedia[uri]

                        if self.prop_as_uri:
                            key = uri
                        else:
                            key = k
                    else:
                        value = None
                        key = k

                    if value is None:
                        value = ''

                    content_properties_final[key] = value

                prop_content = PropertiesDict(content_properties_final)
            else:
                prop_content = PropertiesDict({})

            prop_dict_list.append(prop_content)

        return prop_dict_list
    def get_properties(
            self, raw_source: RawInformationSource) -> List[PropertiesDict]:

        logger.info("Extracting exogenous properties from local dataset")
        prop_dict_list = []
        for raw_content in raw_source:

            if self.__field_name_list is None:
                prop_dict = raw_content
            else:
                prop_dict = {
                    field: raw_content[field]
                    for field in self.__field_name_list
                    if raw_content.get(field) is not None
                }

            if self.mode == 'only_retrieved_evaluated':
                prop_dict = {
                    field: prop_dict[field]
                    for field in prop_dict if prop_dict[field] != ''
                }

            prop_dict_list.append(PropertiesDict(prop_dict))

        return prop_dict_list
Esempio n. 4
0
    def get_properties(self, name: str, raw_content: Dict[str, object]) -> PropertiesDict:
        """
        Execute the properties couple retrieval

        Args:
            name (str): string identifier of the returned properties object
            raw_content: represent a row in the dataset that
                is being processed

        Returns:
            PropertiesDict
        """
        logger.info("Extracting exogenous properties")
        prop_dict = {}
        if self.mode == 'only_retrieved_evaluated':
            prop_dict = self.__get_only_retrieved_evaluated(raw_content)

        if self.mode == 'all_retrieved':
            prop_dict = self.__get_all_properties_retrieved(raw_content)

        if self.mode == 'original_retrieved':
            prop_dict = self.__get_original_retrieved(raw_content)

        if self.mode == 'all':
            prop_dict = self.__get_all_properties(raw_content)

        print(prop_dict)

        return PropertiesDict(name, prop_dict)
Esempio n. 5
0
    def get_properties(self, name: str,
                       raw_content: Dict[str, object]) -> PropertiesDict:

        logger.info("Extracting exogenous properties")
        prop_dict = {}
        for i, k in enumerate(raw_content.keys()):
            field_name = k
            if self.__field_name_list is not None:
                if i < len(self.__field_name_list):
                    field_name = self.__field_name_list[i]
                else:
                    break

            if (field_name in raw_content.keys()):
                prop_dict[field_name] = str(raw_content[field_name])
            else:
                prop_dict[field_name] = ''

            if self.mode == 'only_retrieved_evaluated' and prop_dict[
                    field_name] == '':
                prop_dict.pop(field_name)
            elif self.mode == 'all_retrieved' or self.mode == 'all' or self.mode == 'original_retrieved':
                continue

        return PropertiesDict(name, prop_dict)
Esempio n. 6
0
    def test_append_remove_exo(self):
        """
        Tests for append, remove and get methods of the content's exogenous instances
        """
        exo_features = dict()
        exo_features["test_key"] = "test_value"

        content_exo_repr = PropertiesDict(exo_features)
        content_exo_repr2 = PropertiesDict({"test_key2": 'test_value2'})

        content1 = Content("001")
        content1.append_exogenous_representation(content_exo_repr, "test_exo")

        content2 = Content("002")
        content2.append_exogenous_representation(content_exo_repr, "test_exo")
        content_exo_repr = PropertiesDict(exo_features)
        content2.append_exogenous_representation(content_exo_repr, "test_exo2")
        content2.remove_exogenous_representation("test_exo2")
        self.assertEqual(content1.exogenous_rep_container,
                         content2.exogenous_rep_container)
        self.assertEqual(content1.get_exogenous_representation("test_exo"),
                         content2.get_exogenous_representation("test_exo"))

        # test append list of representations
        content3 = Content("003")

        content3.append_exogenous_representation(
            [content_exo_repr, content_exo_repr2], ["id1", "id2"])
        self.assertEqual(len(content3.exogenous_rep_container), 2)
        self.assertEqual(
            content3.get_exogenous_representation("id1").value,
            content_exo_repr.value)
        self.assertEqual(
            content3.get_exogenous_representation("id2").value,
            content_exo_repr2.value)

        # test append list of representations without id
        content4 = Content("004")
        content4.append_exogenous_representation(
            [content_exo_repr, content_exo_repr2])
        self.assertEqual(len(content3.exogenous_rep_container), 2)
        self.assertEqual(
            content3.get_exogenous_representation(0).value,
            content_exo_repr.value)
        self.assertEqual(
            content3.get_exogenous_representation(1).value,
            content_exo_repr2.value)
Esempio n. 7
0
    def test_append_remove_exo(self):
        """
        Tests for append, remove and get methods of the content's exogenous instances
        """
        exo_features = dict()
        exo_features["test_key"] = "test_value"

        content_exo_repr = PropertiesDict(exo_features)
        content1 = Content("001")
        content1.append_exogenous(content_exo_repr, "test_exo")

        content2 = Content("002")
        content2.append_exogenous(content_exo_repr, "test_exo")
        content_exo_repr = PropertiesDict(exo_features)
        content2.append_exogenous(content_exo_repr, "test_exo2")
        content2.remove_exogenous("test_exo2")
        self.assertEqual(content1.exogenous_rep_container,
                         content2.exogenous_rep_container)
        self.assertEqual(content1.get_exogenous("test_exo"),
                         content2.get_exogenous("test_exo"))
    def __get_all_properties_retrieved(
            self, uris, all_properties_dbpedia) -> List[PropertiesDict]:
        prop_dict_list = []

        for uri in uris['uri']:

            if pd.notna(uri):

                content_properties_dbpedia = all_properties_dbpedia[uri]

                # Get all retrieved properties, so we substitute those with None with ""
                content_properties_final = {}
                for prop_uri in content_properties_dbpedia:

                    if self.prop_as_uri:
                        value = ''
                        if content_properties_dbpedia.get(
                                prop_uri) is not None:
                            value = content_properties_dbpedia[prop_uri]

                        key = prop_uri
                    else:
                        value = ''
                        if content_properties_dbpedia.get(
                                prop_uri) is not None:
                            value = content_properties_dbpedia[prop_uri]

                        # This goes into the class properties table and gets the labels to the corresponding uri
                        key = self.__class_properties.query(
                            'uri == @prop_uri')['label'].values[0]

                    content_properties_final[key] = value

                prop_content = PropertiesDict(content_properties_final)
            else:
                prop_content = PropertiesDict({})

            prop_dict_list.append(prop_content)

        return prop_dict_list
    def __get_all_properties(
            self, uris, all_properties_dbpedia,
            raw_source: RawInformationSource) -> List[PropertiesDict]:
        prop_dict_list = []

        for uri, raw_content in zip(uris['uri'], raw_source):

            if pd.notna(uri):

                content_properties_dbpedia = all_properties_dbpedia[uri]
                content_properties_source = raw_content

                # Get all properties from source + all properties from dbpedia
                # if there are some properties in source that are also in dbpedia
                # the dbpedia value will overwrite the local source value
                content_properties_final = {}
                for k in content_properties_source:
                    if k in self.__class_properties['uri'].tolist():
                        value = content_properties_dbpedia.pop(k)

                        if self.prop_as_uri:
                            key = k
                        else:
                            key = self.__class_properties.query(
                                'uri == @k')['label'].values[0]

                    elif k in self.__class_properties['label'].tolist():
                        uri = self.__class_properties.query(
                            'label == @k')['uri'].values[0]

                        value = content_properties_dbpedia.pop(uri)

                        if self.prop_as_uri:
                            key = uri
                        else:
                            key = k
                    else:
                        value = content_properties_source[k]
                        key = k

                    if value is None:
                        value = content_properties_source[k]

                    content_properties_final[key] = value

                for k in content_properties_dbpedia:
                    value = content_properties_dbpedia[k]

                    if value is None:
                        value = ''

                    if self.prop_as_uri:
                        key = k
                    else:
                        key = self.__class_properties.query(
                            'uri == @k')['label'].values[0]

                    content_properties_final[key] = value

                prop_content = PropertiesDict(content_properties_final)
            else:
                prop_content = PropertiesDict({})

            prop_dict_list.append(prop_content)

        return prop_dict_list