Beispiel #1
0
class GraphIC:
    """
    This class is used to compute graph-based IC in knowledge graph, which is
    basically the proportion of instances tagged with a specific concept
    """
    def __init__(self, ic_file):
        self._ic_file = ic_file
        self._graph_ic = self.graph_ic_reader(ic_file)
        self._graph_stats = StatSPARQL()
        self._N = self._graph_stats.entity_N()

    def concept_ic(self, concept):
        """
        Compute the ic value of a concept using sparql query
        :param concept: a id of concept, here is the uri of concept
        :return: the ic value of the concept
        """
        if concept in self._graph_ic:
            return self._graph_ic[concept]
        else:
            freq = int(self._graph_stats.concept_freq(concept))
            if freq == 0:
                ic = 0.0
            else:
                prob = 1.0 * freq / self._N
                ic = -math.log(prob)
            self.graph_ic_writer(self._ic_file, [{
                'concept': concept,
                'ic': str(ic)
            }])
            self._graph_ic[concept] = ic
            return ic

    def graph_ic_reader(self, filename):
        """
        Load the saved IC values
        :param filename: the file containing IC values of concepts
        :return: a dictionary concept:IC
        """
        data = FileIO.read_json_file(filename)
        return {d['concept']: float(d['ic']) for d in data}

    def graph_ic_writer(self, filename, data):
        """
        Save the ic values for a concept for faster access.
        :param filename:
        :param data:
        :return:
        """
        FileIO.append_json_file(filename, data)
Beispiel #2
0
class GraphIC:

    """
    This class is used to compute graph-based IC in knowledge graph, which is
    basically the proportion of instances tagged with a specific concept
    """

    def __init__(self, ic_file):
        self._ic_file = ic_file
        self._graph_ic = self.graph_ic_reader(ic_file)
        self._graph_stats = StatSPARQL()
        self._N = self._graph_stats.entity_N()

    def concept_ic(self, concept):
        """
        Compute the ic value of a concept using sparql query
        :param concept: a id of concept, here is the uri of concept
        :return: the ic value of the concept
        """
        if concept in self._graph_ic:
            return self._graph_ic[concept]
        else:
            freq = int(self._graph_stats.concept_freq(concept))
            if freq == 0:
                ic = 0.0
            else:
                prob = 1.0 * freq / self._N
                ic = -math.log(prob)
            self.graph_ic_writer(self._ic_file, [{'concept':concept, 'ic':str(ic)}])
            self._graph_ic[concept] = ic
            return ic

    def graph_ic_reader(self, filename):
        """
        Load the saved IC values
        :param filename: the file containing IC values of concepts
        :return: a dictionary concept:IC
        """
        data = FileIO.read_json_file(filename)
        return {d['concept']:float(d['ic']) for d in data}

    def graph_ic_writer(self, filename, data):
        """
        Save the ic values for a concept for faster access.
        :param filename:
        :param data:
        :return:
        """
        FileIO.append_json_file(filename, data)
Beispiel #3
0
 def __init__(self, ic_file):
     self._ic_file = ic_file
     self._graph_ic = self.graph_ic_reader(ic_file)
     self._graph_stats = StatSPARQL()
     self._N = self._graph_stats.entity_N()
Beispiel #4
0
 def __init__(self, ic_file):
     self._ic_file = ic_file
     self._graph_ic = self.graph_ic_reader(ic_file)
     self._graph_stats = StatSPARQL()
     self._N = self._graph_stats.entity_N()