Beispiel #1
0
    def expand_terms(self, terms, limit_per_term=10):
        """
        Given a list of weighted terms, add terms that are one step away in
        ConceptNet at a lower weight.

        This helps increase the recall power of the AssocSpace, because it
        means you can find terms that are too infrequent to have their own
        vector by looking up their neighbors. This forms a reasonable
        approximation of the vector an infrequent term would have anyway.
        """
        self.load()
        expanded = terms[:]
        for term, weight in terms:
            for edge in self.finder.lookup(term, limit=limit_per_term):
                if field_match(edge['start'], term):
                    neighbor = edge['end']
                elif field_match(edge['end'], term):
                    neighbor = edge['start']
                else:
                    continue
                neighbor_weight = weight * edge['weight'] * 0.1
                if edge['rel'].startswith('/r/Not'):
                    neighbor_weight *= -1
                expanded.append((neighbor, neighbor_weight))

        total_weight = sum(abs(weight) for (term, weight) in expanded)
        if total_weight == 0:
            return []
        return [(term, weight / total_weight) for (term, weight) in expanded]
Beispiel #2
0
    def expand_terms(self, terms, limit_per_term=10):
        """
        Given a list of weighted terms, add terms that are one step away in
        ConceptNet at a lower weight.

        This helps increase the recall power of the AssocSpace, because it
        means you can find terms that are too infrequent to have their own
        vector by looking up their neighbors. This forms a reasonable
        approximation of the vector an infrequent term would have anyway.
        """
        self.load()
        expanded = terms[:]
        for term, weight in terms:
            for edge in self.finder.lookup(term, limit=limit_per_term):
                if field_match(edge['start'], term):
                    neighbor = edge['end']
                elif field_match(edge['end'], term):
                    neighbor = edge['start']
                else:
                    continue
                neighbor_weight = weight * edge['weight'] * 0.1
                if edge['rel'].startswith('/r/Not'):
                    neighbor_weight *= -1
                expanded.append((neighbor, neighbor_weight))

        total_weight = sum(abs(weight) for (term, weight) in expanded)
        if total_weight == 0:
            return []
        return [(term, weight / total_weight) for (term, weight) in expanded]
Beispiel #3
0
def expand_terms(terms, limit_per_term=20):
	start = itemgetter('start')
	end = itemgetter('end')
	results = []
	uris = set()
	expanded = terms[:]
	for term in expanded:
		for edge in FINDER.lookup(term, limit=limit_per_term):

			if field_match(start(edge), term) and split_uri(end(edge))[1] == 'en':
				neighbor = edge['end']
			elif field_match(end(edge), term) and split_uri(start(edge))[1] == 'en':
				neighbor = edge['start']
			else:
				continue
			neighbor_weight = 1.0 * min(10, edge['weight'])
			if edge['rel'].startswith('/r/Not'):
				neighbor_weight *= -1
			for prefix in uri_prefixes(neighbor):
				uris.add(prefix)
			results.append((neighbor, neighbor_weight))
	total_weight = sum(abs(weight) for (term, weight) in results)
	if total_weight == 0:
		return []
	return [(term, weight, weight / total_weight) for (term, weight) in results]
Beispiel #4
0
def transform_directed_edge(edge, node):
    if field_match(edge['start']['@id'], node):
        edge['node'] = edge['start']
        edge['other'] = edge['end']
    elif field_match(edge['end']['@id'], node):
        edge['node'] = edge['end']
        edge['other'] = edge['start']
    else:
        raise ValueError("Neither the start nor end of this edge matches "
                         "the node %r: %r" % (node, edge))
    return edge
def transform_directed_edge(edge, node):
    if field_match(edge['start']['@id'], node):
        edge['node'] = edge['start']
        edge['other'] = edge['end']
    elif field_match(edge['end']['@id'], node):
        edge['node'] = edge['end']
        edge['other'] = edge['start']
    else:
        raise ValueError(
            "Neither the start nor end of this edge matches "
            "the node %r: %r" % (node, edge)
        )
    return edge
Beispiel #6
0
 def passes_filter(label, filter):
     if filter is None:
         return True
     else:
         return field_match(label, filter)
Beispiel #7
0
 def passes_filter(label, filter):
     if filter is None:
         return True
     else:
         return field_match(label, filter)