def expand_terms(self, terms, limit_per_term=10): """ Given a list of weighted terms, add terms that are one step away in ConceptNet at a lower weight. This helps increase the recall power of the AssocSpace, because it means you can find terms that are too infrequent to have their own vector by looking up their neighbors. This forms a reasonable approximation of the vector an infrequent term would have anyway. """ self.load() expanded = terms[:] for term, weight in terms: for edge in self.finder.lookup(term, limit=limit_per_term): if field_match(edge['start'], term): neighbor = edge['end'] elif field_match(edge['end'], term): neighbor = edge['start'] else: continue neighbor_weight = weight * edge['weight'] * 0.1 if edge['rel'].startswith('/r/Not'): neighbor_weight *= -1 expanded.append((neighbor, neighbor_weight)) total_weight = sum(abs(weight) for (term, weight) in expanded) if total_weight == 0: return [] return [(term, weight / total_weight) for (term, weight) in expanded]
def expand_terms(terms, limit_per_term=20): start = itemgetter('start') end = itemgetter('end') results = [] uris = set() expanded = terms[:] for term in expanded: for edge in FINDER.lookup(term, limit=limit_per_term): if field_match(start(edge), term) and split_uri(end(edge))[1] == 'en': neighbor = edge['end'] elif field_match(end(edge), term) and split_uri(start(edge))[1] == 'en': neighbor = edge['start'] else: continue neighbor_weight = 1.0 * min(10, edge['weight']) if edge['rel'].startswith('/r/Not'): neighbor_weight *= -1 for prefix in uri_prefixes(neighbor): uris.add(prefix) results.append((neighbor, neighbor_weight)) total_weight = sum(abs(weight) for (term, weight) in results) if total_weight == 0: return [] return [(term, weight, weight / total_weight) for (term, weight) in results]
def transform_directed_edge(edge, node): if field_match(edge['start']['@id'], node): edge['node'] = edge['start'] edge['other'] = edge['end'] elif field_match(edge['end']['@id'], node): edge['node'] = edge['end'] edge['other'] = edge['start'] else: raise ValueError("Neither the start nor end of this edge matches " "the node %r: %r" % (node, edge)) return edge
def transform_directed_edge(edge, node): if field_match(edge['start']['@id'], node): edge['node'] = edge['start'] edge['other'] = edge['end'] elif field_match(edge['end']['@id'], node): edge['node'] = edge['end'] edge['other'] = edge['start'] else: raise ValueError( "Neither the start nor end of this edge matches " "the node %r: %r" % (node, edge) ) return edge
def passes_filter(label, filter): if filter is None: return True else: return field_match(label, filter)