def __init__(self, result_limit=5000, expansion=True, show_query=False): self._expansion = expansion self._show_query = show_query self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph(result_limit)
def test_extraction(): from sematch.nlp import Extraction from sematch.sparql import EntityFeatures entity_f = EntityFeatures() yin_and_yang = entity_f.features( 'http://dbpedia.org/resource/Yin_and_yang') assert yin_and_yang is not None extract = Extraction() assert 'Chinese' in extract.extract_chunks_doc(yin_and_yang['abstract']) assert 'philosophy' in extract.extract_words_doc(yin_and_yang['abstract'])
def test_extraction(): from sematch.nlp import Extraction from sematch.semantic.sparql import EntityFeatures upm = EntityFeatures().features('http://dbpedia.org/resource/Technical_University_of_Madrid') extract = Extraction() assert extract.extract_nouns(upm['abstract']) is not None assert extract.extract_verbs(upm['abstract']) is not None assert extract.extract_chunks_doc(upm['abstract']) is not None cats = extract.category_features(upm['category']) assert extract.category2words(cats) is not None
def __init__(self, result_limit=5000, expansion=False, show_query=False): """ semantic search of entities and concepts :param result_limit: maximumn number of retrieved entities :param expansion: if conduct concept expansion :param show_query: if SPARQL query is shown """ self._expansion = expansion self._show_query = show_query self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph(result_limit)
def test_sim_graph(): from sematch.semantic.graph import SimGraph from sematch.semantic.similarity import WordNetSimilarity from sematch.nlp import Extraction, lemmatization from sematch.sparql import EntityFeatures from collections import Counter madrid = EntityFeatures().features( 'http://dbpedia.org/resource/Tom_Cruise') words = Extraction().extract_words_sent(madrid['abstract']) words = list(set(lemmatization(words))) wns = WordNetSimilarity() word_graph = SimGraph(words, wns.word_similarity) word_scores = word_graph.page_rank() words, scores = zip(*Counter(word_scores).most_common(10)) assert words is not None
def __init__(self): self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph()