def __init__(self, result_limit=5000, expansion=True, show_query=False): self._expansion = expansion self._show_query = show_query self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph(result_limit)
def __init__(self, result_limit=5000, expansion=False, show_query=False): """ semantic search of entities and concepts :param result_limit: maximumn number of retrieved entities :param expansion: if conduct concept expansion :param show_query: if SPARQL query is shown """ self._expansion = expansion self._show_query = show_query self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph(result_limit)
def test_yago_concept_similarity(): from sematch.semantic.similarity import YagoTypeSimilarity yagosim = YagoTypeSimilarity() dancer = yagosim.word2yago('dancer') actor = yagosim.word2yago('actor') singer = yagosim.word2yago('singer') assert yagosim.yago2synset(actor[0]) is not None assert yagosim.yago_similarity(dancer[0], actor[0], 'wpath') is not None assert yagosim.yago_similarity(singer[0], actor[0], 'wpath') is not None assert yagosim.word2yago('university') is not None assert yagosim.yago2synset( 'http://dbpedia.org/class/yago/EducationalInstitution108276342' ) is not None assert yagosim.yago2synset( 'http://dbpedia.org/class/yago/Organization108008335') is not None assert yagosim.yago2synset( 'http://dbpedia.org/class/yago/Institution108053576') is not None assert yagosim.yago2synset( 'http://dbpedia.org/class/yago/Organization108008335') is not None #using corpus-based IC from brown corpus assert yagosim.word_similarity('dancer', 'actor', 'wpath') is not None #using graph-based IC from DBpedia assert yagosim.word_similarity('dancer', 'actor', 'wpath_graph') is not None
simfile.write('\n\n') sim_ref = np.array(contents)[:, 2].astype(float) / 4.0 corr = pearson_correlation(sim_cal, sim_ref) with open('results.txt', 'a') as resfile: resfile.write( 'pearson correlation in dataset [%s] for FastText embedding is %f\n' % ('STS-131', corr)) # part 8 with open('datasets/stss-131.csv', newline='') as csvfile: contents = list(csv.reader(csvfile, delimiter=';')) from sematch.semantic.similarity import YagoTypeSimilarity sim = YagoTypeSimilarity() sim_cal = np.array(sentence_similarity_dataset_yago(contents, sim)).reshape(-1, ) with open('sentence_similarity.txt', 'a') as simfile: simfile.write('Using Yago concepts\n') simfile.write('s1; s2; human_sim; method_sim\n\n') for i, pair in enumerate(contents): simfile.write('%s;%s;%s;%f\n' % (pair[0], pair[1], pair[2], sim_cal[i] * 4)) simfile.write('\n\n') sim_ref = np.array(contents)[:, 2].astype(float) / 4.0 corr = pearson_correlation(sim_cal, sim_ref) with open('results.txt', 'a') as resfile:
def __init__(self): self._linker = NameSPARQL() self._extracter = Extraction() self._yago = YagoTypeSimilarity() self._query_graph = QueryGraph()
def __init__(self): self._yago = YagoTypeSimilarity()
# ---------------------------------------------------------------- ''' Description ------------------------------------------------------------------------ Function will define YAGO concepts and calculate calculates similarity score between sentence 1 and sentence 2 (very similar to PartialSim-function). Inputs ------------------------------------------------------------------------------ s1 sentence 1 (string) s2 sentence 2 (string) method "wpath" or "wpath_graph" (string) Outputs ---------------------------------------------------------------------------- Returns the similarity value in numeric format (between 1 and 0). ''' #Load YAGO sim_yago = YagoTypeSimilarity() #Function for calculating the sentence similarities using YAGO concepts def task4Yago(s1, s2, method): #Format the input sentences to desired form s1 = s1.lower() s2 = s2.lower() #Separate sentence into words. Aka list of words. s1_words = word_tokenize(s1) s2_words = word_tokenize(s2) #POS tags for each word in sentence. pos1 = pos_tag(s1_words) pos2 = pos_tag(s2_words) #Remove stop words from the pos, tagged sentences pos1 = [word for word in pos1 if word[0] not in stopwords.words('english')]