def calculateMaxAccuracy(expectedWord, finalReorderedTargetsFileName, predicted_words=None, limitSuggestions=10): if "-" in expectedWord: expectedWord = expectedWord[:expectedWord.index("-")] maxSimilarity = 0.0 similarWord = None if finalReorderedTargetsFileName is None: for phrase in predicted_words: similarity = conceptnet_util.getWord2VecSimilarity( expectedWord, phrase, True) if similarity > maxSimilarity: maxSimilarity = similarity similarWord = phrase else: with open(finalReorderedTargetsFileName, 'r') as f: i = 0 for line in f: tokens = line.split("\t") if i == limitSuggestions: break similarity = conceptnet_util.getWord2VecSimilarity( expectedWord, tokens[0].strip(), True) if similarity > maxSimilarity: maxSimilarity = similarity similarWord = tokens[0].strip() i = i + 1 return [maxSimilarity, similarWord]
def createObjective(m, seeds, targets, variables, objective, targetsToCentralities, seedsDetected_weights): globalWeightSum = 0 tupleOfObjectives = [] #print(seedsDetected_weights.keys()); for seed_index in seedsDetected_weights.keys(): seedWithoutIndex = seed_index[:len(seed_index) - 1] tuplesOfConstraints = [] for targetWord in targetsToCentralities.keys(): centralityOfTarget = targetsToCentralities[targetWord] #try: sim_word1 = conceptnet_util.getSimilarity(seedWithoutIndex, targetWord, True) word2vecsimilarity = conceptnet_util.getWord2VecSimilarity( seedWithoutIndex, targetWord, True) #except KeyError as e: #print("seedWithoutIndex:%s, targetWord:%s\n" % (seedWithoutIndex,targetWord)); #raise e; if word2vecsimilarity == conceptnet_util.NOT_FOUND_IN_CORPUS_CODE: similarity = sim_word1 else: similarity= (sim_word1*util.CONCEPTNET_SIMILARITY_WEIGHT+\ word2vecsimilarity*util.WORD2VEC_SIMILARITY_WEIGHT)/\ (util.CONCEPTNET_SIMILARITY_WEIGHT+util.WORD2VEC_SIMILARITY_WEIGHT) if sim_word1 > util.SIMILARITY_THRESHOLD_ONEWORDRULE_PSL_ONE: ruleVar = targetWord + "_" + seed_index ruleVariable = m.addVar(name=ruleVar) ## use normalized value for centrality penaltyForPopularity = 0.5 * util.computeNormalizedValue( 1.0 / centralityOfTarget, 3.0, 0.0) weight = similarity + penaltyForPopularity #objective+= weight*ruleVariable; globalWeightSum = globalWeightSum + weight tupleOfObjectives.append((ruleVariable, weight)) tuplesOfConstraints.append( (ruleVariable, seed_index, targetWord)) m.update() #### Add all the constraints in one shot for tupleC in tuplesOfConstraints: #max(seeds[seed1]-targets[target1],0); m.addConstr(tupleC[0], GRB.GREATER_EQUAL, 0) m.addConstr(tupleC[0], GRB.GREATER_EQUAL, seeds[tupleC[1]] - targets[tupleC[2]]) ################################################ ###### IMPORTANT: Normalize the weights ###### before creating the objective function ################################################ for tupleO in tupleOfObjectives: weight = tupleO[1] #/globalWeightSum; objective += weight * tupleO[0] return objective
def calculateRelativeAccuracy(expectedWord, detections, limitSuggestions=20): if "-" in expectedWord: expectedWord=expectedWord[:expectedWord.index("-")]; maxSimilarity = 0; similarWord = None; for word in detections: i=0; if i==limitSuggestions: break; similarity = conceptnet_util.getWord2VecSimilarity(expectedWord,word.strip(),True); if similarity > maxSimilarity: maxSimilarity = similarity; similarWord = word.strip(); i=i+1; return [maxSimilarity,similarWord];
def calculateRelativeAccuracy(expectedWord, finalReorderedTargetsFileName, limitSuggestions=50): if "-" in expectedWord: expectedWord=expectedWord[:expectedWord.index("-")]; maxSimilarity = 0; similarWord = None; with open(finalReorderedTargetsFileName, 'r') as f: i=0; for line in f: tokens =line.split("\t"); if i==limitSuggestions: break; similarity = conceptnet_util.getWord2VecSimilarity(expectedWord,tokens[0].strip(),True); if similarity > maxSimilarity: maxSimilarity = similarity; similarWord = tokens[0].strip(); i=i+1; return [maxSimilarity,similarWord];
def calculateAverageAccuracy(expectedWord, finalReorderedTargetsFileName, limitSuggestions=10): if "-" in expectedWord: expectedWord = expectedWord[:expectedWord.index("-")] avgSimilarity = 0.0 with open(finalReorderedTargetsFileName, 'r') as f: i = 0.0 for line in f: tokens = line.split("\t") if i == limitSuggestions: break similarity = conceptnet_util.getWord2VecSimilarity( expectedWord, tokens[0].strip(), True) avgSimilarity = avgSimilarity + similarity i = i + 1 avgSimilarity = avgSimilarity / i return avgSimilarity
def calculateRelativeAccuracy(expectedWord, finalReorderedTargetsFileName, limitSuggestions=100): if "-" in expectedWord: expectedWord = expectedWord[:expectedWord.index("-")] maxSimilarity = 0 with open(finalReorderedTargetsFileName, 'r') as f: i = 0 for line in f: tokens = line.split("\t") if i == limitSuggestions: break similarity = conceptnet_util.getWord2VecSimilarity( expectedWord, tokens[0].strip(), True) if similarity > maxSimilarity: maxSimilarity = similarity i = i + 1 return maxSimilarity
def createPSLBasedObjectiveFunction(m,objective,variables,seeds,targets,targetsToCentralities,\ seedHypernymFilterSet, seedsDetected_weights, orderedSeedWordsList, \ sortedScoreAndIndexList, targetWordsList, targetWordsDictonary, \ pairwiseDistancesTargetWords): globalWeightSum = 0 tupleOfObjectives = [] turnOn2WordRules = False # Iterate over the targets for indexTarget in range(0, len(sortedScoreAndIndexList)): if indexTarget > util.MAX_TARGETS_PSL_ONE: break indexAndScore = sortedScoreAndIndexList[indexTarget] targetWord = targetWordsList[indexAndScore[0]] comb_similarity_list = targetWordsDictonary[targetWord] nonzeroSeedIndices = np.nonzero(comb_similarity_list)[0] centralityOfTarget = targetsToCentralities[targetWord] tuplesOfConstraints = [] word2vecKeyFoundCode = conceptnet_util.getWord2VecKeyFoundCode( targetWord) if word2vecKeyFoundCode == conceptnet_util.TOO_RARE_WORD_CODE: m.addConstr(targets[targetWord], GRB.LESS_EQUAL, 0) continue ''' ########################################################### ###### Get top (500) similar other targets. ###### if the top targets are on the current ###### suggested_targets list=> add wt: target_i -> target_j wt: target_j -> target_i forces them to be similar ########################################################### ''' similarIndicesList = np.argsort( pairwiseDistancesTargetWords[indexTarget, :]) topk = 0 tuplesOfConstraints = [] for topIndex in similarIndicesList: indexOfSimilarTerm = sortedScoreAndIndexList[topIndex][0] term = targetWordsList[indexOfSimilarTerm] ## normalize u.v/|u|.|v| word2vecKeyFoundCode = conceptnet_util.getWord2VecKeyFoundCode( term) if word2vecKeyFoundCode == conceptnet_util.TOO_RARE_WORD_CODE: continue word2vecsimilarity = conceptnet_util.getWord2VecSimilarity( targetWord, term, True) if word2vecsimilarity == conceptnet_util.NOT_FOUND_IN_CORPUS_CODE: similarity = util.computeNormalizedValue(1-\ pairwiseDistancesTargetWords[indexTarget,topIndex],1,-1) else: similarity = util.computeNormalizedValue(1-\ pairwiseDistancesTargetWords[indexTarget,topIndex],1,-1)+word2vecsimilarity similarity = similarity / 2.0 ruleVar1 = targetWord + "_" + term ruleVar2 = term + "_" + targetWord if similarity <= 0.92 or topk >= util.TOP_K_SIMILAR_TARGETS_PSL_ONE: #if VERBOSE: # print('%s not-matches %s:%g' % (targetWord,term,similarity)); break if (term != targetWord) and (term in targets) and (ruleVar1 not in variables): weight = similarity ruleVariable1 = m.addVar(name=ruleVar1) globalWeightSum = globalWeightSum + weight tupleOfObjectives.append((ruleVariable1, weight)) tuplesOfConstraints.append((ruleVariable1, targetWord, term)) variables.add(ruleVar1) ruleVariable2 = m.addVar(name=ruleVar2) globalWeightSum = globalWeightSum + weight tupleOfObjectives.append((ruleVariable2, weight)) tuplesOfConstraints.append((ruleVariable2, term, targetWord)) variables.add(ruleVar2) topk = topk + 1 #if VERBOSE: # print('%s matches %s:%g' % (targetWord,term,similarity)); m.update() #### Add all the constraints in one shot for tupleC in tuplesOfConstraints: #max(targets[target1]-targets[target2],0); m.addConstr(tupleC[0], GRB.GREATER_EQUAL, 0) m.addConstr(tupleC[0], GRB.GREATER_EQUAL, targets[tupleC[1]] - targets[tupleC[2]]) ####################################################### ####### n=#seeds, Get all n rules for each target. ####### For all seeds, for which similarity exceeds a threshold ####################################################### tuplesOfConstraints = [] for seedIndex in range(len(orderedSeedWordsList)): seed = orderedSeedWordsList[seedIndex] sim_word1 = comb_similarity_list[seedIndex] word2vecsimilarity = conceptnet_util.getWord2VecSimilarity( targetWord, seed, True) if word2vecsimilarity == conceptnet_util.NOT_FOUND_IN_CORPUS_CODE: similarity = sim_word1 else: similarity= (sim_word1*util.CONCEPTNET_SIMILARITY_WEIGHT+\ word2vecsimilarity*util.WORD2VEC_SIMILARITY_WEIGHT)/\ (util.CONCEPTNET_SIMILARITY_WEIGHT+util.WORD2VEC_SIMILARITY_WEIGHT) # Hypernym, then dont consider if seed not in seedHypernymFilterSet: continue if sim_word1 > util.SIMILARITY_THRESHOLD_ONEWORDRULE_PSL_ONE: seed1 = seed + "1" ruleVar = targetWord + "_" + seed1 ruleVariable = m.addVar(name=ruleVar) ## use normalized value for centrality penaltyForPopularity = 0.5 * util.computeNormalizedValue( 1.0 / centralityOfTarget, 3.0, 0.0) weight = similarity + penaltyForPopularity #1.0/centralityOfTarget; #objective+= weight*ruleVariable; globalWeightSum = globalWeightSum + weight tupleOfObjectives.append((ruleVariable, weight)) tuplesOfConstraints.append((ruleVariable, seed1, targetWord)) m.update() #### Add all the constraints in one shot for tupleC in tuplesOfConstraints: #max(seeds[seed1]-targets[target1],0); m.addConstr(tupleC[0], GRB.GREATER_EQUAL, 0) m.addConstr(tupleC[0], GRB.GREATER_EQUAL, seeds[tupleC[1]] - targets[tupleC[2]]) ################################################ ###### IMPORTANT: Normalize the weights ###### before creating the objective function ################################################ for tupleO in tupleOfObjectives: weight = tupleO[1] #/globalWeightSum; objective += weight * tupleO[0] return objective