def CalcConnFileFScore(): print "Calcing FScore for:", config.get_string('CONN_FILE'); setConnTups = predicate.ReadConnectionsFileToTupSet(); setGoldConns = LoadGoldStringConnSet(); if config.get_bool('ANALYZE_ON_HARD'): lEasy = data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE')); iTruePos = 0; iFalsePos = 0; for sFrom, sTo in setConnTups: if config.get_bool('ANALYZE_ON_HARD'): if sTo in lEasy: continue; if (sFrom, sTo) in setGoldConns: iTruePos += 1; else: iFalsePos += 1; iFalseNeg = len(setGoldConns)-iTruePos; if config.get_bool('ANALYZE_ON_HARD'): iFalseNeg = len(setGoldConns)-iTruePos-len(lEasy); iTrueNeg = 0; fPrecision = float(iTruePos)/float(iTruePos+iFalsePos) if iTruePos > 0 else 0; fRecall = float(iTruePos)/float(iTruePos+iFalseNeg) if iTruePos > 0 else 0; fScore = 2*fPrecision*fRecall/(fPrecision+fRecall) if (fPrecision*fRecall) > 0 else 0; print "TP:", iTruePos, "FP:", iFalsePos, "TN:", iTrueNeg, "FN:", iFalseNeg; print "Precision:", fPrecision; print "Recall:", fRecall; print "FScore:", fScore;
def GetPredPos(self, bIgnoreDir = False): if config.get_bool('SVM'): fThres = config.get_int('SVM_THRESHOLD'); elif config.get_bool('LOG_LINEAR'): fThres = 0.5 else: assert False; return (self.GetPred(bIgnoreDir) > fThres);
def GetWordAndDistance(self, iPathIndex, iWordIndex, iPathLen, depTree): iLeftDis, iRightDis = self.CalcDisToTerminals(iPathIndex, iPathLen); sWord = depTree.dIndexToWord[iWordIndex]; if config.get_bool('FEATURES:DONT_GEN_FEATURES_WITH_PDDL_OBJECT_WORDS') \ and obj_extract.IsPddlWord(sWord): sWord = 'OBJ_WORD'; if sWord != 'OBJ_WORD' and config.get_bool('FEATURES:USE_STEMMER'): sWord = Stemmer.stem_word(sWord); return sWord, iLeftDis, iRightDis;
def GetPredPos(self, bIgnoreDir = False): if bIgnoreDir: return self.bPredPos or self.pddlconn.pddlconnReverse.sample.bPredPos; if config.get_bool('SVM'): fThres = config.get_int('SVM_THRESHOLD'); elif config.get_bool('LOG_LINEAR'): fThres = 0.5 else: assert False; return (self.fPred > fThres);
def GetPos(self, bIgnoreDir = False): # don't allow same from/to if self.pddlconn.textconn.sFrom == self.pddlconn.textconn.sTo: return False; if bIgnoreDir: return self.bPos or self.pddlconn.pddlconnReverse.sample.bPos; if config.get_bool('TRAIN_ON_GOLD_DEP'): assert (not config.get_bool('TRAIN_ON_MANUAL_TEXT_ANNOT')); return self.bGoldPos; elif config.get_bool('TRAIN_ON_MANUAL_TEXT_ANNOT'): return self.bManualPos; else: assert False;
def TrainAndTestFromGranular(lTrainGranular, lTestGranular): if config.get_bool('COLLAPSE_FIRST'): assert not config.get_bool('TEST_AND_TRAIN_ON_BOTH_HALVES'); lTrainCollapsed = Sample.CollapseSamples(lTrainGranular); lTestCollapsed = Sample.CollapseSamples(lTestGranular); dFeatureWeights = TrainAndTest(lTrainCollapsed, lTestCollapsed); else: if config.get_bool('TEST_AND_TRAIN_ON_BOTH_HALVES'): dFeatureWeights = TrainAndTest(lTrainGranular, lTestGranular); TrainAndTest(lTestGranular, lTrainGranular); lTestCollapsed = Sample.CollapseSamples(lTrainGranular+lTestGranular); else: dFeatureWeights = TrainAndTest(lTrainGranular, lTestGranular); lTestCollapsed = Sample.CollapseSamples(lTestGranular); return lTestCollapsed, dFeatureWeights;
def GetNumPos(self): if config.get_bool('REWARDS_SIMPLE_POS_NEG'): return self.iNumPos; elif self.iNumEarlierNeg > 0: return 0; else: return self.iNumPos;
def GetNumNeg(self): if config.get_bool('REWARDS_SIMPLE_POS_NEG'): return self.iNumNoReachNeg + self.iNumEarlierNeg; elif self.GetNumPos() > 0: return 0; else: return self.iNumNoReachNeg + self.iNumEarlierNeg;
def ComputeCompressedReward(self, dConnRewards, lAllPreds): if len(self.lFfPreds) == 0: return; sPrev = self.lFfPreds[0].GetObject(); setEarlier = set([sPrev]); for predSubgoal in self.lFfPreds: sCur = predSubgoal.GetObject(); if sPrev != sCur: dConnRewards[(sPrev,sCur)].iNumPos += 1; if config.get_bool('REWARD_USE_EARLIER_NEGS'): # add all the negs from earlier for sNegPred in lAllPreds: if sNegPred not in setEarlier: if sNegPred != sCur: dConnRewards[(sNegPred, sCur)].iNumEarlierNeg += 1; setEarlier.add(sCur); sPrev = sCur; if config.get_bool('REWARD_USE_TARGET_NO_REACH_NEGS') and not self.bSuccess: # couldn't reach neg sTarget = self.predTarget.GetObject(); sLastReached = self.lFfPreds[-1].GetObject(); if sLastReached != sTarget: dConnRewards[(sLastReached, sTarget)].iNumNoReachNeg += 1; if config.get_bool('REWARD_USE_NEXT_HOP_NO_REACH_NEGS') and not self.bSuccess: # couldn't reach neg predPrev = None; for predCur in self.lSubgoalPreds: if not predCur.result.IsKnown(): break; elif not predCur.result.IsSuccess(): if predPrev != None: dConnRewards[(predPrev.GetObject(), predCur.GetObject())].iNumNoReachNeg += 1; break; else: predPrev = predCur; if config.get_bool('REWARD_USE_PRED_POS'): predPrev = None; for predCur in self.lSubgoalPreds: if not predCur.result.IsKnown(): break; elif not predCur.result.IsSuccess(): break; else: if predPrev != None: dConnRewards[(predPrev.GetObject(), predCur.GetObject())].iNumPos += 1; predPrev = predCur;
def SplitTrainTest(lSamples): if config.get_bool('SPLIT_BY_EASY_HARD'): setEasy = set(data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE'))); lTrain = filter(lambda samp: samp.pddlconn.sPddlTo in setEasy, lSamples); lTest = filter(lambda samp: samp.pddlconn.sPddlTo not in setEasy, lSamples); print "NUM Train:", len(lTrain), "Test:", len(lTest); return lTrain, lTest; if config.get_bool('SPLIT_BY_FIRST_30'): setFirst30 = set(data.file_to_obj(config.get_string('FIRST_30_CONNECTIONS_LIST_FILE'))); lTrain = filter(lambda samp: samp.pddlconn.sPddlTo in setFirst30, lSamples); lTest = filter(lambda samp: samp.pddlconn.sPddlTo not in setFirst30, lSamples); print "NUM Train:", len(lTrain), "Test:", len(lTest); return lTrain, lTest; if config.get_bool('SPLIT_BY_SENTENCE'): lRandomIndexes, iSplit = SplitSampleBySentence(lSamples); else: lRandomIndexes = range(len(lSamples)); random.shuffle(lRandomIndexes); iSplit = len(lSamples)/2; if config.get_bool('TRAIN_AND_TEST_ON_ALL'): assert(not config.get_bool('TRAIN_ON_HALF_TEST_ON_ALL')); lTrainIndexes = range(len(lSamples)); lTestIndexes = range(len(lSamples)); elif config.get_bool('TRAIN_ON_HALF_TEST_ON_ALL'): lTrainIndexes = lRandomIndexes[:iSplit]; lTestIndexes = range(len(lSamples)); elif config.get_bool('TRAIN_ON_HALF_TEST_ON_HALF'): lTrainIndexes = lRandomIndexes[:iSplit]; lTestIndexes = lRandomIndexes[iSplit:]; else: assert False, 'No Test Train Split Method Specified'; lTrain = map(lambda x:lSamples[x], lTrainIndexes); lTest = map(lambda x:lSamples[x], lTestIndexes); return lTrain, lTest;
def GetSvmLine(self): sLine = '+1' if self.GetPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_TRAINING')) else '-1'; #str(self.GetType()); lFeatures = self.features.GetFeatureIndexList(); sFeatureComment = ''; for iFeature in lFeatures: sLine += ' ' + str(iFeature) + ':1'; sFeatureComment += ' ' + str(iFeature) + '->' + FeatureSpace.FeatureString(iFeature); sLine += ('# conn:' + self.pddlconn.textconn.sText + ' Features: ' + sFeatureComment + ' ***Sentence--> ' + self.pddlconn.textconn.sentence.sText); return sLine;
def LoadFullRewards(): dConnRewards = reward.LoadFullRewardsDict(); #print "Len:", len(dConnRewards); #for (sFrom, sTo), cr in dConnRewards.items(): # print sFrom, sTo, "Pos:", cr.iNumPos, "NE:", cr.iNumEarlierNeg, "NR:", cr.iNumNoReachNeg; #lGranularSamples = lGranularSamples[:100]; assert(config.get_bool('COLLAPSE_FIRST')); #add the reward data to the samples themselves lSentences = Sentence.ReadSentencesFromTextFile(); lGranularSamples = Sentence.GenAllGranularSamplesFromList(lSentences, 'sentences.log'); lCollapsedSamples = Sample.CollapseSamples(lGranularSamples); setEasy = set(data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE'))); lTrainCollapsed = filter(lambda sample: sample.pddlconn.sPddlTo in setEasy, lCollapsedSamples); lTestCollapsed = filter(lambda sample: sample.pddlconn.sPddlTo not in setEasy, lCollapsedSamples); lTrainingSamples = GenerateTrainingSamplesFromRewards(dConnRewards, lTrainCollapsed); if config.get_bool('SVM'): svm.Train(lTrainingSamples); svm.Test(lTestCollapsed); elif config.get_bool('LOG_LINEAR'): log_linear.TrainAndTestLogLinear(lTrainingSamples, lTestCollapsed); else: assert False; # remove the duplicates setAlreadySeen = set(); lTestNoDups = []; for sample in lTestCollapsed: tKey = (sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo); if tKey not in setAlreadySeen: setAlreadySeen.add(tKey); lTestNoDups.append(sample); fScore, fPrec, fRecall = AnalyzePredsSimple(lTestNoDups); #Sample.CollapsedSample.WriteConnections(lTestNoDups, config.get_string('SVM_REWARD_CONNECTIONS_FILE'), # bAppend=False, bWritePredictions = True, bPosOnly = True); #fOut = open('debug.txt', 'w'); #for sample in lTestCollapsed: # print >> fOut, "Sample:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, sample.fPred; print "Precision: ", fPrec print "Recall: ", fRecall print "F-Score: ", fScore;
def Train(lSamples, j = 1, bSuppress = False): sTrainFile = config.get_string('SVM_TRAIN'); sModelFile = config.get_string('SVM_MODEL'); Sample.WriteSvmFile(lSamples, sTrainFile); if config.get_bool('USE_SVM_PERF'): # version used for good svm results iRetVal = PrintAndRun("../../bin/svm_perf_learn -c 10 -w 3 -l 1 -b 0 " + sTrainFile + " " + "model.svm", bSuppress = bSuppress); #iRetVal = PrintAndRun("../../bin/svm_perf_learn -c 1 -w 3 -l 1 -b 0 " + sTrainFile + " " + "model.svm"); else: iRetVal = PrintAndRun("../../bin/svm_learn -c 10 -b 0 -m 1000 -j " + str(j) + ' ' + sTrainFile + " " + "model.svm", bSuppress = bSuppress);
def TrainAndTestSvm(lTrainGranular, lTestGranular): if config.get_bool('COLLAPSE_FIRST'): assert not config.get_bool('TEST_AND_TRAIN_ON_BOTH_HALVES'); lTrainCollapsed = Sample.CollapseSamples(lTrainGranular); lTestCollapsed = Sample.CollapseSamples(lTestGranular); svm.Train(lTrainCollapsed); svm.Test(lTestCollapsed); else: if config.get_bool('TEST_AND_TRAIN_ON_BOTH_HALVES'): svm.Train(lTrainGranular); svm.Test(lTestGranular); svm.Train(lTestGranular); svm.Test(lTrainGranular); lTestCollapsed = Sample.CollapseSamples(lTrainGranular+lTestGranular); else: svm.Train(lTrainGranular); svm.Test(lTestGranular); lTrainCollapsed = Sample.CollapseSamples(lTrainGranular); lTestCollapsed = Sample.CollapseSamples(lTestGranular); fThreshold, dFeatureWeights = svm.GetNormalizedWeights(); return lTestCollapsed, dFeatureWeights;
def Evaluate(lSamples): bCollapseFirst = config.get_bool('COLLAPSE_FIRST'); lFScores = []; lPrecisions = []; lRecalls = []; dFalsePosCounts = collections.defaultdict(lambda:PredData(bPos = True)); dFalseNegCounts = collections.defaultdict(lambda:PredData(bPos = False)); dTruePosCounts = collections.defaultdict(lambda:PredData(bPos = True)); dTotalCounts = collections.defaultdict(lambda:0); for iIter in range(config.get_int('NUM_ITER')): lTrain, lTest = SplitTrainTest(lSamples); if config.get_bool('SVM'): assert not config.get_bool('LOG_LINEAR'); lTest, dFeatureWeights = TrainAndTestSvm(lTrain, lTest); elif config.get_bool('LOG_LINEAR'): lTest, dFeatureWeights = log_linear.TrainAndTestFromGranular(lTrain, lTest); else: assert False; if config.get_bool('WRITE_TRUE_POS_AND_FALSE_NEG'): UpdateBadPredCounts(dFalsePosCounts, dFalseNegCounts, dTruePosCounts, dTotalCounts, dFeatureWeights, lTest); fScore, fPrec, fRecall = AnalyzePredsSimple(lTest); lFScores.append(fScore); lPrecisions.append(fPrec); lRecalls.append(fRecall); if config.get_bool('WRITE_TRUE_POS_AND_FALSE_NEG'): WriteBadPredCounts(dFalsePosCounts, dFalseNegCounts, dTruePosCounts, dTotalCounts); for fScore in lFScores: print "FScore is:", fScore; print "Average Precision: ", np.average(lPrecisions), "\tStd: ", np.std(lPrecisions); print "Average Recall: ", np.average(lRecalls), "\tStd: ", np.std(lRecalls); print "Average F-Score: ", np.average(lFScores), "\tStd: ", np.std(lFScores);
def WriteFirst30SvmConnectionsFile(lGranularSamples): assert config.get_bool('SPLIT_BY_FIRST_30'); lTrainGranular, lTestGranular = SplitTrainTest(lGranularSamples); lTestCollapsed, dFeatureWeights = TrainAndTestSvm(lTrainGranular, lTestGranular); fScore, fPrec, fRecall = AnalyzePredsSimple(lTestCollapsed); Sample.CollapsedSample.WriteConnections(lTestCollapsed, config.get_string('FIRST_30_SVM_CONNECTIONS_FILE'), bAppend=False, bWritePredictions = True, bPosOnly = True); # note that this one is train on train and test on train (yes those words are correct) lTrainCollapsed, dFeatureWeights = TrainAndTestSvm(lTrainGranular, lTrainGranular); fScore, fPrec, fRecall = AnalyzePredsSimple(lTrainCollapsed); Sample.CollapsedSample.WriteConnections(lTrainCollapsed, config.get_string('FIRST_30_SVM_CONNECTIONS_FILE'), bAppend=True, bWritePredictions = True, bPosOnly = True);
def listener(): addr = config.get("addr") port = config.get_int("port", 0) sock.bind((addr, port)) while True: try: # Clean temporary peers peers.clean_temp() packet, peer_addr = sock.recvfrom(2048) message = bencode.decode(packet) if config.get_bool("dumpraw"): print(packet) key = config.get("network-key") if key: assert NetworkKey.check_signature(message) peer = peers.find_by_addr(peer_addr) if not peer: if config.get_bool("temp_peer"): peer = peers.create_temp(peer_addr) else: continue if config.get_bool("dump"): print(message) #print(message, peer_addr, peer.alias) handle(message, peer) peer.last_received = time.time() except Exception as e: pass
def Run(): if config.get_bool('CALC_CONN_FILE_FSCORE'): CalcConnFileFScore(); return; elif config.get_bool('CALC_EASY_HARD_CONNECTIONS'): CalcEasyHardConnections(); return; elif config.get_bool('LOAD_FULL_REWARDS'): LoadFullRewards(); return; lSentences = Sentence.ReadSentencesFromTextFile(); lGranularSamples = Sentence.GenAllGranularSamplesFromList(lSentences, 'sentences.log'); if config.get_bool('CALC_ALL_TEXT_FSCORE'): CalcAllTextFScore(lGranularSamples); elif config.get_string('GRANULAR_SAMPLE_FILE') != '': Sample.GranularSample.WriteList(lGranularSamples, config.get_string('GRANULAR_SAMPLE_FILE')); elif config.get_string('SENTENCES_AND_FEATURES_FILE') != '': Sample.GranularSample.WriteDebugFromList(lGranularSamples, config.get_string('SENTENCES_AND_FEATURES_FILE')); elif config.get_string('SVM_CONNECTIONS_FILE') != '': WriteSvmConnectionsFile(lGranularSamples); elif config.get_string('FIRST_30_SVM_CONNECTIONS_FILE') != '': WriteFirst30SvmConnectionsFile(lGranularSamples); elif config.get_string('COLLAPSED_MANUAL_TEXT_CONNECTIONS_FILE'): lCollapsed = Sample.CollapseSamples(lGranularSamples); Sample.CollapsedSample.WriteConnections(lCollapsed, config.get_string('COLLAPSED_MANUAL_TEXT_CONNECTIONS_FILE'), bPosOnly = True); elif config.get_string('COLLAPSED_ALL_TEXT_CONNECTIONS_FILE'): lCollapsed = Sample.CollapseSamples(lGranularSamples); Sample.CollapsedSample.WriteConnections(lCollapsed, config.get_string('COLLAPSED_ALL_TEXT_CONNECTIONS_FILE'), bPosOnly = False); elif config.get_bool('TRAIN_ON_REWARD_EVAL_ON_GOLD'): TrainOnRewardEvalOnGold(lGranularSamples); else: Evaluate(lGranularSamples);
def Test(lSamples, bSuppress = False): sTestFile = config.get_string('SVM_TEST'); sModelFile = config.get_string('SVM_MODEL'); sPredFile = config.get_string('SVM_PRED'); Sample.WriteSvmFile(lSamples, sTestFile); if config.get_bool('USE_SVM_PERF'): iRetVal = PrintAndRun("../../bin/svm_perf_classify " + sTestFile + " " + sModelFile + " " + sPredFile, bSuppress = bSuppress); else: iRetVal = PrintAndRun("../../bin/svm_classify " + sTestFile + " " + sModelFile + " " + sPredFile, bSuppress = bSuppress); sPredLines = open(sPredFile).readlines(); lPreds = map(lambda x:float(x), sPredLines); assert (len(lPreds) == len(lSamples)); for pred, sample in zip(lPreds, lSamples): sample.fPred = float(pred);
def TrainOnRewardEvalOnGold(lGranularSamples): #lGranularSamples = lGranularSamples[:100]; assert(config.get_bool('COLLAPSE_FIRST')); dConnRewards = LoadRewardsDict(); fNegMultiplier = PrintDebugInfo(dConnRewards); #add the reward data to the samples themselves lCollapsedSamples = Sample.CollapseSamples(lGranularSamples); lNewSamples = []; for sample in lCollapsedSamples: dRewards = dConnRewards[sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo]; if dRewards['iNumPos'] > 0: for iIter in range(dRewards['iNumPos']): sampleNew = copy.copy(sample); sampleNew._bPos = True; lNewSamples.append(sampleNew); else: for iIter in range(int(math.ceil(dRewards['iNumNeg']*fNegMultiplier))): sampleNew = copy.copy(sample); sampleNew._bPos = False; lNewSamples.append(sampleNew); lCollapsedSamples = lNewSamples; lFScores = []; lPrecisions = []; lRecalls = []; lTrainCollapsed, lTestCollapsed = SplitTrainTest(lCollapsedSamples); svm.Train(lTrainCollapsed); svm.Test(lTestCollapsed); # remove the duplicates setAlreadySeen = set(); lTestNoDups = []; for sample in lTestCollapsed: tKey = (sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo); if tKey not in setAlreadySeen: setAlreadySeen.add(tKey); lTestNoDups.append(sample); fScore, fPrec, fRecall = AnalyzePredsSimple(lTestNoDups); Sample.CollapsedSample.WriteConnections(lTestNoDups, config.get_string('SVM_REWARD_CONNECTIONS_FILE'), bAppend=False, bWritePredictions = True, bPosOnly = True); fOut = open('debug.txt', 'w'); for sample in lTestCollapsed: print >> fOut, "Sample:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, sample.fPred; print "Precision: ", fPrec print "Recall: ", fRecall print "F-Score: ", fScore;
def __init__(self, sConnText, sentence): self.sentence = sentence; self.bIrrelevant = False; self.sText = sConnText; if self.sText.startswith('+'): self.bPos = True; # skip first char self.sText = self.sText[1:] else: self.bPos = False; if self.sText.startswith('*'): if config.get_bool('ASTERIX_IS_BAD'): self.bIrrelevant = True; else: self.bPos = True; self.sText = self.sText[1:]; lSplit = self.sText.split('|'); assert(len(lSplit) == 2); self.sFrom, self.iFrom = lSplit[0].split(':'); self.sTo, self.iTo = lSplit[1].split(':'); self.iFrom = int(self.iFrom); self.iTo = int(self.iTo);
def FindLeastCommonDepFromMatrix(self, iFrom, iTo): self.bUseMatrix = True; if not hasattr(self, 'mDist'): self.BuildMatrix(); lPath = self.GetShortestPath(iFrom, iTo); if lPath == None: print "BadMatrix:", iFrom, iTo; print "\tSentence:", self.sentence.sText; #self.PrintDeps('\t'); return None, None; # add ifrom and ito if config.get_bool('FEATURES:ADD_TERMINAL_WORDS'): lPath = [iFrom] + lPath + [iTo]; if len(lPath) == 0: return iFrom, []; # print '----------'; # print self.sentence.sText; # print map(lambda x:self.dIndexToWord[x], lPath); for iIndex in range(len(lPath)-1): if (lPath[iIndex], lPath[iIndex+1]) not in self.dTupToDep: # print self.dIndexToWord[lPath[iIndex]]; return lPath[iIndex], lPath; return lPath[-1], lPath;
def AnalyzePredsSimple(lSamples): if config.get_bool('FORCE_SINGLE_DIR'): dSamples = {}; for sample in lSamples: tKey = (sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo); assert(tKey not in dSamples); dSamples[tKey] = sample; iNumTotal = 0; iNumCorrect = 0; iTruePos = 0; iFalsePos = 0; iTrueNeg = 0; iFalseNeg = 0; iThres = 0; if config.get_bool('SVM'): fThres = config.get_int('SVM_THRESHOLD'); elif config.get_bool('LOG_LINEAR'): fThres = 0.5 else: assert False; if config.get_bool('CALC_FSCORE_ON_GOLD'): setGoldStringConns = LoadGoldStringConnSet() iNumGold = len(setGoldStringConns); if config.get_bool('ANALYZE_ON_HARD'): lEasy = data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE')); fPredMin = sys.float_info.max; fPredMax = -sys.float_info.max; for sample in lSamples: if config.get_bool('ANALYZE_ON_HARD'): if sample.pddlconn.sPddlTo in lEasy: continue; if config.get_bool('TRAIN_ON_REWARD_EVAL_ON_GOLD'): bActual = sample.GetGoldPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_EVAL')); else: bActual = sample.GetPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_EVAL')); if config.get_bool('FORCE_SINGLE_DIR'): fPred = sample.fPred; tReverseKey = (sample.pddlconn.sPddlTo, sample.pddlconn.sPddlFrom); fReversePred = dSamples[tReverseKey].fPred if tReverseKey in dSamples else -sys.maxint; bNormalPred = (float(sample.fPred) > fThres); bPred = ((float(sample.fPred) > fThres) and (float(fPred) >= float(fReversePred))); if tReverseKey not in dSamples: print "FORCE-MISSING"; elif (bNormalPred == bActual) and (bPred != bActual): print "FORCE-BAD:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, fPred, fReversePred; elif (bNormalPred != bActual) and (bPred == bActual): print "FORCE-GOOD:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, fPred, fReversePred; else: print "FORCE-NEITHER:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, fPred, fReversePred; else: bPred = sample.GetPredPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_EVAL')); fPredMin = min(fPredMin, sample.fPred); fPredMax = max(fPredMax, sample.fPred); iNumTotal += 1; if bPred == bActual: iNumCorrect += 1; if bPred: if bActual: iTruePos += 1; else: iFalsePos += 1; else: if bActual: iFalseNeg += 1; else: iTrueNeg += 1; if config.get_bool('CALC_FSCORE_ON_GOLD'): iFalseNeg = iNumGold - iTruePos; if config.get_bool('ANALYZE_ON_HARD'): iFalseNeg = iNumGold - iTruePos - len(lEasy); fPrecision = float(iTruePos)/float(iTruePos+iFalsePos) if iTruePos > 0 else 0; fRecall = float(iTruePos)/float(iTruePos+iFalseNeg) if iTruePos > 0 else 0; fScore = 2*fPrecision*fRecall/(fPrecision+fRecall) if (fPrecision*fRecall) > 0 else 0; print "FPred: min:", fPredMin, "max:", fPredMax; print "FScore:", fScore, fPrecision, fRecall; print "Frac Correct:", float(iNumCorrect)/float(iNumTotal), iNumCorrect, iNumTotal; print "TP:", iTruePos, "FP:", iFalsePos, "TN:", iTrueNeg, "FN:", iFalseNeg; print "FracPos:", float(iTruePos+iFalsePos)/float(iTrueNeg+iFalseNeg+iTruePos+iFalsePos); return fScore, fPrecision, fRecall;
def print_debug (msg, *args, **dargs): if not config.get_bool("nigiri", "show_debug"): return main_window.print_text("=== Debug: " + msg)
def print_debug(msg, *args, **dargs): if not config.get_bool("nigiri", "show_debug"): return main_window.print_text("=== Debug: " + msg)
def GetSvmLine(self): sLine = '+1' if self.GetPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_TRAINING')) else '-1'; #str(self.GetType()); sLine += self.GetFeatureString(); return sLine;
def GetFeatureTupList(self): #return sorted(list(self.setFeatures)); if config.get_bool('FEATURES:WEIGHT_COLLAPSE_FIRST_BY_NUM_SENTENCES'): return sorted(map(lambda (x,y):(x, float(y)/float(len(self.lGranularSamples))), self.GetFeatureCounts()), key=lambda tup:tup[0]); else: return sorted(map(lambda (x,y):(x, 1), self.GetFeatureCounts()), key=lambda tup:tup[0]);
def collect_binaries(patterns): source_dir = path.join(_info.install_dir, 'bin') binaries = _collect_artifacts(patterns.split(), source_dir, 'bin') _postprocess(binaries, config.get_bool('strip_binaries'), config.get_bool('extract_debug_info'))
def test_get_bool_where_env_variable_is_not_set(env_key_name): value = config.get_bool(env_key_name) assert value is None
def test_get_bool_true(env_key_name, env_value, expected_value): os.environ[env_key_name] = env_value value = config.get_bool(env_key_name) assert value == expected_value
def test_get_bool_incorrect_env_setting(env_key_name): os.environ[env_key_name] = "kitten" with pytest.raises(ValueError): config.get_bool(env_key_name)
def GenFeatures(self): sample = self.sample; pddlconn = sample.pddlconn; textconn = pddlconn.textconn; sentence = textconn.sentence; bUseStemmer = config.get_bool('FEATURES:USE_STEMMER'); if (config.get_bool('FEATURES:OLD_SENTENCE') and not sentence.bIsInOld): self.AddFeature('NEW_SENTENCE'); if config.get_bool('FEATURES:USE_SENTENCE_DIR'): sSentenceDir = 'SentForw::' if textconn.iFrom < textconn.iTo else 'SentBack::'; else: sSentenceDir = ''; if config.get_bool('FEATURES:USE_DEPS'): iCommonDep, lPath = sentence.deptree.FindLeastCommonDep(textconn.iFrom, textconn.iTo); if iCommonDep == None: self.bLoopy = True; return None; iPathLen = len(lPath); if config.get_bool('FEATURES:USE_PATH_WORDS'): # add features for all words along path for i in range(iPathLen): iPath = lPath[i]; sWord, iLeftDis, iRightDis = self.GetWordAndDistance(i, iPath, \ iPathLen, sentence.deptree); if sWord == 'OBJ_WORD': continue; sFeatureStr = 'PathWord::%s::%d::%d::%s' \ % (sWord, iLeftDis, iRightDis, sSentenceDir); self.AddFeature(sFeatureStr); if config.get_bool('FEATURES:USE_NON_SENTENCE_DIR_TOO'): sFeatureStr = 'PathWord::%s::%d::%d' \ % (sWord, iLeftDis, iRightDis); self.AddFeature(sFeatureStr); if config.get_bool('FEATURES:USE_PATH_DEP_TYPES') and len(lPath) > 0: # add features for dep types along the path sPathDirFor = 'Forw::' if config.get_bool('FEATURES:USE_PATH_DIR') else ''; sPathDirBack = 'Back::' if config.get_bool('FEATURES:USE_PATH_DIR') else ''; iPrev = lPath[0]; bContainsSubj = False; bContainsObj = False; bContainsPrepObj = False; for i in range(1,len(lPath)): iPathCur = lPath[i]; dep = None; sPathDir = ''; if (iPrev, iPathCur) in sentence.deptree.dTupToDep: sPathDir = sPathDirFor; dep = sentence.deptree.dTupToDep[(iPrev, iPathCur)]; sDepType = dep.sType; bContainsSubj |= IsSubjDepType(sDepType); bContainsObj |= IsObjDepType(sDepType); bContainsPrepObj |= IsPrepObjDepType(sDepType); if (iPathCur, iPrev) in sentence.deptree.dTupToDep: sPathDir = sPathDirBack; dep = sentence.deptree.dTupToDep[(iPathCur, iPrev)]; sDepType = dep.sType bContainsSubj |= IsSubjDepType(sDepType); bContainsObj |= IsObjDepType(sDepType); bContainsPrepObj |= IsPrepObjDepType(sDepType); if dep != None: sLWord, iLLDis, iLRDis = self.GetWordAndDistance(i-1, iPrev, \ iPathLen, sentence.deptree); sRWord, iRLDis, iRRDis = self.GetWordAndDistance(i, iPathCur, \ iPathLen, sentence.deptree); # dep feature sFeatureStr = 'PathDep::%s::%d::%d::%s::%s' \ % (sDepType, iLLDis, iRRDis, sPathDir, sSentenceDir); self.AddFeature(sFeatureStr); if config.get_bool('FEATURES:USE_NON_SENTENCE_DIR_TOO'): sFeatureStr = 'PathDep::%s::%d::%d::%s::%s' \ % (sDepType, iLLDis, iRRDis, sPathDir, sSentenceDir); self.AddFeature(sFeatureStr); # dep*word / word*dep feature if config.get_bool('FEATURES:USE_WORD_CROSS_DEPTYPE_FEATURES'): sFeatureStr = 'PathWordXDep::%s::%s::%d::%d::%s::%s' \ % (sLWord, sDepType, iLLDis, iRRDis, sPathDir, sSentenceDir); self.AddFeature(sFeatureStr); sFeatureStr = 'PathDepXWord::%s::%s::%d::%d::%s::%s' \ % (sDepType, sRWord, iLLDis, iRRDis, sPathDir, sSentenceDir); self.AddFeature(sFeatureStr); iPrev = iPathCur; if config.get_bool('FEATURES:CHECK_CONTAINS_BOTH_SUBJ_AND_OBJ'): if bContainsObj and bContainsSubj: self.AddFeature('CONTAINS_BOTH_SUBJ_AND_OBJ'); else: self.AddFeature('DOESNT_CONTAIN_BOTH_SUBJ_AND_OBJ'); if bContainsPrepObj and bContainsSubj: self.AddFeature('CONTAINS_BOTH_SUBJ_AND_PREP_OBJ'); else: self.AddFeature('DOESNT_CONTAIN_BOTH_SUBJ_AND_PREP_OBJ');