def ParseParaStr(self, ParaStr): lPara = ParaStr.split(',') lParaScore = [Para.split('=') for Para in lPara] hPara = dict(lParaScore) conf = cxConfC() conf.hConf = hPara return conf
def ParseParaStr(self,ParaStr): lPara = ParaStr.split(',') lParaScore = [Para.split('=') for Para in lPara] hPara = dict(lParaScore) conf = cxConfC() conf.hConf = hPara return conf
def SetConf(self,ConfIn): conf = cxConfC(ConfIn) self.InQuery = conf.GetConf('in') self.InStemQuery = conf.GetConf('stemmedq',self.InQuery) self.MinPhraseLen = int(conf.GetConf('minphraselen',self.MinPhraseLen)) self.OutName = conf.GetConf('out') self.NumOfObjPerQ = int(conf.GetConf('numofobj',self.NumOfObjPerQ))
def SetConf(self,ConfIn): cxBaseC.SetConf(self, ConfIn) conf = cxConfC(ConfIn) self.lQRelIn = conf.GetConf("qrel",self.lQRelIn) if type(self.lQRelIn) != list: self.lQRelIn = [self.lQRelIn] self.Depth = int(conf.GetConf("evadepth",self.Depth)) # self.IndriSearcher.SetConf(ConfIn) self.Prepare() return True
def SetConf(self,ConfIn): conf = cxConfC(ConfIn) self.DocTextDir = conf.GetConf('doctextdir') self.LoadDocText() self.ObjCenter.SetConf(ConfIn) CtfInName = conf.GetConf('objctf') self.CtfCenter.Load(CtfInName)
def Init(self): cxBaseC.Init(self) self.Evaluator = AdhocEvaC() self.Searcher = IndriSearchCenterC() self.Word2VecInName = "" self.Word2VecModel = None self.cLmName = "kde" self.LmClass = KernelDensityLmC #lm conf input self.ParaConf = cxConfC()
def Process(self,TrainQueryIn,TestQueryIn,ParaStr, EvaOutName): logging.info('training using [%s] testing using [%s] eva out to [%s]',TrainQueryIn,TestQueryIn,EvaOutName) conf = cxConfC() conf.ParseParaStr(ParaStr) self.DataDir = conf.GetConf('datadir',self.DataDir) OptMethod = conf.GetConf('optmethod','BFGS') ConvergeThreshold = conf.GetConf('convergethreshold',1e-05) logging.info('pipe start training') llTrainQDocData = self.ReadTargetQDocData(TrainQueryIn,self.DataDir) llTestQDocData = self.ReadTargetQDocData(TestQueryIn,self.DataDir) w = self.Learner.Train(llTrainQDocData, OptMethod,ConvergeThreshold) logging.info('trained w:\n%s',np.array2string(w,precision=6)) logging.info('pipe start testing') lQid = [line.split('\t')[0] for line in open(TestQueryIn).read().splitlines()] llDocScore = [ [[data.DocNo, data.X.dot(w)] for data in lTestQDocData] for lTestQDocData in llTestQDocData] logging.info('pipe start evaluating') lEvaRes = [] for qid,lDocScore in zip(lQid,llDocScore): lDocScore.sort(key=lambda item:item[1],reverse = True) lDocNo = [item[0] for item in lDocScore] EvaRes = self.Evaluator.EvaluatePerQ(qid, "", lDocNo) lEvaRes.append(EvaRes) MeanEvaRes = AdhocMeasureC.AdhocMeasureMean(lEvaRes) lEvaRes.append(MeanEvaRes) lQid.append('mean') out = open(EvaOutName,'w') for qid,EvaRes in zip(lQid,lEvaRes): print >>out, qid + '\t' + EvaRes.dumps() out.close() logging.info('finished, eva res [%s]',lEvaRes[-1].dumps()) return True
def SetConf(self,ConfIn): conf = cxConfC(ConfIn) self.CtfCenter.Load(conf.GetConf('termctf')) self.ObjCenter.SetConf(ConfIn) # self.QObjRankName = conf.GetConf('objrank') self.QFaccObjRankName = conf.GetConf('faccrank') self.QGoogleObjRankName = conf.GetConf('googlerank') self.ObjRankDepth = int(conf.GetConf('objrankdepth',self.ObjRankDepth)) self.InName = conf.GetConf('in') self.OutName = conf.GetConf('out')
def AdhocEvaUnitTest(ConfIn = ""): #UnitTest add hoc eva #input: trec type input + qrel #output: evaluation result if "" == ConfIn: print "conf:\nin\nqrel\nevadepth\nout\n" return False conf = cxConfC(ConfIn) InName = conf.GetConf('in') OutName = conf.GetConf('out') AdhocEva = AdhocEvaC(ConfIn) MeanRes = AdhocMeasureC() cnt = 0 CurrentQid = "" lDocNo = [] out = open(OutName,'w') for line in open(InName): line = line.strip() vCol = line.split() ThisQid = vCol[0] DocNo = vCol[2] if CurrentQid == "": CurrentQid = ThisQid if CurrentQid != ThisQid: EvaRes = AdhocEva.EvaluatePerQ(CurrentQid, lDocNo) OutStr = CurrentQid + " %s" %(EvaRes.dumps()) #this is bad MeanRes = MeanRes + EvaRes cnt += 1 print >> out, OutStr CurrentQid = ThisQid lDocNo = [] lDocNo.append(DocNo) EvaRes = AdhocEva.EvaluatePerQ(CurrentQid, lDocNo) OutStr = CurrentQid + " %s" %(EvaRes.dumps()) MeanRes = MeanRes + EvaRes cnt += 1 print >> out, OutStr MeanRes = MeanRes / cnt print >> out,"mean %s" %(MeanRes.dumps()) out.close() return True
if __name__=='__main__': import sys,os from AdhocEva.RankerEvaluator import RankerEvaluatorC if 2 != len(sys.argv): print 'I evaluate Boe exp model ' print 'in\nout' BoePRFRerankerC.ShowConf() RankerEvaluatorC.ShowConf() sys.exit() root = logging.getLogger() root.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) # ch.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) root.addHandler(ch) conf = cxConfC(sys.argv[1]) QIn = conf.GetConf('in') EvaOut = conf.GetConf('out') Ranker = BoePRFRerankerC(sys.argv[1]) Evaluator = RankerEvaluatorC(sys.argv[1]) Evaluator.Evaluate(QIn, Ranker.Rank, EvaOut)
for line in open(SVMInName): LeToRData = LeToRDataBaseC(line.strip()) LeToRData = self.ProcessOneInstance(LeToRData) print >> out, LeToRData.dumps() out.close() print "finished" if __name__ == '__main__': import sys if 2 != len(sys.argv): ExtractDocVecFeatureToSVMDataC.ShowConf() print "in\nout" sys.exit() root = logging.getLogger() root.setLevel(logging.INFO) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') ch.setFormatter(formatter) root.addHandler(ch) conf = cxConfC(sys.argv[1]) SVMInName = conf.GetConf('in') OutName = conf.GetConf('out') Extractor = ExtractDocVecFeatureToSVMDataC(sys.argv[1]) Extractor.Process(SVMInName, OutName)
def SetConf(self,ConfIn): conf = cxConfC(ConfIn) self.FaccDir = conf.GetConf('faccdir')
def SetConf(self,ConfIn): conf = cxConfC(ConfIn) # self.MaxP = float(conf.GetConf('maxp',self.MaxP)) self.lName = conf.GetConf('methodname',[]) self.lBaseName = conf.GetConf('baseline',[]) self.lResInName = conf.GetConf('methodevares',[])