def ParseParaStr(self, ParaStr):
     lPara = ParaStr.split(',')
     lParaScore = [Para.split('=') for Para in lPara]
     hPara = dict(lParaScore)
     conf = cxConfC()
     conf.hConf = hPara
     return conf
 def ParseParaStr(self,ParaStr):
     lPara = ParaStr.split(',')
     lParaScore = [Para.split('=') for Para in lPara]
     hPara = dict(lParaScore)
     conf = cxConfC()
     conf.hConf = hPara
     return conf
 def SetConf(self,ConfIn):
     conf = cxConfC(ConfIn)
     self.InQuery = conf.GetConf('in')
     self.InStemQuery = conf.GetConf('stemmedq',self.InQuery)
     self.MinPhraseLen = int(conf.GetConf('minphraselen',self.MinPhraseLen))
     self.OutName = conf.GetConf('out')
     self.NumOfObjPerQ = int(conf.GetConf('numofobj',self.NumOfObjPerQ))
Example #4
0
    def SetConf(self,ConfIn):
        cxBaseC.SetConf(self, ConfIn)
        conf = cxConfC(ConfIn)
        self.lQRelIn = conf.GetConf("qrel",self.lQRelIn)
        if type(self.lQRelIn) != list:
            self.lQRelIn = [self.lQRelIn]
        self.Depth = int(conf.GetConf("evadepth",self.Depth))
#         self.IndriSearcher.SetConf(ConfIn)
        self.Prepare()
        return True
Example #5
0
 def SetConf(self,ConfIn):
     conf = cxConfC(ConfIn)
     
     self.DocTextDir = conf.GetConf('doctextdir')
     self.LoadDocText()
     
     self.ObjCenter.SetConf(ConfIn)
     
     CtfInName = conf.GetConf('objctf')
     self.CtfCenter.Load(CtfInName)
    def Init(self):
        cxBaseC.Init(self)
        self.Evaluator = AdhocEvaC()
        self.Searcher = IndriSearchCenterC()
        self.Word2VecInName = ""
        self.Word2VecModel = None
        self.cLmName = "kde"
        self.LmClass = KernelDensityLmC

        #lm conf input
        self.ParaConf = cxConfC()
 def Process(self,TrainQueryIn,TestQueryIn,ParaStr, EvaOutName):
     logging.info('training using [%s] testing using [%s] eva out to [%s]',TrainQueryIn,TestQueryIn,EvaOutName)
     
     
     conf = cxConfC()
     conf.ParseParaStr(ParaStr)
     
     self.DataDir = conf.GetConf('datadir',self.DataDir)
     OptMethod = conf.GetConf('optmethod','BFGS')
     ConvergeThreshold = conf.GetConf('convergethreshold',1e-05)
     
     logging.info('pipe start training')
     
     
     llTrainQDocData = self.ReadTargetQDocData(TrainQueryIn,self.DataDir)
     llTestQDocData = self.ReadTargetQDocData(TestQueryIn,self.DataDir)
     
     
     
     
     w = self.Learner.Train(llTrainQDocData, OptMethod,ConvergeThreshold)
     
     logging.info('trained w:\n%s',np.array2string(w,precision=6))
     
     logging.info('pipe start testing')
     
     lQid = [line.split('\t')[0] for line in open(TestQueryIn).read().splitlines()]
     llDocScore = [ [[data.DocNo, data.X.dot(w)] for data in lTestQDocData] for lTestQDocData in llTestQDocData]
     
     
     logging.info('pipe start evaluating')
     
     lEvaRes = []
     
     for qid,lDocScore in zip(lQid,llDocScore):
         lDocScore.sort(key=lambda item:item[1],reverse = True)
         lDocNo = [item[0] for item in lDocScore]
         EvaRes = self.Evaluator.EvaluatePerQ(qid, "", lDocNo)
         lEvaRes.append(EvaRes)
         
     MeanEvaRes = AdhocMeasureC.AdhocMeasureMean(lEvaRes)
     lEvaRes.append(MeanEvaRes)
     lQid.append('mean')
     
     out = open(EvaOutName,'w')
     for qid,EvaRes in zip(lQid,lEvaRes):
         print >>out, qid + '\t' + EvaRes.dumps()
         
     out.close()
     logging.info('finished, eva res [%s]',lEvaRes[-1].dumps())
     return True
    def SetConf(self,ConfIn):
        conf = cxConfC(ConfIn)
        self.CtfCenter.Load(conf.GetConf('termctf'))
        self.ObjCenter.SetConf(ConfIn)
        
#         self.QObjRankName = conf.GetConf('objrank')
        
        self.QFaccObjRankName = conf.GetConf('faccrank')
        self.QGoogleObjRankName = conf.GetConf('googlerank')
        
        self.ObjRankDepth = int(conf.GetConf('objrankdepth',self.ObjRankDepth))
        
        
        self.InName = conf.GetConf('in')
        self.OutName = conf.GetConf('out')
Example #9
0
def AdhocEvaUnitTest(ConfIn = ""):
    #UnitTest add hoc eva
    #input: trec type input + qrel
    #output: evaluation result
    if "" == ConfIn:
        print "conf:\nin\nqrel\nevadepth\nout\n"
        return False
    
    conf = cxConfC(ConfIn)
    InName = conf.GetConf('in')
    OutName = conf.GetConf('out')
    AdhocEva = AdhocEvaC(ConfIn)
    
    MeanRes = AdhocMeasureC()
    cnt = 0
    
    CurrentQid = ""
    lDocNo = []
    
    out = open(OutName,'w')
    for line in open(InName):
        line = line.strip()
        vCol = line.split()
        ThisQid = vCol[0]
        DocNo = vCol[2]
        if CurrentQid == "":
            CurrentQid = ThisQid
        if CurrentQid != ThisQid:
            EvaRes = AdhocEva.EvaluatePerQ(CurrentQid, lDocNo)
            OutStr = CurrentQid + " %s" %(EvaRes.dumps())
                
            #this is bad
            MeanRes = MeanRes + EvaRes
            cnt += 1
            print >> out, OutStr
            CurrentQid = ThisQid
            lDocNo = []
        lDocNo.append(DocNo)
    EvaRes = AdhocEva.EvaluatePerQ(CurrentQid, lDocNo)
    OutStr = CurrentQid + " %s" %(EvaRes.dumps())                
    MeanRes = MeanRes + EvaRes
    cnt += 1
    print >> out, OutStr    
    
    MeanRes = MeanRes / cnt
    print >> out,"mean %s" %(MeanRes.dumps())
    out.close()    
    return True
Example #10
0
        
if __name__=='__main__':
    import sys,os
    from AdhocEva.RankerEvaluator import RankerEvaluatorC
    if 2 != len(sys.argv):
        print 'I evaluate Boe exp model '
        print 'in\nout'
        BoePRFRerankerC.ShowConf()
        RankerEvaluatorC.ShowConf()
        
        sys.exit()
    
    root = logging.getLogger()
    root.setLevel(logging.DEBUG)
    ch = logging.StreamHandler(sys.stdout)
#     ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    root.addHandler(ch)
    
    
    
    conf = cxConfC(sys.argv[1])   
    QIn = conf.GetConf('in')
    EvaOut = conf.GetConf('out')
    
    Ranker = BoePRFRerankerC(sys.argv[1])
    Evaluator = RankerEvaluatorC(sys.argv[1])
    Evaluator.Evaluate(QIn, Ranker.Rank, EvaOut)        
            
        for line in open(SVMInName):
            LeToRData = LeToRDataBaseC(line.strip())
            LeToRData = self.ProcessOneInstance(LeToRData)
            print >> out, LeToRData.dumps()
        out.close()
        print "finished"

if __name__ == '__main__':
    import sys
    if 2 != len(sys.argv):
        ExtractDocVecFeatureToSVMDataC.ShowConf()
        print "in\nout"
        sys.exit()

    root = logging.getLogger()
    root.setLevel(logging.INFO)

    ch = logging.StreamHandler(sys.stdout)
    ch.setLevel(logging.DEBUG)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    ch.setFormatter(formatter)
    root.addHandler(ch)

    conf = cxConfC(sys.argv[1])
    SVMInName = conf.GetConf('in')
    OutName = conf.GetConf('out')

    Extractor = ExtractDocVecFeatureToSVMDataC(sys.argv[1])
    Extractor.Process(SVMInName, OutName)
Example #12
0
 def SetConf(self,ConfIn):
     conf = cxConfC(ConfIn)
     self.FaccDir = conf.GetConf('faccdir')
    def SetConf(self,ConfIn):
        conf = cxConfC(ConfIn)
#         self.MaxP = float(conf.GetConf('maxp',self.MaxP))
        self.lName = conf.GetConf('methodname',[])
        self.lBaseName = conf.GetConf('baseline',[])
        self.lResInName = conf.GetConf('methodevares',[])