def evaluateVSM(targeEventFile, collFolder,k,relevTh,vsmClassifierFileName,topK):
    '''
    docs = []
    try:
        classifierFile = open(vsmClassifierFileName,"rb")
        classifier = pickle.load(classifierFile)
        classifierFile.close()
    except:    
        f = open(targeEventFile,'r')
        for url in f:
            url = url.strip()
            d = Document(url)
            if d:
                docs.append(d)
        f.close()
        docsTF = []
        for d in docs:
            wordsFreq = getFreq(d.getWords())
            docsTF.append(wordsFreq)
        
        classifier = VSMClassifier(docsTF,relevTh)
    
    evalres = []
    for j in range(k):
        
        fn = collFolder+str(j)+'.txt'
        f = codecs.open(fn, encoding='utf-8')
        ftext = f.read()
        r = classifier.calculate_score(ftext)[0]
        evalres.append(r)
        f.close()
    '''
    evaluator = Evaluate()
    evaluator.buildVSMClassifier(targeEventFile,vsmClassifierFileName,relevTh,topK)
    collFiles = []
    for j in range(k):
        
        fn = collFolder+str(j)+'.txt'
        f = codecs.open(fn, encoding='utf-8')
        ftext = f.read()
        o = myObj()
        o.text = ftext
        collFiles.append(o)
    res = evaluator.evaluateFC(collFiles)
    #f = open(collFolder+'evaluationRes_VSM.txt','w')
    #f.write('\n'.join([str(r) for r in res]))
    #f.close()
    #print sum(res)
    return res
Beispiel #2
0
def evaluateVSM(targeEventFile, collFolder,k,relevTh,vsmClassifierFileName,topK):
    '''
    docs = []
    try:
        classifierFile = open(vsmClassifierFileName,"rb")
        classifier = pickle.load(classifierFile)
        classifierFile.close()
    except:    
        f = open(targeEventFile,'r')
        for url in f:
            url = url.strip()
            d = Document(url)
            if d:
                docs.append(d)
        f.close()
        docsTF = []
        for d in docs:
            wordsFreq = getFreq(d.getWords())
            docsTF.append(wordsFreq)
        
        classifier = VSMClassifier(docsTF,relevTh)
    
    evalres = []
    for j in range(k):
        
        fn = collFolder+str(j)+'.txt'
        f = codecs.open(fn, encoding='utf-8')
        ftext = f.read()
        r = classifier.calculate_score(ftext)[0]
        evalres.append(r)
        f.close()
    '''
    evaluator = Evaluate()
    evaluator.buildVSMClassifier(targeEventFile,vsmClassifierFileName,relevTh,topK)
    collFiles = []
    for j in range(k):
        
        fn = collFolder+str(j)+'.txt'
        f = codecs.open(fn, encoding='utf-8')
        ftext = f.read()
        o = myObj()
        o.text = ftext
        collFiles.append(o)
    res = evaluator.evaluateFC(collFiles)
    #f = open(collFolder+'evaluationRes_VSM.txt','w')
    #f.write('\n'.join([str(r) for r in res]))
    #f.close()
    #print sum(res)
    return res
def evaluateClassifier(classifierFile,cf,k):
    
    evaluator = Evaluate()
    evaluator.buildClassifier("posFile","negFolder",classifierFile)
    collFiles = []
    for j in range(k):
        
        fn = cf+str(j)+'.txt'
        f = codecs.open(fn, encoding='utf-8')
        ftext = f.read()
        o = myObj()
        o.text = ftext
        collFiles.append(o)
    res = evaluator.evaluateFC(collFiles)
    f = open(cf+'evaluationRes_Classf.txt','w')
    f.write('\n'.join([str(r) for r in res]))
    f.close()
    print sum(res)
Beispiel #4
0
def evaluateClassifier(classifierFile,cf,k):
    
    evaluator = Evaluate()
    evaluator.buildClassifier("posFile","negFolder",classifierFile)
    collFiles = []
    for j in range(k):
        
        fn = cf+str(j)+'.txt'
        f = codecs.open(fn, encoding='utf-8')
        ftext = f.read()
        o = myObj()
        o.text = ftext
        collFiles.append(o)
    res = evaluator.evaluateFC(collFiles)
    f = open(cf+'evaluationRes_Classf.txt','w')
    f.write('\n'.join([str(r) for r in res]))
    f.close()
    print sum(res)
Beispiel #5
0
    elif ct == 'e':
        #eventRelevantPages = eventFC(crawlParams)
        pagesDir = outputDir + "/event-webpages/"
        logDataFilename = pagesDir + "event-logData.txt"
        outputURLsFilename = pagesDir + "event-Output-URLs.txt"
        evalFilename = pagesDir + "event-evaluateData.txt"
        rp = eventFC(crawlParams)

    #if not os.path.exists(outputDir):
    #    os.makedirs(outputDir)
    if not os.path.exists(pagesDir):
        os.makedirs(pagesDir)
    f = open(logDataFilename, "w")
    furl = open(outputURLsFilename, "w")

    for p in rp:
        f.write(str(p.pageId) + "," + str(p.pageUrl[2]) + "\n")
        #furl.write(p.pageUrl[1].encode("utf-8")+","+str(p.estimatedScore)+"\n")
        furl.write(p.pageUrl[1].encode("utf-8") + "\n")
        ftext = open(pagesDir + str(p.pageId) + ".txt", "w")
        ftext.write(p.text.encode("utf-8"))
        ftext.close()
    f.close()
    furl.close()

    res = evaluator.evaluateFC(rp)
    writeEvaluation(res, evalFilename)
    print sum(res)
    print len(res)
        #eventRelevantPages = eventFC(crawlParams)
        pagesDir=outputDir+"/event-webpages/"
        logDataFilename=pagesDir+"event-logData.txt"
        outputURLsFilename=pagesDir+"event-Output-URLs.txt"
        evalFilename=pagesDir+"event-evaluateData.txt"
        rp = eventFC(crawlParams)
        
    
    #if not os.path.exists(outputDir):
    #    os.makedirs(outputDir)
    if not os.path.exists(pagesDir):
        os.makedirs(pagesDir)
    f = open(logDataFilename,"w")
    furl = open(outputURLsFilename,"w")
    
    for p in rp:
        f.write(str(p.pageId) + "," + str(p.pageUrl[2]) + "\n")
        #furl.write(p.pageUrl[1].encode("utf-8")+","+str(p.estimatedScore)+"\n")
        furl.write(p.pageUrl[1].encode("utf-8")+"\n")
        ftext = open(pagesDir+str(p.pageId) + ".txt", "w")
        ftext.write(p.text.encode("utf-8"))
        ftext.close()
    f.close()
    furl.close()
    
    res = evaluator.evaluateFC(rp)
    writeEvaluation(res,evalFilename)    
    print sum(res)
    print len(res)