def evaluateVSM(targeEventFile, collFolder,k,relevTh,vsmClassifierFileName,topK): ''' docs = [] try: classifierFile = open(vsmClassifierFileName,"rb") classifier = pickle.load(classifierFile) classifierFile.close() except: f = open(targeEventFile,'r') for url in f: url = url.strip() d = Document(url) if d: docs.append(d) f.close() docsTF = [] for d in docs: wordsFreq = getFreq(d.getWords()) docsTF.append(wordsFreq) classifier = VSMClassifier(docsTF,relevTh) evalres = [] for j in range(k): fn = collFolder+str(j)+'.txt' f = codecs.open(fn, encoding='utf-8') ftext = f.read() r = classifier.calculate_score(ftext)[0] evalres.append(r) f.close() ''' evaluator = Evaluate() evaluator.buildVSMClassifier(targeEventFile,vsmClassifierFileName,relevTh,topK) collFiles = [] for j in range(k): fn = collFolder+str(j)+'.txt' f = codecs.open(fn, encoding='utf-8') ftext = f.read() o = myObj() o.text = ftext collFiles.append(o) res = evaluator.evaluateFC(collFiles) #f = open(collFolder+'evaluationRes_VSM.txt','w') #f.write('\n'.join([str(r) for r in res])) #f.close() #print sum(res) return res
def evaluateClassifier(classifierFile,cf,k): evaluator = Evaluate() evaluator.buildClassifier("posFile","negFolder",classifierFile) collFiles = [] for j in range(k): fn = cf+str(j)+'.txt' f = codecs.open(fn, encoding='utf-8') ftext = f.read() o = myObj() o.text = ftext collFiles.append(o) res = evaluator.evaluateFC(collFiles) f = open(cf+'evaluationRes_Classf.txt','w') f.write('\n'.join([str(r) for r in res])) f.close() print sum(res)
elif ct == 'e': #eventRelevantPages = eventFC(crawlParams) pagesDir = outputDir + "/event-webpages/" logDataFilename = pagesDir + "event-logData.txt" outputURLsFilename = pagesDir + "event-Output-URLs.txt" evalFilename = pagesDir + "event-evaluateData.txt" rp = eventFC(crawlParams) #if not os.path.exists(outputDir): # os.makedirs(outputDir) if not os.path.exists(pagesDir): os.makedirs(pagesDir) f = open(logDataFilename, "w") furl = open(outputURLsFilename, "w") for p in rp: f.write(str(p.pageId) + "," + str(p.pageUrl[2]) + "\n") #furl.write(p.pageUrl[1].encode("utf-8")+","+str(p.estimatedScore)+"\n") furl.write(p.pageUrl[1].encode("utf-8") + "\n") ftext = open(pagesDir + str(p.pageId) + ".txt", "w") ftext.write(p.text.encode("utf-8")) ftext.close() f.close() furl.close() res = evaluator.evaluateFC(rp) writeEvaluation(res, evalFilename) print sum(res) print len(res)
#eventRelevantPages = eventFC(crawlParams) pagesDir=outputDir+"/event-webpages/" logDataFilename=pagesDir+"event-logData.txt" outputURLsFilename=pagesDir+"event-Output-URLs.txt" evalFilename=pagesDir+"event-evaluateData.txt" rp = eventFC(crawlParams) #if not os.path.exists(outputDir): # os.makedirs(outputDir) if not os.path.exists(pagesDir): os.makedirs(pagesDir) f = open(logDataFilename,"w") furl = open(outputURLsFilename,"w") for p in rp: f.write(str(p.pageId) + "," + str(p.pageUrl[2]) + "\n") #furl.write(p.pageUrl[1].encode("utf-8")+","+str(p.estimatedScore)+"\n") furl.write(p.pageUrl[1].encode("utf-8")+"\n") ftext = open(pagesDir+str(p.pageId) + ".txt", "w") ftext.write(p.text.encode("utf-8")) ftext.close() f.close() furl.close() res = evaluator.evaluateFC(rp) writeEvaluation(res,evalFilename) print sum(res) print len(res)