#!/usr/bin/env python # -*- coding: utf-8 -*- #coding=gbk from PreProcesser import XmlConverter xmlConverter = XmlConverter() #xmlConverter.convertDoc(99900, 99900) xmlConverter.convertQuery(19,21)
f.write(content) if __name__ == "__main__": reload(sys) sys.setdefaultencoding('utf8') initDirectories() handleArgv() if Config.FEEDBACK_MODE == 1: feedbackGenerator = FeedbackQueryGenerator() feedbackGenerator.genFeedbackQuery() sys.exit(0) if Config.PRE_PROCESS_ON == 1: xmlConverter = XmlConverter() xmlConverter.convertDoc(0, Config.DATA_SIZE) xmlConverter.convertQuery(0, Config.QUERY_SIZE) else: docReader = DocReader() docModeler = DocModeler() for d in range(0, Config.TEST_DATA_SIZE): words = docReader.loadSegDoc(d) docModeler.genModelByDocArr(words, d) print "Done generate %d model" % (d) queryResult = [] for q in range(1, 11): scoreList = []
fw.write('NewsId,Agency\n') else: eachLine = eachLine.replace("\n", "") content = '%s,%s\n' % (eachLine, clfResults[resultCounter]) fw.write(content) resultCounter += 1 if __name__ == "__main__": reload(sys) sys.setdefaultencoding('utf8') initDirectories() handleArgv() if Config.PRE_PROCESS_ON == 1: xmlConverter = XmlConverter() xmlConverter.convertDoc(0, Config.DATA_SIZE) xmlConverter.convertQuery(0, Config.QUERY_SIZE) xmlConverter.convertTestData(0, Config.TEST_DATA_SIZE) else: docReader = DocReader() docModeler = DocModeler() trainDataReader = TrainDataReader() featureModeler = FeatureBasedModeler() Y, trainDataIdxs = trainDataReader.getTrainAnswers() print "Get Train Answers Done" tfidfMat, Y = featureModeler.extractFeaturesMatrix(trainDataIdxs, Y) print "Calc data features done"
import sys import os import numpy import Config from PreProcesser import XmlConverter from PreProcesser import DocReader from Modeler import DocModeler if __name__ == "__main__": reload(sys) sys.setdefaultencoding('utf8') if Config.PRE_PROCESS_ON == 1: xmlConverter = XmlConverter() #xmlConverter.convertDoc(0, Config.DATA_SIZE) xmlConverter.convertQuery(0, Config.QUERY_SIZE) else: docReader = DocReader() docModeler = DocModeler() for d in range(0, Config.TEST_DATA_SIZE): words = docReader.loadSegDoc(d) docModeler.genModelByDocArr(words, d) print "Done generate %d model" % (d) scoreList = [] for q in range(5, 6): query = docReader.loadQuery(q) for d in range(0, Config.TEST_DATA_SIZE):