Esempio n. 1
0
#!/usr/bin/env python 
# -*- coding: utf-8 -*-
#coding=gbk 

from PreProcesser import XmlConverter


xmlConverter = XmlConverter()
#xmlConverter.convertDoc(99900, 99900)
xmlConverter.convertQuery(19,21)
Esempio n. 2
0
			f.write(content)

if __name__ == "__main__":
	reload(sys)
	sys.setdefaultencoding('utf8')

	initDirectories()
	handleArgv()

	if Config.FEEDBACK_MODE == 1:
		feedbackGenerator = FeedbackQueryGenerator()
		feedbackGenerator.genFeedbackQuery()
		sys.exit(0)

	if Config.PRE_PROCESS_ON == 1:
		xmlConverter = XmlConverter()
		xmlConverter.convertDoc(0, Config.DATA_SIZE)
		xmlConverter.convertQuery(0, Config.QUERY_SIZE)

	else:
		docReader = DocReader()
		docModeler = DocModeler()

		for d in range(0, Config.TEST_DATA_SIZE):
			words = docReader.loadSegDoc(d)
			docModeler.genModelByDocArr(words, d)
			print "Done generate %d model" % (d)

		queryResult = []
		for q in range(1, 11):
			scoreList = []
Esempio n. 3
0
                        fw.write('NewsId,Agency\n')
                    else:
                        eachLine = eachLine.replace("\n", "")
                        content = '%s,%s\n' % (eachLine, clfResults[resultCounter])
                        fw.write(content)
                        resultCounter += 1

if __name__ == "__main__":
	reload(sys)
	sys.setdefaultencoding('utf8')

	initDirectories()
	handleArgv()

	if Config.PRE_PROCESS_ON == 1:
		xmlConverter = XmlConverter()
		xmlConverter.convertDoc(0, Config.DATA_SIZE)
		xmlConverter.convertQuery(0, Config.QUERY_SIZE)
                xmlConverter.convertTestData(0, Config.TEST_DATA_SIZE)

	else:
		docReader = DocReader()
		docModeler = DocModeler()
                trainDataReader = TrainDataReader()

                featureModeler = FeatureBasedModeler()
                Y, trainDataIdxs = trainDataReader.getTrainAnswers()
                print "Get Train Answers Done"

                tfidfMat, Y = featureModeler.extractFeaturesMatrix(trainDataIdxs, Y)
                print "Calc data features done"
Esempio n. 4
0
import sys
import os
import numpy

import Config
from PreProcesser import XmlConverter
from PreProcesser import DocReader
from Modeler import DocModeler

if __name__ == "__main__":
	reload(sys)
	sys.setdefaultencoding('utf8')

	if Config.PRE_PROCESS_ON == 1:
		xmlConverter = XmlConverter()
		#xmlConverter.convertDoc(0, Config.DATA_SIZE)
		xmlConverter.convertQuery(0, Config.QUERY_SIZE)

	else:
		docReader = DocReader()
		docModeler = DocModeler()

		for d in range(0, Config.TEST_DATA_SIZE):
			words = docReader.loadSegDoc(d)
			docModeler.genModelByDocArr(words, d)
			print "Done generate %d model" % (d)

		scoreList = []
		for q in range(5, 6):
			query = docReader.loadQuery(q)
			for d in range(0, Config.TEST_DATA_SIZE):