content = '%s,%s\n' % (eachLine, clfResults[resultCounter]) fw.write(content) resultCounter += 1 if __name__ == "__main__": reload(sys) sys.setdefaultencoding('utf8') initDirectories() handleArgv() if Config.PRE_PROCESS_ON == 1: xmlConverter = XmlConverter() xmlConverter.convertDoc(0, Config.DATA_SIZE) xmlConverter.convertQuery(0, Config.QUERY_SIZE) xmlConverter.convertTestData(0, Config.TEST_DATA_SIZE) else: docReader = DocReader() docModeler = DocModeler() trainDataReader = TrainDataReader() featureModeler = FeatureBasedModeler() Y, trainDataIdxs = trainDataReader.getTrainAnswers() print "Get Train Answers Done" tfidfMat, Y = featureModeler.extractFeaturesMatrix(trainDataIdxs, Y) print "Calc data features done" ''' print tfidfMat.shape transformer = random_projection.SparseRandomProjection(n_components=700000)