def extractData(self,name,training,common,svmFormat=False,classType='Coarse'): helperFile = DBStore.commonRoot + '/words'+common+'.txt' if training: if not svmFormat: outputFile = DBStore.trainingRoot + '/vector_'+name+'.txt' else: outputFile = DBStore.trainingRoot + '/vector_SVM_'+name+'.txt' else: if not svmFormat: outputFile = DBStore.testingRoot + '/vector_'+name+'.txt' else: outputFile = DBStore.testingRoot + '/vector_SVM_'+name+'.txt' output = open(outputFile,'w') helper = open(helperFile,'r') helperWord = [word for word in helper] query = DBStore.queryDB(name) i = 0 for question in query: print i i=i+1 output.write(str(question[classType+'Code']) +" ") j = 1 if svmFormat: for word in helperWord: output.write(str(j)+":" +str(question[word.rstrip()]) + " ") j=j+1 else: for word in helperWord: output.write(str(question[word.rstrip()]) + " ") helper.seek(0) output.write("\n") output.close()
insert = {'unigram':False,'hypernym':False,'head':True,'whWord':True} classType = 'Coarse' dbName = 'QAnlp' for key,value in insert.iteritems(): if value: dbName = dbName + '_' + key DBStore.init(dbName) dataprep = DataRetrieval() termExtractor = BagOfWords() featureExtractor = FeatureExtractor() colName = '5500' common = '5500' training = True questions = DBStore.queryDB('raw'+colName) #=============================================================================== # Insert Raw File to Database #=============================================================================== #dataprep.insertFile2Database(colName,training) #parsingBerkeley() #featureInit() #whWordExtraction() #headWordExtraction() #hypernimExtraction() #termExtractor.bagOfWordBuilder(questions,'words'+colName,insert) #featureInsertion() colName = '10'
insert = {"unigram": True, "hypernym": True, "head": True, "whWord": True} classType = "Coarse" dbName = "QAnlp" for key, value in insert.iteritems(): if value: dbName = dbName + "_" + key DBStore.init(dbName) dataprep = DataRetrieval() termExtractor = BagOfWords() featureExtractor = FeatureExtractor() colName = "5500" common = "5500" training = True questions = DBStore.queryDB("raw" + colName) # =============================================================================== # Insert Raw File to Database # =============================================================================== # dataprep.insertFile2Database(colName,training) # parsingBerkeley() featureInit() # whWordExtraction() # headWordExtraction() # hypernimExtraction() featureInsertion() colName = "10" common = "5500"