Beispiel #1
0
 def extractData(self,name,training,common,svmFormat=False,classType='Coarse'):
     helperFile =  DBStore.commonRoot + '/words'+common+'.txt'
     if training:
         if not svmFormat:
             outputFile = DBStore.trainingRoot + '/vector_'+name+'.txt'
         else:
             outputFile = DBStore.trainingRoot + '/vector_SVM_'+name+'.txt'
     else:
         if not svmFormat:
             outputFile = DBStore.testingRoot + '/vector_'+name+'.txt'
         else:
             outputFile = DBStore.testingRoot + '/vector_SVM_'+name+'.txt'
             
     output = open(outputFile,'w')
     helper = open(helperFile,'r')
     helperWord = [word for word in helper]
     query = DBStore.queryDB(name)
     i = 0
     for question in query:
         print i
         i=i+1
         output.write(str(question[classType+'Code']) +" ")
         j = 1
         if svmFormat:
             for word in helperWord:
                 output.write(str(j)+":" +str(question[word.rstrip()]) + " ") 
                 j=j+1
         else:
             for word in helperWord:
                 output.write(str(question[word.rstrip()]) + " ") 
         helper.seek(0)
         output.write("\n")
     output.close()
Beispiel #2
0
insert = {'unigram':False,'hypernym':False,'head':True,'whWord':True}
classType = 'Coarse'
dbName = 'QAnlp'
for key,value in insert.iteritems():
    if value:
        dbName = dbName + '_' + key
                
DBStore.init(dbName)
dataprep = DataRetrieval()
termExtractor = BagOfWords()
featureExtractor = FeatureExtractor()
colName = '5500'
common = '5500'
training = True
questions = DBStore.queryDB('raw'+colName)
#===============================================================================
# Insert Raw File to Database
#===============================================================================


#dataprep.insertFile2Database(colName,training)
#parsingBerkeley()
#featureInit()
#whWordExtraction()
#headWordExtraction()
#hypernimExtraction()
#termExtractor.bagOfWordBuilder(questions,'words'+colName,insert)
#featureInsertion()

colName = '10'
Beispiel #3
0
insert = {"unigram": True, "hypernym": True, "head": True, "whWord": True}
classType = "Coarse"
dbName = "QAnlp"
for key, value in insert.iteritems():
    if value:
        dbName = dbName + "_" + key

DBStore.init(dbName)
dataprep = DataRetrieval()
termExtractor = BagOfWords()
featureExtractor = FeatureExtractor()
colName = "5500"
common = "5500"
training = True
questions = DBStore.queryDB("raw" + colName)
# ===============================================================================
# Insert Raw File to Database
# ===============================================================================


# dataprep.insertFile2Database(colName,training)
# parsingBerkeley()
featureInit()
# whWordExtraction()
# headWordExtraction()
# hypernimExtraction()
featureInsertion()

colName = "10"
common = "5500"