def main():
    userInput = parser.getInput()
    fileList = parser.getFiles(userInput['train'])
    pdata = parser.parseFiles(fileList)





    allsent = ''
    for f in pdata:
        allsent += f[3]

    all_words = FreqDist(w.lower()
                    for w in word_tokenize(allsent)
                        if w not in stopwords.words('english') )

    global top_words
    top_words = all_words.keys()[:500]

    # pdata = getParseData()
    featdata = featureAggregator(pdata)







    print featdata[:10]
def main():
    classifier = getClassifier()


    userInput = getInput()
    fileList = parser.getFiles(userInput['test'])
    pdata = parser.parseFiles(fileList)

    featdata = extractor.featureAggregator(pdata)

    print pdata[4]

    output = []
    outputFileObj = open('../../output.txt', 'w')

    # if pdata[4]
    for featdatarow in featdata:
        cl = classifier.classify(featdatarow[4])

        if cl == 'pos':
            label = '1'
        elif cl == 'neutral':
            label = '0'
        else:
            label = '-1'

        print featdatarow[0], featdatarow[1], label
        outputRow = str(featdatarow[0]) + '\t' + str(featdatarow[1]) + '\t' + str(label) + '\n'
        output.append(outputRow)
        outputFileObj.write(outputRow)

    outputFileObj.close()
def main():
    userInput = parser.getInput()
    fileList = parser.getFiles(userInput['train'])
    parsedata = parser.parseFiles(fileList)


    allsent = ''
    for f in parsedata:
        allsent += f[3]

    all_words = FreqDist(w.lower()
                    for w in word_tokenize(allsent)
                        if w not in stopwords.words('english') )

    global top_words
    top_words = all_words.keys()[:500]


    featdata = extractor.featureAggregator(parsedata)



    # print featdata[20]




    print "Sample Data Item:\n\n"

    print "%20s %4s %4s %20s" % ("FILENAME", "LINENUM", "VOTE", "SENTENCE" )
    print "-" * 79
    print "%10s %4s %4s %20s" % (featdata[20][0], featdata[20][1], featdata[20][2], featdata[20][3])

    print "\n\nFeatures of this Data Item"
    print "-" * 79
    for key,val in featdata[20][4].items():
        print "%50s : %10s" % (key, val )
    # print  "A sample feature: %s" % (featdata[20][4])




    allacc = splitfeatdata(featdata)

    print "\n\n"
    print "-" * 60
    print "Accuracy Values: %s" % (allacc)
    print "==" * 60
    print "Overall Classifier Accuracy %4.4f " % (sum(allacc)/len(allacc))
Esempio n. 4
0
def main():
    userInput = parser.getInput()
    fileList = parser.getFiles(userInput['train'])
    pdata = parser.parseFiles(fileList)

    allsent = ''
    for f in pdata:
        allsent += f[3]

    all_words = FreqDist(w.lower() for w in word_tokenize(allsent)
                         if w not in stopwords.words('english'))

    global top_words
    top_words = all_words.keys()[:500]

    # pdata = getParseData()
    featdata = featureAggregator(pdata)

    print featdata[:10]