def main():
    userInput = parser.getInput()
    fileList = parser.getFiles(userInput['train'])
    pdata = parser.parseFiles(fileList)





    allsent = ''
    for f in pdata:
        allsent += f[3]

    all_words = FreqDist(w.lower()
                    for w in word_tokenize(allsent)
                        if w not in stopwords.words('english') )

    global top_words
    top_words = all_words.keys()[:500]

    # pdata = getParseData()
    featdata = featureAggregator(pdata)







    print featdata[:10]
def main():
    userInput = parser.getInput()
    fileList = parser.getFiles(userInput['train'])
    parsedata = parser.parseFiles(fileList)


    allsent = ''
    for f in parsedata:
        allsent += f[3]

    all_words = FreqDist(w.lower()
                    for w in word_tokenize(allsent)
                        if w not in stopwords.words('english') )

    global top_words
    top_words = all_words.keys()[:500]


    featdata = extractor.featureAggregator(parsedata)



    # print featdata[20]




    print "Sample Data Item:\n\n"

    print "%20s %4s %4s %20s" % ("FILENAME", "LINENUM", "VOTE", "SENTENCE" )
    print "-" * 79
    print "%10s %4s %4s %20s" % (featdata[20][0], featdata[20][1], featdata[20][2], featdata[20][3])

    print "\n\nFeatures of this Data Item"
    print "-" * 79
    for key,val in featdata[20][4].items():
        print "%50s : %10s" % (key, val )
    # print  "A sample feature: %s" % (featdata[20][4])




    allacc = splitfeatdata(featdata)

    print "\n\n"
    print "-" * 60
    print "Accuracy Values: %s" % (allacc)
    print "==" * 60
    print "Overall Classifier Accuracy %4.4f " % (sum(allacc)/len(allacc))
Example #3
0
def main():
    database = raw_input("Enter name of .db file >")
    conn = sqlite3.connect(database)
    cursor = conn.cursor()

    cursor.execute('''
                    SELECT COUNT(*) FROM SQLITE_MASTER;''')
    nTables = cursor.fetchone()
    nTables = (nTables[0]) / 2  # account for input table + FD table

    initScreen = raw_input("Please select an option:\n"
                           "1. Compute attribute closure\n"
                           "2. Compare FD tables\n"
                           "3. Normalize a table\n")

    if initScreen == '1':
        print getAttributeClosure(cursor)

    if initScreen == '2':
        print compareFDs(cursor)

    elif initScreen == '3':
        i = 0
        while i < nTables:  # loop for multiple tables within db, subject to change condition
            inTable = raw_input("Enter name of input table or q to quit >")

            if inTable == 'q':  # break from loop if user wishes to quit
                break

            inFDtable = raw_input("Enter name of FD table >")
            inRows, inFDs, fdDict = getInput(inTable, inFDtable, cursor)

            os.system("clear")
            choice = raw_input(
                "Please choose one of the following "
                "(entering anything else will terminate the program):\n"
                "1. Convert to BCNF\n2. Convert to 3NF\n")
            if choice == '1':
                # convert to BCNF
                convertbcnf(inRows, inFDs, cursor, conn, fdDict, inTable[5:])
            elif choice == '2':
                # convert to 3nf
                convert3nf(inRows, inFDs, cursor, conn, fdDict, inTable[5:])
            else:
                # quit
                return

            i += 1
Example #4
0
def main():
    userInput = parser.getInput()
    fileList = parser.getFiles(userInput['train'])
    pdata = parser.parseFiles(fileList)

    allsent = ''
    for f in pdata:
        allsent += f[3]

    all_words = FreqDist(w.lower() for w in word_tokenize(allsent)
                         if w not in stopwords.words('english'))

    global top_words
    top_words = all_words.keys()[:500]

    # pdata = getParseData()
    featdata = featureAggregator(pdata)

    print featdata[:10]