Ejemplo n.º 1
0
def main():
    datafs = []
    if (len(sys.argv) < 2):
        chowderPath = '/home/avanroi1/messages/inbox/chowder_g03zkk7sug'
        weirdPathName = '/home/avanroi1/messages/inbox/mualphanugammaomicron_baafcd34pg'
        groupmePathName = '/home/avanroi1/groupmeData/theboys'
        #groupmePathName = '/home/avanroi1/groupmeData/allupcs'
        datafs = [chowderPath, weirdPathName, groupmePathName]
    else:
        counter = 1
        while (counter < len(sys.argv)):
            datafs.append(sys.argv[counter])
            counter += 1
    for f in datafs:
        data = getJson(f)
        #gmdata = getJson(groupmePathName)
        #gcids,gcnames = startGroupMe(gmdata)
        args = parseArgs()
        #numMessages = getNumMessages(data)
        username = args.username
        nameVec = data[0]["participants"]
        print(nameVec)
        nameVec = [x['name'] for x in nameVec]
        #nameVec.append(username)
        if (debug is True):
            print(nameVec)
        c2 = Chat(nameVec, data)
        #c2= Chat(gcids,gmdata,isGM=True)
        c2.popSelfDict()
        c2.updateUserStats()
        #c2.updateNames(gcnames)
        c2.printStats()
        corpus, vocab, userOrder = c2.createCorpusAndTokens()
        pdb.set_trace()
        with open('./conversation.pk', 'wb') as f:
            pk.dump(corpus, f)
        with open('./vocab.pk', 'wb') as f:
            pk.dump(vocab, f)
        with open('./userOrder.pk', 'wb') as f:
            pk.dump(userOrder, f)

        pdb.set_trace()
        #print(corpus)
        #model = Word2Vec(corpus, size=100, window=5, min_count=1, workers=4)
        #model.save("word2vec.model")
        if (doModel):
            bestmodel, data, targs = doHTBasic(c2)
            model, train, test = tryBagOfWords(c2, bestModel=bestmodel)

            #pdb.set_trace()
            whatevs = bestmodel.predict(["Miss you mangos"])
            print(whatevs)
            print(bestmodel.best_score_)
            print(doPipeLineAcc(bestmodel, data, targs))
            print(bestmodel.best_params_)
            trainAcc = testAcc(model, train)
            trainAcc = testAcc(model, test)
            multiClassAcc(model, train)
            print("Test set results")

            multiClassAcc(model, test)