def main(): datafs = [] if (len(sys.argv) < 2): chowderPath = '/home/avanroi1/messages/inbox/chowder_g03zkk7sug' weirdPathName = '/home/avanroi1/messages/inbox/mualphanugammaomicron_baafcd34pg' groupmePathName = '/home/avanroi1/groupmeData/theboys' #groupmePathName = '/home/avanroi1/groupmeData/allupcs' datafs = [chowderPath, weirdPathName, groupmePathName] else: counter = 1 while (counter < len(sys.argv)): datafs.append(sys.argv[counter]) counter += 1 for f in datafs: data = getJson(f) #gmdata = getJson(groupmePathName) #gcids,gcnames = startGroupMe(gmdata) args = parseArgs() #numMessages = getNumMessages(data) username = args.username nameVec = data[0]["participants"] print(nameVec) nameVec = [x['name'] for x in nameVec] #nameVec.append(username) if (debug is True): print(nameVec) c2 = Chat(nameVec, data) #c2= Chat(gcids,gmdata,isGM=True) c2.popSelfDict() c2.updateUserStats() #c2.updateNames(gcnames) c2.printStats() corpus, vocab, userOrder = c2.createCorpusAndTokens() pdb.set_trace() with open('./conversation.pk', 'wb') as f: pk.dump(corpus, f) with open('./vocab.pk', 'wb') as f: pk.dump(vocab, f) with open('./userOrder.pk', 'wb') as f: pk.dump(userOrder, f) pdb.set_trace() #print(corpus) #model = Word2Vec(corpus, size=100, window=5, min_count=1, workers=4) #model.save("word2vec.model") if (doModel): bestmodel, data, targs = doHTBasic(c2) model, train, test = tryBagOfWords(c2, bestModel=bestmodel) #pdb.set_trace() whatevs = bestmodel.predict(["Miss you mangos"]) print(whatevs) print(bestmodel.best_score_) print(doPipeLineAcc(bestmodel, data, targs)) print(bestmodel.best_params_) trainAcc = testAcc(model, train) trainAcc = testAcc(model, test) multiClassAcc(model, train) print("Test set results") multiClassAcc(model, test)