def saveModelStuff(model, modelType, Xtest, ytest, Xtrain, ytrain, filename, clustered=False): #modelType = 'SVM' if Xtest.shape == (0,): print("Xtest is empty") return print("Done fitting model, getting predictions...", time.ctime()) predictions = model.predict(Xtest) print ("Done with predictions, scoring...", time.ctime()) score = model.score(Xtest,ytest) #print ("Getting predictions on train data...", time.ctime()) #predictionsTrain = model.predict(Xtrain) print ("Scoring check...", time.ctime()) check = model.score(Xtrain,ytrain) print("Done with all testing, saving outputs.", time.ctime()) subfolder = util.string_appendDateAndTime(modelType) + '/' path = learn_util.makePathToTrajectories(filename) + subfolder if not os.path.exists(path): os.makedirs(path) np.savetxt(path + 'ACTUALS-TEST.txt', ytest) np.savetxt(path + 'PREDICTIONS-TEST.txt', predictions) np.savetxt(path + 'SCORE-TEST.txt', np.array([score])) #np.savetxt(path + 'ACTUALS-TRAIN.txt', ytrain) #np.savetxt(path + 'PREDICTIONS-TRAIN.txt', predictionsTrain) np.savetxt(path + 'SCORE-TRAIN.txt', np.array([check])) joblib.dump(model, path + 'MODEL') print('model ', modelType, ': score = ', score, 'train_score = ', check)
def printClusters(clusterToData, clusterStats, est, dataType, featureList): dict2dump = copy.deepcopy(clusterToData) numtypeFeatures = getListIntersections(featureList, c.KMEANS_NUM_FEATURES) for cluster, recipeList in clusterToData.iteritems(): print 10 * '-' + cluster + ' '+ 10 * '-' clusterData = clusterStats[cluster] for f, v in clusterData.iteritems(): if f in numtypeFeatures or isIngredientContraint(f): if f == c.KMEANS_FEATURE_TOTALTIME: v /= TOTAL_TIME_ADJUSTMENT if f == c.KMEANS_FEATURE_NUM_INGREDIENTS: v /= NUM_INGREDIENT_ADJUSTMENT dict2dump[cluster + ' ' + f + ' average score'] = v print "The cluster's %s average score was: %f" % (f, v) else: dict2dump[cluster + ' ' + f + ' distribution'] = v print "The cluster's %s distribution was: " % f for name, prob in v.iteritems(): print " %s: %f%%" % (name, prob) print for recipe in recipeList: print recipe print dict2dump['command-line'] = COMMAND_LINE tempFilename = dataType + '-cluster-' for f in featureList: tempFilename = tempFilename + f + '&' if tempFilename[-1] == '&': tempFilename = tempFilename[:-1] toFilename = util.string_appendDateAndTime(tempFilename) if dataType == c.KMEANS_RECIPE_DATATYPE: jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/recipe", toFilename) if dataType == c.KMEANS_ALIAS_DATATYPE: jsonToFilePath = os.path.join(c.PATH_TO_ROOT, "res/kmeans-results/alias", toFilename) util.dumpJSONDict(jsonToFilePath, dict2dump) print 'Clustering dumped into: ' + jsonToFilePath
def dumpAliasDataToJSONFiles(aliasData): dataFileName = util.string_appendDateAndTime("aliasData") dataFilePath = os.path.join(c.PATH_TO_RESOURCES, "aliasdata", dataFileName) util.dumpJSONDict(dataFilePath, aliasData)