def parse(filename, show=False, external=True): """@TODO Assumes json file is of the form: dict with three entries: 'tests', which is a one-element list of dicts, 'parameters', which is a dict of parameters used to generate the data, and 'data', a dict of relevant statistics regarding the model used to generate the data. In the case of real data, 'parameters' and 'data' will be empty dicts. For 'tests': The inner dict has experiment names as the keys and dicts with various performance metrics as values Args: external: flag for whether this function is being called directly from the command line (ie it is being called by main() below); set this to False if you want to call it from anywhere else Returns: A tuple (names, scores, tests, parameters, data), where: - names is a list of all experiment names in the file, excluding those in HIDDEN; - scores is a list, in the same order of names, of the prediction rates achieved by the experiments; - tests, parameters and data are the corresponding fields taken directly from the json file. """ with open(filename, 'r') as f: data = json.load(f) names = [] scores = [] for test in data[TEST]: for key in sorted(test.keys(), cmp=(lambda x,y: cmp(test[x]['Predicted_Mean'], test[y]['Predicted_Mean']))): if key not in HIDDEN: names.append(str(key)) scores.append(test[key]['Predicted_Mean']) if external: util.figure(figsize=(12,7)) util.plot_dist(scores, labels=names) if data[DATA]: for datum in data[DATA]: if datum == 'sig_words' or datum == 'sig_topics': util.plot(0, label=str(datum) + " " + str(data[DATA][datum])) util.legend(loc='best') if show: util.show() else: util.savefig(filename + '.pdf') return names, scores, data[TEST][0], data[PARAMS], data[DATA]
def showSigWords(file='lda/trained/'): matrix = get_matrix(file + 'final.beta') sigWordsPerTopic = [] for topicNum in range(0,len(matrix)): topic = np.power(math.e, matrix[topicNum]) topicDist = (np.array(topic)[0].tolist()) topicDist.sort() topicDist.reverse() sigWordsPerTopic.append(util.get_sig_words([topicDist])) util.plot_dist(sigWordsPerTopic) pylab.show()
def parse(filename, show): with open(filename, 'r') as f: data = json.load(f) names = [] scores = [] for test in data[TEST]: for key in test.keys(): names.append(str(key)) scores.append(test[key]['Predicted_Mean']) util.plot_dist(scores, labels=names) if show: util.show() else: util.savefig(filename + '.pdf') return data