def evaluate(self, data, sess): res = [] all_labels = [] all_scores = [] sample = 0 for idts, idbs, id_labels in data: sample += 1 cur_scores = self.eval_batch(idts, idbs, sess) assert len(id_labels) == len(cur_scores) # equal to 20 all_labels.append(id_labels) all_scores.append(cur_scores) ranks = (-cur_scores).argsort() ranked_labels = id_labels[ranks] res.append(ranked_labels) e = Evaluation(res) MAP = e.MAP() MRR = e.MRR() P1 = e.Precision(1) P5 = e.Precision(5) if 'mlp_dim' in self.args and self.args.mlp_dim != 0: loss1 = dev_entropy_loss(all_labels, all_scores) else: loss1 = devloss1(all_labels, all_scores) loss0 = devloss0(all_labels, all_scores) loss2 = devloss2(all_labels, all_scores) return MAP, MRR, P1, P5, loss0, loss1, loss2
def run(configfile, name): OutputPath.init(configFile) thread = ThreadWritableObject(configFile, name) thread.start() sys.stdout = thread sys.errout = thread # XXX: Actually, it does NOT work try: db = Database(configFile, 'specials') db.initialize() evaluation = Evaluation(configFile, db) evaluation.updateOverdue() path = OutputPath.getSharePath() sharePath = getProperty(configFile, 'output-share-file') cmd = '/bin/rm -f {1} && /bin/ln -s {0} {1}'.format(path, sharePath) runCommand(cmd) data = evaluation.output() with open(path, 'w') as fp: fp.write(reprDict(data)) except KeyboardInterrupt: pass except Exception, e: print 'Error occurs at', datetime.now().strftime('%Y-%m-%d %H:%M:%S') traceback.print_exc(file=sys.stdout)
def run(configfile, name, content, savefile): OutputPath.init(configFile) try: db = Database(configFile, 'specials') db.initialize() evaluation = Evaluation(configFile, db) data = evaluation.search(content) if savefile is not None: with open(savefile, 'w') as fp: fp.write(reprDict(data)) else: print reprDict(data) return 0 except KeyboardInterrupt: pass except Exception, e: print 'Error occurs at', datetime.now().strftime('%Y-%m-%d %H:%M:%S') traceback.print_exc(file=sys.stdout)
def __init__(self, name, param=10, link_method=1, granularity=1): self.name = name self.agent = None self.param = param self.link_method = link_method self.granularity = granularity self.evaluation = Evaluation()
def evaluate(all_ranked_labels): evaluator = Evaluation(all_ranked_labels) MAP = evaluator.MAP()*100 MRR = evaluator.MRR()*100 P1 = evaluator.Precision(1)*100 P5 = evaluator.Precision(5)*100 return MAP, MRR, P1, P5
def main(): # Step1: Collect Data (Uncomment this, if you dont have data) # Change movie names in config.properties for the ones you want print("---- Starting Scrapping Module ---") scrapper = RTScrapper() scrapper.main() print("\n") print("---- Completed Scrapping of Movie Reviews ----") # Step2: Create Complete DataFrame with all the information connector = Organizer() connector.connectDFtoReview(df_filename="DFM2R.pkl", reviewFolder="ScrappedData") print("\n") print("---- Starting Raking Module ----") # Step3: Run Weighted Page Rank for scores # ss = getConfigParams() summarySize, Scores_folder = getConfigParams() for Measure in MeaureTypes: print("-- Using " + str(Measure) + " --") ranker = RankSentences(Measuretype=Measure, summarySize=summarySize, filename="CompleteData.pkl") ranker.main() print("\n") print("---- Completed Ranking of Sentences ----") # Final Step evaluate print("\n") print("---- Evaluating Summaries ----") evaluate = Evaluation() evaluate.main(folderName=Scores_folder) print("\n") print("---- Completed Evaluation ----")
def _plot_epoch_samples(self, generator, discriminator): samples = [] predictions = [] for batch in tf.data.Dataset.from_tensor_slices( self.epoch_sample_input).batch(self._config.batch_size): new_samples = generator(batch, training=True) disc_input = tf.concat( [batch, new_samples], axis=-1 ) if self._config.conditioned_discriminator else new_samples new_predictions = logistic(discriminator(disc_input, training=True)) samples.append(new_samples) predictions.append(new_predictions) images_per_sample = 2 rows_and_cols = (self._epoch_images_shape[0], self._epoch_images_shape[1] * images_per_sample) samples = tf.concat(samples, axis=0) predictions = tf.concat(predictions, axis=0) for i in range(samples.shape[0]): plt.subplot(*rows_and_cols, (i * images_per_sample) + 1) Evaluation.plot_image(self.epoch_sample_input[i]) plt.subplot(*rows_and_cols, (i * images_per_sample) + 2) Evaluation.plot_image(samples[i], np.round(predictions[i].numpy(), 5)) return samples, predictions
def eval_classifier(classifierToUse, featuresToUse, testOrTrain="train"): print("Chosen feature: {0}".format(featuresToUse) ) print("Chosen classifier: {0}".format(classifierToUse)) fe = FeatureExtractor(featuresToUse) dataset = DataSet(fe) classifier = Classifier() evaluate = Evaluation() print "test or Train %s" % testOrTrain for feature_class, files in getTestData(testOrTrain).items(): print "%s" % testOrTrain for f in files: dataset.addFile(feature_class, f) print "Dataset initialized" print_class_stats(dataset.classes) print "Test set created." a_train, a_test, c_train, c_test = train_test_split(dataset.featureVector, dataset.classes, test_size=0.9) c_pred = classifier.classification(a_train,a_test,c_train,c_test,classifierToUse) evaluate.evaluate(c_pred,c_test,featuresToUse,classifierToUse)
def __init__(self, config, model_name): """ Initialize model class :param config: experiment configuration :param model_name: model name """ super(HML, self).__init__() self.config = config self.device = torch.device("cpu") self.model_name = model_name self.item_emb = ItemEmbedding(config) self.user_emb = UserEmbedding(config) self.mp_learner = MetapathLearner(config) self.meta_learner = MetaLearner(config) self.mp_lr = config['mp_lr'] self.local_lr = config['local_lr'] self.emb_dim = self.config['embedding_dim'] self.cal_metrics = Evaluation() self.ml_weight_len = len(self.meta_learner.update_parameters()) self.ml_weight_name = list( self.meta_learner.update_parameters().keys()) self.mp_weight_len = len(self.mp_learner.update_parameters()) self.mp_weight_name = list(self.mp_learner.update_parameters().keys()) self.transformer_liners = self.transform_mp2task() self.meta_optimizer = torch.optim.Adam(self.parameters(), lr=config['lr'])
def main(args): # Load configuration config = Configuration(args.yaml_path) print("Loading Probase...") probase = Probase(config) print("Loading dataset...") dataset = Data(config) print("Loading NLP utility...") nlp = NLP('en') print("Loading feature extractor...") features = Feature(config, probase, nlp=nlp) print("Extracting vector features") features.extract_vector_features(dataset) print("Extracting statistical vector features") features.extract_statistical_features(dataset) print("Evaluating clasifiers") ev = Evaluation(config, dataset) ev.full_evaluation(features.X, features.y)
def _plot_epoch_samples(self, generator, discriminator): hires_samples = [] lowres_samples = [] predictions = [] for batch in tf.data.Dataset.from_tensor_slices( self.epoch_sample_input).batch(self._config.batch_size): new_lowres_samples = self._low_res_generator(batch, training=True) new_samples = generator(new_lowres_samples, training=True) new_predictions = logistic( discriminator(new_samples, training=True)) hires_samples.append(new_samples) lowres_samples.append(new_lowres_samples) predictions.append(new_predictions) hires_samples = tf.concat(hires_samples, axis=0) lowres_samples = tf.concat(lowres_samples, axis=0) predictions = tf.concat(predictions, axis=0) rows_and_cols = (self._epoch_images_shape[0], self._epoch_images_shape[1] * 2) for i in range(hires_samples.shape[0]): plt.subplot(*rows_and_cols, 2 * i + 1) Evaluation.plot_image( lowres_samples[i], "{}x{}".format(lowres_samples.shape[1], lowres_samples.shape[2])) plt.subplot(*rows_and_cols, 2 * i + 2) Evaluation.plot_image( hires_samples[i], "{}x{}: {:.5f}".format(hires_samples.shape[1], hires_samples.shape[2], predictions[i].numpy())) return (hires_samples, lowres_samples), predictions
def __init__(self, world, reset_callback=None, reward_callback=None, observation_callback=None, info_callback=None, done_callback=None): logger.debug("Simsim Env") self._world = world self._drones = world.get_drones() self._n_drone = world.n_drone self._evaluation = Evaluation(world) # scenario callbacks self.reset_callback = reset_callback self.reward_callback = reward_callback self.observation_callback = observation_callback self.info_callback = info_callback self.done_callback = done_callback self._action_dim = 4 self._obs_dim = np.reshape(self.get_obs(), -1).shape[0] / self._n_drone self._action_max = np.array([20, 20, 0.2, np.pi / 5]) # environment configuration4 self._action_min = np.array([-20, -20, -0.2, -np.pi / 5]) # environment configuration5
def evaluate(self, args, data, cnn): res = [] for idts, idbs, labels in data: xt = self.embedding.forward(idts.ravel()) xt = xt.reshape((idts.shape[0], idts.shape[1], self.embedding.n_d)) xb = self.embedding.forward(idbs.ravel()) xb = xb.reshape((idbs.shape[0], idbs.shape[1], self.embedding.n_d)) titles = Variable(torch.from_numpy(xt)).float() bodies = Variable(torch.from_numpy(xb)).float() if args.cuda: titles = titles.cuda() bodies = bodies.cuda() outputs = cnn(titles, bodies) pos = outputs[0].view(1, outputs[0].size(0)) scores = torch.mm(pos, outputs[1:].transpose(1, 0)).squeeze() if args.cuda: scores = scores.data.cpu().numpy() else: scores = scores.data.numpy() assert len(scores) == len(labels) ranks = (-scores).argsort() ranked_labels = labels[ranks] res.append(ranked_labels) e = Evaluation(res) MAP = e.MAP() * 100 MRR = e.MRR() * 100 P1 = e.Precision(1) * 100 P5 = e.Precision(5) * 100 return MAP, MRR, P1, P5
def on_epoch_end(self, epoch, logs=None): modelName = os.path.join( self.foldPath, self.category + "_weights_" + str(epoch) + ".hdf5") keras.models.save_model(self.model, modelName) print "Saving model to ", modelName print "Runing evaluation ........." xEval = Evaluation(self.category, None) xEval.init_from_model(self.model) start = time() neScore, categoryDict = xEval.eval(self.multiOut, details=True) end = time() print "Evaluation Done", str( neScore), " cost ", end - start, " seconds!" for key in categoryDict.keys(): scores = categoryDict[key] print key, ' score ', sum(scores) / len(scores) with open(self.valLog, 'a+') as xfile: xfile.write(modelName + ", Socre " + str(neScore) + "\n") for key in categoryDict.keys(): scores = categoryDict[key] xfile.write(key + ": " + str(sum(scores) / len(scores)) + "\n") xfile.close()
def evaluate(self, data, session): # return for each query the labels, ranked results, and scores eval_func = self.score_func all_ranked_labels = [] all_ranked_ids = [] all_ranked_scores = [] query_ids = [] all_MAP, all_MRR, all_Pat1, all_Pat5 = [], [], [], [] for idts, idbs, labels, pid, qids in data: scores = eval_func(idts, idbs, session) assert len(scores) == len(labels) ranks = (-scores).argsort() ranked_scores = np.array(scores)[ranks] ranked_labels = labels[ranks] ranked_ids = np.array(qids)[ranks] query_ids.append(pid) all_ranked_labels.append(ranked_labels) all_ranked_ids.append(ranked_ids) all_ranked_scores.append(ranked_scores) this_ev = Evaluation([ranked_labels]) all_MAP.append(this_ev.MAP()) all_MRR.append(this_ev.MRR()) all_Pat1.append(this_ev.Precision(1)) all_Pat5.append(this_ev.Precision(5)) print 'average all ... ', sum(all_MAP) / len(all_MAP), sum( all_MRR) / len(all_MRR), sum(all_Pat1) / len(all_Pat1), sum( all_Pat5) / len(all_Pat5) return all_MAP, all_MRR, all_Pat1, all_Pat5, all_ranked_labels, all_ranked_ids, query_ids, all_ranked_scores
def pipeline(tx_train, y_train, tx_val, y_val, degrees, gamma, lambda_, epochs, verbose): """ Run the model training and evaluation on the given parameters """ # Perform data cleaning (missing values, constant features, outliers, standardization) data_cleaner = DataCleaning() tx_train = data_cleaner.fit_transform(tx_train) tx_val = data_cleaner.transform(tx_val) # Perform feature engineering feature_generator = FeatureEngineering() x_train = feature_generator.fit_transform(tx=tx_train, degree=degrees) x_val = feature_generator.transform(tx=tx_val) # Initialize values initial_w = np.zeros(x_train.shape[1]) # Train model w, _ = reg_logistic_regression(y_train, x_train, lambda_, initial_w, epochs, gamma, verbose) # Perform inference on validation pred = predict_labels(weights=w, data=x_val, logistic=True) evaluator = Evaluation(y_val, pred) return evaluator.get_f1(), evaluator.get_accuracy()
def __init__(self, root): self.labelusr = tk.Label(root, text='学号:') self.labelusr.grid(row=0, sticky=tk.W) self.username = tk.StringVar() tk.Entry(root, textvariable=self.username).grid(row=0, column=1) self.labelpw = tk.Label(root, text='密码:') self.labelpw.grid(row=1, sticky=tk.W) self.password = tk.StringVar() tk.Entry(root, textvariable=self.password, show='*').grid(row=1, column=1) self.labelcode = tk.Label(root, text='验证码:') self.labelcode.grid(row=2, sticky=tk.W) self.code = tk.StringVar() tk.Entry(root, textvariable=self.code).grid(row=2, column=1) self.button1 = tk.Button(root, text="登陆", command=self.prelogin) self.button1.grid(row=3, column=0) self.button2 = tk.Button(root, text="更换验证码", command=self.prechange) self.button2.grid(row=3, column=2) self.info = tk.LabelFrame(root, text='信息栏: ') self.error = tk.StringVar() self.info.grid(row=4, column=1) self.Labelerr = tk.Label(self.info, textvariable=self.error, wraplength=130, height=2) # 可调整调试内容文本框高度 self.Labelerr.grid() self.eva = Evaluation(self.error) self.labelimg = tk.Label(root) self.labelimg.grid(row=2, column=2) self.prechange()
def maximize_activations(generator, discriminator, args): tf.logging.fatal("Maximizing feature activations") block = 0 feature = 0 learning_rate = 1e-2 adam = tf.train.AdamOptimizer(learning_rate=learning_rate) samples = 1 disc_input = tf.Variable(tf.random_normal([samples, 256, 256, 3])) _ = discriminator(disc_input) disc_summary = [] discriminator.summary(print_fn=disc_summary.append) disc_summary = "\n".join(disc_summary) tf.logging.info("Discriminator model:\n{}".format(disc_summary)) epochs = 1000 epochs_per_image = epochs // 10 inputs = [] activations = [] for i in trange(epochs + 1): with tf.GradientTape() as tape: inputs.append(disc_input.numpy() if ( i % epochs_per_image) == 0 else None) _ = discriminator(disc_input) activation = discriminator.discriminators[0].activations[ block][:, :, :, feature] activations.append(activation if (i % epochs_per_image) == 0 else None) loss = tf.reduce_sum(activation) gradient = tape.gradient(-loss, disc_input) adam.apply_gradients([(gradient, disc_input)]) if (i + 1) % epochs_per_image == 0: tf.logging.info("{}/{}: Activations: {}".format( i + 1, epochs, ", ".join([ "{:.1f}".format(tf.reduce_sum(activation[j])) for j in range(samples) ]))) tf.logging.info("Plotting...") plt.figure(figsize=(32, 24)) plt.tight_layout() columns = epochs // epochs_per_image + 1 for i in range(columns): for j in range(samples): plt.subplot(2 * samples, columns, 1 + i + j * 2 * columns) Evaluation.plot_image( inputs[epochs_per_image * i][j], title="Epoch {}".format(i * epochs_per_image) if j == 0 else "") plt.subplot(2 * samples, columns, columns + 1 + i + j * 2 * columns) Evaluation.plot_image( tf.expand_dims(-activations[epochs_per_image * i][j], -1), title=str( tf.reduce_sum(activations[epochs_per_image * i][j]).numpy())) plt.suptitle("Learning rate {}, block {}, feature {}".format( learning_rate, block, feature)) plt.savefig("block-{}-feature-{}.png".format(block, feature))
def evaluate_accuracy(contingency_table: np.ndarray, evaluation: Evaluation, is_sampled_graph: bool = False) -> np.ndarray: """Evaluates the accuracy of partitioning. Parameters --------- contingency_table : np.ndarray (int) the contingency table (confusion matrix) comparing the true block assignment to the algorithmically determined block assignment evaluation : Evaluation stores evaluation results is_sampled_graph : bool True if evaluation is for a sampled graph. Default = False Returns ------- joint_prob : np.ndarray (float) the normalized contingency table """ # joint probability of the two partitions is just the normalized contingency table joint_prob = contingency_table / sum(sum(contingency_table)) accuracy = sum(joint_prob.diagonal()) print('Accuracy (with optimal partition matching): {}'.format(accuracy)) print() if is_sampled_graph: evaluation.sampled_graph_accuracy = accuracy else: evaluation.accuracy = accuracy return joint_prob
def evaluate_partition(true_b: np.ndarray, alg_b: np.ndarray, evaluation: Evaluation): """Evaluate the output partition against the truth partition and report the correctness metrics. Compare the partitions using only the nodes that have known truth block assignment. Parameters ---------- true_b : ndarray (int) array of truth block assignment for each node. If the truth block is not known for a node, -1 is used to indicate unknown blocks. alg_b : ndarray (int) array of output block assignment for each node. The length of this array corresponds to the number of nodes observed and processed so far. evaluation : Evaluation stores evaluation results Returns ------ evaluation : Evaluation the evaluation results, filled in with goodness of partitioning measures """ contingency_table, N = create_contingency_table(true_b, alg_b, evaluation) joint_prob = evaluate_accuracy(contingency_table, evaluation) evaluate_pairwise_metrics(contingency_table, N, evaluation) evaluate_entropy_metrics(joint_prob, evaluation) evaluation.save()
def calc_entropy(p_marginal_truth: np.ndarray, p_marginal_alg: np.ndarray, idx_truth: np.ndarray, idx_alg: np.ndarray, evaluation: Evaluation, is_subgraph: bool = False) -> Evaluation: """Calculates the entropy of the truth and algorithm partitions. Parameters --------- p_marginal_truth : np.ndarray (float) the marginal probabilities of the truth partition p_marginal_alg : np.ndarray (float) the marginal probabilities of the algorithm partition idx_truth : np.ndarray (int) the indexes of the non-zero marginal probabilities of the truth partition idx_alg : np.ndarray (int) the indexes of the non-zero marginal probabilities of the algorithm partition is_subgraph : bool True if evaluation is for a subgraph. Default = False Returns ------ evaluation : Evaluation the evaluation object, updated with the entropy metrics """ # compute entropy of the non-partition2 and the partition2 version entropy_truth = -np.sum(p_marginal_truth[idx_truth] * np.log(p_marginal_truth[idx_truth])) print('Entropy of truth partition: {}'.format(abs(entropy_truth))) entropy_alg = -np.sum(p_marginal_alg[idx_alg] * np.log(p_marginal_alg[idx_alg])) print('Entropy of alg. partition: {}'.format(abs(entropy_alg))) if is_subgraph: evaluation.subgraph_entropy_truth = entropy_truth evaluation.subgraph_entropy_algorithm = entropy_alg else: evaluation.entropy_truth = entropy_truth evaluation.entropy_algorithm = entropy_alg return evaluation
def crossValidationIris(k=5): testData1 = np.load("iris_data/competitionData.npy") testData2 = np.load("iris_data/evaluationData.npy") trainData = np.load("iris_data/trainingData.npy") wholeData = np.concatenate((trainData, testData1, testData2)).astype(float) folds = k attTypes = [1, 1, 1, 1] trainingSets = np.split(wholeData, [(i + 1) * round(len(wholeData) / folds) for i in range(folds - 1)]) for set in trainingSets: print('\n\n\n', set, '\n\n\n') for idx, testData in enumerate(trainingSets): print(np.r_[0:idx, idx + 1:len(trainingSets) + 1]) trainingData = np.concatenate( [trainingSets[i] for i in np.r_[0:idx, idx + 1:len(trainingSets)]]) print('testData: ', len(testData)) print('trainingData: ', len(trainingData)) print('K FOLD #' + str(idx)) start = time.time() model = Naive(trainingData, attTypes) timer = time.time() print('Time to train: ', timer - start) start = time.time() evalModel = Evaluation(model, testData, CLASS_AMM_IRIS) evalModel.normalPrint() print('Took ', time.time() - start, 's')
def get_evaluation(evaluation_type, build_dictionaries=False): evaluation = Evaluation(build_dictionaries=build_dictionaries) if evaluation_type == "-g": evaluation.show_precision_recall() elif evaluation_type == "-m": map_value = evaluation.mean_average_precision() print("MAP : %s" % map_value)
def evaluate_model(args): hparams = PARAMS_MAP[args.model] hparams = collections.namedtuple("HParams", sorted(hparams.keys()))(**hparams) model = Evaluation(hparams) model.run_evaluate(args.evaluate)
def run_stochastic_gradient_descent(tx_train, y_train, tx_val, y_val): """It performs training and evaluation of least squares with stochastic gradient descent.""" print('\nTraining with Stochastic Gradient Descent') initial_w = np.zeros((tx_train.shape[1])) gamma = 0.005 max_iter = 3000 # Train the model w, _ = least_squares_SGD(y=y_train, tx=tx_train, initial_w=initial_w, max_iters=max_iter, gamma=gamma, verbose=False) # Perform predictions y_pred = predict_labels(weights=w, data=tx_val, logistic=False) # Evaluate evaluation = Evaluation(y_actual=y_val, y_pred=y_pred) acc = evaluation.get_accuracy() f1 = evaluation.get_f1() print('Accuracy: {acc}, F1: {f1}'.format(acc=acc, f1=f1)) return acc, f1
def run_regularized_logistic_regression(tx_train, y_train, tx_val, y_val): """It performs training and evaluation of regularized logistic regression.""" print('\nTraining with regularized logistic regression ') # Initialize parameters initial_w = np.zeros((tx_train.shape[1])) gamma = 1e-6 max_iter = 1000 lambda_ = 0.00001 # Train the model w, _ = reg_logistic_regression(y=y_train, tx=tx_train, initial_w=initial_w, max_iters=max_iter, gamma=gamma, lambda_=lambda_) # Perform predictions y_pred = predict_labels(weights=w, data=tx_val, logistic=True) # Evaluate evaluation = Evaluation(y_actual=y_val, y_pred=y_pred) acc = evaluation.get_accuracy() f1 = evaluation.get_f1() print('Accuracy: {acc}, F1: {f1}'.format(acc=acc, f1=f1)) return acc, f1
def valence_validation(self): self.get_single_validate_data_provider(self.valence_validate_tfrecords) predictions = self.get_predictions validation = Evaluation(self.single_validate_data_provider, self.batch_size, self.epochs, self.num_classes, self.learning_rate, predictions, 1811, 'valence', './ckpt/valence/model.ckpt') validation.start_evaluation()
def evaluate_entropy_metrics(joint_prob: np.ndarray, evaluation: Evaluation, is_sampled_graph: bool = False): """Evaluates the entropy (information theoretics based) goodness of partition metrics. Parameters --------- joint_prob : np.ndarray the normalized contingency table evaluation : Evaluation stores the evaluation metrics is_sampled_graph : bool = False True if evaluation is for a sampled_graph. Default = False """ # compute the information theoretic metrics marginal_prob_b2 = np.sum(joint_prob, 0) marginal_prob_b1 = np.sum(joint_prob, 1) idx_truth = np.nonzero(marginal_prob_b1) idx_alg = np.nonzero(marginal_prob_b2) evaluation = calc_entropy(marginal_prob_b1, marginal_prob_b2, idx_truth, idx_alg, evaluation, is_sampled_graph) evaluation = calc_conditional_entropy(joint_prob, marginal_prob_b1, marginal_prob_b2, idx_truth, idx_alg, evaluation, is_sampled_graph) if is_sampled_graph: if evaluation.sampled_graph_entropy_truth > 0: fraction_missed_info = ( evaluation.sampled_graph_entropy_truth_given_algorithm / evaluation.sampled_graph_entropy_truth) else: fraction_missed_info = 0 if evaluation.sampled_graph_entropy_algorithm > 0: fraction_err_info = ( evaluation.sampled_graph_entropy_algorithm_given_truth / evaluation.sampled_graph_entropy_algorithm) else: fraction_err_info = 0 evaluation.sampled_graph_missed_info = fraction_missed_info evaluation.sampled_graph_erroneous_info = fraction_err_info else: if evaluation.entropy_truth > 0: fraction_missed_info = evaluation.entropy_truth_given_algorithm / evaluation.entropy_truth else: fraction_missed_info = 0 if evaluation.entropy_algorithm > 0: fraction_err_info = evaluation.entropy_algorithm_given_truth / evaluation.entropy_algorithm else: fraction_err_info = 0 evaluation.missed_info = fraction_missed_info evaluation.erroneous_info = fraction_err_info print('Fraction of missed information: {}'.format( abs(fraction_missed_info))) print('Fraction of erroneous information: {}'.format( abs(fraction_err_info)))
def create_contingency_table(true_b: np.ndarray, alg_b: np.ndarray, evaluation: Evaluation) -> Tuple[np.ndarray, int]: """Creates the contingency table for the block assignment of the truth and algorithmically determined partitions.. Parameters --------- true_b : ndarray (int) array of truth block assignment for each node. If the truth block is not known for a node, -1 is used to indicate unknown blocks. alg_b : ndarray (int) array of output block assignment for each node. The length of this array corresponds to the number of nodes observed and processed so far. evaluation : Evaluation stores the evaluation results Returns ------ contingency_table : np.ndarray (int) the contingency table (confusion matrix) comparing the true block assignment to the algorithmically determined block assignment N : int the """ blocks_b1 = true_b blocks_b1_set = set(true_b) blocks_b1_set.discard(-1) # -1 is the label for 'unknown' num_blocks_truth = len(blocks_b1_set) blocks_b2 = alg_b num_blocks_alg = max(blocks_b2) + 1 evaluation.num_blocks_algorithm = num_blocks_alg evaluation.num_blocks_truth = num_blocks_truth print('\nPartition Correctness Evaluation\n') print('Number of nodes: {}'.format(len(alg_b))) print('Number of partitions in truth partition: {}'.format(num_blocks_truth)) print('Number of partitions in alg. partition: {}'.format(num_blocks_alg)) # populate the confusion matrix between the two partitions contingency_table = np.zeros((num_blocks_truth, num_blocks_alg)) for i in range(len(alg_b)): # evaluation based on nodes observed so far if true_b[i] != -1: # do not include nodes without truth in the evaluation contingency_table[blocks_b1[i], blocks_b2[i]] += 1 N = contingency_table.sum() if num_blocks_truth > num_blocks_alg: # transpose matrix for linear assignment (this implementation assumes #col >= #row) contingency_table = contingency_table.transpose() contingency_table_before_assignment = np.array(contingency_table) # associate the labels between two partitions using linear assignment contingency_table, indexes = associate_labels(contingency_table, contingency_table_before_assignment) # fill in the un-associated columns contingency_table = fill_unassociated_columns(contingency_table, contingency_table_before_assignment, indexes) if num_blocks_truth > num_blocks_alg: # transpose back contingency_table = contingency_table.transpose() print('Contingency Table: \n{}'.format(contingency_table)) return contingency_table, N
def _plot_hq_epoch_samples(self, generated_samples, discriminator_probabilities): for i in range((self._epoch_images_shape[0] + 1) // 2 * self._epoch_images_shape[1] // 2): plt.subplot((self._epoch_images_shape[0] + 1) // 2, self._epoch_images_shape[1] // 2, i + 1) Evaluation.plot_image( generated_samples[i], np.round(discriminator_probabilities[i].numpy(), 5))
def evaluate_random(train_spec, current_network): logger.info("evaluating against random agent...") agent_a = RandomAgent() current_prediction_network = train_spec.prediction_network(current_network) agent_b = AlphaZeroAgent(current_prediction_network, train_spec.game_engine(), num_simulations=train_spec.num_simulations) evaluation = Evaluation(train_spec.game_engine(), agent_a, agent_b, competitive=True) scores = evaluation.play_n_games(train_spec.num_random_evaluation_games) logger.info(f"Eval scores vs random agent {scores}")
def add_result(self, ranking, sample): for size in self.thresholds: recommendation = ranking[:size] self.recommended[size] = self.recommended[ size].union(recommendation) predicted = RecommendationResult(dict.fromkeys(recommendation, 1)) real = RecommendationResult(sample) evaluation = Evaluation(predicted, real, self.repository_size) self.precision[size].append(evaluation.run(Precision())) self.recall[size].append(evaluation.run(Recall())) self.f05[size].append(evaluation.run(F_score(0.5))) self.fpr[size].append(evaluation.run(FPR()))
def test_all_data(self): eval_id = Evaluation.create("Evaluation Create Test", self.card_id).id() EvalItemData.create_or_update(self.item1_id, eval_id, "Item 1 Value") EvalItemData.create_or_update(self.item2_id, eval_id, "Item 2 Value") CommentData.create_or_update(eval_id, "Comments") TextLineData.create_or_update(self.text_line1_id, eval_id, "Text Line 1 Value") all_data = Evaluation.find_by_id(eval_id).all_data() self.assertEqual("Item 1 Value", all_data["items"][self.item1_id]) self.assertEqual("Item 2 Value", all_data["items"][self.item2_id]) self.assertEqual("", all_data["items"][self.item3_id]) self.assertEqual("Comments", all_data["comments"]) self.assertEqual("Text Line 1 Value", all_data["text"][self.text_line1_id]) self.assertEqual("", all_data["text"][self.text_line2_id])
def eval_comp(config_name, trial, i, log_i): global xp, testcases config = configs[config_name] for key in log_i._logs.keys(): print key, len(log_i._logs[key]) if i == 0: config.gui = gui config.env_cfg["gui"] = gui xp = ToolsExperiment(config, log_dir=log_dir + config_name + "/") else: xp.ag.fast_forward(log_i) xp.ag.eval_mode() evaluation = Evaluation(xp.log, xp.ag, xp.env, testcases, modes=["inverse"]) result = evaluation.evaluate() return result
def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] y_val = inputs['phonemes'] y_mask_val = inputs['phonemes_mask'] for batch_ind in xrange(inputs['features'].shape[1]): if x_val.ndim == 2: input_beam = numpy.tile(x_val[:, batch_ind][:, None], (1, self.beam_size)) else: input_beam = numpy.tile(x_val[:, batch_ind, :][:, None, :], (1, self.beam_size, 1)) input_mask_beam = numpy.tile(x_mask_val[:, batch_ind][:, None], (1, self.beam_size)) predictions, _ = self.beam_search.search( {self.x: input_beam, self.x_mask: input_mask_beam}, self.eol_symbol, 100) predictions = [self.phoneme_dict[phone_ind] for phone_ind in predictions[0] if self.phoneme_dict[phone_ind] not in self.black_list][1:-1] targets = y_val[:sum(y_mask_val[:, batch_ind]), batch_ind] targets = [self.phoneme_dict[phone_ind] for phone_ind in targets if self.phoneme_dict[phone_ind] not in self.black_list][1:-1] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' %i) loss += Evaluation.wer([predictions], [targets]) num_examples += 1 print '.. found sequence example:', ' '.join(predictions) print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' per = loss.sum() / num_examples return {'per': per}
def evaluate(self): """ Evaluate additional data from MTurk. The following statistics are computed: * Working Time * Feedback * Hit statistics * Working time per worker * Worker statistics """ evaluation = Evaluation(self.task) evaluation.workingTime() evaluation.extractFeedback() evaluation.HITStatistics() evaluation.workingTimePerWorker() evaluation.workerStatistics()
def add_result(self, ranking, sample): predicted = RecommendationResult(dict.fromkeys(ranking, 1)) real = RecommendationResult(sample) evaluation = Evaluation(predicted, real, self.repository_size) self.precision.append(evaluation.run(Precision())) self.recall.append(evaluation.run(Recall())) self.fpr.append(evaluation.run(FPR())) self.f05.append(evaluation.run(F_score(0.5))) self.mcc.append(evaluation.run(MCC()))
def add_result(self, ranking, sample): for size in self.accuracy.keys(): predicted = RecommendationResult(dict.fromkeys(ranking[:size], 1)) real = RecommendationResult(sample) evaluation = Evaluation(predicted, real, self.repository_size) self.accuracy[size].append(evaluation.run(Accuracy())) self.precision[size].append(evaluation.run(Precision())) self.recall[size].append(evaluation.run(Recall())) self.f1[size].append(evaluation.run(F_score(1))) self.f05[size].append(evaluation.run(F_score(0.5)))
def evaluate(self, data_stream, train=False, file_pred=None, file_targets=None): loss = 0. num_examples = 0 iterator = data_stream.get_epoch_iterator() if train: print 'Train evaluation started' i = 0 for inputs in iterator: inputs = dict(zip(data_stream.sources, inputs)) x_mask_val = inputs['features_mask'] x_val = inputs['features'] #transpose y_val = inputs['phonemes'] y_mask_val = inputs['phonemes_mask'] y_hat = self.prediction_func(x_val, x_mask_val) y_predict = numpy.argmax(y_hat, axis=2) for batch in xrange(inputs['features'].shape[0]): y_val_cur = y_val[:sum(y_mask_val[:, batch]), batch] predicted = y_predict[:sum(x_mask_val[:, batch]), batch] predicted = ctc_strip(predicted) predictions = [self.phoneme_dict[phone_ind] for phone_ind in predicted if self.phoneme_dict[phone_ind] not in self.black_list] targets = [self.phoneme_dict[phone_ind] for phone_ind in y_val_cur if self.phoneme_dict[phone_ind] not in self.black_list] predictions = [x[0] for x in groupby(predictions)] targets = [x[0] for x in groupby(targets)] i += 1 if file_pred: file_pred.write(' '.join(predictions) + '(%d)\n' % i) if file_targets: file_targets.write(' '.join(targets) + '(%d)\n' %i) loss += Evaluation.wer([predictions], [targets]) num_examples += 1 print '.. found sequence example:', ' '.join(predictions) print '.. real output was: ', ' '.join(targets) if train: break if train: print 'Train evaluation finished' per = loss.sum() / num_examples return {'per': per}
def setUp(self): super(EvalItemDataTest, self).setUp() self.card_id = ReportCard.create('Item Data Test Card').id() self.evaluation_id = Evaluation.create('Item Data Test Evaluation', self.card_id).id() self.category_id = EvalCategory.create('Item Data Test Category', self.card_id).id() self.item_id = EvalItem.create('Item Data Test Item', self.category_id).id()
def plot_para_trending(self, collection, query_part, metric, outfmt='png'): """ Generate the performance trendings based on the parameters of the model and the number of terms in the query @Input: @collection: path to the collections (evaluation results) @query_part: e.g. title, desc, title+desc @metric: e.g. map, p@20 @Output: plots in files """ # We assume that there is ONLY ONE parameter in the model!! collection_name = collection.split('/')[-1] json_output_fn = os.path.join(self.output_root, 'performance_analysis', collection_name+'_'+query_part+'_'+metric+'.json') with open('g.json') as f: models = [{ele['name']:ele['paras']} for ele in json.load(f)['methods']] query_instance = Query(collection) eval_instance = Evaluation(collection) query_nums = query_instance.get_queries_lengths(query_part) print collection_name, query_part, query_nums #plot related num_cols = 2 num_rows = int(math.ceil(len(query_nums)*1.0/num_cols)) fig, axs = plt.subplots(nrows=num_rows, ncols=num_cols, sharex=False, sharey=False, figsize=(num_cols, num_rows)) font = {'size' : 4} plt.rc('font', **font) row_idx = 0 col_idx = 0 json_output = [] for i in query_nums: qids = [q['num'] for q in query_instance.get_queries_of_length(i, query_part)] # we assume that the model parameters can be normalized to [0, 1] ax = axs[row_idx][col_idx] col_idx += 1 if col_idx >= num_cols: row_idx += 1 col_idx = 0 markers = ['ro', 'bs', 'kv', 'gx'] this_json_output = {'qLen': i, 'qids': qids, 'details': []} for model_idx, model in enumerate(models): for para_key, para_value in model.items(): avg_perform = [] orig_x = para_value.values()[0] x = [para*1.0/max(para_value.values()[0]) for para in para_value.values()[0]] for para in para_value.values()[0]: method_str = para_key+','+para_value.iterkeys().next()+':'+str(para) avg_perform.append( np.mean([v[metric] if v else 0.0 for v in eval_instance.get_all_performance_of_some_queries(method = method_str, qids = qids, return_all_metrics = False).values()]) ) ax.plot(x, avg_perform, markers[model_idx], ls='-', label=model.keys()[0]) zipped = zip(orig_x, x, avg_perform) zipped.sort(key=itemgetter(2,1,0), reverse=True) this_json_output['details'].append({'model':model.keys()[0], 'para': zipped[0][0], 'performance': round(zipped[0][2], 4)}) #print model.keys()[0], zipped[0] json_output.append(this_json_output) ax.set_title('qLen=%d' % i) plt.legend() plt.savefig(os.path.join(self.output_root, 'performance_analysis', collection_name+'_'+query_part+'_'+metric+'.'+outfmt), format=outfmt, bbox_inches='tight', dpi=400) with open(json_output_fn, 'wb') as jo: json.dump(json_output, jo, indent=2)
def classify(self) : t1 = time.time() # Schedule a crawl job with the query try : crawler = Search(self.search_query) crawler.googleSearch() except Exception as e : print e print "Error in initializing Google search" t2 = time.time() print "Google search done in " + str(t2-t1) + " secs" # Extract data crawled try : crawler.get_crawled_urls() except Exception as e : print e print "Error in extracting crawl data" t3 = time.time() print "Test data extraction done in " + str(t3-t2) + " secs" # Preprocess test data try : preproc_test = Preprocessor(crawler.all_urls) preproc_test.preprocessor_main() except Exception as e : print e print "Error in preprocessing crawl data" t4 = time.time() print "Test data preprocessing done in " + str(t4-t3) + " secs" # Send a search request to Dig server with the query dig_search = Dig_Search(self.search_query) dig_search.search_request() t5 = time.time() print "Dig Search done in " + str(t5-t4) + " secs" # Extract results returned by search query dig_search.dig_extraction() t6 = time.time() print "Dig extraction done in " + str(t6-t5) + " secs" # Preprocess the search results try : preproc_train = Preprocessor(dig_search.urls_dig) preproc_train.preprocessor_main() dig_search.filter_dig_result(preproc_train.data) except Exception as e : print e print "Error in preprocessing training data" t7 = time.time() print "Training data preprocessing done in " + str(t7-t6) + " secs" # Compute tfidf vectors of data try : tfidf_train = Tfidf_Vectorize(dig_search.urls_dig) tfidf_train.tfidf_vectorize_train() tfidf_train.tfidf_vectorize_test(preproc_test.data) except Exception as e : print e print "Error in computing tfidf vectorization" t9 = time.time() print "Tfidf computation done in " + str(t9-t7) + " secs" # Compute similarity of training data with its centroid vector try : sim_train = Similarity(tfidf_train.tfidf_centroid_train, tfidf_train.features_train, tfidf_train.tfidf_train) similarity_train = sim_train.similarity_main() except Exception as e : print e print "Error in computing cosine similarity" t10 = time.time() print "Training data similarity computation done in " + str(t10-t9) + " secs" # Compute similarity of test data with training data try : sim_test = Similarity(tfidf_train.tfidf_centroid_train, tfidf_train.features_train, tfidf_train.tfidf_test) similarity_test = sim_test.similarity_main() except Exception as e : print e print "Error in computing cosine similarity" t11 = time.time() print "Similarity computation done in " + str(t11-t10) + " secs" print "Total time = " + str(t11-t1) evaluator = Evaluation(similarity_train, similarity_test) urls_classified = evaluator.compare_similarity(preproc_test) classified_output = self.formatOutput(urls_classified) return classified_output
def plot_rel_prob(self, query_length, x_func, _method, plot_ratio=True, plot_total_or_avg=True, plot_rel_or_all=True, performance_as_legend=True, drawline=True, numbins=60, xlimit=0, ylimit=0, zoom_x=0, compact_x=False, curve_fitting=False, draw_individual=False, draw_all=True, oformat='eps'): """ plot the P(D=1|TF=x) Input: @query_length: only plot the queries of length, 0 for all queries. @x_func: how to get the x-axis of the figure. By default, this should be TF values. But we are flexible with other options, e.g. tf/dl @_method: Which method is going to be plot. The parameters should also be attached, e.g. dir,mu:2500 @plot_ratio: When this is false, plot the y-axis as the number of relevant documents; When this is true, plot the y-axis as the #rel_docs/#docs @plot_total_or_avg: When this is true, plot the y-axis as the collection total ; When this is false, plot the collection average. Only available when plot_ratio is false is only available for collection-wise @plot_rel_or_all: When this is true, plot the y-axis as the number of relevant docs ; When this is false, plot the number of all docs. Only available when plot_ratio is false is only available for collection-wise @performance_as_legend: whether to add performance(e.g. MAP) as part of the legend @drawline: draw the data points as line(true) or dots(false) @numbins: the number of bins if we choose to plot x points as bins, 0 for no bins @xlimit: the limit of xaxis, any value larger than this value would not be plotted. default 0, meaning plot all data. @ylimit: the limit of yaxis, any value larger than this value would not be plotted. default 0, meaning plot all data. @zoom: whether zoom part of the plot @zoom_x: the zoom start x point, 0 for no zoom. @compact_x: map the x to continuous integers, e.g. 1,2,3,4,.... @oformat: output format, eps or png """ collection_name = self.collection_name cs = CollectionStats(self.collection_path) doc_details = GenDocDetails(self.collection_path) output_root = os.path.join('collection_figures', str(query_length)) if not os.path.exists(os.path.join(self.all_results_root, output_root)): os.makedirs(os.path.join(self.all_results_root, output_root)) if query_length == 0: queries = Query(self.collection_path).get_queries() else: queries = Query(self.collection_path).get_queries_of_length(query_length) queries = {ele['num']:ele['title'] for ele in queries} #print qids rel_docs = Judgment(self.collection_path).get_relevant_docs_of_some_queries(queries.keys(), 1, 'dict') #print np.mean([len(rel_docs[qid]) for qid in rel_docs]) eval_class = Evaluation(self.collection_path) print _method p = eval_class.get_all_performance_of_some_queries( method=_method, qids=queries.keys(), return_all_metrics=False, metrics=['map'] ) collection_x_dict = {} collection_level_maxX = 0.0 num_cols = min(4, len(queries)) num_rows = int(math.ceil(len(rel_docs)*1.0/num_cols)) fig, axs = plt.subplots(nrows=num_rows, ncols=num_cols, sharex=False, sharey=False, figsize=(2*num_cols, 2*num_rows)) font = {'size' : 5} plt.rc('font', **font) row_idx = 0 col_idx = 0 #idfs = [(qid, math.log(cs.get_term_IDF1(queries[qid]))) for qid in rel_docs] #idfs.sort(key=itemgetter(1)) all_expected_maps = [] if curve_fitting: all_fitting_results = [{'sr': [], 'ap':[], 'ap_diff':[]} for i in range(FittingModels().size())] all_fitting_performances = {} for qid in sorted(queries): if num_rows > 1: ax = axs[row_idx][col_idx] else: if num_cols > 1: ax = axs[col_idx] else: ax = axs col_idx += 1 if col_idx >= num_cols: row_idx += 1 col_idx = 0 query_term = queries[qid] maxTF = cs.get_term_maxTF(query_term) #idf = math.log(cs.get_term_IDF1(query_term)) #legend = 'idf:%.2f'%idf if performance_as_legend: legend = '\nAP:%.4f' % (p[qid]['map'] if p[qid] else 0) x_dict = {} qid_docs_len = 0 #for row in cs.get_qid_details(qid): for row in doc_details.get_qid_details(qid): qid_docs_len += 1 x = x_func(cs, row) if x > collection_level_maxX: collection_level_maxX = x rel = (int(row['rel_score'])>=1) if x not in x_dict: x_dict[x] = [0, 0] # [rel_docs, total_docs] if rel: x_dict[x][0] += 1 x_dict[x][1] += 1 if x not in collection_x_dict: collection_x_dict[x] = [0, 0] # [rel_docs, total_docs] if rel: collection_x_dict[x][0] += 1 collection_x_dict[x][1] += 1 xaxis = x_dict.keys() xaxis.sort() if plot_ratio: yaxis = [x_dict[x][0]*1./x_dict[x][1] for x in xaxis] else: yaxis = [(x_dict[x][0]) if plot_rel_or_all else (x_dict[x][1]) for x in xaxis] ranking_list = [(x_dict[x][0], x_dict[x][1]) for x in xaxis] all_expected_maps.append(EMAP().cal_expected_map(ranking_list)) if draw_individual: if np.sum(xaxis) == 0 or np.sum(yaxis) == 0: continue raw_xaxis = copy.deepcopy(xaxis) xaxis = np.array(xaxis, dtype=np.float32) yaxis = np.array(yaxis, dtype=np.float32) if compact_x: xaxis = range(1, len(xaxis)+1) if curve_fitting and not plot_ratio: sum_yaxis = np.sum(yaxis) yaxis /= sum_yaxis query_stat = cs.get_term_stats(query_term) zoom_xaxis = xaxis[zoom_x:] zoom_yaxis = yaxis[zoom_x:] ax, zoom_ax = self.plot_figure(ax, xaxis, yaxis, qid+'-'+query_term, legend, drawline=drawline, xlimit=xlimit, ylimit=ylimit, zoom=zoom_x > 0, zoom_xaxis=zoom_xaxis, zoom_yaxis=zoom_yaxis, legend_markscale=0.5) if curve_fitting: all_fittings = [] fitting_xaxis = [] fitting_yaxis = [] for i, ele in enumerate(yaxis): #if ele != 0: fitting_xaxis.append(xaxis[i]) fitting_yaxis.append(ele) for j in range(1, FittingModels().size()+1): fitting = FittingModels().cal_curve_fit(fitting_xaxis, fitting_yaxis, j) if not fitting is None: fitting_func_name = fitting[1] all_fitting_results[j-1]['name'] = fitting_func_name all_fitting_results[j-1]['sr'].append(fitting[4]) # sum of squared error if re.search(r'^tf\d+$', _method): estimated_map = CalEstMAP().cal_map( rel_docs = np.rint(fitting[3]*sum_yaxis).astype(int), all_docs = [x_dict[x][1] for x in raw_xaxis], mode=1 ) else: estimated_map = CalEstMAP().cal_map( rel_docs = np.rint(fitting[3]*sum_yaxis).astype(int), all_docs = [x_dict[x][1] for x in raw_xaxis], mode=1 ) all_fitting_results[j-1]['ap'].append(estimated_map) # average precision actual_map = p[qid]['map'] if p[qid] else 0 all_fitting_results[j-1]['ap_diff'].append(math.fabs(estimated_map-actual_map)) fitting.append(estimated_map) fitting.append(math.fabs(estimated_map-actual_map)) all_fittings.append(fitting) if fitting_func_name not in all_fitting_performances: all_fitting_performances[fitting_func_name] = {} all_fitting_performances[fitting_func_name][qid] = estimated_map #print fitting[0], fitting[1], fitting[3] else: #print j, 'None' pass all_fittings.sort(key=itemgetter(4)) try: print qid, query_term, all_fittings[0][0], all_fittings[0][1], all_fittings[0][2], all_fittings[0][4] except: continue fitted_y = [0 for i in range(len(xaxis))] for x in xaxis: if x in fitting_xaxis: idx = fitting_xaxis.index(x) fitted_y[idx] = all_fittings[0][3][idx] best_fit_func_name = all_fittings[0][1] all_fittings.sort(key=itemgetter(-1)) zoom_yaxis_fitting = fitted_y[zoom_x:] self.plot_figure(ax, xaxis, fitted_y, qid+'-'+query_term, '%s\n%s(%.4f)' % (best_fit_func_name, all_fittings[0][1], all_fittings[0][-2]), drawline=True, linestyle='--', zoom=zoom_x > 0, zoom_ax = zoom_ax, zoom_xaxis=zoom_xaxis, zoom_yaxis=zoom_yaxis_fitting, legend_pos='best', xlimit=xlimit, ylimit=ylimit, legend_markscale=0.5) if draw_individual: output_fn = os.path.join(self.all_results_root, output_root, '%s-%s-%s-%s-%s-%s-%d-%.1f-%.1f-zoom%d-%s-%s-individual.%s' % ( collection_name, _method, 'ratio' if plot_ratio else 'abscnt', 'total' if plot_total_or_avg else 'avg', 'rel' if plot_rel_or_all else 'all', 'line' if drawline else 'dots', numbins, xlimit, ylimit, zoom_x, 'compact' if compact_x else 'raw', 'fit' if curve_fitting else 'plain', oformat) ) plt.savefig(output_fn, format=oformat, bbox_inches='tight', dpi=400) if curve_fitting: # plot the goodness of fit all_fitting_results = [ele for ele in all_fitting_results if 'name' in ele and ele['name'] not in ['AD']] goodness_fit_data = [ele['sr'] for ele in all_fitting_results if 'sr' in ele] labels = [ele['name'] for ele in all_fitting_results if 'sr' in ele and 'name' in ele] fig, ax = plt.subplots(nrows=1, ncols=1, sharex=False, sharey=False, figsize=(6, 3.*1)) font = {'size' : 8} plt.rc('font', **font) ax.boxplot(goodness_fit_data, labels=labels) output_fn = os.path.join(self.all_results_root, output_root, '%s-%s-fitting.%s' % (collection_name, _method, oformat) ) plt.savefig(output_fn, format=oformat, bbox_inches='tight', dpi=400) # plot the AP diff ap_diff_data = [ele['ap_diff'] for ele in all_fitting_results if 'ap_diff' in ele] labels = [ele['name'] for ele in all_fitting_results if 'ap_diff' in ele and 'name' in ele] fig, ax = plt.subplots(nrows=1, ncols=1, sharex=False, sharey=False, figsize=(6, 3.*1)) font = {'size' : 8} plt.rc('font', **font) ax.boxplot(ap_diff_data, labels=labels) output_fn = os.path.join(self.all_results_root, output_root, '%s-%s-apdiff.%s' % (collection_name, _method, oformat) ) plt.savefig(output_fn, format=oformat, bbox_inches='tight', dpi=400) # draw the figure for the whole collection collection_vocablulary_stat = cs.get_vocabulary_stats() collection_vocablulary_stat_str = '' idx = 1 for k,v in collection_vocablulary_stat.items(): collection_vocablulary_stat_str += k+'='+'%.2f'%v+' ' if idx == 3: collection_vocablulary_stat_str += '\n' idx = 1 idx += 1 fig, axs = plt.subplots(nrows=1, ncols=1, sharex=False, sharey=False, figsize=(6, 3.*1)) font = {'size' : 8} plt.rc('font', **font) xaxis = collection_x_dict.keys() xaxis.sort() if plot_ratio: yaxis = [collection_x_dict[x][0]*1./collection_x_dict[x][1] for x in xaxis] else: if plot_total_or_avg: yaxis = [(collection_x_dict[x][0]) if plot_rel_or_all else (collection_x_dict[x][1]) for x in xaxis] else: yaxis = [(collection_x_dict[x][0]/len(queries)) if plot_rel_or_all else (collection_x_dict[x][1]/len(queries)) for x in xaxis] #print np.sum(yaxis[20:]), np.sum(yaxis[20:]) if numbins > 0: interval = collection_level_maxX*1.0/numbins newxaxis = [i for i in np.arange(0, collection_level_maxX+1e-10, interval)] newyaxis = [[0.0, 0.0] for x in newxaxis] for x in xaxis: newx = int(x / interval) newyaxis[newx][0] += collection_x_dict[x][0] newyaxis[newx][1] += collection_x_dict[x][1] xaxis = newxaxis if plot_ratio: yaxis = [ele[0]/ele[1] if ele[1] != 0 else 0.0 for ele in newyaxis] else: if plot_total_or_avg: yaxis = [(ele[0]) if plot_rel_or_all else (ele[1]) for ele in newyaxis] else: yaxis = [(ele[0]/len(queries)) if plot_rel_or_all else (ele[1]/len(queries)) for ele in newyaxis] # we do not care about the actual values of x # so we just map the actual values to integer values return_data = copy.deepcopy(collection_x_dict) if curve_fitting: #### calculate the stats for fitting_func_name in all_fitting_performances: actual_maps = [p[qid]['map'] if p[qid] else 0 for qid in queries] estimated_maps = [all_fitting_performances[fitting_func_name][qid] if qid in all_fitting_performances[fitting_func_name] else 0 for qid in queries] print fitting_func_name, print scipy.stats.pearsonr(actual_maps, estimated_maps), print scipy.stats.kendalltau(actual_maps, estimated_maps) print '-'*30 if draw_all: if compact_x: xaxis = range(1, len(xaxis)+1) xaxis = np.array(xaxis, dtype=np.float32) yaxis = np.array(yaxis, dtype=np.float32) if curve_fitting and not plot_ratio: yaxis /= np.sum(yaxis) collection_legend = '' if performance_as_legend: collection_legend = '$MAP:%.4f$' % (np.mean([p[qid]['map'] if p[qid] else 0 for qid in queries])) #collection_legend += '\n$MAP_E:%.4f$' % (np.mean(all_expected_maps)) zoom_xaxis = xaxis[zoom_x:] zoom_yaxis = yaxis[zoom_x:] axs, zoom_axs = self.plot_figure(axs, xaxis, yaxis, collection_name, collection_legend, drawline=drawline, xlimit=xlimit, ylimit=ylimit, zoom=zoom_x > 0, zoom_xaxis=zoom_xaxis, zoom_yaxis=zoom_yaxis) if curve_fitting: all_fittings = [] fitting_xaxis = [] fitting_yaxis = [] for i, ele in enumerate(yaxis): if ele != 0: fitting_xaxis.append(xaxis[i]) fitting_yaxis.append(ele) for j in range(1, FittingModels().size()+1): fitting = FittingModels().cal_curve_fit(fitting_xaxis, fitting_yaxis, j) if not fitting is None: all_fittings.append(fitting) #print fitting[0], fitting[1], fitting[3] else: #print j, 'None' pass all_fittings.sort(key=itemgetter(4)) if all_fittings: print all_fittings[0][0], all_fittings[0][1], all_fittings[0][2], all_fittings[0][4] fitted_y = [0 for i in range(len(xaxis))] for x in xaxis: if x in fitting_xaxis: idx = fitting_xaxis.index(x) fitted_y[idx] = all_fittings[0][3][idx] zoom_yaxis_fitting = fitted_y[zoom_x:] self.plot_figure(axs, xaxis, fitted_y, collection_name, all_fittings[0][1], drawline=True, linestyle='--', zoom=zoom_x > 0, zoom_ax = zoom_axs, zoom_xaxis=zoom_xaxis, zoom_yaxis=zoom_yaxis_fitting, legend_pos='best', xlimit=xlimit, ylimit=ylimit) output_fn = os.path.join(self.all_results_root, output_root, '%s-%s-%s-%s-%s-%s-%d-%.1f-%.1f-zoom%d-%s-%s-all.%s' % ( collection_name, _method, 'ratio' if plot_ratio else 'abscnt', 'total' if plot_total_or_avg else 'avg', 'rel' if plot_rel_or_all else 'all', 'line' if drawline else 'dots', numbins, xlimit, ylimit, zoom_x, 'compact' if compact_x else 'raw', 'fit' if curve_fitting else 'plain', oformat) ) plt.savefig(output_fn, format=oformat, bbox_inches='tight', dpi=400) return collection_name, return_data
def main(): path = "/mnt/nas/GrimaRepo/datasets/mscoco/coco2014/crops/cropsFeats" #DEBUGGING #path = "./features" ev = Evaluation(path) ev.run()
for pkg in user.pkg_profile: item_score[pkg] = user.item_score[pkg] sample = {} sample_size = int(profile_len * 0.9) for i in range(sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) iteration_user = User(item_score) recommendation = rec.get_recommendation( iteration_user, repo_size) if hasattr(recommendation, "ranking"): ranking = recommendation.ranking real = RecommendationResult(sample) predicted_10 = RecommendationResult( dict.fromkeys(ranking[:10], 1)) evaluation = Evaluation(predicted_10, real, repo_size) p_10.append(evaluation.run(Precision())) predicted_100 = RecommendationResult( dict.fromkeys(ranking[:100], 1)) evaluation = Evaluation(predicted_100, real, repo_size) f05_100.append(evaluation.run(F_score(0.5))) c_10[k][size] = c_10[k][size].union( recommendation.ranking[:10]) c_100[k][size] = c_100[k][size].union( recommendation.ranking[:100]) # save summary if p_10: p_10_summary[k][size].append(numpy.mean(p_10)) if f05_100: f05_100_summary[k][size].append(numpy.mean(f05_100))
def classify(self) : t1 = time.time() # Schedule a crawl job with the query try : crawler = Search(self.search_query) crawler.googleSearch() except Exception as e : print "Error in initializing Google search" t2 = time.time() print "Google search done in " + str(t2-t1) + " secs" # Extract data crawled try : crawler.get_crawled_urls() except Exception as e : print "Error in extracting crawl data" t3 = time.time() print "Test data extraction done in " + str(t3-t2) + " secs" # Preprocess test data try : preproc_test = Preprocessor(crawler.all_urls) preproc_test.preprocessor_main() except Exception as e : print e print "Error in preprocessing crawl data" t4 = time.time() print "Test data preprocessing done in " + str(t4-t3) + " secs" # Send a search request to Dig server with the query dig_search = Dig_Search(self.search_query) dig_search.search_request() t5 = time.time() print "Dig Search done in " + str(t5-t4) + " secs" # Extract results returned by search query dig_search.dig_extraction() t6 = time.time() print "Dig extraction done in " + str(t6-t5) + " secs" # Preprocess the search results try : preproc_train = Preprocessor(dig_search.urls_dig) preproc_train.preprocessor_main() dig_search.filter_dig_result(preproc_train.data) except Exception as e : print e print "Error in preprocessing training data" t7 = time.time() print "Training data preprocessing done in " + str(t7-t6) + " secs" # Compute tfidf vectors of data try : tfidf_train = Tfidf_Vectorize(dig_search.urls_dig) tfidf_train.tfidf_vectorize_train() tfidf_train.tfidf_vectorize_test(preproc_test.data) except Exception as e : print e print "Error in computing tfidf vectorization" t9 = time.time() print "Tfidf computation done in " + str(t9-t7) + " secs" # Compute similarity of training data with its centroid vector try : sim_train = Similarity(tfidf_train.tfidf_centroid_train, tfidf_train.features_train, tfidf_train.tfidf_train) similarity_train = sim_train.similarity_main() except Exception as e : print e print "Error in computing cosine similarity" t10 = time.time() print "Training data similarity computation done in " + str(t10-t9) + " secs" # Compute similarity of test data with training data try : sim_test = Similarity(tfidf_train.tfidf_centroid_train, tfidf_train.features_train, tfidf_train.tfidf_test) similarity_test = sim_test.similarity_main() except Exception as e : print e print "Error in computing cosine similarity" t11 = time.time() print "Similarity computation done in " + str(t11-t10) + " secs" print "Total time = " + str(t11-t1) evaluator = Evaluation(similarity_train, similarity_test) similarity_count = evaluator.compare_similarity(preproc_test) avg_train_similarity = numpy.mean(similarity_train) epsilon = 0.4 * avg_train_similarity classifier_output = open("output/" + self.search_query.replace(' ','_') + "2.html","w") urls_classified = [] tfidf_tr = tfidf_train.tfidf_centroid_train tfidf_tr = sorted(tfidf_tr, key= lambda tfidf : tfidf[1], reverse=True) for sim in similarity_count : url_desc = {} url_desc['Test_url'] = "<a href='"+preproc_test.data[sim[0]]['url']+"''>"+preproc_test.data[sim[0]]['url']+"</a>" if sim[1] >= (avg_train_similarity-epsilon) : url_desc['Classifier Output'] = True else : url_desc['Classifier Output'] = False url_desc['Similarity Score'] = sim[1] url_desc['Average Training Similarity'] = avg_train_similarity tfidf_url = tfidf_train.tfidf_test[sim[0]] tfidf_url = sorted(tfidf_url, key= lambda tfidf : tfidf[1], reverse=True) url_desc['Top Test Keywords'] = ", ".join([tfidf[0] for tfidf in tfidf_url[0:20]]) urls_classified.append(url_desc) _json2conv = {"" : urls_classified} classifier_output.write("<html><h2 align='center' style='text-decoration:underline'>Classifier Output</h3><h2 align='center'>Query : "+self.search_query+"</h2><h2 align='center'>Top Train Keywords : "+", ".join([tfidf[0] for tfidf in tfidf_tr[0:20]])+"</h2><body>"+ json2html.convert(json=_json2conv, table_attributes="border=2, cellspacing=0, cellpadding=5, text-align='center'") + "</body></html>") classifier_output.close()
def setUp(self): super(TextLineDataTest, self).setUp() self.card_id = ReportCard.create('Text Line Data Test Card').id() self.evaluation_id = Evaluation.create('Text Line Data Test Evaluation', self.card_id).id() self.text_line_id = TextLine.create('Text Line Data Test Text Line', self.card_id).id()
def setUp(self): super(CommentDataTest, self).setUp() self.card_id = ReportCard.create('Comment Data Test Card').id() self.evaluation_id = Evaluation.create('Comment Data Test Evaluation', self.card_id).id()
def test_create(self): eval_id = Evaluation.create("Evaluation Create Test", self.card_id).id() evaluation = Evaluation.find_by_id(eval_id) self.assertEqual("Evaluation Create Test", evaluation.name) self.assertEqual(self.card_id, evaluation.card.key().id())
def iterate(self, params, rep, n): if params['name'].startswith("content"): item_score = dict.fromkeys(self.user.pkg_profile, 1) # Prepare partition sample = {} for i in range(self.sample_size): key = random.choice(item_score.keys()) sample[key] = item_score.pop(key) # Get full recommendation user = User(item_score) recommendation = self.rec.get_recommendation(user, self.repo_size) # Write recall log recall_file = "results/content/recall/%s-%s-%.2f-%d" % \ (params['strategy'], params[ 'weight'], params['sample'], n) output = open(recall_file, 'w') output.write("# weight=%s\n" % params['weight']) output.write("# strategy=%s\n" % params['strategy']) output.write("# sample=%f\n" % params['sample']) output.write("\n%d %d %d\n" % (self.repo_size, len(item_score), self.sample_size)) notfound = [] ranks = [] for pkg in sample.keys(): if pkg in recommendation.ranking: ranks.append(recommendation.ranking.index(pkg)) else: notfound.append(pkg) for r in sorted(ranks): output.write(str(r) + "\n") if notfound: output.write("Out of recommendation:\n") for pkg in notfound: output.write(pkg + "\n") output.close() # Plot metrics summary accuracy = [] precision = [] recall = [] f1 = [] g = Gnuplot.Gnuplot() g('set style data lines') g.xlabel('Recommendation size') for size in range(1, len(recommendation.ranking) + 1, 100): predicted = RecommendationResult( dict.fromkeys(recommendation.ranking[:size], 1)) real = RecommendationResult(sample) evaluation = Evaluation(predicted, real, self.repo_size) accuracy.append([size, evaluation.run(Accuracy())]) precision.append([size, evaluation.run(Precision())]) recall.append([size, evaluation.run(Recall())]) f1.append([size, evaluation.run(F1())]) g.plot(Gnuplot.Data(accuracy, title="Accuracy"), Gnuplot.Data(precision, title="Precision"), Gnuplot.Data(recall, title="Recall"), Gnuplot.Data(f1, title="F1")) g.hardcopy(recall_file + "-plot.ps", enhanced=1, color=1) # Iteration log result = {'iteration': n, 'weight': params['weight'], 'strategy': params['strategy'], 'accuracy': accuracy[20], 'precision': precision[20], 'recall:': recall[20], 'f1': f1[20]} return result