def main(): # Go to script's directory os.chdir(os.path.dirname(os.path.realpath(__file__))) # Train on Michael dataset, evaluate with Wookie dataset featMat = FeatMat() featMat.addFolder('../datasets/Michael') trainer = Trainer() trainer.train(featMat)
def train_on(self, feats, annotations): global TrainingData name = annotations.name if name in TrainingData: self.train_data = TrainingData[name] else: self.train_data = FeatMat() self.train_data.add(feats, annotations) TrainingData[name] = self.train_data return self
def main(): # Go to script's directory os.chdir(os.path.dirname(os.path.realpath(__file__))) # Train on Michael dataset, evaluate with Wookie dataset featMat = FeatMat() featMat.addFolder('../datasets/Michael') trainer = Trainer() trainer.train(featMat) trainer.train_gist(featMat)
def evaluate_on(self, feats, annotations): global TrainingData name = annotations.name if name in TrainingData: self.eval_data = TrainingData[name] else: self.eval_data = FeatMat() self.eval_data.add(feats, annotations) TrainingData[name] = self.eval_data self.eval_feats = feats self.eval_annos = annotations return self
def predictImage(self, imgf): feats = FeatMat().getFeature(imgf) return self.predictFeats(feats)
class Evaluator: name = "" train_data = None eval_feats = None eval_annos = None trainer = None classifier = None def __init__(self, name): self.name = name self.init() def init(self): self.train_data = None eval_feats = None eval_annos = None self.trainer = Trainer() return self def log(self, message, section=None): msg = '' if section: msg = '[%s|%s] ' % (self.name, section) elif self.name: msg = '[%s] ' % self.name else: msg = '' msg += message print(msg) global logf logf.write(msg + '\n') def train_on(self, feats, annotations): global TrainingData name = annotations.name if name in TrainingData: self.train_data = TrainingData[name] else: self.train_data = FeatMat() self.train_data.add(feats, annotations) TrainingData[name] = self.train_data return self def evaluate_on(self, feats, annotations): global TrainingData name = annotations.name if name in TrainingData: self.eval_data = TrainingData[name] else: self.eval_data = FeatMat() self.eval_data.add(feats, annotations) TrainingData[name] = self.eval_data self.eval_feats = feats self.eval_annos = annotations return self def train(self): global Models name = self.eval_data.name if name in Models: self.trainer = Models[name] else: Models[name] = self.trainer.train(self.train_data, persist=False) self.classifier = Classifier(self.trainer) return self def evaluate(self): def log(msg): self.log(msg, 'eval') print('') errs = [] C = 0 # Correctly estimated I = 0 # Inconsistent annotations A = 0 # in Agreement (annotations) C_A = 0 # correctly estimated where annotations in agreement T = len(self.eval_annos) # Total for fname, fclassifs in self.eval_annos.data.iteritems(): # Get ground truth tru = np.median(fclassifs) tru_cls = 1 if tru > 0.5 else 0 # Get prediction est = self.classifier.predictFeats(self.eval_feats[fname]) est_cls = 1 if est > 0.5 else 0 if tru_cls == est_cls: C += 1 # If any inconsistent classifications if len(np.unique(fclassifs)) > 1: I += 1 else: A += 1 if tru_cls == est_cls: C_A += 1 # Add errors errs.append(abs(tru - est)) A = float(A) C_A = float(C_A) T = float(T) global logf logf.write('\n') if self.eval_annos.n > 1: log('%d/%d (%.2f%%) annotations in agreement' % (A, T, A/T*100)) log('%d/%d (%.2f%%) incorrect for annotations in agreement' % (A-C_A, A, (A-C_A)/A*100)) log('%d/%d (%.2f%%) incorrect' % (T-C, T, (T-C)/T*100)) l1err = float(np.linalg.norm(errs, 1)) # L1-error norm l2err = float(np.linalg.norm(errs, 2)) # L2-error norm log('L1-error: %.3f' % (l1err / len(errs))) log('L2-error: %.3f' % (l2err / len(errs))) return (T-C)/T*100 def print_correlations(self, annotations): pairs = combinations(annotations, 2) Rs = [] for anno1, anno2 in pairs: _, cls1 = zip(*sorted(anno1.data.items())) _, cls2 = zip(*sorted(anno2.data.items())) cls1 = [x[0] for x in cls1] cls2 = [x[0] for x in cls2] R, p = pearsonr(cls1, cls2) self.log('%s <-> %s: %f %g' % (anno1.name, anno2.name, R, p)) Rs.append(R) self.log('Mean R: %f\n' % np.mean(Rs)) def plot_PR(self, ofpath, label): y_true = self.eval_data.y y_score = self.classifier.clf.decision_function(self.eval_data.X) # Scale random values to span same range as y_score y_score_maxp = np.max(y_score) y_score_maxn = -np.min(y_score) y_score_span = y_score_maxp + y_score_maxn y_rand = np.random.rand(y_true.shape[0], 1) * y_score_span - y_score_maxn # Calculate PR (model) precision, recall, _ = precision_recall_curve(y_true, y_score) area = average_precision_score(y_true, y_score) x = np.linspace(0, 1, 200) y = np.interp(x, np.flipud(recall), np.flipud(precision)) # Calculate PR (random) precision, recall, _ = precision_recall_curve(y_true, y_rand) area_rand = average_precision_score(y_true, y_rand) y_rand = np.interp(x, np.flipud(recall), np.flipud(precision)) txtpath = 'PR.out' try: dat = np.genfromtxt(txtpath, delimiter='\t', names=True, deletechars='', replace_space=False) except: dat = np.array(x, dtype=[('Recall', float)]) # Add model PR dat = append_fields(dat, '%s (area: %.2f)' % (label, area), y) # Add random PR dat = append_fields(dat, 'Random classifier (area: %.2f)' % area_rand, y_rand) np.savetxt(txtpath, dat, delimiter='\t', header='\t'.join(dat.dtype.names), comments='') if ofpath: title = 'Precision-Recall curve' ylabel = 'Precision' generate_graph(txtpath, ofpath, title, ylabel) return self def cleanup(self): return self.init()
class Evaluator: name = "" train_data = None eval_feats = None eval_annos = None trainer = None classifier = None def __init__(self, name): self.name = name self.init() def init(self): self.train_data = None eval_feats = None eval_annos = None self.trainer = Trainer() return self def log(self, message, section=None): msg = '' if section: msg = '[%s|%s] ' % (self.name, section) elif self.name: msg = '[%s] ' % self.name else: msg = '' msg += message print(msg) global logf logf.write(msg + '\n') def train_on(self, feats, annotations): global TrainingData name = annotations.name if name in TrainingData: self.train_data = TrainingData[name] else: self.train_data = FeatMat() self.train_data.add(feats, annotations) TrainingData[name] = self.train_data return self def evaluate_on(self, feats, annotations): global TrainingData name = annotations.name if name in TrainingData: self.eval_data = TrainingData[name] else: self.eval_data = FeatMat() self.eval_data.add(feats, annotations) TrainingData[name] = self.eval_data self.eval_feats = feats self.eval_annos = annotations return self def train(self): global Models name = self.eval_data.name if name in Models: self.trainer = Models[name] else: Models[name] = self.trainer self.trainer.train(self.train_data, persist=False) self.trainer.train_gist(self.train_data, persist=False) self.classifier = Classifier(self.trainer) return self def evaluate(self): def log(msg): self.log(msg, 'eval') print('') errs = [] C = 0 # Correctly estimated C_g = 0 # - GIST + SVM C_r = 0 # - random I = 0 # Inconsistent annotations A = 0 # in Agreement (annotations) C_A = 0 # correctly estimated where annotations in agreement C_A_g = 0 # - GIST + SVM C_A_r = 0 # - random T = len(self.eval_annos) # Total for fname, fclassifs in self.eval_annos.data.iteritems(): # Get ground truth tru = np.median(fclassifs) tru_cls = 1 if tru > 0.5 else 0 # Get prediction est = self.classifier.predictFeats(self.eval_feats[fname]) est_gist = self.classifier.predictFeats_gist(self.eval_feats.gist(fname)) est_rand = randint(0, 1) est_cls = 1 if est > 0.5 else 0 if tru_cls == est_cls: C += 1 if tru_cls == est_gist: C_g += 1 if tru_cls == est_rand: C_r += 1 # If any inconsistent classifications if len(np.unique(fclassifs)) > 1: I += 1 else: A += 1 if tru_cls == est_cls: C_A += 1 if tru_cls == est_gist: C_A_g += 1 if tru_cls == est_rand: C_A_r += 1 # Add errors errs.append(abs(tru - est)) A = float(A) C_A = float(C_A) T = float(T) global logf logf.write('\n') if self.eval_annos.n > 1: log('%d/%d (%.2f%%) annotations in agreement' % (A, T, A/T*100)) log('%d/%d (%.2f%%) incorrect for annotations in agreement' % (A-C_A, A, (A-C_A)/A*100)) log('%d/%d (%.2f%%) incorrect for annotations in agreement (GIST)' % (A-C_A_g, A, (A-C_A_g)/A*100)) log('%d/%d (%.2f%%) incorrect for annotations in agreement (RAND)' % (A-C_A_r, A, (A-C_A_r)/A*100)) log('%d/%d (%.2f%%) incorrect' % (T-C, T, (T-C)/T*100)) l1err = float(np.linalg.norm(errs, 1)) # L1-error norm l2err = float(np.linalg.norm(errs, 2)) # L2-error norm log('L1-error: %.3f' % (l1err / len(errs))) log('L2-error: %.3f' % (l2err / len(errs))) return (T-C)/T*100 def print_correlations(self, annotations): pairs = combinations(annotations, 2) Rs = [] for anno1, anno2 in pairs: _, cls1 = zip(*sorted(anno1.data.items())) _, cls2 = zip(*sorted(anno2.data.items())) cls1 = [x[0] for x in cls1] cls2 = [x[0] for x in cls2] R, p = pearsonr(cls1, cls2) self.log('%s <-> %s: %f %g' % (anno1.name, anno2.name, R, p)) Rs.append(R) self.log('Mean R: %f\n' % np.mean(Rs)) def plot_PR(self, ofpath, label): y_true = self.eval_data.y y_score = self.classifier.clf.decision_function(self.eval_data.X) # Scale random values to span same range as y_score y_score_maxp = np.max(y_score) y_score_maxn = -np.min(y_score) y_score_span = y_score_maxp + y_score_maxn y_rand = np.random.rand(y_true.shape[0], 1) * y_score_span - y_score_maxn # Calculate PR (model) precision, recall, _ = precision_recall_curve(y_true, y_score) area = average_precision_score(y_true, y_score) x = np.linspace(0, 1, 200) y = np.interp(x, np.flipud(recall), np.flipud(precision)) # Calculate PR (random) precision, recall, _ = precision_recall_curve(y_true, y_rand) area_rand = average_precision_score(y_true, y_rand) y_rand = np.interp(x, np.flipud(recall), np.flipud(precision)) txtpath = 'PR.out' try: dat = np.genfromtxt(txtpath, delimiter='\t', names=True, deletechars='', replace_space=False) except: dat = np.array(x, dtype=[('Recall', float)]) # Add model PR dat = append_fields(dat, '%s (area: %.2f)' % (label, area), y) # Add random PR dat = append_fields(dat, 'Random classifier (area: %.2f)' % area_rand, y_rand) np.savetxt(txtpath, dat, delimiter='\t', header='\t'.join(dat.dtype.names), comments='') if ofpath: title = 'Precision-Recall curve' ylabel = 'Precision' generate_graph(txtpath, ofpath, title, ylabel) return self def cleanup(self): return self.init()