def create_feature_object_set(database, phase): feature_objs = defaultdict(dict) feature_vect = [] norm_phase = "MOVE_ARM_START_POSITION" # For each object in the database, extract the phase and sensor # data for #temp = [g for g in utilities.iterator_over_object_groups(database)] for group in utilities.iterator_over_object_groups(database): #for group in temp[0:2]: # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group, [phase]) norm_dict = utilities.dict_from_h5_group(group, [norm_phase]) data = data_dict["data"][phase] norm_data = norm_dict["data"][norm_phase] object_name = data_dict["name"] name = object_name.split('_') print "Loading object ", object_name # Extract features static_feature_phase, feats = extract_static_features(data, norm_data) # Store information about object static_feature_phase.labels = data_dict["adjectives"] static_feature_phase.name = object_name static_feature_phase.detailed_state = phase static_feature_phase.object_id = int(name[-2]) static_feature_phase.run_num = int(name[-1]) feature_objs[object_name] = static_feature_phase feature_vect.append(feats) return feature_objs, np.array(feature_vect)
def create_features_set(self, database, store = False, verbose = False): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ labels = [] features = [] for group in utilities.iterator_over_object_groups(database): data_dict = utilities.dict_from_h5_group(group) if verbose: print "Loading object ", data_dict["name"] data = data_dict["data"] features.append(self.extract_features(data)) if self.adjective in data_dict["adjectives"]: labels.append(1) else: labels.append(0) features = np.array(features).squeeze() labels = np.array(labels).flatten() if store: self.features = features self.labels = labels return features, labels
def create_features_set(self, database, store=False, verbose=False): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ labels = [] features = [] for group in utilities.iterator_over_object_groups(database): data_dict = utilities.dict_from_h5_group(group) if verbose: print "Loading object ", data_dict["name"] data = data_dict["data"] features.append(self.extract_features(data)) if self.adjective in data_dict["adjectives"]: labels.append(1) else: labels.append(0) features = np.array(features).squeeze() labels = np.array(labels).flatten() if store: self.features = features self.labels = labels return features, labels
def test_object(obj, classifiers): assert isinstance(obj, tables.Group) data_dict = utilities.dict_from_h5_group(obj) true_adjectives = [a for a in sorted(data_dict["adjectives"]) if a not in to_remove] if len(true_adjectives) == 0: print "Object in database has no adjectives!" test_classifier = False else: test_classifier = True print "Object %s has adjectives %s" %(data_dict["name"], " ".join(true_adjectives) ) print "Positive classifiers:" positives = [] for clf in classifiers: if clf.adjective in to_remove: continue assert isinstance(clf, FeaturesAdjectiveClassifier) features = clf.extract_features(data_dict["data"]) output = clf.predict(features) if output[0] == 1: positives.append(clf.adjective) if not test_classifier: print "Results can't be shown" raise ValueError() if len(positives) == 0: "No classifiers output!" return (0.0, 0.0, 0.0) positives = sorted(positives) print "\t" + " ".join(positives) cls_set = set(positives) true_set = set(true_adjectives) intersection = cls_set & true_set difference = true_set - cls_set true_length = float(len(true_set)) clf_length = float(len(cls_set)) true_positives = len(intersection) / clf_length false_negatives = (true_length - len(intersection)) / true_length false_positives = len(cls_set - true_set) / clf_length print "True posititives %f, False positivies %f, False negatives %f" %( true_positives, false_positives, false_negatives) precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) try: f1 = 2.0 * precision*recall / (precision + recall) except ZeroDivisionError: f1 = 0 print "Precision: %f, Recall: %f, F1: %f" % (precision, recall, f1) return (precision, recall, f1)
def test_object(obj, classifiers): assert isinstance(obj, tables.Group) data_dict = utilities.dict_from_h5_group(obj) true_adjectives = [ a for a in sorted(data_dict["adjectives"]) if a not in to_remove ] if len(true_adjectives) == 0: print "Object in database has no adjectives!" test_classifier = False else: test_classifier = True print "Object %s has adjectives %s" % (data_dict["name"], " ".join(true_adjectives)) print "Positive classifiers:" positives = [] for clf in classifiers: if clf.adjective in to_remove: continue assert isinstance(clf, FeaturesAdjectiveClassifier) features = clf.extract_features(data_dict["data"]) output = clf.predict(features) if output[0] == 1: positives.append(clf.adjective) if not test_classifier: print "Results can't be shown" raise ValueError() if len(positives) == 0: "No classifiers output!" return (0.0, 0.0, 0.0) positives = sorted(positives) print "\t" + " ".join(positives) cls_set = set(positives) true_set = set(true_adjectives) intersection = cls_set & true_set difference = true_set - cls_set true_length = float(len(true_set)) clf_length = float(len(cls_set)) true_positives = len(intersection) / clf_length false_negatives = (true_length - len(intersection)) / true_length false_positives = len(cls_set - true_set) / clf_length print "True posititives %f, False positivies %f, False negatives %f" % ( true_positives, false_positives, false_negatives) precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) try: f1 = 2.0 * precision * recall / (precision + recall) except ZeroDivisionError: f1 = 0 print "Precision: %f, Recall: %f, F1: %f" % (precision, recall, f1) return (precision, recall, f1)
def predict(self, X): if isinstance(X, tables.Group): data_dict = utilities.dict_from_h5_group(X) features = self.extract_features(data_dict["data"]) else: features = X return self.svc.predict(features)
def create_feature_set(database, feature_dict, object_set, adjective): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ labels = [] features = [] object_names = [] object_ids = [] print "Building adjective %s" % adjective # For each object in the database, extract the phase and sensor # data for for group in utilities.iterator_over_object_groups(database): # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) object_name = data_dict["name"] name = object_name.split('_') # Skip over object if it is in the set # Training set will skip over test objects # and vice versa if object_name not in object_set: continue # print "Loading object ", object_name # Store object name object_names.append(object_name) object_ids.append(int(name[-2])) # Extract features feature_obj = feature_dict[object_name] feature_vector = createFeatureVector(feature_obj, static_features) features.append(feature_vector) # Store off the labels here if adjective in data_dict["adjectives"]: labels.append(1) else: labels.append(0) set_dict = defaultdict(dict) set_dict['features'] = np.array(features).squeeze() set_dict['labels'] = np.array(labels).flatten() set_dict['object_names'] = np.array(object_names).flatten() set_dict['object_ids'] = np.array(object_ids).flatten() return set_dict
def load_dataset(database, adjective, phase, sensor): """Loads the data from a dataset corresponding to an adjective, phase and sensor.""" if adjective not in adjectives: raise ValueError("%s is not a known adjective" % adjective) if phase not in phases: raise ValueError("%s is not a known phase" % phase) if sensor not in sensors: raise ValueError("%s is not a known sensor" % sensor) train_group = database.getNode("/train_test_sets", adjective).train train_set = [utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor] for g in train_group._v_children.values()] test_group = database.getNode("/train_test_sets", adjective).test test_set = [utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor] for g in test_group._v_children.values()] return train_set, test_set
def create_feature_object_set(database, phase): feature_objs = defaultdict(dict) feature_vect = [] norm_phase = "MOVE_ARM_START_POSITION" # For each object in the database, extract the phase and sensor # data for #temp = [g for g in utilities.iterator_over_object_groups(database)] all_values = dict() for phase in phases: all_values[phase] = [] for group in utilities.iterator_over_object_groups(database): #for group in temp[0:2]: # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) for phase in phases: all_values[phase].append(data_dict['data'][phase]['electrodes']) ''' #data_dict = utilities.dict_from_h5_group(group, [phase]) norm_dict = utilities.dict_from_h5_group(group, [norm_phase]) data = data_dict["data"][phase] norm_data = norm_dict["data"][norm_phase] object_name = data_dict["name"] name = object_name.split('_') print "Loading object ", object_name import pdb; pdb.set_trace() # Extract features #static_feature_phase, feats = extract_static_features(data, norm_data) # Store information about object static_feature_phase.labels = data_dict["adjectives"] static_feature_phase.name = object_name static_feature_phase.detailed_state = phase static_feature_phase.object_id = int(name[-2]) static_feature_phase.run_num = int(name[-1]) feature_objs[object_name] = static_feature_phase feature_vect.append(feats) ''' import pdb; pdb.set_trace() slide = np.concatenate(all_values['SLIDE_5CM']) squeeze = np.concatenate(all_values['SQUEEZE_SET_PRESSURE_SLOW']) hold = np.concatenate(all_values['HOLD_FOR_10_SECONDS']) fast_slide = np.concatenate(all_values['MOVE_DOWN_5CM']) return feature_objs, np.array(feature_vect)
def create_feature_object_set(database, phase): feature_objs = defaultdict(dict) feature_vect = [] norm_phase = "MOVE_ARM_START_POSITION" # For each object in the database, extract the phase and sensor # data for #temp = [g for g in utilities.iterator_over_object_groups(database)] all_values = dict() for phase in phases: all_values[phase] = [] for group in utilities.iterator_over_object_groups(database): #for group in temp[0:2]: # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) for phase in phases: all_values[phase].append(data_dict['data'][phase]['electrodes']) ''' #data_dict = utilities.dict_from_h5_group(group, [phase]) norm_dict = utilities.dict_from_h5_group(group, [norm_phase]) data = data_dict["data"][phase] norm_data = norm_dict["data"][norm_phase] object_name = data_dict["name"] name = object_name.split('_') print "Loading object ", object_name import pdb; pdb.set_trace() # Extract features #static_feature_phase, feats = extract_static_features(data, norm_data) # Store information about object static_feature_phase.labels = data_dict["adjectives"] static_feature_phase.name = object_name static_feature_phase.detailed_state = phase static_feature_phase.object_id = int(name[-2]) static_feature_phase.run_num = int(name[-1]) feature_objs[object_name] = static_feature_phase feature_vect.append(feats) ''' import pdb pdb.set_trace() slide = np.concatenate(all_values['SLIDE_5CM']) squeeze = np.concatenate(all_values['SQUEEZE_SET_PRESSURE_SLOW']) hold = np.concatenate(all_values['HOLD_FOR_10_SECONDS']) fast_slide = np.concatenate(all_values['MOVE_DOWN_5CM']) return feature_objs, np.array(feature_vect)
def classification_labels(self, X): """ X: list of dictionaries d, each with the structure: d[phase][sensor] = data """ if isinstance(X, tables.Group): X = utilities.dict_from_h5_group(X)['data'] if type(X) is not list: X = [X] ret = [] for x in X: scores = [] for phase, v in x.iteritems(): for sensor, data in v.iteritems(): try: chain = self.chains[phase][sensor] scores.append(chain.predict(data)[0]) except KeyError: print "No key for %s %s" %(phase, sensor) ret.append(scores) return ret
def extract_features(self, X): """ X: list of dictionaries d, each with the structure: d[phase][sensor] = data """ if isinstance(X, tables.Group): X = utilities.dict_from_h5_group(X) if type(X) is not list: X = [X] ret = [] for x in X: scores = [] for phase, v in x.iteritems(): for sensor, data in v.iteritems(): try: chain = self.chains[phase][sensor] scores.append(chain.score(data)) except KeyError: pass ret.append(scores) return ret
def load_dataset(database, adjective, phase, sensor): """Loads the data from a dataset corresponding to an adjective, phase and sensor.""" msg = [] if adjective not in adjectives: raise ValueError("%s is not a known adjective" % adjective) if phase not in phases: raise ValueError("%s is not a known phase" % phase) if sensor not in sensors: raise ValueError("%s is not a known sensor" % sensor) included_names = set() train_set = [] test_set = [] train_group = database.getNode("/train_test_sets", adjective).train for name, g in train_group._v_children.iteritems(): if name not in included_names: train_set.append( utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor]) included_names.add(name) #msg.append("0 Adding " + name + " to positive train") test_group = database.getNode("/train_test_sets", adjective).test for name, g in test_group._v_children.iteritems(): if name not in included_names: test_set.append( utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor]) included_names.add(name) #msg.append("1 Adding " + name + " to positive test") train_label = [1] * len(train_set) test_label = [1] * len(test_set) #now take all the other adjectives for negative class for other_adj in database.getNode("/train_test_sets"): if other_adj._v_name == adjective: continue train_group = other_adj.train for name, g in train_group._v_children.iteritems(): if name not in included_names: train_set.append( utilities.dict_from_h5_group( g, [phase], [sensor])["data"][phase][sensor]) included_names.add(name) #msg.append("2 Adding " + name +" to negative train") test_group = other_adj.test for name, g in test_group._v_children.iteritems(): if name not in included_names: test_set.append( utilities.dict_from_h5_group( g, [phase], [sensor])["data"][phase][sensor]) included_names.add(name) #msg.append("3 Adding " + name +" to negative test") train_label += [0] * (len(train_set) - len(train_label)) test_label += [0] * (len(test_set) - len(test_label)) return (train_set, train_label), (test_set, test_label)
def create_hmm_feature_set(database, object_set, adj_obj, phase_list): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ print "Building adjective %s" % adj_obj.adjective # For each object in the database, extract the phase and sensor # data for for group in utilities.iterator_over_object_groups(database): # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) object_name = data_dict["name"] name = object_name.split('_') labels = [] # Skip over object if it is in the set # Training set will skip over test objects # and vice versa if object_name in object_set: # Extract features feature_data = data_dict["data"] for i, phase_obj in enumerate(phase_list): scores = [] set_dict = defaultdict(dict) if phase_obj.build == False: continue for sensor, data in feature_data[phase_obj.phase].iteritems(): try: chain = adj_obj.chains[phase_obj.phase][sensor] scores.append(chain.score(data)) except KeyError: pass #import pdb; pdb.set_trace() phase_obj.features.append(scores) # Sort out the objec's label if adj_obj.adjective in data_dict["adjectives"]: phase_obj.labels.append(1) else: phase_obj.labels.append(0) phase_obj.object_names.append(object_name) phase_obj.object_ids.append(int(name[-2])) #Iterate over all phases, convert to dictionaries and sqeeze #place all phases in a list set_dict_list = [] for phase_obj in phase_list: set_dict = defaultdict(dict) if phase_obj.build == True: set_dict['features'] = np.array(phase_obj.features).squeeze() set_dict['labels'] = np.array(phase_obj.labels).flatten() set_dict['object_names'] = np.array( phase_obj.object_names).flatten() set_dict['object_ids'] = np.array(phase_obj.object_ids).flatten() phase_obj.wipe_data() #import pdb; pdb.set_trace() set_dict_list.append(set_dict) return set_dict_list
def test_adjective(classifier, database, test_object_names, adjective_report): true_positives = 0.0 true_negatives = 0.0 false_positives = 0.0 false_negatives = 0.0 false_positive_list = [] false_negative_list = [] true_positive_list = [] true_negative_list = [] print '\n \nTesting Adjective: %s' % classifier.adjective for group in utilities.iterator_over_object_groups(database): assert isinstance(group, tables.Group) data_dict = utilities.dict_from_h5_group(group) if data_dict['name'] not in test_object_names: continue features = classifier.extract_features(data_dict["data"]) output = classifier.predict(features) # For this object - find out if the adjective applies # True label is 0 if adjective is false for this adjective true_labels = data_dict['adjectives'] if classifier.adjective in true_labels: true_label = 1 else: true_label = 0 # Determine if the true label and classifier prediction match if true_label == 1: if output[0] == 1: true_positives += 1.0 true_positive_list.append(data_dict['name']) else: false_negatives += 1.0 false_negative_list.append(data_dict['name']) else: # label is 0 if output[0] == 1: false_positives += 1.0 false_positive_list.append(data_dict['name']) else: true_negatives += 1.0 true_negative_list.append(data_dict['name']) # Compute statistics for the adjective precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) try: f1 = 2.0 * precision * recall / (precision + recall) except ZeroDivisionError: f1 = 0 print "Precision: %f, Recall: %f, F1: %f \n" % (precision, recall, f1) adjective_report.write("%s, %f, %f, %f\n" % (classifier.adjective, precision, recall, f1)) print "%d False Positive Objects are: %s \n" % ( false_positives, sorted(false_positive_list)) print "%d False Negative Objects are: %s \n" % ( false_negatives, sorted(false_negative_list)) print "%d True Positive Objects are: %s\n" % (true_positives, sorted(true_positive_list)) print "%d True Negative Objects are: %s\n" % (true_negatives, sorted(true_negative_list)) return (precision, recall, f1)
def load_dataset(database, adjective, phase, sensor): """Loads the data from a dataset corresponding to an adjective, phase and sensor.""" msg = [] if adjective not in adjectives: raise ValueError("%s is not a known adjective" % adjective) if phase not in phases: raise ValueError("%s is not a known phase" % phase) if sensor not in sensors: raise ValueError("%s is not a known sensor" % sensor) included_names = set() train_set = [] test_set = [] train_group = database.getNode("/train_test_sets", adjective).train for name, g in train_group._v_children.iteritems(): if name not in included_names: train_set.append(utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor] ) included_names.add(name) #msg.append("0 Adding " + name + " to positive train") test_group = database.getNode("/train_test_sets", adjective).test for name, g in test_group._v_children.iteritems(): if name not in included_names: test_set.append(utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor] ) included_names.add(name) #msg.append("1 Adding " + name + " to positive test") train_label = [1] * len(train_set) test_label = [1] * len(test_set) #now take all the other adjectives for negative class for other_adj in database.getNode("/train_test_sets"): if other_adj._v_name == adjective: continue train_group = other_adj.train for name, g in train_group._v_children.iteritems(): if name not in included_names: train_set.append(utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor] ) included_names.add(name) #msg.append("2 Adding " + name +" to negative train") test_group = other_adj.test for name, g in test_group._v_children.iteritems(): if name not in included_names: test_set.append(utilities.dict_from_h5_group(g, [phase], [sensor])["data"][phase][sensor] ) included_names.add(name) #msg.append("3 Adding " + name +" to negative test") train_label += [0] * (len(train_set) - len(train_label)) test_label += [0] * (len(test_set) - len(test_label)) return (train_set, train_label), (test_set, test_label)
def create_hmm_feature_set(database, object_set, adj_obj, phase_list): """ For each object in the database, run classifier.extract_features. All the features are then collected in a matrix. If the classifier's adjective is among the objects' then the feature is labeled with 1, otherwise 0. Parameters: database: either a string or an open pytables file. Returns the features and the labels as two 2-dimensional matrices. """ print "Building adjective %s" % adj_obj.adjective # For each object in the database, extract the phase and sensor # data for for group in utilities.iterator_over_object_groups(database): # Pull data from h5 database data_dict = utilities.dict_from_h5_group(group) object_name = data_dict["name"] name = object_name.split('_') labels = [] # Skip over object if it is in the set # Training set will skip over test objects # and vice versa if object_name in object_set: # Extract features feature_data = data_dict["data"] for i, phase_obj in enumerate(phase_list): scores = [] set_dict = defaultdict(dict) if phase_obj.build == False: continue for sensor, data in feature_data[phase_obj.phase].iteritems(): try: chain = adj_obj.chains[phase_obj.phase][sensor] scores.append(chain.score(data)) except KeyError: pass #import pdb; pdb.set_trace() phase_obj.features.append(scores) # Sort out the objec's label if adj_obj.adjective in data_dict["adjectives"]: phase_obj.labels.append(1) else: phase_obj.labels.append(0) phase_obj.object_names.append(object_name) phase_obj.object_ids.append(int(name[-2])) #Iterate over all phases, convert to dictionaries and sqeeze #place all phases in a list set_dict_list = [] for phase_obj in phase_list: set_dict = defaultdict(dict) if phase_obj.build == True: set_dict['features'] = np.array(phase_obj.features).squeeze() set_dict['labels'] = np.array(phase_obj.labels).flatten() set_dict['object_names'] = np.array(phase_obj.object_names).flatten() set_dict['object_ids'] = np.array(phase_obj.object_ids).flatten() phase_obj.wipe_data() #import pdb; pdb.set_trace() set_dict_list.append(set_dict) return set_dict_list
def test_adjective(classifier, database, test_object_names, adjective_report): true_positives = 0.0 true_negatives = 0.0 false_positives = 0.0 false_negatives = 0.0 false_positive_list = [] false_negative_list = [] true_positive_list = [] true_negative_list = [] print '\n \nTesting Adjective: %s' % classifier.adjective for group in utilities.iterator_over_object_groups(database): assert isinstance(group, tables.Group) data_dict = utilities.dict_from_h5_group(group) if data_dict['name'] not in test_object_names: continue features = classifier.extract_features(data_dict["data"]) output = classifier.predict(features) # For this object - find out if the adjective applies # True label is 0 if adjective is false for this adjective true_labels = data_dict['adjectives'] if classifier.adjective in true_labels: true_label = 1 else: true_label = 0 # Determine if the true label and classifier prediction match if true_label == 1: if output[0] == 1: true_positives += 1.0 true_positive_list.append(data_dict['name']) else: false_negatives += 1.0 false_negative_list.append(data_dict['name']) else: # label is 0 if output[0] == 1: false_positives += 1.0 false_positive_list.append(data_dict['name']) else: true_negatives += 1.0 true_negative_list.append(data_dict['name']) # Compute statistics for the adjective precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) try: f1 = 2.0 * precision*recall / (precision + recall) except ZeroDivisionError: f1 = 0 print "Precision: %f, Recall: %f, F1: %f \n" % (precision, recall, f1) adjective_report.write("%s, %f, %f, %f\n" % (classifier.adjective, precision, recall, f1)) print "%d False Positive Objects are: %s \n" % (false_positives, sorted(false_positive_list)) print "%d False Negative Objects are: %s \n" % (false_negatives, sorted(false_negative_list)) print "%d True Positive Objects are: %s\n" % (true_positives, sorted(true_positive_list)) print "%d True Negative Objects are: %s\n" % (true_negatives, sorted(true_negative_list)) return (precision, recall, f1)