def save(self): assert(self.root_dir) if(self.data): if(self.data_mapping): data_ids = [self.data_mapping[d[0]] for d in self.data] data_objects = [d[1] for d in self.data] out_data = zip(data_ids, data_objects) else: out_data = self.data # create root dir, if it is not their yet if not(os.path.exists(self.root_dir)): os.makedirs(self.root_dir) if(os.path.isdir(self.data_path)): # create data dir, if it is not there yet if not(os.path.exists(self.get_data_path())): os.makedirs(self.get_data_path()) if(self.data_mapping): # write mapping object id to data file mapping file #uni_ids = [d[0] for d in self.data] tuples = [(d[0], self.data_mapping[d[0]]) for d in self.data] file_io.write_tuple_list(self.get_mapping_file(), tuples) elif(self.get_mapping_file()): tuples = [(d[0], d[0]) for d in self.data] file_io.write_tuple_list(self.get_mapping_file(), tuples) # store the data self.write_func(self.get_data_path(), out_data)
def save(self): assert (self.root_dir) if (self.data): if (self.data_mapping): data_ids = [self.data_mapping[d[0]] for d in self.data] data_objects = [d[1] for d in self.data] out_data = zip(data_ids, data_objects) else: out_data = self.data # create root dir, if it is not their yet if not (os.path.exists(self.root_dir)): os.makedirs(self.root_dir) if (os.path.isdir(self.data_path)): # create data dir, if it is not there yet if not (os.path.exists(self.get_data_path())): os.makedirs(self.get_data_path()) if (self.data_mapping): # write mapping object id to data file mapping file #uni_ids = [d[0] for d in self.data] tuples = [(d[0], self.data_mapping[d[0]]) for d in self.data] file_io.write_tuple_list(self.get_mapping_file(), tuples) elif (self.get_mapping_file()): tuples = [(d[0], d[0]) for d in self.data] file_io.write_tuple_list(self.get_mapping_file(), tuples) # store the data self.write_func(self.get_data_path(), out_data)
def classify(fm_dir, cl_dir): ''' PRE: required features are available in fe_dir! ''' f_pre = os.path.basename(os.path.dirname(os.path.dirname(fm_dir))) # create dir to store the classification output and feature calc... out_dir = os.path.join(cl_dir, 'class_output') if not(os.path.exists(out_dir)): os.makedirs(out_dir) # read feature ids that were used to train the classifier cl_settings_f = os.path.join(cl_dir, 'settings.txt') settings_dict = file_io.read_settings_dict(cl_settings_f) feature_ids = settings_dict['feature_names'] # obtain feature matrix STANDARDIZED DATA fm = featmat.FeatureMatrix.load_from_dir(fm_dir) feat_is = fm.feature_indices(feature_ids) object_is = range(len(fm.object_ids)) data = fm.standardized_slice(feat_is, object_is) # load trained classifier cl_f = os.path.join(cl_dir, 'classifier.joblib.pkl') classifier = joblib.load(cl_f) # run classify method preds, probas = classification.classify(data, classifier) pred_f = os.path.join(out_dir, '%s_pred.txt' % (f_pre)) proba_f = os.path.join(out_dir, '%s_proba.txt' % (f_pre)) file_io.write_tuple_list(pred_f, zip(fm.object_ids, preds)) file_io.write_tuple_list(proba_f, zip(fm.object_ids, probas))