Exemple #1
0
    def save(self):

        assert(self.root_dir)
        if(self.data):

            if(self.data_mapping):
                data_ids = [self.data_mapping[d[0]] for d in self.data]
                data_objects = [d[1] for d in self.data]
                out_data = zip(data_ids, data_objects)
            else:
                out_data = self.data

            # create root dir, if it is not their yet
            if not(os.path.exists(self.root_dir)):
                os.makedirs(self.root_dir)

            if(os.path.isdir(self.data_path)):

                # create data dir, if it is not there yet
                if not(os.path.exists(self.get_data_path())):
                    os.makedirs(self.get_data_path())

            if(self.data_mapping):

                # write mapping object id to data file mapping file
                #uni_ids = [d[0] for d in self.data]
                tuples = [(d[0], self.data_mapping[d[0]]) for d in self.data]
                file_io.write_tuple_list(self.get_mapping_file(), tuples)
            elif(self.get_mapping_file()):
                tuples = [(d[0], d[0]) for d in self.data]
                file_io.write_tuple_list(self.get_mapping_file(), tuples)

            # store the data
            self.write_func(self.get_data_path(), out_data)
Exemple #2
0
    def save(self):

        assert (self.root_dir)
        if (self.data):

            if (self.data_mapping):
                data_ids = [self.data_mapping[d[0]] for d in self.data]
                data_objects = [d[1] for d in self.data]
                out_data = zip(data_ids, data_objects)
            else:
                out_data = self.data

            # create root dir, if it is not their yet
            if not (os.path.exists(self.root_dir)):
                os.makedirs(self.root_dir)

            if (os.path.isdir(self.data_path)):

                # create data dir, if it is not there yet
                if not (os.path.exists(self.get_data_path())):
                    os.makedirs(self.get_data_path())

            if (self.data_mapping):

                # write mapping object id to data file mapping file
                #uni_ids = [d[0] for d in self.data]
                tuples = [(d[0], self.data_mapping[d[0]]) for d in self.data]
                file_io.write_tuple_list(self.get_mapping_file(), tuples)
            elif (self.get_mapping_file()):
                tuples = [(d[0], d[0]) for d in self.data]
                file_io.write_tuple_list(self.get_mapping_file(), tuples)

            # store the data
            self.write_func(self.get_data_path(), out_data)
Exemple #3
0
def classify(fm_dir, cl_dir):
    '''
    PRE: required features are available in fe_dir!
    '''

    f_pre = os.path.basename(os.path.dirname(os.path.dirname(fm_dir)))

    # create dir to store the classification output and feature calc...
    out_dir = os.path.join(cl_dir, 'class_output')
    if not(os.path.exists(out_dir)):
        os.makedirs(out_dir)

    # read feature ids that were used to train the classifier
    cl_settings_f = os.path.join(cl_dir, 'settings.txt')
    settings_dict = file_io.read_settings_dict(cl_settings_f)
    feature_ids = settings_dict['feature_names']

    # obtain feature matrix STANDARDIZED DATA
    fm = featmat.FeatureMatrix.load_from_dir(fm_dir)
    feat_is = fm.feature_indices(feature_ids)
    object_is = range(len(fm.object_ids))
    data = fm.standardized_slice(feat_is, object_is)

    # load trained classifier
    cl_f = os.path.join(cl_dir, 'classifier.joblib.pkl')
    classifier = joblib.load(cl_f)

    # run classify method
    preds, probas = classification.classify(data, classifier)

    pred_f = os.path.join(out_dir, '%s_pred.txt' % (f_pre))
    proba_f = os.path.join(out_dir, '%s_proba.txt' % (f_pre))

    file_io.write_tuple_list(pred_f, zip(fm.object_ids, preds))
    file_io.write_tuple_list(proba_f, zip(fm.object_ids, probas))
Exemple #4
0
def classify(fm_dir, cl_dir):
    '''
    PRE: required features are available in fe_dir!
    '''

    f_pre = os.path.basename(os.path.dirname(os.path.dirname(fm_dir)))

    # create dir to store the classification output and feature calc...
    out_dir = os.path.join(cl_dir, 'class_output')
    if not(os.path.exists(out_dir)):
        os.makedirs(out_dir)

    # read feature ids that were used to train the classifier
    cl_settings_f = os.path.join(cl_dir, 'settings.txt')
    settings_dict = file_io.read_settings_dict(cl_settings_f)
    feature_ids = settings_dict['feature_names']

    # obtain feature matrix STANDARDIZED DATA
    fm = featmat.FeatureMatrix.load_from_dir(fm_dir)
    feat_is = fm.feature_indices(feature_ids)
    object_is = range(len(fm.object_ids))
    data = fm.standardized_slice(feat_is, object_is)

    # load trained classifier
    cl_f = os.path.join(cl_dir, 'classifier.joblib.pkl')
    classifier = joblib.load(cl_f)

    # run classify method
    preds, probas = classification.classify(data, classifier)

    pred_f = os.path.join(out_dir, '%s_pred.txt' % (f_pre))
    proba_f = os.path.join(out_dir, '%s_proba.txt' % (f_pre))

    file_io.write_tuple_list(pred_f, zip(fm.object_ids, preds))
    file_io.write_tuple_list(proba_f, zip(fm.object_ids, probas))