Exemple #1
0
def apply_classifier(exp,
                     block,
                     train_es,
                     test_es,
                     classifier_name,
                     classifier_options=None,
                     fit_options=None,
                     base_folder="/tmp",
                     base_filename="cl"):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
    """
    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]

    # Unpack data
    x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = fabric(**classifier_options)
        cl.fit(x_train, y_train_fixed, **fit_options)
    else:
        raise NotImplementedError()

    # Applying on test partition
    y_test_predicted = cl.predict(x_test)

    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    cr.labels_encode_vector = le.classes_  # Store target class labels

    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    cr.store_model(cl)
    return [cr], {}
Exemple #2
0
def apply_classifier(
    exp, block,
    train_es, test_es,
    classifier_name, classifier_options=None, fit_options=None,
    base_folder="/tmp", base_filename="cl"
):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
    """
    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]


    # Unpack data
    x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = fabric(**classifier_options)
        cl.fit(x_train, y_train_fixed, **fit_options)
    else:
        raise NotImplementedError()


    # Applying on test partition
    y_test_predicted = cl.predict(x_test)

    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    cr.labels_encode_vector = le.classes_  # Store target class labels

    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    cr.store_model(cl)
    return [cr], {}
Exemple #3
0
def apply_classifier(
    exp, block,
    train_es, test_es,
    classifier_name, classifier_options=None, fit_options=None,
    base_folder="/tmp", base_filename="cl"
):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
        @type exp: Experiment
        @type block: GenericBlock
    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]
    tr_es = train_es.get_assay_data_frame()
    cols = tr_es.columns

    te_es = test_es.get_assay_data_frame()[list(cols)]

    # Unpack data
    x_train = tr_es.as_matrix()
    # x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = te_es.as_matrix()
    # x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    # log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = get_classifier(fabric, classifier_options, classifier_name, block)
        log.debug("Fitting classifier.")
        cl.fit(x_train, y_train_fixed)
        log.debug("Finished fitting classifier.")
    else:
        raise NotImplementedError()

    log.debug("Applying on test.")
    # Applying on test partition
    y_test_predicted = cl.predict(x_test)
    log.debug("Building result.")
    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    log.debug("Storing labels.")
    cr.labels_encode_vector = le.classes_  # Store target class labels

    log.debug("Storing y.")
    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    log.debug("Storing model.")
    # TODO Why to store model?
    # cr.store_model(cl)
    log.debug("Finished apply_classifier.")
    return [cr], {}
Exemple #4
0
def apply_classifier(
    exp, block,
    train_es, test_es,
    classifier_name, classifier_options=None, fit_options=None,
    base_folder="/tmp", base_filename="cl"
):
    """
        @type train_es: ExpressionSet
        @type test_es: ExpressionSet
        @type exp: Experiment
        @type block: GenericBlock
    """
    if settings.CELERY_DEBUG:
        import sys
        sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
        import pydevd
        pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)

    if not classifier_options:
        classifier_options = {}
    if not fit_options:
        fit_options = {}

    target_class_column = train_es.pheno_metadata["user_class_title"]
    tr_es = train_es.get_assay_data_frame()
    cols = tr_es.columns

    te_es = test_es.get_assay_data_frame()[list(cols)]

    # Unpack data
    x_train = tr_es.as_matrix()
    # x_train = train_es.get_assay_data_frame().as_matrix().transpose()
    y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix()

    x_test = te_es.as_matrix()
    # x_test = test_es.get_assay_data_frame().as_matrix().transpose()
    y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix()

    # Unfortunately svm can't operate with string labels as a target classes
    #   so we need to preprocess labels
    le = preprocessing.LabelEncoder()
    le.fit(y_train)

    y_train_fixed = le.transform(y_train)
    y_test_fixed = le.transform(y_test)

    # Classifier initialization
    fabric, apply_func = classifiers_map[classifier_name]
    # log.debug("Classifier options: %s", classifier_options)
    if apply_func is None:
        cl = get_classifier(fabric, classifier_options, classifier_name, block)
        log.debug("Fitting classifier.")
        try:
            log.debug(str(x_train))
            cl.fit(x_train, y_train_fixed)
        except ValueError:
            # if settings.CELERY_DEBUG:
            #     import sys
            #     sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg')
            #     import pydevd
            #     pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True)
            log.debug(str(x_train))
            raise
        log.debug("Finished fitting classifier.")
    else:
        raise NotImplementedError()

    log.debug("Applying on test.")
    # Applying on test partition
    y_test_predicted = cl.predict(x_test)
    log.debug("Building result.")
    # Here we build result object
    cr = ClassifierResult(base_folder, base_filename)

    log.debug("Storing labels.")
    cr.labels_encode_vector = le.classes_  # Store target class labels

    log.debug("Storing y.")
    cr.y_true = y_test_fixed
    cr.y_predicted = y_test_predicted

    cr.classifier = classifier_name
    log.debug("Storing model.")
    # TODO Why to store model?
    # cr.store_model(cl)
    log.debug("Finished apply_classifier.")
    return [cr], {}