def apply_classifier(exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl"): """ @type train_es: ExpressionSet @type test_es: ExpressionSet """ if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] # Unpack data x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = fabric(**classifier_options) cl.fit(x_train, y_train_fixed, **fit_options) else: raise NotImplementedError() # Applying on test partition y_test_predicted = cl.predict(x_test) # Here we build result object cr = ClassifierResult(base_folder, base_filename) cr.labels_encode_vector = le.classes_ # Store target class labels cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name cr.store_model(cl) return [cr], {}
def apply_classifier( exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl" ): """ @type train_es: ExpressionSet @type test_es: ExpressionSet """ if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] # Unpack data x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = fabric(**classifier_options) cl.fit(x_train, y_train_fixed, **fit_options) else: raise NotImplementedError() # Applying on test partition y_test_predicted = cl.predict(x_test) # Here we build result object cr = ClassifierResult(base_folder, base_filename) cr.labels_encode_vector = le.classes_ # Store target class labels cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name cr.store_model(cl) return [cr], {}
def apply_classifier( exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl" ): """ @type train_es: ExpressionSet @type test_es: ExpressionSet @type exp: Experiment @type block: GenericBlock """ if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] tr_es = train_es.get_assay_data_frame() cols = tr_es.columns te_es = test_es.get_assay_data_frame()[list(cols)] # Unpack data x_train = tr_es.as_matrix() # x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = te_es.as_matrix() # x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] # log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = get_classifier(fabric, classifier_options, classifier_name, block) log.debug("Fitting classifier.") cl.fit(x_train, y_train_fixed) log.debug("Finished fitting classifier.") else: raise NotImplementedError() log.debug("Applying on test.") # Applying on test partition y_test_predicted = cl.predict(x_test) log.debug("Building result.") # Here we build result object cr = ClassifierResult(base_folder, base_filename) log.debug("Storing labels.") cr.labels_encode_vector = le.classes_ # Store target class labels log.debug("Storing y.") cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name log.debug("Storing model.") # TODO Why to store model? # cr.store_model(cl) log.debug("Finished apply_classifier.") return [cr], {}
def apply_classifier( exp, block, train_es, test_es, classifier_name, classifier_options=None, fit_options=None, base_folder="/tmp", base_filename="cl" ): """ @type train_es: ExpressionSet @type test_es: ExpressionSet @type exp: Experiment @type block: GenericBlock """ if settings.CELERY_DEBUG: import sys sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) if not classifier_options: classifier_options = {} if not fit_options: fit_options = {} target_class_column = train_es.pheno_metadata["user_class_title"] tr_es = train_es.get_assay_data_frame() cols = tr_es.columns te_es = test_es.get_assay_data_frame()[list(cols)] # Unpack data x_train = tr_es.as_matrix() # x_train = train_es.get_assay_data_frame().as_matrix().transpose() y_train = train_es.get_pheno_data_frame()[target_class_column].as_matrix() x_test = te_es.as_matrix() # x_test = test_es.get_assay_data_frame().as_matrix().transpose() y_test = test_es.get_pheno_data_frame()[target_class_column].as_matrix() # Unfortunately svm can't operate with string labels as a target classes # so we need to preprocess labels le = preprocessing.LabelEncoder() le.fit(y_train) y_train_fixed = le.transform(y_train) y_test_fixed = le.transform(y_test) # Classifier initialization fabric, apply_func = classifiers_map[classifier_name] # log.debug("Classifier options: %s", classifier_options) if apply_func is None: cl = get_classifier(fabric, classifier_options, classifier_name, block) log.debug("Fitting classifier.") try: log.debug(str(x_train)) cl.fit(x_train, y_train_fixed) except ValueError: # if settings.CELERY_DEBUG: # import sys # sys.path.append('/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg') # import pydevd # pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) log.debug(str(x_train)) raise log.debug("Finished fitting classifier.") else: raise NotImplementedError() log.debug("Applying on test.") # Applying on test partition y_test_predicted = cl.predict(x_test) log.debug("Building result.") # Here we build result object cr = ClassifierResult(base_folder, base_filename) log.debug("Storing labels.") cr.labels_encode_vector = le.classes_ # Store target class labels log.debug("Storing y.") cr.y_true = y_test_fixed cr.y_predicted = y_test_predicted cr.classifier = classifier_name log.debug("Storing model.") # TODO Why to store model? # cr.store_model(cl) log.debug("Finished apply_classifier.") return [cr], {}