Beispiel #1
0
class LinearSVM(GenericClassifier):
    block_base_name = "LIN_SVM"
    name = "Linear SVM Classifier"

    classifier_name = "linear_svm"

    C = ParamField(name="C", title="Penalty", order_num=10,
                   input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0)

    tol = ParamField(name="tol", order_num=20,
                 title="Tolerance for stopping criteria",
                 input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.0001)

    loss = ParamField(
        name="loss", order_num=30,
        title="The loss function",
        input_type=InputType.SELECT, field_type=FieldType.STR,
        options={
            "inline_select_provider": True,
            "select_options": [
                ["l1", "Hinge loss"],
                ["l2", "Squared hinge loss"],
            ]
        }
    )

    def collect_options(self):
        self.collect_option_safe("C", float)
        self.collect_option_safe("tol", float)
        self.collect_option_safe("loss", str)
Beispiel #2
0
class UploadGeneSets(GenericBlock):
    block_base_name = "GENE_SETS_UPLOAD"
    block_group = GroupType.INPUT_DATA
    name = "Upload gene sets"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "done"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    upload_gs = ParamField("upload_gs",
                           title="Gene sets in .gmt format",
                           order_num=10,
                           input_type=InputType.FILE_INPUT,
                           field_type=FieldType.CUSTOM)

    set_units = ParamField("set_units",
                           title="Set units",
                           order_num=11,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)
    gen_units = ParamField("gen_units",
                           title="Gene units",
                           order_num=12,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)

    _gene_sets = OutputBlockField(name="gene_sets",
                                  provided_data_type="GeneSets")

    def on_params_is_valid(self, exp, *args, **kwargs):
        try:
            gmt_file = self.upload_gs.get_file()
            gs = GmtStorage.read_inp(gmt_file, "\t")
            gene_sets = GeneSets(exp.get_data_folder(), str(self.uuid))
            gene_sets.store_gs(gs)

            self.set_out_var("gene_sets", gene_sets)

        except Exception as e:
            log.error(e)

        exp.store_block(self)
Beispiel #3
0
class UploadInteraction(GenericBlock):
    block_base_name = "GENE_INTERACTION"
    block_group = GroupType.INPUT_DATA
    name = "Upload gene interaction"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "done"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    upload_interaction = ParamField("upload_interaction",
                                    title="Interaction matrix",
                                    order_num=10,
                                    input_type=InputType.FILE_INPUT,
                                    field_type=FieldType.CUSTOM)
    row_units = ParamField("row_units",
                           title="Row units",
                           order_num=11,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)
    col_units = ParamField("col_units",
                           title="Column units",
                           order_num=12,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)

    _interaction = OutputBlockField(name="interaction",
                                    provided_data_type="BinaryInteraction")

    def on_params_is_valid(self, exp, *args, **kwargs):
        # Convert to  BinaryInteraction
        interaction_df = self.upload_interaction.get_as_data_frame()

        interaction = BinaryInteraction(exp.get_data_folder(), str(self.uuid))
        interaction.store_matrix(interaction_df)

        interaction.row_units = self.row_units
        interaction.col_units = self.col_units

        self.set_out_var("interaction", interaction)
        exp.store_block(self)
Beispiel #4
0
class EnrichmentNoTBlock(GenericBlock):
    block_base_name = "ENRICHMENT_COM"
    name = "Comodule Enrichment"

    is_abstract = False
    block_group = GroupType.TESTING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="gs",
                            order_num=10,
                            required_data_type="GeneSets",
                            required=True)
    H = InputBlockField(name="patterns",
                        order_num=11,
                        required_data_type="GeneSets",
                        required=True)
    _t = ParamField(name="T",
                    order_num=12,
                    title="Enrichment threshold",
                    input_type=InputType.TEXT,
                    field_type=FieldType.FLOAT,
                    init_val="0.05")

    dict = OutputBlockField(name="dictionary_set",
                            provided_data_type="DictionarySet")

    def __init__(self, *args, **kwargs):
        super(EnrichmentNoTBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        gs = self.get_input_var("gs")
        cs = self.get_input_var("patterns")
        self.celery_task = wrapper_task.s(enrichment_no_t_task,
                                          exp,
                                          self,
                                          T=self.T,
                                          gs=gs,
                                          patterns=cs,
                                          base_filename="%s_%s_enrich" %
                                          (self.uuid, 'enrichment_cont'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("dictionary_set", flt_es)
        exp.store_block(self)
Beispiel #5
0
class KernelSvm(GenericClassifier):
    block_base_name = "KERNEL_SVM"
    name = "Kernel SVM Classifier"

    classifier_name = "svm"

    C = ParamField(name="C", title="Penalty", order_num=10,
                   input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0)

    kernel = ParamField(
        name="kernel", order_num=20,
        title="Kernel type",
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="rbf",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["linear", "Linear"],
                ["poly", "Polynomial"],
                ["rbf", "RBF"],
                ["sigmoid", "Sigmoid"],
            ]
        }
    )
    degree = ParamField(
        name="degree", order_num=21,
        title="Degree of the polynomial kernel",
        input_type=InputType.TEXT, field_type=FieldType.INT
    )

    gamma = ParamField(
        name="gamma", order_num=22,
        title="Kernel coefficient for RBF, Polynomial and Sigmoid",
        input_type=InputType.TEXT, field_type=FieldType.FLOAT
    )

    tol = ParamField(name="tol", order_num=30,
                     title="Tolerance for stopping criteria",
                     input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.001)

    def collect_options(self):
        self.collect_option_safe("C", float)
        self.collect_option_safe("kernel", str)
        self.collect_option_safe("degree", int)
        self.collect_option_safe("gamma", float)
        self.collect_option_safe("tol", float)
Beispiel #6
0
class SvdSubAgg(GenericBlock):
    is_abstract = True
    block_group = GroupType.AGGREGATION

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _mRNA_es = InputBlockField(name="mRNA_es", order_num=10,
                               required_data_type="ExpressionSet", required=True)
    _miRNA_es = InputBlockField(name="miRNA_es", order_num=20,
                                required_data_type="ExpressionSet", required=True)
    _interaction = InputBlockField(name="interaction", order_num=30,
                                   required_data_type="BinaryInteraction", required=True)

    c = ParamField(name="c", title="Constant c",
                   input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0)

    agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet")

    mode = ""

    def __init__(self, *args, **kwargs):
        super(SvdSubAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA_es = self.get_input_var("mRNA_es")
        miRNA_es = self.get_input_var("miRNA_es")
        interaction_matrix = self.get_input_var("interaction")

        self.celery_task = wrapper_task.s(
            aggregation_task,
            exp, self,
            mode=self.mode,
            c=self.c,
            m_rna_es=mRNA_es,
            mi_rna_es=miRNA_es,
            interaction_matrix=interaction_matrix,
            base_filename="%s_%s_agg" % (self.uuid, self.mode)
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
Beispiel #7
0
class ThresholdBlock(GenericBlock):
    block_base_name = "THRESHOLD"
    name = "Threshold"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    t = ParamField(name="T",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=0.1)

    flt_es = OutputBlockField(name="gene_sets", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(ThresholdBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        # T = self.get_input_var("T")
        self.celery_task = wrapper_task.s(threshold_task,
                                          exp,
                                          self,
                                          es=es,
                                          T=self.T,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'threshold'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("gene_sets", flt_es)
        exp.store_block(self)
Beispiel #8
0
class RcVisualizer(GenericBlock):
    block_base_name = "RC_VIZUALIZER"
    is_block_supports_auto_execution = False
    block_group = GroupType.VISUALIZE
    is_abstract = True

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "input_bound"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("configure_table", ["input_bound", "ready"], "ready"),
    ])

    results_container = InputBlockField(name="results_container",
                                        required_data_type="ResultsContainer",
                                        required=True,
                                        field_type=FieldType.CUSTOM)
    _rc = BlockField(name="rc",
                     field_type=FieldType.CUSTOM,
                     is_a_property=True)
    _available_metrics = BlockField(name="available_metrics",
                                    field_type=FieldType.RAW,
                                    is_a_property=True)

    metric = ParamField(name="metric",
                        title="Metric",
                        field_type=FieldType.STR,
                        input_type=InputType.SELECT,
                        select_provider="available_metrics")

    def __init__(self, *args, **kwargs):
        super(RcVisualizer, self).__init__(*args, **kwargs)

    @property
    @log_timing
    def available_metrics(self):
        try:
            return [{
                "pk": metric_name,
                "str": metric.title
            } for metric_name, metric in metrics_dict.iteritems()
                    if metric.produce_single_number]
        except Exception, e:
            log.exception(e)
            return []
Beispiel #9
0
class GetBroadInstituteGeneSet(GenericBlock):
    block_base_name = "BI_GENE_SET"
    block_group = GroupType.INPUT_DATA
    name = "Get MSigDB Gene Set"

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    # TODO: maybe create more general solution ?
    _all_gene_sets = BlockField("all_gene_sets",
                                title="",
                                input_type=InputType.HIDDEN,
                                field_type=FieldType.RAW,
                                is_a_property=True)

    msigdb_id = ParamField(
        name="msigdb_id",
        title="MSigDB gene set",
        input_type=InputType.SELECT,
        field_type=FieldType.INT,
        init_val=0,  # TODO: fix hardcoded value
        select_provider="all_gene_sets")

    _gs = OutputBlockField(name="gs",
                           field_type=FieldType.HIDDEN,
                           provided_data_type="GeneSets")

    @property
    def all_gene_sets(self):
        return BroadInstituteGeneSet.get_all_meta()

    def on_params_is_valid(self, exp):
        gs = BroadInstituteGeneSet.objects.get(
            pk=self.msigdb_id).get_gene_sets()
        self.set_out_var("gs", gs)

        super(GetBroadInstituteGeneSet, self).on_params_is_valid(exp)
Beispiel #10
0
class CrossValidation(UniformMetaBlock):
    block_base_name = "CROSS_VALID"
    name = "Cross Validation K-fold"

    _cv_actions = ActionsList(
        [ActionRecord("become_ready", ["valid_params"], "ready")])
    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["cv_info.html"])

    _input_es_dyn = InputBlockField(name="es_inputs",
                                    required_data_type="ExpressionSet",
                                    required=True,
                                    multiply_extensible=True,
                                    order_num=-1)

    folds_num = ParamField(name="folds_num",
                           title="Folds number",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=5)
    repeats_num = ParamField(name="repeats_num",
                             title="Repeats number",
                             order_num=20,
                             input_type=InputType.TEXT,
                             field_type=FieldType.INT,
                             init_val=1)

    def get_fold_labels(self):
        out = []
        for repeat in range(self.repeats_num):
            for num in range(self.folds_num):
                out.append("fold_%s_%s" % (repeat + 1, num + 1))
        return out  # ["fold_%s_%s" % (repeat + 1, num + 1) for num in range(self.folds_num) for repeat in range(self.repeats_num)]

    def get_repeat_labels(self):
        return [
            "repeat_%s" % (repeat + 1) for repeat in range(self.repeats_num)
        ]

    def add_dyn_input_hook(self, exp, dyn_port, new_port):
        """
            @type new_port: InputBlockField
        """
        new_inner_output_train = InnerOutputField(
            name="%s_train_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        new_inner_output_test = InnerOutputField(
            name="%s_test_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        self.inner_output_es_names_map[new_port.name] = \
            (new_inner_output_train.name, new_inner_output_test.name)

        self.register_inner_output_variables(
            [new_inner_output_train, new_inner_output_test])

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()

        self.inner_output_manager.reset()
        es_dict = {
            inp_name: self.get_input_var(inp_name)
            for inp_name in self.es_inputs
        }

        self.celery_task = wrapper_task.s(
            generate_cv_folds,
            exp,
            self,
            folds_num=self.folds_num,
            repeats_num=self.repeats_num,
            es_dict=es_dict,
            inner_output_es_names_map=self.inner_output_es_names_map,
            success_action="on_folds_generation_success",
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs)
        self.do_action("become_ready", exp)

    def become_ready(self, *args, **kwargs):
        pass

    def build_result_collection(self, exp):
        if settings.CELERY_DEBUG:
            import sys
            sys.path.append(
                '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
            )
            import pydevd
            pydevd.settrace('localhost',
                            port=6901,
                            stdoutToServer=True,
                            stderrToServer=True)

        rc = ResultsContainer(base_dir=exp.get_data_folder(),
                              base_filename="%s" % self.uuid)
        res_seq = self.res_seq

        def create_new_dim_rc(local_rc, axis_meta_block,
                              axis_meta_block_labels):
            local_rc.axis_list = [axis_meta_block]
            local_rc.labels_dict[axis_meta_block] = axis_meta_block_labels
            local_rc.init_ar()
            local_rc.update_label_index()

        # WARNING: We only support homogeneous results, so we only check first element
        res_seq_field_name, data_type = res_seq.fields.iteritems().next()
        if data_type == "ClassifierResult":
            fold_labels = self.get_fold_labels()
            single_rc_list = []
            for field_name in res_seq.fields:
                run_num = 0
                loc_list = []
                for idx, res_seq_cell in enumerate(res_seq.sequence):
                    if (idx % self.folds_num) == 0:
                        rc_run = ResultsContainer("", "")
                        create_new_dim_rc(rc_run, self.base_name + "_folds", [
                            "fold_%s" % fold_num
                            for fold_num in range(self.folds_num)
                        ])
                        loc_list.append(rc_run)
                        run_num += 1
                    rc_run.ar[idx % self.folds_num] = res_seq_cell[field_name]
                rc_single = ResultsContainer("", "")
                rc_single.add_dim_layer(loc_list, self.base_name,
                                        self.get_repeat_labels())
                single_rc_list.append(rc_single)
            rc.add_dim_layer(single_rc_list, self.collector_spec.label,
                             res_seq.fields.keys())

        elif data_type == "ResultsContainer":
            if len(res_seq.fields) > 1:
                raise Exception(
                    "Meta block only support single output of type ResultsContainer"
                )

            else:
                rc_list = []
                for cell in res_seq.sequence:
                    sub_rc = cell[res_seq_field_name]
                    sub_rc.load()
                    rc_list.append(sub_rc)

                rc.add_dim_layer(rc_list, self.base_name,
                                 self.get_fold_labels())

        elif data_type == "SequenceContainer":
            # TODO remove this check
            pass
        else:
            raise Exception("Meta blocks only support ClassifierResult "
                            "or ResultsContainer in the output collection. "
                            " Instead got: %s" % data_type)

        rc.store()
        rc.ar = None
        self.set_out_var("results_container", rc)
Beispiel #11
0
class FilterBlock(GenericBlock):
    block_base_name = "FILTER"
    name = "Var/Val Filter"

    is_abstract = False
    block_group = GroupType.FILTER

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    filter_method = ParamField("filter_method",
                               title="Filter method",
                               order_num=50,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val="LOW_VAL",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [["LOW_VAL", "Low Val Filter"],
                                    ["VAR", "Var Filter"]]
                               })
    q = ParamField(name="q",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=30.0)

    flt_es = OutputBlockField(name="flt_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FilterBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")

        self.celery_task = wrapper_task.s(filter_task,
                                          exp,
                                          self,
                                          filter_type=self.filter_method,
                                          q=self.q,
                                          es=es,
                                          base_filename="%s_%s_flt" %
                                          (self.uuid, self.filter_method))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("flt_es", flt_es)
        exp.store_block(self)
Beispiel #12
0
class BoxPlot(RcVisualizer):
    block_base_name = "BOX_PLOT"
    block_group = GroupType.VISUALIZE
    name = "Box plot"

    boxplot_config = ParamField(name="boxplot_config", title="",
                              input_type=InputType.HIDDEN,
                              field_type=FieldType.RAW)

    plot_inputs = BlockField(name="plot_inputs", field_type=FieldType.RAW, init_val=[])
    chart_series = BlockField(name="chart_series", field_type=FieldType.RAW, init_val=[])
    chart_categories = BlockField(name="chart_categories", field_type=FieldType.SIMPLE_LIST,
                                  init_val=[])

    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "box_plot.html"
    ])

    def __init__(self, *args, **kwargs):
        super(BoxPlot, self).__init__(*args, **kwargs)
        self.boxplot_config = {
            "agg_axis_for_scoring": {},
            "compare_axis_by_boxplot": {},
        }

    @log_timing
    def compute_boxplot_stats(self, exp, *args, **kwargs):
        agg_axis_for_scoring = [
            axis for axis, is_selected in
            self.boxplot_config["agg_axis_for_scoring"].items() if is_selected
        ]
        compare_axis_by_boxplot = [
            axis for axis, is_selected in
            self.boxplot_config["compare_axis_by_boxplot"].items() if is_selected
        ]
        rc = self.rc

        if compare_axis_by_boxplot and rc:
            rc.load()

            df = rc.get_pandas_slice_for_boxplot(
                compare_axis_by_boxplot,
                agg_axis_for_scoring or [],
                self.metric
            )

            categories = []
            for row_id, _ in df.iterrows():
                if type(row_id) == tuple:
                    title = ":".join(map(str, row_id))
                else:
                    title = str(row_id)

                categories.append(title)

            # import ipdb; ipdb.set_trace()
            bps = boxplot_stats(np.array(df.T, dtype=float))

            if bps:
                self.chart_series = [{
                    "data": [],
                }, {
                    "name": "Outliers",
                    "data": [],
                    "type": "scatter",
                    "marker": {
                        "fillColor": "white",
                        "lineWidth": 1,
                        "lineColor": "blue"
                    },
                    "tooltip": {
                        "pointFormat": '%s: {point.y} ' % self.metric
                    }


                }]
                self.chart_series[0]["data"] = [
                    [
                        fix_nan(rec["whislo"]),
                        fix_nan(rec["q1"]),
                        fix_nan(rec["med"]),
                        fix_nan(rec["q3"]),
                        fix_nan(rec["whishi"])
                    ]
                    for rec in bps
                ]
                for cat_idx, rec in enumerate(bps):
                    for outlier in rec['fliers']:
                        self.chart_series[1]["data"].append([cat_idx, outlier])

                self.chart_categories = categories
                exp.store_block(self)

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(BoxPlot, self).on_params_is_valid(exp, *args, **kwargs)
        if self.rc is not None:
            for axis in self.rc.axis_list:
                if axis not in self.boxplot_config["agg_axis_for_scoring"]:
                    self.boxplot_config["agg_axis_for_scoring"][axis] = ""
                if axis not in self.boxplot_config["compare_axis_by_boxplot"]:
                    self.boxplot_config["compare_axis_by_boxplot"][axis] = ""


            self.compute_boxplot_stats(exp)
        exp.store_block(self)
Beispiel #13
0
class MassUpload(UniformMetaBlock):
    block_base_name = "BunchUpload"
    name = "Mass Upload Expression Sets"

    _bu_block_actions = ActionsList([

        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data"),

        ActionRecord("error_on_processing", ["processing_upload"], "valid_params"),
        ActionRecord("processing_done", ["processing_upload"], "ready")

        # ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True),
        # ActionRecord("error", ["processing_upload"], "valid_params"),
    ])

    es_mRNA_matrices = ParamField(
        "es_mRNA_matrices", title="mRNA Expression sets", order_num=10,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM,
        options={"multiple": True},
    )

    es_mRNA_matrices_ori = ParamField(
        "es_mRNA_matrices_ori", title="Matrices orientation", order_num=11,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="SxG",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["SxG", "Samples x Features"],
                ["GxS", "Features x Samples"]
            ]
        }
    )

    m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=12,
                                input_type=InputType.TEXT, field_type=FieldType.STR, required=False)

    m_rna_unit = ParamField("m_rna_unit", title="Working unit [used when platform is unknown]",
                            order_num=13, input_type=InputType.SELECT, field_type=FieldType.STR, required=False,
                            init_val="RefSeq",
                            options={
                                "inline_select_provider": True,
                                "select_options": [
                                    ["RefSeq", "RefSeq"],
                                    ["Entrez", "EntrezID"],
                                    ["Symbol", "Symbol"]
                                ]
                            })

    csv_sep_m_rna = ParamField(
        "csv_sep_m_rna", title="CSV separator symbol", order_num=14,
        input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",",
        options={
            "inline_select_provider": True,
            "select_options": [
                [" ", "space ( )"],
                [",", "comma  (,)"],
                ["\t", "tab (\\t)"],
                [";", "semicolon (;)"],
                [":", "colon (:)"],
            ]
        }
    )

    es_miRNA_matrices = ParamField(
        "es_miRNA_matrices", title="miRNA Expression sets", order_num=15,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM,
        options={"multiple": True},
    )

    mi_rna_platform = ParamField("mi_rna_platform", title="Platform ID", order_num=21,
                                 input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    mi_rna_unit = ParamField("mi_rna_unit", title="Working unit [used when platform is unknown]",
                             order_num=22, input_type=InputType.SELECT, field_type=FieldType.STR, required=False,
                             init_val="RefSeq",
                             options={
                                 "inline_select_provider": True,
                                 "select_options": [
                                     ["RefSeq", "RefSeq"],
                                     ["mirbase", "miRBase ID"]
                                 ]
                             })

    es_miRNA_matrices_ori = ParamField(
        "es_miRNA_matrices_ori", title="Matrices orientation", order_num=23,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="SxG",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["SxG", "Samples x Genes"],
                ["GxS", "Genes x Samples"]
            ]
        }
    )
    csv_sep_mi_rna = ParamField(
        "csv_sep_mi_rna", title="CSV separator symbol", order_num=24,
        input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",",
        options={
            "inline_select_provider": True,
            "select_options": [
                [" ", "space ( )"],
                [",", "comma  (,)"],
                ["\t", "tab (\\t)"],
                [";", "semicolon (;)"],
                [":", "colon (:)"],
            ]
        }
    )

    pheno_matrices = ParamField(
        "pheno_matrices", title="Phenotypes", order_num=40,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM,
        options={"multiple": True},
    )

    csv_sep = ParamField(
        "csv_sep", title="CSV separator symbol", order_num=50,
        input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",",
        options={
            "inline_select_provider": True,
            "select_options": [
                [" ", "space ( )"],
                [",", "comma  (,)"],
                ["\t", "tab (\\t)"],
                [";", "semicolon (;)"],
                [":", "colon (:)"],
            ]
        }
    )

    # cells = BlockField(name="cells", field_type=FieldType.CUSTOM, init_val=None)

    # elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
    #     "mass_upload/data_spec.html"
    # ])

    def __init__(self, *args, **kwargs):
        super(MassUpload, self).__init__(*args, **kwargs)
        self.es_mRNA_matrices = MultiUploadField()
        self.es_miRNA_matrices = MultiUploadField()

        self.pheno_matrices = MultiUploadField()

        self.pheno_by_es_names = {}

        self.labels = []
        self.seq = []
        self.register_inner_output_variables([InnerOutputField(
            name="mRNA_es",
            provided_data_type="ExpressionSet"
        ), InnerOutputField(
            name="miRNA_es",
            provided_data_type="ExpressionSet"
        )])

    @property
    def is_sub_pages_visible(self):
        if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']:
            return True
        return False

    def get_fold_labels(self):
        return self.labels

    def error_on_processing(self, *args, **kwargs):
        pass

    def processing_done(self, exp, block):
        exp.store_block(block)

    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()
        try:
            if len(self.pheno_matrices) != len(self.es_mRNA_matrices):
                raise RuntimeError("Different number of phenotypes and mRNA expression sets")
            if self.es_miRNA_matrices:
                if len(self.pheno_matrices) != len(self.es_miRNA_matrices):
                    raise RuntimeError("Different number of phenotypes and miRNA expression sets")
            self.labels = es_mRNA_matrix_names = sorted(self.es_mRNA_matrices)
            es_miRNA_matrix_names = sorted(self.es_miRNA_matrices)
            pheno_matrix_names = sorted(self.pheno_matrices)
            if len(es_miRNA_matrix_names) == 0:
                es_miRNA_matrix_names = len(es_mRNA_matrix_names) * [None]
            self.pheno_by_es_names = {
                pheno_name: es_name for
                es_name, pheno_name
                in zip(zip(es_mRNA_matrix_names, es_miRNA_matrix_names), pheno_matrix_names)
            }

            self.clean_errors()
            self.celery_task = wrapper_task.s(
                bunch_upload_task,
                exp,
                self,
                success_action="processing_done",
                error_action="error_on_processing"
            )
            exp.store_block(self)
            self.celery_task.apply_async()
        except Exception as e:
            exp.log(self.uuid, e, severity="CRITICAL")
            log.exception(e)
            self.errors.append(e)
            self.do_action("error_on_processing", exp, e)
            # self.celery_task_fetch.apply_async()

    def execute(self, exp, *args, **kwargs):
        self.inner_output_manager.reset()
        self.do_action("on_folds_generation_success", exp, self.seq)

    def get_repeat_labels(self):
        pass
class FeatureSelectionByCut(GenericBlock):
    block_base_name = "FS_BY_CUT"
    block_group = GroupType.FILTER
    name = "Feature Selection by Ranking"

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    _rank_table = InputBlockField(name="rank_table",
                                  order_num=20,
                                  required_data_type="TableResult",
                                  required=True)

    _cut_property_options = BlockField(name="cut_property_options",
                                       field_type=FieldType.RAW,
                                       is_a_property=True)
    cut_property = ParamField(
        name="cut_property",
        title="Ranking property to use",
        # input_type=InputType.SELECT,
        input_type=InputType.TEXT,
        field_type=FieldType.STR,
        #select_provider="cut_property_options",
        order_num=10,
    )
    threshold = ParamField(
        name="threshold",
        title="Threshold for cut",
        order_num=20,
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
    )
    _cut_direction_options = BlockField(name="cut_direction_options",
                                        field_type=FieldType.RAW)
    cut_direction_options = ["<", "<=", ">=", ">"]
    cut_direction = ParamField(name="cut_direction",
                               title="Direction of cut",
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               select_provider="cut_direction_options",
                               order_num=30,
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [[op, op] for op in ["<", "<=", ">=", ">"]]
                               })

    es = OutputBlockField(name="es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FeatureSelectionByCut, self).__init__(*args, **kwargs)
        self.celery_task = None

    @property
    def cut_property_options(self):
        # import ipdb; ipdb.set_trace()
        rank_table = self.get_input_var("rank_table")
        if rank_table and hasattr(rank_table, "headers"):
            return [{
                "pk": header,
                "str": header
            } for header in rank_table.headers]

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(
            feature_selection_by_cut,
            exp=exp,
            block=self,
            src_es=self.get_input_var("es"),
            rank_table=self.get_input_var("rank_table"),
            cut_property=self.cut_property,
            threshold=self.threshold,
            cut_direction=self.cut_direction,
            base_filename="%s_feature_selection" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("es", es)
        exp.store_block(self)
Beispiel #15
0
class GeneSetAgg(GenericBlock):
    block_base_name = "GENE_SET_AGG"
    name = "Gene sets aggregation"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    _gs = InputBlockField(name="gs",
                          order_num=20,
                          required_data_type="GeneSets",
                          required=True)

    agg_method = ParamField("agg_method",
                            title="Aggregate method",
                            order_num=50,
                            input_type=InputType.SELECT,
                            field_type=FieldType.STR,
                            init_val="mean",
                            options={
                                "inline_select_provider":
                                True,
                                "select_options": [["mean", "Mean"],
                                                   ["media", "Median"]]
                            })

    agg_es = OutputBlockField(name="agg_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        gs = self.get_input_var("gs")

        base_filename = "%s_gs_agg" % (self.uuid, )

        self.celery_task = wrapper_task.s(do_gs_agg, exp, self, es, gs,
                                          self.agg_method, base_filename)

        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
Beispiel #16
0
class NIMFASNMNMFBlock(GenericBlock):
    block_base_name = "NIMFA_SNMNMF"
    name = "NIMFA SNMNMF"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True)
    _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=True)
    # _dna_methyl = InputBlockField(name="DNAmethyl", order_num=30, required_data_type="ExpressionSet", required=False)
    _gene2gene = InputBlockField(name="Gene2Gene", order_num=40, required_data_type="BinaryInteraction", required=True)
    _mirna2gene = InputBlockField(name="miRNA2gene", order_num=50, required_data_type="BinaryInteraction",
                                  required=True)
    # _gene2DNAmethylation =  InputBlockField(name="Gene2DNAmethyl", order_num=60, required_data_type="BinaryInteraction", required=False)


    l1 = ParamField(name="l1", order_num=70, title="l1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    l2 = ParamField(name="l2", order_num=80, title="l2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g1 = ParamField(name="g1", order_num=90, title="g1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g2 = ParamField(name="g2", order_num=100, title="g2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    rank = ParamField(name="rank", order_num=110, title="rank", input_type=InputType.TEXT, field_type=FieldType.INT,
                      init_val=50)

    w = OutputBlockField(name="W", provided_data_type="ExpressionSet")
    H1_miRNA = OutputBlockField(name="H1_miRNA", provided_data_type="ExpressionSet")
    H2_genes = OutputBlockField(name="H2_genes", provided_data_type="ExpressionSet")
    # H3_DNAmethyl = OutputBlockField(name="H3_DNAmethyl", provided_data_type="ExpressionSet")

    #H1_perf = OutputBlockField(name="H1_perf", provided_data_type="ExpressionSet")
    #H2_perf = OutputBlockField(name="H2_perf", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(NIMFASNMNMFBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA = self.get_input_var("mRNA")
        miRNA = self.get_input_var("miRNA")
        #DNAmethyl = self.get_input_var("DNAmethyl")
        Gene2Gene = self.get_input_var("Gene2Gene")
        miRNA2gene = self.get_input_var("miRNA2gene")
        #Gene2DNAmethyl = self.get_input_var("Gene2DNAmethyl")

        self.celery_task = wrapper_task.s(
            nimfa_snmnmf_task,
            exp,
            self,
            mRNA=mRNA,
            miRNA=miRNA,
            #DNAmethyl = DNAmethyl,
            gene2gene=Gene2Gene,
            miRNA2gene=miRNA2gene,
            #gene2DNAmethylation = Gene2DNAmethyl,
            params={'l1': self.l1, 'l2': self.l2, 'g1': self.g1, 'g2': self.g2, 'rank': self.rank},
            base_filename="%s_nimfa_snmnmf" % self.uuid
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, W, H1, H2):
        self.set_out_var("W", W)
        self.set_out_var("H1_miRNA", H1)
        self.set_out_var("H2_genes", H2)
        #self.set_out_var("H1_perf", matrices[3])
        #self.set_out_var("H2_perf", matrices[4])
        exp.store_block(self)
Beispiel #17
0
class PatternSearch(GenericBlock):
    block_base_name = "PattSearch"
    name = "Pattern Search"
    block_group = GroupType.PATTERN_SEARCH

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA",
                             order_num=10,
                             required_data_type="ExpressionSet",
                             required=True)
    _mi_rna = InputBlockField(name="miRNA",
                              order_num=20,
                              required_data_type="ExpressionSet",
                              required=False)

    gene2gene = InputBlockField(name="gene2gene",
                                order_num=30,
                                required_data_type="BinaryInteraction",
                                required=True)
    miRNA2gene = InputBlockField(name="miRNA2gene",
                                 order_num=31,
                                 required_data_type="BinaryInteraction",
                                 required=False)

    genes_num = ParamField(name="genes_num",
                           title="Number of Genes",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=100)

    # upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=32,
    #                                        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)

    # upload_mirna_platform = ParamField("upload_mirna_platform", title="miRNA platform", order_num=33,
    #                                    input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False)

    d = ParamField(name="d",
                   order_num=70,
                   title="d",
                   input_type=InputType.TEXT,
                   field_type=FieldType.INT,
                   init_val=2)
    min_imp = ParamField(name="min_imp",
                         order_num=80,
                         title="Minimal improvement",
                         input_type=InputType.TEXT,
                         field_type=FieldType.FLOAT,
                         init_val=0.06)

    _metric = ParamField(
        "metric",
        title="Metric",
        order_num=40,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        init_val="mutual_information",
        options={
            "inline_select_provider":
            True,
            "select_options":
            [["mutual_information", "Mutual Information"],
             ['normed_mutual_information', "Normed Mutual Information"],
             ['square_error', "Square Error"], ['correlation', "Correlation"],
             ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]]
        })
    patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(PatternSearch, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        exp.log(self.uuid, "Execute called")

        self.celery_task = wrapper_task.s(
            pattern_search,
            exp,
            self,
            m_rna_es=self.get_input_var("mRNA"),
            mi_rna_es=self.get_input_var("miRNA"),
            gene2gene=self.get_input_var("gene2gene"),
            miRNA2gene=self.get_input_var("miRNA2gene"),
            radius=self.d,
            min_imp=self.min_imp,
            number_of_genes=self.genes_num,
            metric=self.get_input_var("metric"),
            base_filename="%s_comodule_sets" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, gs):
        exp.log(self.uuid, "Success")
        self.set_out_var("patterns", gs)
        exp.store_block(self)
Beispiel #18
0
class GenericRankingBlock(GenericBlock):
    block_base_name = ""
    block_group = GroupType.PROCESSING
    is_abstract = True

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(
        name="es", order_num=10,
        required_data_type="ExpressionSet", required=True
    )

    ## TODO: remove from generic ranking
    best = ParamField(
        name="best", title="Consider only best",
        input_type=InputType.TEXT,
        field_type=FieldType.INT, init_val=None
    )

    _result = OutputBlockField(name="result", field_type=FieldType.STR,
                               provided_data_type="TableResult", init_val=None)

    def __init__(self, *args, **kwargs):
        super(GenericRankingBlock, self).__init__(*args, **kwargs)
        self.ranking_name = None
        self.ranking_options = {}
        self.celery_task = None

        exp = Experiment.get_exp_by_id(self.exp_id)
        self.result = TableResult(
            base_dir=exp.get_data_folder(),
            base_filename="%s_gt_result" % self.uuid,
        )
        self.set_out_var("result", self.result)

    def collect_options(self):
        self.ranking_options = {}

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.collect_options()

        self.celery_task = wrapper_task.s(
            apply_ranking,
            exp=exp, block=self,
            es=self.get_input_var("es"),
            ranking_name=self.ranking_name,
            result_table=self.result,
            options=self.ranking_options
        )
        exp.store_block(self)
        self.celery_task.apply_async()
        exp.log(self.uuid, "Sent ranking computation to queue")
        log.debug("Sent ranking computation to queue")

    def success(self, exp, result, *args, **kwargs):
        self.result = result
        self.set_out_var("result", self.result)
        exp.store_block(self)
Beispiel #19
0
class FetchGSE(GenericBlock):
    block_base_name = "FETCH_GEO"
    name = "Fetch from NCBI GEO"
    block_group = GroupType.INPUT_DATA

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("start_fetch", ["valid_params", "done"],
                     "source_is_being_fetched", "Start fetch"),
        ActionRecord("error_during_fetch", ["source_is_being_fetched"],
                     "form_valid",
                     reload_block_in_client=True),
        ActionRecord("successful_fetch", ["source_is_being_fetched"],
                     "source_was_fetched",
                     reload_block_in_client=True),
        ActionRecord("start_preprocess",
                     ["source_was_fetched", "source_was_preprocessed"],
                     "source_is_being_fetched", "Run preprocess"),
        ActionRecord("error_during_preprocess", ["source_is_being_fetched"],
                     "source_was_fetched",
                     reload_block_in_client=True),
        ActionRecord("successful_preprocess", ["source_is_being_fetched"],
                     "source_was_preprocessed",
                     reload_block_in_client=True),
        ActionRecord("assign_sample_classes",
                     ["source_was_preprocessed", "done"], "done"),
    ])

    source_file = BlockField("source_file", FieldType.CUSTOM, None)

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "assign_phenotype_classes": {
                               "title": "Assign phenotype classes",
                               "resource": "assign_phenotype_classes",
                               "widget":
                               "widgets/assign_phenotype_classes.html"
                           },
                       })
    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       is_a_property=True)

    ### PARAMETERS
    geo_uid = ParamField("geo_uid", "Geo accession id", InputType.TEXT,
                         FieldType.STR, "")

    _expression_set = OutputBlockField(name="expression_set",
                                       field_type=FieldType.HIDDEN,
                                       provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        #"Fetch ncbi gse",
        super(FetchGSE, self).__init__(*args, **kwargs)
        self.celery_task_fetch = None
        self.celery_task_preprocess = None

    def is_form_fields_editable(self):
        if self.state in ['created', 'form_modified']:
            return True
        return False

    def phenotype_for_js(self, exp, *args, **kwargs):
        headers_options = {
            "custom_title_prefix_map": [
                ("Sample_title", "Title"),
                ("Sample_description", "Description"),
                ("Sample_characteristics", "Characteristics"),
                ("Sample_organism", "Organism"),
                ("Sample_geo_accession", "GEO #"),
                ("Sample_", ""),
            ],
            "prefix_order": [
                "Sample_geo_accession",
                "Sample_title",
                "Sample_description",
                "Sample_contact",
                "Sample_characteristics",
            ],
            "prefix_hide": {
                "Sample_contact",
                "Sample_channel",
                "Sample_data_row_count",
                "Sample_data",
                "Sample_platform",
                "Sample_growth",
                "Sample_series_id",
                "Sample_status",
                "Sample_extract",
                "Sample_supplementary_file",
                "Sample_hyb",
                "Sample_label",
                "Sample_source",
                "Sample_last_update",
                "Sample_molecule",
                "Sample_organism",
                "Sample_scan",
                "Sample_taxid",
                "Sample_type",
                "Sample_submission",
            }
        }
        return prepare_phenotype_for_js_from_es(
            self.get_out_var("expression_set"), headers_options)

    @property
    def is_sub_pages_visible(self):
        if self.state in [
                'source_was_preprocessed', 'sample_classes_assigned', 'ready'
        ]:
            return True
        return False

    def start_fetch(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()
        self.celery_task_fetch = wrapper_task.s(
            fetch_geo_gse,
            exp,
            self,
            geo_uid=self.geo_uid,
            success_action="successful_fetch",
            error_action="error_during_fetch",
            ignore_cache=False)
        exp.store_block(self)
        self.celery_task_fetch.apply_async()

    def error_during_fetch(self, exp, *args, **kwargs):
        exp.store_block(self)

    def successful_fetch(self, exp, source_file, *args, **kwargs):
        self.clean_errors()
        self.source_file = source_file
        self.do_action("start_preprocess", exp)
        exp.store_block(self)

    def start_preprocess(self, exp, *args, **kwargs):
        self.celery_task_preprocess = wrapper_task.s(
            preprocess_soft,
            exp,
            self,
            source_file=self.source_file,
            success_action="successful_preprocess",
            error_action="error_during_preprocess")
        exp.store_block(self)
        self.celery_task_preprocess.apply_async()

    def error_during_preprocess(self, exp, *args, **kwargs):
        exp.store_block(self)

    def successful_preprocess(self, exp, es, *args, **kwargs):
        """
            @type es: ExpressionSet
            @type ann: PlatformAnnotation
        """
        self.set_out_var("expression_set", es)
        # self.set_out_var("gpl_annotation", ann)

        self.clean_errors()
        exp.store_block(self)

        msg = BlockUpdated(self.exp_id, self.uuid, self.base_name)
        msg.comment = u"Dataset %s was preprocessed, \n please assign samples to classes" % self.geo_uid
        msg.silent = False
        msg.send()

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        #TODO: unify code with user upload
        es = self.get_out_var("expression_set")
        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)
        es.pheno_metadata["user_class_title"] = received["user_class_title"]
        pheno_df[received["user_class_title"]] = received["classes"]

        es.store_pheno_data_frame(pheno_df)
        exp.store_block(self)

        self.do_action("assign_sample_classes", exp)

    def assign_sample_classes(self, exp, *args, **kwargs):
        pass
Beispiel #20
0
class GeneSetAggCV(GenericBlock):
    block_group = GroupType.AGGREGATION
    block_base_name = "CV_GS_A"
    name = "CV Gene Sets Aggregation"
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _input_train_es = InputBlockField(name="train_es", order_num=10,
                                      required_data_type="ExpressionSet", required=True)
    _input_test_es = InputBlockField(name="test_es", order_num=20,
                                     required_data_type="ExpressionSet", required=True)

    _input_gs = InputBlockField(name="gs", order_num=30,
                                required_data_type="GeneSets", required=True)

    agg_method = ParamField(
        "agg_method", title="Aggregate method", order_num=50,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="mean",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["mean", "Mean"],
                ["median", "Median"],
                ["pca", "PCA"]
            ]
        }
    )

    out_train_es = OutputBlockField(name="out_train_es", provided_data_type="ExpressionSet")
    out_test_es = OutputBlockField(name="out_test_es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAggCV, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        train_es = self.get_input_var("train_es")
        test_es = self.get_input_var("test_es")

        gene_sets = self.get_input_var("gs")

        self.celery_task = wrapper_task.s(
            agg_task_cv,
            exp, self,
            train_es=train_es,
            test_es=test_es,
            gene_sets=gene_sets,
            method=self.agg_method,
            base_filename="%s_%s_agg" % (self.uuid, "pca_cv")
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, out_train_es, out_test_es):
        self.set_out_var("out_train_es", out_train_es)
        self.set_out_var("out_test_es", out_test_es)
        exp.store_block(self)
Beispiel #21
0
class CrossValidation(UniformMetaBlock):
    block_base_name = "CROSS_VALID"
    name = "Cross validation K-fold"

    _cv_actions = ActionsList(
        [ActionRecord("become_ready", ["valid_params"], "ready")])
    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["cv_info.html"])

    _input_es_dyn = InputBlockField(name="es_inputs",
                                    required_data_type="ExpressionSet",
                                    required=True,
                                    multiply_extensible=True,
                                    order_num=-1)

    folds_num = ParamField(name="folds_num",
                           title="Folds number",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=5)
    repeats_num = ParamField(name="repeats_num",
                             title="Repeats number",
                             order_num=20,
                             input_type=InputType.TEXT,
                             field_type=FieldType.INT,
                             init_val=1)

    def get_fold_labels(self):
        return [
            "fold_%s" % (num + 1, )
            for num in range(self.folds_num * self.repeats_num)
        ]

    def add_dyn_input_hook(self, exp, dyn_port, new_port):
        """
            @type new_port: InputBlockField
        """
        new_inner_output_train = InnerOutputField(
            name="%s_train_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        new_inner_output_test = InnerOutputField(
            name="%s_test_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        self.inner_output_es_names_map[new_port.name] = \
            (new_inner_output_train.name, new_inner_output_test.name)

        self.register_inner_output_variables(
            [new_inner_output_train, new_inner_output_test])

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()

        self.inner_output_manager.reset()
        es_dict = {
            inp_name: self.get_input_var(inp_name)
            for inp_name in self.es_inputs
        }

        self.celery_task = wrapper_task.s(
            generate_cv_folds,
            exp,
            self,
            folds_num=self.folds_num,
            repeats_num=self.repeats_num,
            es_dict=es_dict,
            inner_output_es_names_map=self.inner_output_es_names_map,
            success_action="on_folds_generation_success",
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs)
        self.do_action("become_ready", exp)

    def become_ready(self, *args, **kwargs):
        pass
Beispiel #22
0
class MergeComoduleSets(GenericBlock):
    block_base_name = "MERGE_COMODULE_SETS"
    name = "Merge Comodule Sets"

    is_abstract = False
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="cs_1",
                            order_num=10,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_1_name = ParamField(name="cs_1_name",
                            order_num=11,
                            title="Comodule 1 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    _cs_2 = InputBlockField(name="cs_2",
                            order_num=20,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_2_name = ParamField(name="cs_2_name",
                            order_num=21,
                            title="Comodule 2 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    flt_es = OutputBlockField(name="comodule_set",
                              provided_data_type="ComoduleSet")

    def __init__(self, *args, **kwargs):
        super(MergeComoduleSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        cs_1 = self.get_input_var("cs_1")
        cs_2 = self.get_input_var("cs_2")

        self.celery_task = wrapper_task.s(merge_comodules_task,
                                          exp,
                                          self,
                                          cs_1=cs_1,
                                          cs_2=cs_2,
                                          cs_1_name=self.cs_1_name,
                                          cs_2_name=self.cs_2_name,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'merge_cs'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("comodule_set", flt_es)
        exp.store_block(self)
Beispiel #23
0
class UserUploadComplex(GenericBlock):
    # unit_options =
    block_base_name = "UPLOAD_CMPLX"
    block_group = GroupType.INPUT_DATA
    name = "Upload mRna/miRna/methyl"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data"),
        ActionRecord("success", ["processing_upload"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params"),
    ])

    m_rna_matrix = ParamField("m_rna_matrix",
                              title="mRNA expression",
                              order_num=10,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM)
    m_rna_platform = ParamField("m_rna_platform",
                                title="Platform ID",
                                order_num=11,
                                input_type=InputType.TEXT,
                                field_type=FieldType.STR,
                                required=False)

    m_rna_unit = ParamField(
        "m_rna_unit",
        title="Working unit [used when platform is unknown]",
        order_num=12,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        required=False,
        init_val="RefSeq",
        options={
            "inline_select_provider":
            True,
            "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"],
                               ["Symbol", "Symbol"]]
        })

    m_rna_matrix_ori = ParamField("m_rna_matrix_ori",
                                  title="Matrix orientation",
                                  order_num=13,
                                  input_type=InputType.SELECT,
                                  field_type=FieldType.STR,
                                  init_val="SxG",
                                  options={
                                      "inline_select_provider":
                                      True,
                                      "select_options":
                                      [["SxG", "Samples x Genes"],
                                       ["GxS", "Genes x Samples"]]
                                  })
    csv_sep_m_rna = ParamField("csv_sep_m_rna",
                               title="CSV separator symbol",
                               order_num=14,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val=",",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options": [
                                       [" ", "space ( )"],
                                       [",", "comma  (,)"],
                                       ["\t", "tab (\\t)"],
                                       [";", "semicolon (;)"],
                                       [":", "colon (:)"],
                                   ]
                               })

    mi_rna_matrix = ParamField("mi_rna_matrix",
                               title=u"μRNA expression",
                               order_num=20,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    mi_rna_platform = ParamField("mi_rna_platform",
                                 title="Platform ID",
                                 order_num=21,
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.STR,
                                 required=False)
    mi_rna_unit = ParamField(
        "mi_rna_unit",
        title="Working unit [used when platform is unknown]",
        order_num=22,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        required=False,
        init_val="RefSeq",
        options={
            "inline_select_provider": True,
            "select_options": [["RefSeq", "RefSeq"], ["mirbase", "miRBase ID"]]
        })

    mi_rna_matrix_ori = ParamField("mi_rna_matrix_ori",
                                   title="Matrix orientation",
                                   order_num=23,
                                   input_type=InputType.SELECT,
                                   field_type=FieldType.STR,
                                   init_val="SxG",
                                   options={
                                       "inline_select_provider":
                                       True,
                                       "select_options":
                                       [["SxG", "Samples x Genes"],
                                        ["GxS", "Genes x Samples"]]
                                   })
    csv_sep_mi_rna = ParamField("csv_sep_mi_rna",
                                title="CSV separator symbol",
                                order_num=24,
                                input_type=InputType.SELECT,
                                field_type=FieldType.STR,
                                init_val=",",
                                options={
                                    "inline_select_provider":
                                    True,
                                    "select_options": [
                                        [" ", "space ( )"],
                                        [",", "comma  (,)"],
                                        ["\t", "tab (\\t)"],
                                        [";", "semicolon (;)"],
                                        [":", "colon (:)"],
                                    ]
                                })

    methyl_matrix = ParamField("methyl_matrix",
                               title="Methylation expression",
                               order_num=30,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    methyl_platform = ParamField("methyl_platform",
                                 title="Platform ID",
                                 order_num=31,
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.STR,
                                 required=False)
    # methyl_unit = ParamField("methyl_unit", title="Working unit [used when platform is unknown]", init_val=None,
    #                        order_num=32, input_type=InputType.TEXT, field_type=FieldType.STR, required=False)

    methyl_matrix_ori = ParamField("methyl_matrix_ori",
                                   title="Matrix orientation",
                                   order_num=33,
                                   input_type=InputType.SELECT,
                                   field_type=FieldType.STR,
                                   init_val="SxG",
                                   options={
                                       "inline_select_provider":
                                       True,
                                       "select_options":
                                       [["SxG", "Samples x Genes"],
                                        ["GxS", "Genes x Samples"]]
                                   })

    csv_sep_methyl = ParamField("csv_sep_methyl",
                                title="CSV separator symbol",
                                order_num=34,
                                input_type=InputType.SELECT,
                                field_type=FieldType.STR,
                                init_val=",",
                                options={
                                    "inline_select_provider":
                                    True,
                                    "select_options": [
                                        [" ", "space ( )"],
                                        [",", "comma  (,)"],
                                        ["\t", "tab (\\t)"],
                                        [";", "semicolon (;)"],
                                        [":", "colon (:)"],
                                    ]
                                })

    pheno_matrix = ParamField("pheno_matrix",
                              title="Phenotype matrix",
                              order_num=40,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM,
                              required=False)

    csv_sep_pheno = ParamField("csv_sep_pheno",
                               title="CSV separator symbol",
                               order_num=50,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val=",",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options": [
                                       [" ", "space ( )"],
                                       [",", "comma  (,)"],
                                       ["\t", "tab (\\t)"],
                                       [";", "semicolon (;)"],
                                       [":", "colon (:)"],
                                   ]
                               })

    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       is_a_property=True)

    _m_rna_es = OutputBlockField(name="m_rna_es",
                                 field_type=FieldType.HIDDEN,
                                 provided_data_type="ExpressionSet")
    # _m_rna_annotation = OutputBlockField(name="m_rna_annotation", field_type=FieldType.HIDDEN,
    #     provided_data_type="PlatformAnnotation")
    _mi_rna_es = OutputBlockField(name="mi_rna_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")
    _methyl_es = OutputBlockField(name="methyl_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")

    mrna_gpl_file = BlockField("mrna_gpl_file", FieldType.CUSTOM, None)
    mirna_gpl_file = BlockField("mirna_gpl_file", FieldType.CUSTOM, None)
    methyl_gpl_file = BlockField("methyl_gpl_file", FieldType.CUSTOM, None)

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "assign_phenotype_classes": {
                               "title": "Assign phenotype classes",
                               "resource": "assign_phenotype_classes",
                               "widget":
                               "widgets/assign_phenotype_classes.html"
                           },
                       })

    @property
    def is_sub_pages_visible(self):
        if self.state in [
                'source_was_preprocessed', 'sample_classes_assigned', 'ready',
                'done'
        ]:
            return True
        return False

    def __init__(self, *args, **kwargs):
        super(UserUploadComplex, self).__init__(*args, **kwargs)
        self.celery_task = None

    def process_upload(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(user_upload_complex_task, exp, self)
        exp.store_block(self)
        self.celery_task.apply_async()

    def phenotype_for_js(self, exp, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es
        if es is None:
            raise Exception("No data was stored before")

        return prepare_phenotype_for_js_from_es(es)

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es

        if es is None:
            raise Exception("No data was stored before")

        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)

        pheno_df[received["user_class_title"]] = received["classes"]

        for work_es in [m_rna_es, mi_rna_es, methyl_es]:
            if work_es is not None:
                work_es.pheno_metadata["user_class_title"] = received[
                    "user_class_title"]
                work_es.store_pheno_data_frame(pheno_df)

        # import ipdb; ipdb.set_trace()
        exp.store_block(self)

    def success(self, exp, m_rna_es, mi_rna_es, methyl_es):
        if m_rna_es:
            self.set_out_var("m_rna_es", m_rna_es)
        if mi_rna_es:
            self.set_out_var("mi_rna_es", mi_rna_es)
        if methyl_es:
            self.set_out_var("methyl_es", methyl_es)
        exp.store_block(self)
Beispiel #24
0
class UserUpload(GenericBlock):
    block_base_name = "UPLOAD"
    block_group = GroupType.INPUT_DATA
    is_abstract = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),

        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data", reload_block_in_client=True),
        ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params", reload_block_in_client=True),
    ])

    es_matrix = ParamField("es_matrix", title="Expression set matrix", order_num=0,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)
    es_matrix_ori = ParamField(
        "es_matrix_ori", title="Matrix orientation", order_num=1,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="SxG",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["SxG", "Samples x Genes"],
                ["GxS", "Genes x Samples"]
            ]
        }
    )
    pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=10,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)
    gpl_platform = ParamField("gpl_platform", title="Platform ID", order_num=20,
        input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    working_unit = ParamField("working_unit", title="Working unit [used when platform is unknown]",
        order_num=3, input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    # TODO: add sub page field
    # pages = BlockField("pages", FieldType.RAW, init_val={
    #     "assign_sample_classes": {
    #         "title": "Assign sample classes",
    #         "resource": "assign_sample_classes",
    #         "widget": "widgets/fetch_gse/assign_sample_classes.html"
    #     },
    # })
    _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True)

    ### PARAMETERS
    _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN,
                                       provided_data_type="ExpressionSet")
    _gpl_annotation = OutputBlockField(name="gpl_annotation", field_type=FieldType.HIDDEN,
                                       provided_data_type="PlatformAnnotation")

    # TODO: COPY PASTE from fetch_gse block
    pages = BlockField("pages", FieldType.RAW, init_val={
        "assign_phenotype_classes": {
            "title": "Assign phenotype classes",
            "resource": "assign_phenotype_classes",
            "widget": "widgets/assign_phenotype_classes.html"
        },
    })

    def __init__(self, *args, **kwargs):
        super(UserUpload, self).__init__("User upload", *args, **kwargs)


    @property
    def is_sub_pages_visible(self):
        if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']:
            return True
        return False

    def phenotype_for_js(self, exp, *args, **kwargs):
        return prepare_phenotype_for_js_from_es(self.get_out_var("expression_set"))

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        es = self.get_out_var("expression_set")
        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)
        es.pheno_metadata["user_class_title"] = received["user_class_title"]
        pheno_df[received["user_class_title"]] = received["classes"]

        es.store_pheno_data_frame(pheno_df)
        exp.store_block(self)

    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()

        assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file())

        es = ExpressionSet(base_dir=exp.get_data_folder(),
                           base_filename="%s_annotation" % self.uuid)

        pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file())
        pheno_df.set_index(pheno_df.columns[0])

        user_class_title = es.pheno_metadata["user_class_title"]
        if user_class_title not in pheno_df.columns:
            pheno_df[es.pheno_metadata["user_class_title"]] = ""

        # if matrix is bad oriented, then do transposition
        if self.es_matrix_ori == "GxS":
            assay_df = assay_df.T

        es.store_assay_data_frame(assay_df)
        es.store_pheno_data_frame(pheno_df)

        if self.working_unit:
            es.working_unit = self.working_unit

        self.set_out_var("expression_set", es)

        exp.store_block(self)

        self.do_action("success", exp)
        # self.celery_task_fetch.apply_async()

    def success(self, exp, *args, **kwargs):
        pass
Beispiel #25
0
class DecisionTree(GenericClassifier):
    block_base_name = "DT"
    name = "Decision Tree"

    classifier_name = "DT"

    criterion = ParamField(
        name="criterion",
        title="The function to measure the quality of a split",
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        order_num=11,
        options={
            "inline_select_provider": True,
            "select_options": [
                ["gini", "Gini impurity"],
                ["entropy", "Information gain"]
            ]
        }
    )

    # max_features_mode = ParamField(
    #     name="max_features_mode",
    #     title="Max features for split, mode",
    #     input_type=InputType.SELECT,
    #     field_type=FieldType.STR,
    #     options={
    #         "inline_select_provider": True,
    #         "select_options": [
    #             ["int", "Fixed number"],
    #             ["float", "Ratio of the features number [0.0 .. 1.0]"],
    #             ["sqrt", "sqrt(number of features)"],
    #             ["log2", "log2(number of features)"],
    #         ]
    #     },
    #     order_num=20,
    # )

    # max_features_value = ParamField(
    #     name="max_features_value",
    #     title="Value for the chosen max feature mode",
    #     input_type=InputType.TEXT,
    #     field_type=FieldType.STR,
    #     order_num=30,
    # )

    max_depth = ParamField(
        name="max_depth",
        title="The maximum depth of the tree",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        order_num=40,
    )

    min_samples_split = ParamField(
        name="min_samples_split",
        title="The minimum number of samples to split an internal node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        order_num=50,
    )

    min_samples_leaf = ParamField(
        name="min_samples_leaf",
        title="The minimum number of samples to be at a leaf node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        order_num=60,
    )

    def collect_options(self):
        # max_features_mode = self.get_option_safe("max_features_mode", str)
        # if max_features_mode in ["sqrt", "log2"]:
        #     self.classifier_options["max_features"] = max_features_mode
        # elif max_features_mode == "int":
        #     self.collect_option_safe("max_features_value", int, target_name="max_features")
        # elif max_features_mode == "float":
        #     self.collect_option_safe("max_features_value", float, target_name="max_features")

        self.collect_option_safe("max_depth", int)
        self.collect_option_safe("min_samples_split", int)
        self.collect_option_safe("min_samples_leaf", int)
class MergeExpressionSets(GenericBlock):
    block_base_name = "MergeES"
    name = "Concatenate Expression"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es_1 = InputBlockField(name="es_1",
                            title="Set 1",
                            order_num=10,
                            required_data_type="ExpressionSet",
                            required=True)
    _es_2 = InputBlockField(name="es_2",
                            title="Set 2",
                            order_num=20,
                            required_data_type="ExpressionSet",
                            required=True)
    _es_matrix_con = ParamField("_es_matrix_con",
                                title="Concatenation",
                                order_num=30,
                                input_type=InputType.SELECT,
                                field_type=FieldType.STR,
                                init_val="CR",
                                options={
                                    "inline_select_provider":
                                    True,
                                    "select_options":
                                    [["CR", "concatenate samples"],
                                     ["CC", "concatenate features"]]
                                })
    merged_es = OutputBlockField(name="merged_es",
                                 provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(MergeExpressionSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        con = getattr(self, "_es_matrix_con", "CR")
        # import ipdb; ipdb.set_trace()
        self.celery_task = wrapper_task.s(
            merge_two_es,
            exp,
            self,
            es_1=self.get_input_var("es_1"),
            es_2=self.get_input_var("es_2"),
            con=con,
            base_filename="%s_merged" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("merged_es", es)
        exp.store_block(self)
Beispiel #27
0
class RenderTable(RcVisualizer):
    block_base_name = "RENDER_TABLE"
    name = "Results Container as Table"

    _table = BlockField(name="table",
                        field_type=FieldType.CUSTOM,
                        is_a_property=True)
    _export_table_url = BlockField(name="export_table_url",
                                   field_type=FieldType.STR,
                                   is_a_property=True)
    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                         field_type=FieldType.STR,
                                         is_a_property=True)

    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["rc_table.html"])

    table_config = ParamField(name="table_config",
                              title="",
                              input_type=InputType.HIDDEN,
                              field_type=FieldType.RAW)

    def __init__(self, *args, **kwargs):
        super(RenderTable, self).__init__(*args, **kwargs)
        self.table_config = {
            "header_axis": "",
            "multi_index_axis_dict": {},
        }

    @property
    def table(self):
        rc = self.rc
        to = TableObj()
        if rc:
            rc.load()
            header_axis = self.table_config.get("header_axis")
            index_axis_list = []
            for axis, flag in self.table_config.get("multi_index_axis_dict",
                                                    {}).iteritems():
                if flag:
                    index_axis_list.append(axis)

            if header_axis and index_axis_list and hasattr(self, "metric"):
                # log.debug("Can build table slice")

                df = rc.get_pandas_slice(header_axis,
                                         index_axis_list,
                                         metric_name=self.metric)
                # log.debug(df)
                to.html = df.to_html(float_format=pd_float_format_func)
                to.df = df
            else:
                if self.exp_id:
                    exp = Experiment.get_exp_by_id(self.exp_id)
                    exp.log(
                        self.uuid,
                        "Can't build table slice, header axis `%s`, index axis_list `%s`"
                        % (header_axis, index_axis_list))
                log.debug(
                    "Can't build table slice, header axis `%s`, index axis_list `%s`",
                    header_axis, index_axis_list)

            # log.debug("Table: %s", to.to_dict())
        return to

    @property
    def export_table_url(self):
        return reverse("block_field_formatted",
                       kwargs={
                           "exp_id": self.exp_id,
                           "block_uuid": self.uuid,
                           "field": "export_table",
                           "format": "csv"
                       })

    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted",
                       kwargs={
                           "exp_id": self.exp_id,
                           "block_uuid": self.uuid,
                           "field": "export_rc",
                           "format": "json"
                       })
        # import ipdb; ipdb.set_trace()
        # return

    def export_rc(self, exp, *args, **kwargs):
        return self.rc.export_to_json_dict()

    def export_table(self, exp, *args, **kwargs):
        table = self.table
        out = StringIO.StringIO()
        # Float format in fact doesn't work in pandas
        # table.df.to_csv(out, float_format=pd_float_format_func)
        #
        tmp_df = table.df.applymap(pd_float_format_func)
        tmp_df.to_csv(out, float_format=pd_float_format_func)

        out.seek(0)
        return out.read()

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(RenderTable, self).on_params_is_valid(exp, *args, **kwargs)
        if self.rc is not None:
            for axis in self.rc.axis_list:
                if axis not in self.table_config["multi_index_axis_dict"]:
                    self.table_config["multi_index_axis_dict"][axis] = ""
        exp.store_block(self)
Beispiel #28
0
class RandomForest(GenericClassifier):
    block_base_name = "RND_FOREST"
    name = "Random forest"

    classifier_name = "random_forest"

    n_estimators = ParamField(
        name="n_estimators",
        title="The number of trees in the forest",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        init_val=10,
        order_num=10,
    )

    criterion = ParamField(
        name="criterion",
        title="The function to measure the quality of a split",
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        order_num=11,
        options={
            "inline_select_provider": True,
            "select_options": [
                ["gini", "Gini impurity"],
                ["entropy", "Information gain"]
            ]
        }
    )

    max_features_mode = ParamField(
        name="max_features_mode",
        title="The number of features to consider when looking for the best split",
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        options={
            "inline_select_provider": True,
            "select_options": [
                ["int", "Fixed number"],
                ["float", "Ratio of the features number [0.0 .. 1.0]"],
                ["sqrt", "sqrt(number of features)"],
                ["log2", "log2(number of features)"],
            ]
        },
        order_num=20,
    )

    max_features_value = ParamField(
        name="max_features_value",
        title="Value for the chosen mode",
        input_type=InputType.TEXT,
        field_type=FieldType.STR,
        order_num=30,
    )

    max_depth = ParamField(
        name="max_depth",
        title="The maximum depth of the tree.",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        order_num=40,
    )

    min_samples_split = ParamField(
        name="min_samples_split",
        title="The minimum number of samples to split an internal node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        order_num=50,
    )

    min_samples_leaf = ParamField(
        name="min_samples_leaf",
        title="The minimum number of samples to be at a leaf node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        order_num=60,
    )

    def collect_options(self):
        self.collect_option_safe("n_n_estimators", int)

        max_features_mode = self.get_option_safe("max_features_mode", str)
        if max_features_mode in ["sqrt", "log2"]:
            self.classifier_options["max_features"] = max_features_mode
        elif max_features_mode == "int":
            self.collect_option_safe("max_features_value", int, target_name="max_features")
        elif max_features_mode == "float":
            self.collect_option_safe("max_features_value", float, target_name="max_features")

        self.collect_option_safe("max_depth", int)
        self.collect_option_safe("min_samples_split", int)
        self.collect_option_safe("min_samples_leaf", int)
Beispiel #29
0
class UniformMetaBlock(GenericBlock):
    is_abstract = True
    block_group = GroupType.META_PLUGIN
    create_new_scope = True
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([])
    _block_actions.extend(ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),

        ActionRecord("add_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"),
        ActionRecord("remove_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"),

        ActionRecord("execute", ["ready"], "generating_folds", user_title="Run block"),

        ActionRecord("on_folds_generation_success", ["generating_folds"], "ready_to_run_sub_scope",
                     reload_block_in_client=True),
        ActionRecord("continue_collecting_sub_scope", ["ready_to_run_sub_scope"],
                     "sub_scope_executing"),

        ActionRecord("run_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"),
        ActionRecord("on_sub_scope_done", ["sub_scope_executing"], "ready_to_run_sub_scope"),

        ActionRecord("success", ["working", "ready_to_run_sub_scope"], "done",
                     propagate_auto_execution=True, reload_block_in_client=True),
        ActionRecord("error", ["*", "ready", "working", "sub_scope_executing",
                               "generating_folds", "ready_to_run_sub_scope"],
                     "execution_error", reload_block_in_client=True),

        ActionRecord("reset_execution", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope",
                                         "generating_folds", "execution_error"], "ready",
                     user_title="Reset execution"),
    ]))

    _collector_spec = ParamField(name="collector_spec", title="",
                                 field_type=FieldType.CUSTOM,
                                 input_type=InputType.HIDDEN,
                                 init_val=None, required=False
    )

    res_seq = BlockField(name="res_seq", provided_data_type="SequenceContainer",
                         field_type=FieldType.HIDDEN, init_val=None)

    _results_container = OutputBlockField(
        name="results_container",
        provided_data_type="ResultsContainer",
        field_type=FieldType.HIDDEN,
        init_val=None
    )

    def __init__(self, *args, **kwargs):
        super(UniformMetaBlock, self).__init__(*args, **kwargs)
        self.auto_exec_status_working.update(["sub_scope_executing", "ready_to_run_sub_scope",
                                              "generating_folds"])

        self.inner_output_manager = IteratedInnerFieldManager()
        self.collector_spec = CollectorSpecification()
        self.collector_spec.label = self.block_base_name + "_collection"

        self.inner_output_es_names_map = {}
        self.celery_task = None

        self.set_out_var("results_container", None)
        self.res_seq = SequenceContainer()

    def remap_inputs(self, mapping):
        for var in self.bound_inputs.itervalues():
            var.change_block(mapping)
        for var in self.collector_spec.bound.itervalues():
            var.change_block(mapping)

    @property
    def is_sub_pages_visible(self):
        if self.state in ['valid_params', 'done', 'ready']:
            return True
        return False

    @abstractmethod
    def get_fold_labels(self):
        pass

    @abstractmethod
    def get_repeat_labels(self):
        pass

    def get_inner_out_var(self, name):
        return self.inner_output_manager.get_var(name)

    def run_sub_scope(self, exp, *args, **kwargs):
        self.reset_execution_for_sub_blocks()
        cell = self.res_seq.sequence[self.inner_output_manager.iterator]
        log.debug("Cell!!!!!!!! %s", str(cell))
        act = self.inner_output_manager.sequence[self.inner_output_manager.iterator]
        log.debug("Cell!!!!!!!! %s", str(act))

        exp.store_block(self)
        sr = ScopeRunner(exp, self.sub_scope_name)
        sr.execute()

    def on_sub_scope_done(self, exp, *args, **kwargs):
        """
            @type exp: Experiment

            This action should be called by ScopeRunner
            when all blocks in sub-scope have exec status == done
        """
        r = get_redis_instance()
        with redis_lock.Lock(r, ExpKeys.get_block_global_lock_key(self.exp_id, self.uuid)):

            cell = self.res_seq.sequence[self.inner_output_manager.iterator]
            for name, scope_var in self.collector_spec.bound.iteritems():
                var = exp.get_scope_var_value(scope_var)
                exp.log(self.uuid, "Collected %s from %s" % (var, scope_var.title), severity="CRITICAL")
                log.debug("Collected %s from %s", var, scope_var.title)
                if var is not None:
                    if hasattr(var, "clone"):
                        cell[name] = var.clone("%s_%s" %
                                               (self.uuid, self.inner_output_manager.iterator))
                    else:
                        cell[name] = deepcopy(var)

            self.res_seq.sequence[self.inner_output_manager.iterator] = cell

            exp.store_block(self)

        if len(cell) < len(self.res_seq.fields):
            self.do_action("continue_collecting_sub_scope", exp)
        else:
            try:
                self.inner_output_manager.next()
                self.do_action("run_sub_scope", exp)
            except StopIteration, e:
                # All folds were processed without errors
                self.build_result_collection(exp)

                self.do_action("success", exp)
Beispiel #30
0
class KnnClassifier(GenericClassifier):
    block_base_name = "KNN"
    name = "Knn classifier"

    classifier_name = "knn"

    n_neighbors = ParamField(
        name="n_neighbors",
        title="Number of neighbors",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        init_val=1,
        order_num=10,
    )

    algorithm = ParamField(
        name="algorithm",
        title="Algorithm [optional]",
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        order_num=20,
        options={
            "inline_select_provider": True,
            "select_options": [
                 ["ball_tree", "BallTree"],
                 ["kd_tree", "KDTree"],
                 ["brute", "Brute force search"],
                 ["auto", "Auto guess algorithm"],
            ]
        }
    )

    leaf_size = ParamField(
        name="leaf_size",
        title="Leaf size for BallTree or KDTree [optional]",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        order_num=30,
    )

    _metric_options = BlockField(name="metric_options", field_type=FieldType.RAW)
    metric_options = [
        {"pk": "euclidean", "str": "Euclidean Distance"},
        {"pk": "manhattan", "str": "Manhattan Distance"},
        {"pk": "chebyshev", "str": "Chebyshev Distance"},
    ]
    metric = ParamField(
        name="metric",
        title="The distance metric to use for the tree [optional]",
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        select_provider="metric_options",
        order_num=40,
        options={
            "inline_select_provider": True,
            "select_options": [
                ["euclidean", "Euclidean Distance"],
                ["manhattan", "Manhattan Distance"],
                ["chebyshev", "Chebyshev Distance"],
            ]
        }
    )

    def collect_options(self):
        self.collect_option_safe("n_neighbors", int)
        self.collect_option_safe("algorithm")
        self.collect_option_safe("leaf_size", int)
        self.collect_option_safe("metric")