Esempio n. 1
0
class FilterByInteraction(GenericBlock):
    block_base_name = "FILTER_BY_BI"
    name = "Filter ES by interaction"
    block_group = GroupType.PROCESSING
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _mRNA_es = InputBlockField(name="mRNA_es",
                               order_num=10,
                               required_data_type="ExpressionSet",
                               required=True)
    _miRNA_es = InputBlockField(name="miRNA_es",
                                order_num=20,
                                required_data_type="ExpressionSet",
                                required=True)
    _interaction = InputBlockField(name="interaction",
                                   order_num=30,
                                   required_data_type="BinaryInteraction",
                                   required=True)

    m_rna_filtered_es = OutputBlockField(name="m_rna_filtered_es",
                                         provided_data_type="ExpressionSet")
    mi_rna_filtered_es = OutputBlockField(name="mi_rna_filtered_es",
                                          provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FilterByInteraction, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA_es = self.get_input_var("mRNA_es")
        miRNA_es = self.get_input_var("miRNA_es")
        interaction_matrix = self.get_input_var("interaction")

        self.celery_task = wrapper_task.s(
            filter_by_bi,
            exp,
            self,
            m_rna_es=mRNA_es,
            mi_rna_es=miRNA_es,
            interaction_matrix=interaction_matrix,
            base_filename="%s_filtered_by_BI" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, m_rna_filtered_es, mi_rna_filtered_es):
        self.set_out_var("m_rna_filtered_es", m_rna_filtered_es)
        self.set_out_var("mi_rna_filtered_es", mi_rna_filtered_es)
        exp.store_block(self)
Esempio n. 2
0
class UploadGeneSets(GenericBlock):
    block_base_name = "GENE_SETS_UPLOAD"
    block_group = GroupType.INPUT_DATA
    name = "Upload Gene Sets"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "done"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    upload_gs = ParamField("upload_gs",
                           title="Gene sets in .gmt format",
                           order_num=10,
                           input_type=InputType.FILE_INPUT,
                           field_type=FieldType.CUSTOM)

    _gene_sets = OutputBlockField(name="gene_sets",
                                  provided_data_type="GeneSets")

    def on_params_is_valid(self, exp, *args, **kwargs):
        try:
            gmt_file = self.upload_gs.get_file()
            gs = GmtStorage.read_inp(gmt_file, "\t")
            gene_sets = GeneSets(exp.get_data_folder(), str(self.uuid))
            gene_sets.store_gs(gs)
            self.set_out_var("gene_sets", gene_sets)
        except Exception as e:
            exp.log(self.uuid, e, severity="CRITICAL")
            log.error(e)

        exp.store_block(self)
Esempio n. 3
0
class EnrichmentNoTBlock(GenericBlock):
    block_base_name = "ENRICHMENT_COM"
    name = "Comodule Enrichment"

    is_abstract = False
    block_group = GroupType.TESTING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="gs",
                            order_num=10,
                            required_data_type="GeneSets",
                            required=True)
    H = InputBlockField(name="patterns",
                        order_num=11,
                        required_data_type="GeneSets",
                        required=True)
    _t = ParamField(name="T",
                    order_num=12,
                    title="Enrichment threshold",
                    input_type=InputType.TEXT,
                    field_type=FieldType.FLOAT,
                    init_val="0.05")

    dict = OutputBlockField(name="dictionary_set",
                            provided_data_type="DictionarySet")

    def __init__(self, *args, **kwargs):
        super(EnrichmentNoTBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        gs = self.get_input_var("gs")
        cs = self.get_input_var("patterns")
        self.celery_task = wrapper_task.s(enrichment_no_t_task,
                                          exp,
                                          self,
                                          T=self.T,
                                          gs=gs,
                                          patterns=cs,
                                          base_filename="%s_%s_enrich" %
                                          (self.uuid, 'enrichment_cont'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("dictionary_set", flt_es)
        exp.store_block(self)
Esempio n. 4
0
class MergeGeneSetWithPlatformAnnotation(GenericBlock):
    block_base_name = "MERGE_GS_GPL_ANN"
    name = "Merge gene set with platform"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _input_gs = InputBlockField(name="gs",
                                order_num=10,
                                required_data_type="GeneSets",
                                required=True)
    _input_ann = InputBlockField(name="ann",
                                 order_num=20,
                                 required_data_type="PlatformAnnotation",
                                 required=True)

    _gs = OutputBlockField(name="gs",
                           field_type=FieldType.HIDDEN,
                           init_val=None,
                           provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(MergeGeneSetWithPlatformAnnotation,
              self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        gs, ann = self.get_input_var("gs"), self.get_input_var("ann")
        # import ipdb; ipdb.set_trace()
        self.celery_task = wrapper_task.s(map_gene_sets_to_probes,
                                          exp,
                                          self,
                                          base_dir=exp.get_data_folder(),
                                          base_filename="%s_merged" %
                                          self.uuid,
                                          ann_gene_sets=ann.gene_sets,
                                          src_gene_sets=gs)
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, gs):
        self.set_out_var("gs", gs)
        exp.store_block(self)
Esempio n. 5
0
class SvdSubAgg(GenericBlock):
    is_abstract = True
    block_group = GroupType.AGGREGATION

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _mRNA_es = InputBlockField(name="mRNA_es", order_num=10,
                               required_data_type="ExpressionSet", required=True)
    _miRNA_es = InputBlockField(name="miRNA_es", order_num=20,
                                required_data_type="ExpressionSet", required=True)
    _interaction = InputBlockField(name="interaction", order_num=30,
                                   required_data_type="BinaryInteraction", required=True)

    c = ParamField(name="c", title="Constant c",
                   input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0)

    agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet")

    mode = ""

    def __init__(self, *args, **kwargs):
        super(SvdSubAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA_es = self.get_input_var("mRNA_es")
        miRNA_es = self.get_input_var("miRNA_es")
        interaction_matrix = self.get_input_var("interaction")

        self.celery_task = wrapper_task.s(
            aggregation_task,
            exp, self,
            mode=self.mode,
            c=self.c,
            m_rna_es=mRNA_es,
            mi_rna_es=miRNA_es,
            interaction_matrix=interaction_matrix,
            base_filename="%s_%s_agg" % (self.uuid, self.mode)
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
Esempio n. 6
0
class GlobalTest(GenericBlock):
    block_base_name = "GLOBAL_TEST"
    name = "Goeman global test"
    block_group = GroupType.PROCESSING
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _input_es = InputBlockField(name="es", order_num=10,
                                required_data_type="ExpressionSet", required=True)
    _input_gs = InputBlockField(name="gs", order_num=20,
                                required_data_type="GeneSets", required=True)

    _result = OutputBlockField(name="result", field_type=FieldType.STR,
                               provided_data_type="TableResult", init_val=None)

    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "gt_result.html"
    ])

    def __init__(self, *args, **kwargs):
        super(GlobalTest, self).__init__(*args, **kwargs)
        self.celery_task = None

        exp = Experiment.get_exp_by_id(self.exp_id)
        self.result = TableResult(
            base_dir=exp.get_data_folder(),
            base_filename="%s_gt_result" % self.uuid,
        )
        self.result.headers = ['p-value', 'Statistic', 'Expected', 'Std.dev', '#Cov']

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(
            global_test_task,
            exp, self,
            es=self.get_input_var("es"),
            gene_sets=self.get_input_var("gs"),
            table_result=self.result
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        self.result = result
        self.set_out_var("result", self.result)
        exp.store_block(self)
Esempio n. 7
0
class ThresholdBlock(GenericBlock):
    block_base_name = "THRESHOLD"
    name = "Threshold"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    t = ParamField(name="T",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=0.1)

    flt_es = OutputBlockField(name="gene_sets", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(ThresholdBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        # T = self.get_input_var("T")
        self.celery_task = wrapper_task.s(threshold_task,
                                          exp,
                                          self,
                                          es=es,
                                          T=self.T,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'threshold'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("gene_sets", flt_es)
        exp.store_block(self)
Esempio n. 8
0
class MergeExpressionSets(GenericBlock):
    block_base_name = "MergeES"
    name = "Merge ES by concatenation"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es_1 = InputBlockField(name="es_1",
                            title="Set 1",
                            order_num=10,
                            required_data_type="ExpressionSet",
                            required=True)
    _es_2 = InputBlockField(name="es_2",
                            title="Set 2",
                            order_num=20,
                            required_data_type="ExpressionSet",
                            required=True)

    merged_es = OutputBlockField(name="merged_es",
                                 provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(MergeExpressionSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        # import ipdb; ipdb.set_trace()
        self.celery_task = wrapper_task.s(
            merge_two_es,
            exp,
            self,
            es_1=self.get_input_var("es_1"),
            es_2=self.get_input_var("es_2"),
            base_filename="%s_merged" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("merged_es", es)
        exp.store_block(self)
Esempio n. 9
0
class RcVisualizer(GenericBlock):
    block_base_name = "RC_VIZUALIZER"
    is_block_supports_auto_execution = False
    block_group = GroupType.VISUALIZE
    is_abstract = True

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "input_bound"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("configure_table", ["input_bound", "ready"], "ready"),
    ])

    results_container = InputBlockField(name="results_container",
                                        required_data_type="ResultsContainer",
                                        required=True,
                                        field_type=FieldType.CUSTOM)
    _rc = BlockField(name="rc",
                     field_type=FieldType.CUSTOM,
                     is_a_property=True)
    _available_metrics = BlockField(name="available_metrics",
                                    field_type=FieldType.RAW,
                                    is_a_property=True)

    metric = ParamField(name="metric",
                        title="Metric",
                        field_type=FieldType.STR,
                        input_type=InputType.SELECT,
                        select_provider="available_metrics")

    def __init__(self, *args, **kwargs):
        super(RcVisualizer, self).__init__(*args, **kwargs)

    @property
    @log_timing
    def available_metrics(self):
        try:
            return [{
                "pk": metric_name,
                "str": metric.title
            } for metric_name, metric in metrics_dict.iteritems()
                    if metric.produce_single_number]
        except Exception, e:
            log.exception(e)
            return []
Esempio n. 10
0
class UploadInteraction(GenericBlock):
    block_base_name = "GENE_INTERACTION"
    block_group = GroupType.INPUT_DATA
    name = "Upload gene interaction"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "done"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    upload_interaction = ParamField("upload_interaction",
                                    title="Interaction matrix",
                                    order_num=10,
                                    input_type=InputType.FILE_INPUT,
                                    field_type=FieldType.CUSTOM)
    row_units = ParamField("row_units",
                           title="Row units",
                           order_num=11,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)
    col_units = ParamField("col_units",
                           title="Column units",
                           order_num=12,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)

    _interaction = OutputBlockField(name="interaction",
                                    provided_data_type="BinaryInteraction")

    def on_params_is_valid(self, exp, *args, **kwargs):
        # Convert to  BinaryInteraction
        interaction_df = self.upload_interaction.get_as_data_frame()

        interaction = BinaryInteraction(exp.get_data_folder(), str(self.uuid))
        interaction.store_matrix(interaction_df)

        interaction.row_units = self.row_units
        interaction.col_units = self.col_units

        self.set_out_var("interaction", interaction)
        exp.store_block(self)
Esempio n. 11
0
class ZScoreBlock(GenericBlock):
    block_base_name = "ZSCORE_NORM"
    name = "Z-score Normalization"

    is_abstract = False
    block_group = GroupType.NORMALIZATION

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    flt_es = OutputBlockField(name="flt_zscore_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(ZScoreBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")

        self.celery_task = wrapper_task.s(zscore_task,
                                          exp,
                                          self,
                                          es=es,
                                          base_filename="%s_%s_flt" %
                                          (self.uuid, 'zscore'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("flt_zscore_es", flt_es)
        exp.store_block(self)
Esempio n. 12
0
class GetBroadInstituteGeneSet(GenericBlock):
    block_base_name = "BI_GENE_SET"
    block_group = GroupType.INPUT_DATA
    name = "Get MSigDB Gene Set"

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    # TODO: maybe create more general solution ?
    _all_gene_sets = BlockField("all_gene_sets",
                                title="",
                                input_type=InputType.HIDDEN,
                                field_type=FieldType.RAW,
                                is_a_property=True)

    msigdb_id = ParamField(
        name="msigdb_id",
        title="MSigDB gene set",
        input_type=InputType.SELECT,
        field_type=FieldType.INT,
        init_val=0,  # TODO: fix hardcoded value
        select_provider="all_gene_sets")

    _gs = OutputBlockField(name="gs",
                           field_type=FieldType.HIDDEN,
                           provided_data_type="GeneSets")

    @property
    def all_gene_sets(self):
        return BroadInstituteGeneSet.get_all_meta()

    def on_params_is_valid(self, exp):
        gs = BroadInstituteGeneSet.objects.get(
            pk=self.msigdb_id).get_gene_sets()
        self.set_out_var("gs", gs)

        super(GetBroadInstituteGeneSet, self).on_params_is_valid(exp)
Esempio n. 13
0
class UserUploadComplex(GenericBlock):
    # unit_options =
    block_base_name = "UPLOAD_CMPLX"
    block_group = GroupType.INPUT_DATA
    name = "Upload mRna/miRna/methyl"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data"),
        ActionRecord("success", ["processing_upload"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params"),
    ])

    m_rna_matrix = ParamField("m_rna_matrix",
                              title="mRNA expression",
                              order_num=10,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM)
    m_rna_platform = ParamField("m_rna_platform",
                                title="Platform ID",
                                order_num=11,
                                input_type=InputType.TEXT,
                                field_type=FieldType.STR,
                                required=False)

    m_rna_unit = ParamField(
        "m_rna_unit",
        title="Working unit [used when platform is unknown]",
        order_num=12,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        required=False,
        init_val="RefSeq",
        options={
            "inline_select_provider":
            True,
            "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"],
                               ["Symbol", "Symbol"]]
        })

    m_rna_matrix_ori = ParamField("m_rna_matrix_ori",
                                  title="Matrix orientation",
                                  order_num=13,
                                  input_type=InputType.SELECT,
                                  field_type=FieldType.STR,
                                  init_val="SxG",
                                  options={
                                      "inline_select_provider":
                                      True,
                                      "select_options":
                                      [["SxG", "Samples x Genes"],
                                       ["GxS", "Genes x Samples"]]
                                  })
    csv_sep_m_rna = ParamField("csv_sep_m_rna",
                               title="CSV separator symbol",
                               order_num=14,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val=",",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options": [
                                       [" ", "space ( )"],
                                       [",", "comma  (,)"],
                                       ["\t", "tab (\\t)"],
                                       [";", "semicolon (;)"],
                                       [":", "colon (:)"],
                                   ]
                               })

    mi_rna_matrix = ParamField("mi_rna_matrix",
                               title=u"μRNA expression",
                               order_num=20,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    mi_rna_platform = ParamField("mi_rna_platform",
                                 title="Platform ID",
                                 order_num=21,
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.STR,
                                 required=False)
    mi_rna_unit = ParamField(
        "mi_rna_unit",
        title="Working unit [used when platform is unknown]",
        order_num=22,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        required=False,
        init_val="RefSeq",
        options={
            "inline_select_provider": True,
            "select_options": [["RefSeq", "RefSeq"], ["mirbase", "miRBase ID"]]
        })

    mi_rna_matrix_ori = ParamField("mi_rna_matrix_ori",
                                   title="Matrix orientation",
                                   order_num=23,
                                   input_type=InputType.SELECT,
                                   field_type=FieldType.STR,
                                   init_val="SxG",
                                   options={
                                       "inline_select_provider":
                                       True,
                                       "select_options":
                                       [["SxG", "Samples x Genes"],
                                        ["GxS", "Genes x Samples"]]
                                   })
    csv_sep_mi_rna = ParamField("csv_sep_mi_rna",
                                title="CSV separator symbol",
                                order_num=24,
                                input_type=InputType.SELECT,
                                field_type=FieldType.STR,
                                init_val=",",
                                options={
                                    "inline_select_provider":
                                    True,
                                    "select_options": [
                                        [" ", "space ( )"],
                                        [",", "comma  (,)"],
                                        ["\t", "tab (\\t)"],
                                        [";", "semicolon (;)"],
                                        [":", "colon (:)"],
                                    ]
                                })

    methyl_matrix = ParamField("methyl_matrix",
                               title="Methylation expression",
                               order_num=30,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    methyl_platform = ParamField("methyl_platform",
                                 title="Platform ID",
                                 order_num=31,
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.STR,
                                 required=False)
    # methyl_unit = ParamField("methyl_unit", title="Working unit [used when platform is unknown]", init_val=None,
    #                        order_num=32, input_type=InputType.TEXT, field_type=FieldType.STR, required=False)

    methyl_matrix_ori = ParamField("methyl_matrix_ori",
                                   title="Matrix orientation",
                                   order_num=33,
                                   input_type=InputType.SELECT,
                                   field_type=FieldType.STR,
                                   init_val="SxG",
                                   options={
                                       "inline_select_provider":
                                       True,
                                       "select_options":
                                       [["SxG", "Samples x Genes"],
                                        ["GxS", "Genes x Samples"]]
                                   })

    csv_sep_methyl = ParamField("csv_sep_methyl",
                                title="CSV separator symbol",
                                order_num=34,
                                input_type=InputType.SELECT,
                                field_type=FieldType.STR,
                                init_val=",",
                                options={
                                    "inline_select_provider":
                                    True,
                                    "select_options": [
                                        [" ", "space ( )"],
                                        [",", "comma  (,)"],
                                        ["\t", "tab (\\t)"],
                                        [";", "semicolon (;)"],
                                        [":", "colon (:)"],
                                    ]
                                })

    pheno_matrix = ParamField("pheno_matrix",
                              title="Phenotype matrix",
                              order_num=40,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM,
                              required=False)

    csv_sep_pheno = ParamField("csv_sep_pheno",
                               title="CSV separator symbol",
                               order_num=50,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val=",",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options": [
                                       [" ", "space ( )"],
                                       [",", "comma  (,)"],
                                       ["\t", "tab (\\t)"],
                                       [";", "semicolon (;)"],
                                       [":", "colon (:)"],
                                   ]
                               })

    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       is_a_property=True)

    _m_rna_es = OutputBlockField(name="m_rna_es",
                                 field_type=FieldType.HIDDEN,
                                 provided_data_type="ExpressionSet")
    # _m_rna_annotation = OutputBlockField(name="m_rna_annotation", field_type=FieldType.HIDDEN,
    #     provided_data_type="PlatformAnnotation")
    _mi_rna_es = OutputBlockField(name="mi_rna_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")
    _methyl_es = OutputBlockField(name="methyl_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")

    mrna_gpl_file = BlockField("mrna_gpl_file", FieldType.CUSTOM, None)
    mirna_gpl_file = BlockField("mirna_gpl_file", FieldType.CUSTOM, None)
    methyl_gpl_file = BlockField("methyl_gpl_file", FieldType.CUSTOM, None)

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "assign_phenotype_classes": {
                               "title": "Assign phenotype classes",
                               "resource": "assign_phenotype_classes",
                               "widget":
                               "widgets/assign_phenotype_classes.html"
                           },
                       })

    @property
    def is_sub_pages_visible(self):
        if self.state in [
                'source_was_preprocessed', 'sample_classes_assigned', 'ready',
                'done'
        ]:
            return True
        return False

    def __init__(self, *args, **kwargs):
        super(UserUploadComplex, self).__init__(*args, **kwargs)
        self.celery_task = None

    def process_upload(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(user_upload_complex_task, exp, self)
        exp.store_block(self)
        self.celery_task.apply_async()

    def phenotype_for_js(self, exp, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es
        if es is None:
            raise Exception("No data was stored before")

        return prepare_phenotype_for_js_from_es(es)

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es

        if es is None:
            raise Exception("No data was stored before")

        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)

        pheno_df[received["user_class_title"]] = received["classes"]

        for work_es in [m_rna_es, mi_rna_es, methyl_es]:
            if work_es is not None:
                work_es.pheno_metadata["user_class_title"] = received[
                    "user_class_title"]
                work_es.store_pheno_data_frame(pheno_df)

        # import ipdb; ipdb.set_trace()
        exp.store_block(self)

    def success(self, exp, m_rna_es, mi_rna_es, methyl_es):
        if m_rna_es:
            self.set_out_var("m_rna_es", m_rna_es)
        if mi_rna_es:
            self.set_out_var("mi_rna_es", mi_rna_es)
        if methyl_es:
            self.set_out_var("methyl_es", methyl_es)
        exp.store_block(self)
Esempio n. 14
0
        if isinstance(new_errors, collections.Iterable):
            self.errors.extend(new_errors)
        elif new_errors:
            self.errors.append(new_errors)

        exp.store_block(self)

    def reset_execution(self, exp, *args, **kwargs):
        self.clean_errors()
        exp.store_block(self)


save_params_actions_list = ActionsList([
    ActionRecord("save_params", ["created", "valid_params", "done", "ready"],
                 "validating_params",
                 user_title="Save parameters"),
    ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
    ActionRecord("on_params_not_valid", ["validating_params"], "created"),
])

execute_block_actions_list = ActionsList([
    ActionRecord("execute", ["ready"], "working", user_title="Run block"),
    ActionRecord("success", ["working"], "done",
                 propagate_auto_execution=True),
    ActionRecord("error", ["*", "ready", "working"], "execution_error"),
    ActionRecord("reset_execution",
                 ["*", "done", "execution_error", "ready", "working"],
                 "ready",
                 user_title="Reset execution")
])
Esempio n. 15
0
class MergeComoduleSets(GenericBlock):
    block_base_name = "MERGE_COMODULE_SETS"
    name = "Merge Comodule Sets"

    is_abstract = False
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="cs_1",
                            order_num=10,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_1_name = ParamField(name="cs_1_name",
                            order_num=11,
                            title="Comodule 1 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    _cs_2 = InputBlockField(name="cs_2",
                            order_num=20,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_2_name = ParamField(name="cs_2_name",
                            order_num=21,
                            title="Comodule 2 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    flt_es = OutputBlockField(name="comodule_set",
                              provided_data_type="ComoduleSet")

    def __init__(self, *args, **kwargs):
        super(MergeComoduleSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        cs_1 = self.get_input_var("cs_1")
        cs_2 = self.get_input_var("cs_2")

        self.celery_task = wrapper_task.s(merge_comodules_task,
                                          exp,
                                          self,
                                          cs_1=cs_1,
                                          cs_2=cs_2,
                                          cs_1_name=self.cs_1_name,
                                          cs_2_name=self.cs_2_name,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'merge_cs'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("comodule_set", flt_es)
        exp.store_block(self)
Esempio n. 16
0
class NIMFASNMNMFBlock(GenericBlock):
    block_base_name = "NIMFA_SNMNMF"
    name = "NIMFA SNMNMF"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True)
    _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=True)
    # _dna_methyl = InputBlockField(name="DNAmethyl", order_num=30, required_data_type="ExpressionSet", required=False)
    _gene2gene = InputBlockField(name="Gene2Gene", order_num=40, required_data_type="BinaryInteraction", required=True)
    _mirna2gene = InputBlockField(name="miRNA2gene", order_num=50, required_data_type="BinaryInteraction",
                                  required=True)
    # _gene2DNAmethylation =  InputBlockField(name="Gene2DNAmethyl", order_num=60, required_data_type="BinaryInteraction", required=False)


    l1 = ParamField(name="l1", order_num=70, title="l1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    l2 = ParamField(name="l2", order_num=80, title="l2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g1 = ParamField(name="g1", order_num=90, title="g1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g2 = ParamField(name="g2", order_num=100, title="g2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    rank = ParamField(name="rank", order_num=110, title="rank", input_type=InputType.TEXT, field_type=FieldType.INT,
                      init_val=50)

    w = OutputBlockField(name="W", provided_data_type="ExpressionSet")
    H1_miRNA = OutputBlockField(name="H1_miRNA", provided_data_type="ExpressionSet")
    H2_genes = OutputBlockField(name="H2_genes", provided_data_type="ExpressionSet")
    # H3_DNAmethyl = OutputBlockField(name="H3_DNAmethyl", provided_data_type="ExpressionSet")

    #H1_perf = OutputBlockField(name="H1_perf", provided_data_type="ExpressionSet")
    #H2_perf = OutputBlockField(name="H2_perf", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(NIMFASNMNMFBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA = self.get_input_var("mRNA")
        miRNA = self.get_input_var("miRNA")
        #DNAmethyl = self.get_input_var("DNAmethyl")
        Gene2Gene = self.get_input_var("Gene2Gene")
        miRNA2gene = self.get_input_var("miRNA2gene")
        #Gene2DNAmethyl = self.get_input_var("Gene2DNAmethyl")

        self.celery_task = wrapper_task.s(
            nimfa_snmnmf_task,
            exp,
            self,
            mRNA=mRNA,
            miRNA=miRNA,
            #DNAmethyl = DNAmethyl,
            gene2gene=Gene2Gene,
            miRNA2gene=miRNA2gene,
            #gene2DNAmethylation = Gene2DNAmethyl,
            params={'l1': self.l1, 'l2': self.l2, 'g1': self.g1, 'g2': self.g2, 'rank': self.rank},
            base_filename="%s_nimfa_snmnmf" % self.uuid
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, W, H1, H2):
        self.set_out_var("W", W)
        self.set_out_var("H1_miRNA", H1)
        self.set_out_var("H2_genes", H2)
        #self.set_out_var("H1_perf", matrices[3])
        #self.set_out_var("H2_perf", matrices[4])
        exp.store_block(self)
Esempio n. 17
0
class GeneSetAgg(GenericBlock):
    block_base_name = "GENE_SET_AGG"
    name = "Gene sets aggregation"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    _gs = InputBlockField(name="gs",
                          order_num=20,
                          required_data_type="GeneSets",
                          required=True)

    agg_method = ParamField("agg_method",
                            title="Aggregate method",
                            order_num=50,
                            input_type=InputType.SELECT,
                            field_type=FieldType.STR,
                            init_val="mean",
                            options={
                                "inline_select_provider":
                                True,
                                "select_options": [["mean", "Mean"],
                                                   ["media", "Median"]]
                            })

    agg_es = OutputBlockField(name="agg_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        gs = self.get_input_var("gs")

        base_filename = "%s_gs_agg" % (self.uuid, )

        self.celery_task = wrapper_task.s(do_gs_agg, exp, self, es, gs,
                                          self.agg_method, base_filename)

        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
Esempio n. 18
0
class GenericClassifier(GenericBlock):
    block_group = GroupType.CLASSIFIER
    is_abstract = True

    is_block_supports_auto_execution = True
    classifier_name = ""
    # Block behavior
    _block_actions = ActionsList([])
    _block_actions.extend(save_params_actions_list)
    _block_actions.extend(execute_block_actions_list)

    # User defined parameters
    # Input ports definition
    _train_es = InputBlockField(name="train_es", order_num=10,
                                required_data_type="ExpressionSet",
                                required=True)
    _test_es = InputBlockField(name="test_es", order_num=20,
                               required_data_type="ExpressionSet",
                               required=True)

    # Provided outputs
    _result = OutputBlockField(name="result", field_type=FieldType.CUSTOM,
                               provided_data_type="ClassifierResult", init_val=None)

    def __init__(self, *args, **kwargs):
        super(GenericClassifier, self).__init__(*args, **kwargs)

        self.celery_task = None
        self.classifier_options = {}
        self.fit_options = {}

    @abstractmethod
    def collect_options(self):
        """
            Should populate `self.classifier_options` and `self.fit_options`
            from block parameters.
        """
        pass

    def get_option_safe(self, name, target_type=None):
        if hasattr(self, name):
            raw = getattr(self, name)
            if raw:
                if target_type:
                    try:
                        return target_type(raw)
                    except:
                        pass
                else:
                    return raw
        return None

    def collect_option_safe(self, name, target_type=None, target_name=None):
        value = self.get_option_safe(name, target_type)
        # from celery.contrib import rdb; rdb.set_trace()
        if value:
            if target_name:
                self.classifier_options[target_name] = value
            else:
                self.classifier_options[name] = value
        return value

    def execute(self, exp,  *args, **kwargs):
        self.set_out_var("result", None)
        self.collect_options()

        train_es = self.get_input_var("train_es")
        test_es = self.get_input_var("test_es")

        self.celery_task = wrapper_task.s(
            apply_classifier,
            exp=exp, block=self,

            train_es=train_es, test_es=test_es,

            classifier_name=self.classifier_name,
            classifier_options=self.classifier_options,
            fit_options=self.fit_options,

            base_folder=exp.get_data_folder(),
            base_filename="%s_%s" % (self.uuid, self.classifier_name),
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        # We store obtained result as an output variable
        self.set_out_var("result", result)
        exp.store_block(self)

    def reset_execution(self, exp, *args, **kwargs):
        self.clean_errors()
        # self.get_scope().remove_temp_vars()
        self.set_out_var("result", None)
        exp.store_block(self)
Esempio n. 19
0
class FeatureSelectionByCut(GenericBlock):
    block_base_name = "FS_BY_CUT"
    block_group = GroupType.FILTER
    name = "Feature Selection by Ranking"

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    _rank_table = InputBlockField(name="rank_table",
                                  order_num=20,
                                  required_data_type="TableResult",
                                  required=True)

    _cut_property_options = BlockField(name="cut_property_options",
                                       field_type=FieldType.RAW,
                                       is_a_property=True)
    cut_property = ParamField(
        name="cut_property",
        title="Ranking property to use",
        # input_type=InputType.SELECT,
        input_type=InputType.TEXT,
        field_type=FieldType.STR,
        #select_provider="cut_property_options",
        order_num=10,
    )
    threshold = ParamField(
        name="threshold",
        title="Threshold for cut",
        order_num=20,
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
    )
    _cut_direction_options = BlockField(name="cut_direction_options",
                                        field_type=FieldType.RAW)
    cut_direction_options = ["<", "<=", ">=", ">"]
    cut_direction = ParamField(name="cut_direction",
                               title="Direction of cut",
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               select_provider="cut_direction_options",
                               order_num=30,
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [[op, op] for op in ["<", "<=", ">=", ">"]]
                               })

    es = OutputBlockField(name="es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FeatureSelectionByCut, self).__init__(*args, **kwargs)
        self.celery_task = None

    @property
    def cut_property_options(self):
        # import ipdb; ipdb.set_trace()
        rank_table = self.get_input_var("rank_table")
        if rank_table and hasattr(rank_table, "headers"):
            return [{
                "pk": header,
                "str": header
            } for header in rank_table.headers]

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(
            feature_selection_by_cut,
            exp=exp,
            block=self,
            src_es=self.get_input_var("es"),
            rank_table=self.get_input_var("rank_table"),
            cut_property=self.cut_property,
            threshold=self.threshold,
            cut_direction=self.cut_direction,
            base_filename="%s_feature_selection" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("es", es)
        exp.store_block(self)
Esempio n. 20
0
class GeneSetAggCV(GenericBlock):
    block_group = GroupType.AGGREGATION
    block_base_name = "CV_GS_A"
    name = "CV Gene Sets Aggregation"
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _input_train_es = InputBlockField(name="train_es", order_num=10,
                                      required_data_type="ExpressionSet", required=True)
    _input_test_es = InputBlockField(name="test_es", order_num=20,
                                     required_data_type="ExpressionSet", required=True)

    _input_gs = InputBlockField(name="gs", order_num=30,
                                required_data_type="GeneSets", required=True)

    agg_method = ParamField(
        "agg_method", title="Aggregate method", order_num=50,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="mean",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["mean", "Mean"],
                ["median", "Median"],
                ["pca", "PCA"]
            ]
        }
    )

    out_train_es = OutputBlockField(name="out_train_es", provided_data_type="ExpressionSet")
    out_test_es = OutputBlockField(name="out_test_es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAggCV, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        train_es = self.get_input_var("train_es")
        test_es = self.get_input_var("test_es")

        gene_sets = self.get_input_var("gs")

        self.celery_task = wrapper_task.s(
            agg_task_cv,
            exp, self,
            train_es=train_es,
            test_es=test_es,
            gene_sets=gene_sets,
            method=self.agg_method,
            base_filename="%s_%s_agg" % (self.uuid, "pca_cv")
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, out_train_es, out_test_es):
        self.set_out_var("out_train_es", out_train_es)
        self.set_out_var("out_test_es", out_test_es)
        exp.store_block(self)
Esempio n. 21
0
class EnrichmentVisualize(GenericBlock):
    block_base_name = "EV_VIEW"
    block_group = GroupType.VISUALIZE
    name = "Enrichment Visualize"

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params")
    ])

    _input_dictionary_set = InputBlockField(name="ds", order_num=10,
                               required_data_type="DictionarySet", required=True)

    _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True)

    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                   field_type=FieldType.STR, is_a_property=True)

    _export_results_csv_url = BlockField(name="export_results_csv_url",
                                   field_type=FieldType.STR, is_a_property=True)


    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "enrichment_view.html"
    ])

    def map_to_symbols(self, gene_set):
        genes = [gi.name for gi in GeneIdentifier.objects.filter(refseq__refseq__in=gene_set)]
        return genes

    @property
    def export_results_csv_url(self):
        return reverse("block_field_formatted", kwargs={
            "exp_id": self.exp_id,
            "block_uuid": self.uuid,
            "field": "export_csv",
            "format": "csv"
        })

    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted", kwargs={
            "exp_id": self.exp_id,
            "block_uuid": self.uuid,
            "field": "export_json",
            "format": "json"
        })


    @property
    def table_js(self):
        cs = self.get_input_var("ds")
        """:type :DictionarySet"""
        if cs:
            table = cs.load_dict()
            table_headers = ['comodule','(term, p-val)','genes']

            column_title_to_code_name = {
                title: "_" + hashlib.md5(title).hexdigest()[:8]
                for title in table_headers
            }
            fields_list = [column_title_to_code_name[title] for title in table_headers]

            return {
                "columns": [
                    {
                        "title": title,
                        "field": column_title_to_code_name[title],
                        "visible": True
                    }
                    for title in table_headers
                ],
                "rows": [
                    dict(zip(fields_list, row))
                    for row in
                    [(k, v[1], set(self.map_to_symbols(v[0]))) for k, v in table.iteritems()]
                    #table.to_records().tolist() #[:100]
                ]
            }
        else:
            return None

    def export_json(self, exp, *args, **kwargs):
        ds = self.get_input_var("ds")
        dic = ds.load_dict()
        return dic

    def export_csv(self, exp, *args, **kwargs):
        import csv
        import StringIO
        ds = self.get_input_var("ds")
        dic = ds.load_dict()
        out = StringIO.StringIO()
        w = csv.writer(out)
        w.writerows(dic.items())
        out.seek(0)
        return out.read()
Esempio n. 22
0
class GeneSetsView(GenericBlock):
    block_base_name = "GS_VIEW"
    block_group = GroupType.VISUALIZE
    name = "Gene Sets view"

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
    ])

    _input_dictionary_set = InputBlockField(name="gs",
                                            order_num=10,
                                            required_data_type="GeneSets",
                                            required=True)

    _table_for_js = BlockField(name="table_js",
                               field_type=FieldType.RAW,
                               is_a_property=True)

    _export_raw_results_url = BlockField(name="export_raw_results_url",
                                         field_type=FieldType.STR,
                                         is_a_property=True)

    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["dictionary_set_view.html"])

    @property
    def export_raw_results_url(self):
        return reverse("block_field_formatted",
                       kwargs={
                           "exp_id": self.exp_id,
                           "block_uuid": self.uuid,
                           "field": "export_json",
                           "format": "json"
                       })

    @property
    def table_js(self):
        cs = self.get_input_var("gs")
        """:type :GeneSets"""
        if cs:
            table = cs.get_gs(conv=False).genes
            table_headers = ['key', 'value']

            column_title_to_code_name = {
                title: "_" + hashlib.md5(title).hexdigest()[:8]
                for title in table_headers
            }
            fields_list = [
                column_title_to_code_name[title] for title in table_headers
            ]

            return {
                "columns": [{
                    "title": title,
                    "field": column_title_to_code_name[title],
                    "visible": True
                } for title in table_headers],
                "rows": [
                    dict(zip(fields_list, row))
                    for row in [(k, list(v)) for k, v in table.iteritems()]
                    # table.to_records().tolist() #[:100]
                ]
            }
        else:
            return None

    def export_json(self, exp, *args, **kwargs):
        ds = self.get_input_var("gs")
        dic = ds.get_gs().genes
        return dic
Esempio n. 23
0
class MultiFeature(UniformMetaBlock):
    block_base_name = "MULTI_FEATURE"
    name = "Multi Feature Validation"

    _mf_block_actions = ActionsList([
        ActionRecord("on_feature_selection_updated",
                     ["valid_params", "ready", "done"], "ready"),
    ])

    _input_es_dyn = InputBlockField(name="es_inputs",
                                    order_num=-10,
                                    required_data_type="ExpressionSet",
                                    required=True,
                                    multiply_extensible=True)

    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       init_val=False,
                                       is_a_property=True)

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "select_feature": {
                               "title": "Select features to examine",
                               "resource": "select_feature",
                               "widget": "widgets/select_feature.html"
                           },
                       })

    def __init__(self, *args, **kwargs):
        super(MultiFeature, self).__init__(*args, **kwargs)
        self.features = []

    @property
    def is_sub_pages_visible(self):
        if self.state in ['valid_params', 'done', 'ready']:
            return True
        return False

    def get_fold_labels(self):
        return self.features

    def add_dyn_input_hook(self, exp, dyn_port, new_port):
        """
            @type new_port: InputBlockField
        """
        new_inner_output = InnerOutputField(
            name="%s_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        self.inner_output_es_names_map[new_port.name] = new_inner_output.name
        self.register_inner_output_variables([new_inner_output])

    def execute(self, exp, *args, **kwargs):
        # self.celery_task = wrapper_task.s(
        #
        # )
        self.inner_output_manager.reset()
        es_dict = {
            inp_name: self.get_input_var(inp_name)
            for inp_name in self.es_inputs
        }
        self.celery_task = wrapper_task.s(
            prepare_folds,
            exp,
            self,
            features=self.features,
            es_dict=es_dict,
            inner_output_es_names_map=self.inner_output_es_names_map,
            success_action="on_folds_generation_success")
        exp.store_block(self)
        self.celery_task.apply_async()

    def phenotype_for_js(self, exp, *args, **kwargs):
        es = None
        for input_name in self.es_inputs:
            es = self.get_input_var(input_name)
            if es is not None:
                break
        res = prepare_phenotype_for_js_from_es(es)
        res["features"] = self.features
        return res

    def update_feature_selection(self, exp, request, *args, **kwargs):
        req = json.loads(request.body)
        self.features = req["features"]
        if self.features:
            self.do_action("on_feature_selection_updated", exp)

    def on_feature_selection_updated(self, *args, **kwargs):
        pass
Esempio n. 24
0
class UserUpload(GenericBlock):
    block_base_name = "UPLOAD"
    block_group = GroupType.INPUT_DATA
    is_abstract = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),

        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data", reload_block_in_client=True),
        ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params", reload_block_in_client=True),
    ])

    es_matrix = ParamField("es_matrix", title="Expression set matrix", order_num=0,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)
    es_matrix_ori = ParamField(
        "es_matrix_ori", title="Matrix orientation", order_num=1,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="SxG",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["SxG", "Samples x Genes"],
                ["GxS", "Genes x Samples"]
            ]
        }
    )
    pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=10,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)
    gpl_platform = ParamField("gpl_platform", title="Platform ID", order_num=20,
        input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    working_unit = ParamField("working_unit", title="Working unit [used when platform is unknown]",
        order_num=3, input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    # TODO: add sub page field
    # pages = BlockField("pages", FieldType.RAW, init_val={
    #     "assign_sample_classes": {
    #         "title": "Assign sample classes",
    #         "resource": "assign_sample_classes",
    #         "widget": "widgets/fetch_gse/assign_sample_classes.html"
    #     },
    # })
    _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True)

    ### PARAMETERS
    _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN,
                                       provided_data_type="ExpressionSet")
    _gpl_annotation = OutputBlockField(name="gpl_annotation", field_type=FieldType.HIDDEN,
                                       provided_data_type="PlatformAnnotation")

    # TODO: COPY PASTE from fetch_gse block
    pages = BlockField("pages", FieldType.RAW, init_val={
        "assign_phenotype_classes": {
            "title": "Assign phenotype classes",
            "resource": "assign_phenotype_classes",
            "widget": "widgets/assign_phenotype_classes.html"
        },
    })

    def __init__(self, *args, **kwargs):
        super(UserUpload, self).__init__("User upload", *args, **kwargs)


    @property
    def is_sub_pages_visible(self):
        if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']:
            return True
        return False

    def phenotype_for_js(self, exp, *args, **kwargs):
        return prepare_phenotype_for_js_from_es(self.get_out_var("expression_set"))

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        es = self.get_out_var("expression_set")
        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)
        es.pheno_metadata["user_class_title"] = received["user_class_title"]
        pheno_df[received["user_class_title"]] = received["classes"]

        es.store_pheno_data_frame(pheno_df)
        exp.store_block(self)

    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()

        assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file())

        es = ExpressionSet(base_dir=exp.get_data_folder(),
                           base_filename="%s_annotation" % self.uuid)

        pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file())
        pheno_df.set_index(pheno_df.columns[0])

        user_class_title = es.pheno_metadata["user_class_title"]
        if user_class_title not in pheno_df.columns:
            pheno_df[es.pheno_metadata["user_class_title"]] = ""

        # if matrix is bad oriented, then do transposition
        if self.es_matrix_ori == "GxS":
            assay_df = assay_df.T

        es.store_assay_data_frame(assay_df)
        es.store_pheno_data_frame(pheno_df)

        if self.working_unit:
            es.working_unit = self.working_unit

        self.set_out_var("expression_set", es)

        exp.store_block(self)

        self.do_action("success", exp)
        # self.celery_task_fetch.apply_async()

    def success(self, exp, *args, **kwargs):
        pass
Esempio n. 25
0
class MassUpload(UniformMetaBlock):
    block_base_name = "BunchUpload"
    name = "Mass Upload Expression Sets"

    _bu_block_actions = ActionsList([

        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data"),

        ActionRecord("error_on_processing", ["processing_upload"], "valid_params"),
        ActionRecord("processing_done", ["processing_upload"], "ready")

        # ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True),
        # ActionRecord("error", ["processing_upload"], "valid_params"),
    ])

    es_mRNA_matrices = ParamField(
        "es_mRNA_matrices", title="mRNA Expression sets", order_num=10,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM,
        options={"multiple": True},
    )

    es_mRNA_matrices_ori = ParamField(
        "es_mRNA_matrices_ori", title="Matrices orientation", order_num=11,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="SxG",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["SxG", "Samples x Features"],
                ["GxS", "Features x Samples"]
            ]
        }
    )

    m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=12,
                                input_type=InputType.TEXT, field_type=FieldType.STR, required=False)

    m_rna_unit = ParamField("m_rna_unit", title="Working unit [used when platform is unknown]",
                            order_num=13, input_type=InputType.SELECT, field_type=FieldType.STR, required=False,
                            init_val="RefSeq",
                            options={
                                "inline_select_provider": True,
                                "select_options": [
                                    ["RefSeq", "RefSeq"],
                                    ["Entrez", "EntrezID"],
                                    ["Symbol", "Symbol"]
                                ]
                            })

    csv_sep_m_rna = ParamField(
        "csv_sep_m_rna", title="CSV separator symbol", order_num=14,
        input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",",
        options={
            "inline_select_provider": True,
            "select_options": [
                [" ", "space ( )"],
                [",", "comma  (,)"],
                ["\t", "tab (\\t)"],
                [";", "semicolon (;)"],
                [":", "colon (:)"],
            ]
        }
    )

    es_miRNA_matrices = ParamField(
        "es_miRNA_matrices", title="miRNA Expression sets", order_num=15,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM,
        options={"multiple": True},
    )

    mi_rna_platform = ParamField("mi_rna_platform", title="Platform ID", order_num=21,
                                 input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    mi_rna_unit = ParamField("mi_rna_unit", title="Working unit [used when platform is unknown]",
                             order_num=22, input_type=InputType.SELECT, field_type=FieldType.STR, required=False,
                             init_val="RefSeq",
                             options={
                                 "inline_select_provider": True,
                                 "select_options": [
                                     ["RefSeq", "RefSeq"],
                                     ["mirbase", "miRBase ID"]
                                 ]
                             })

    es_miRNA_matrices_ori = ParamField(
        "es_miRNA_matrices_ori", title="Matrices orientation", order_num=23,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="SxG",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["SxG", "Samples x Genes"],
                ["GxS", "Genes x Samples"]
            ]
        }
    )
    csv_sep_mi_rna = ParamField(
        "csv_sep_mi_rna", title="CSV separator symbol", order_num=24,
        input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",",
        options={
            "inline_select_provider": True,
            "select_options": [
                [" ", "space ( )"],
                [",", "comma  (,)"],
                ["\t", "tab (\\t)"],
                [";", "semicolon (;)"],
                [":", "colon (:)"],
            ]
        }
    )

    pheno_matrices = ParamField(
        "pheno_matrices", title="Phenotypes", order_num=40,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM,
        options={"multiple": True},
    )

    csv_sep = ParamField(
        "csv_sep", title="CSV separator symbol", order_num=50,
        input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",",
        options={
            "inline_select_provider": True,
            "select_options": [
                [" ", "space ( )"],
                [",", "comma  (,)"],
                ["\t", "tab (\\t)"],
                [";", "semicolon (;)"],
                [":", "colon (:)"],
            ]
        }
    )

    # cells = BlockField(name="cells", field_type=FieldType.CUSTOM, init_val=None)

    # elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
    #     "mass_upload/data_spec.html"
    # ])

    def __init__(self, *args, **kwargs):
        super(MassUpload, self).__init__(*args, **kwargs)
        self.es_mRNA_matrices = MultiUploadField()
        self.es_miRNA_matrices = MultiUploadField()

        self.pheno_matrices = MultiUploadField()

        self.pheno_by_es_names = {}

        self.labels = []
        self.seq = []
        self.register_inner_output_variables([InnerOutputField(
            name="mRNA_es",
            provided_data_type="ExpressionSet"
        ), InnerOutputField(
            name="miRNA_es",
            provided_data_type="ExpressionSet"
        )])

    @property
    def is_sub_pages_visible(self):
        if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']:
            return True
        return False

    def get_fold_labels(self):
        return self.labels

    def error_on_processing(self, *args, **kwargs):
        pass

    def processing_done(self, exp, block):
        exp.store_block(block)

    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()
        try:
            if len(self.pheno_matrices) != len(self.es_mRNA_matrices):
                raise RuntimeError("Different number of phenotypes and mRNA expression sets")
            if self.es_miRNA_matrices:
                if len(self.pheno_matrices) != len(self.es_miRNA_matrices):
                    raise RuntimeError("Different number of phenotypes and miRNA expression sets")
            self.labels = es_mRNA_matrix_names = sorted(self.es_mRNA_matrices)
            es_miRNA_matrix_names = sorted(self.es_miRNA_matrices)
            pheno_matrix_names = sorted(self.pheno_matrices)
            if len(es_miRNA_matrix_names) == 0:
                es_miRNA_matrix_names = len(es_mRNA_matrix_names) * [None]
            self.pheno_by_es_names = {
                pheno_name: es_name for
                es_name, pheno_name
                in zip(zip(es_mRNA_matrix_names, es_miRNA_matrix_names), pheno_matrix_names)
            }

            self.clean_errors()
            self.celery_task = wrapper_task.s(
                bunch_upload_task,
                exp,
                self,
                success_action="processing_done",
                error_action="error_on_processing"
            )
            exp.store_block(self)
            self.celery_task.apply_async()
        except Exception as e:
            exp.log(self.uuid, e, severity="CRITICAL")
            log.exception(e)
            self.errors.append(e)
            self.do_action("error_on_processing", exp, e)
            # self.celery_task_fetch.apply_async()

    def execute(self, exp, *args, **kwargs):
        self.inner_output_manager.reset()
        self.do_action("on_folds_generation_success", exp, self.seq)

    def get_repeat_labels(self):
        pass
Esempio n. 26
0
class PcaVisualize(GenericBlock):
    block_base_name = "PCA_VISUALIZE"
    name = "2D PCA Plot"
    block_group = GroupType.VISUALIZE

    is_block_supports_auto_execution = False

    _block_actions = ActionsList([
        ActionRecord(
            "save_params",
            ["created", "valid_params", "done", "ready", "input_bound"],
            "validating_params",
            user_title="Save parameters"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("compute_pca", ["valid_params"],
                     "computing_pca",
                     user_title="Compute PCA"),
        ActionRecord(
            "pca_done",
            ["computing_pca"],
            "done",
        ),
        ActionRecord("reset_execution",
                     ["*", "done", "execution_error", "ready", "working"],
                     "ready",
                     user_title="Reset execution")

        #ActionRecord("update", ["input_bound", "ready"], "ready"),
    ])

    input_es = InputBlockField(name="es",
                               order_num=10,
                               required_data_type="ExpressionSet",
                               required=True)

    chart_series = BlockField(name="chart_series",
                              field_type=FieldType.RAW,
                              init_val=[])
    chart_categories = BlockField(name="chart_categories",
                                  field_type=FieldType.SIMPLE_LIST,
                                  init_val=[])

    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["pca.html"])

    def __init__(self, *args, **kwargs):
        super(PcaVisualize, self).__init__("PCA visualise", *args, **kwargs)

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(PcaVisualize, self).on_params_is_valid(exp, *args, **kwargs)

        self.do_action("compute_pca", exp)

    def compute_pca(self, exp, *args, **kwargs):
        log.info("compute pca invoked")

        es = self.get_input_var("es")
        """:type :ExpressionSet"""
        df = es.get_assay_data_frame()
        pheno_df = es.get_pheno_data_frame()
        target_column = es.pheno_metadata['user_class_title']

        X = df.as_matrix().transpose()

        pca_model = decomposition.PCA(n_components=2)
        pca_model.fit(X)
        Xp = pca_model.transform(X).tolist()

        names = [x.strip() for x in pheno_df[target_column].tolist()]

        series_by_names = defaultdict(list)
        for x, name in zip(Xp, names):
            series_by_names[name].append(x)

        self.chart_series = [{
            "name": name,
            "data": points
        } for name, points in series_by_names.iteritems()]
        self.do_action("pca_done", exp)

    def pca_done(self, exp, *args, **kwargs):
        log.info("pca done")
Esempio n. 27
0
class FilterBlock(GenericBlock):
    block_base_name = "FILTER"
    name = "Var/Val Filter"

    is_abstract = False
    block_group = GroupType.FILTER

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    filter_method = ParamField("filter_method",
                               title="Filter method",
                               order_num=50,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val="LOW_VAL",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [["LOW_VAL", "Low Val Filter"],
                                    ["VAR", "Var Filter"]]
                               })
    q = ParamField(name="q",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=30.0)

    flt_es = OutputBlockField(name="flt_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FilterBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")

        self.celery_task = wrapper_task.s(filter_task,
                                          exp,
                                          self,
                                          filter_type=self.filter_method,
                                          q=self.q,
                                          es=es,
                                          base_filename="%s_%s_flt" %
                                          (self.uuid, self.filter_method))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("flt_es", flt_es)
        exp.store_block(self)
Esempio n. 28
0
class CrossValidation(UniformMetaBlock):
    block_base_name = "CROSS_VALID"
    name = "Cross Validation K-fold"

    _cv_actions = ActionsList(
        [ActionRecord("become_ready", ["valid_params"], "ready")])
    elements = BlockField(name="elements",
                          field_type=FieldType.SIMPLE_LIST,
                          init_val=["cv_info.html"])

    _input_es_dyn = InputBlockField(name="es_inputs",
                                    required_data_type="ExpressionSet",
                                    required=True,
                                    multiply_extensible=True,
                                    order_num=-1)

    folds_num = ParamField(name="folds_num",
                           title="Folds number",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=5)
    repeats_num = ParamField(name="repeats_num",
                             title="Repeats number",
                             order_num=20,
                             input_type=InputType.TEXT,
                             field_type=FieldType.INT,
                             init_val=1)

    def get_fold_labels(self):
        out = []
        for repeat in range(self.repeats_num):
            for num in range(self.folds_num):
                out.append("fold_%s_%s" % (repeat + 1, num + 1))
        return out  # ["fold_%s_%s" % (repeat + 1, num + 1) for num in range(self.folds_num) for repeat in range(self.repeats_num)]

    def get_repeat_labels(self):
        return [
            "repeat_%s" % (repeat + 1) for repeat in range(self.repeats_num)
        ]

    def add_dyn_input_hook(self, exp, dyn_port, new_port):
        """
            @type new_port: InputBlockField
        """
        new_inner_output_train = InnerOutputField(
            name="%s_train_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        new_inner_output_test = InnerOutputField(
            name="%s_test_i" % new_port.name,
            provided_data_type=new_port.required_data_type)
        self.inner_output_es_names_map[new_port.name] = \
            (new_inner_output_train.name, new_inner_output_test.name)

        self.register_inner_output_variables(
            [new_inner_output_train, new_inner_output_test])

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()

        self.inner_output_manager.reset()
        es_dict = {
            inp_name: self.get_input_var(inp_name)
            for inp_name in self.es_inputs
        }

        self.celery_task = wrapper_task.s(
            generate_cv_folds,
            exp,
            self,
            folds_num=self.folds_num,
            repeats_num=self.repeats_num,
            es_dict=es_dict,
            inner_output_es_names_map=self.inner_output_es_names_map,
            success_action="on_folds_generation_success",
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs)
        self.do_action("become_ready", exp)

    def become_ready(self, *args, **kwargs):
        pass

    def build_result_collection(self, exp):
        if settings.CELERY_DEBUG:
            import sys
            sys.path.append(
                '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg'
            )
            import pydevd
            pydevd.settrace('localhost',
                            port=6901,
                            stdoutToServer=True,
                            stderrToServer=True)

        rc = ResultsContainer(base_dir=exp.get_data_folder(),
                              base_filename="%s" % self.uuid)
        res_seq = self.res_seq

        def create_new_dim_rc(local_rc, axis_meta_block,
                              axis_meta_block_labels):
            local_rc.axis_list = [axis_meta_block]
            local_rc.labels_dict[axis_meta_block] = axis_meta_block_labels
            local_rc.init_ar()
            local_rc.update_label_index()

        # WARNING: We only support homogeneous results, so we only check first element
        res_seq_field_name, data_type = res_seq.fields.iteritems().next()
        if data_type == "ClassifierResult":
            fold_labels = self.get_fold_labels()
            single_rc_list = []
            for field_name in res_seq.fields:
                run_num = 0
                loc_list = []
                for idx, res_seq_cell in enumerate(res_seq.sequence):
                    if (idx % self.folds_num) == 0:
                        rc_run = ResultsContainer("", "")
                        create_new_dim_rc(rc_run, self.base_name + "_folds", [
                            "fold_%s" % fold_num
                            for fold_num in range(self.folds_num)
                        ])
                        loc_list.append(rc_run)
                        run_num += 1
                    rc_run.ar[idx % self.folds_num] = res_seq_cell[field_name]
                rc_single = ResultsContainer("", "")
                rc_single.add_dim_layer(loc_list, self.base_name,
                                        self.get_repeat_labels())
                single_rc_list.append(rc_single)
            rc.add_dim_layer(single_rc_list, self.collector_spec.label,
                             res_seq.fields.keys())

        elif data_type == "ResultsContainer":
            if len(res_seq.fields) > 1:
                raise Exception(
                    "Meta block only support single output of type ResultsContainer"
                )

            else:
                rc_list = []
                for cell in res_seq.sequence:
                    sub_rc = cell[res_seq_field_name]
                    sub_rc.load()
                    rc_list.append(sub_rc)

                rc.add_dim_layer(rc_list, self.base_name,
                                 self.get_fold_labels())

        elif data_type == "SequenceContainer":
            # TODO remove this check
            pass
        else:
            raise Exception("Meta blocks only support ClassifierResult "
                            "or ResultsContainer in the output collection. "
                            " Instead got: %s" % data_type)

        rc.store()
        rc.ar = None
        self.set_out_var("results_container", rc)
Esempio n. 29
0
class UniformMetaBlock(GenericBlock):
    is_abstract = True
    block_group = GroupType.META_PLUGIN
    create_new_scope = True
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([])
    _block_actions.extend(ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),

        ActionRecord("add_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"),
        ActionRecord("remove_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"),

        ActionRecord("execute", ["ready"], "generating_folds", user_title="Run block"),

        ActionRecord("on_folds_generation_success", ["generating_folds"], "ready_to_run_sub_scope",
                     reload_block_in_client=True),
        ActionRecord("continue_collecting_sub_scope", ["ready_to_run_sub_scope"],
                     "sub_scope_executing"),

        ActionRecord("run_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"),
        ActionRecord("on_sub_scope_done", ["sub_scope_executing"], "ready_to_run_sub_scope"),

        ActionRecord("success", ["working", "ready_to_run_sub_scope"], "done",
                     propagate_auto_execution=True, reload_block_in_client=True),
        ActionRecord("error", ["*", "ready", "working", "sub_scope_executing",
                               "generating_folds", "ready_to_run_sub_scope"],
                     "execution_error", reload_block_in_client=True),

        ActionRecord("reset_execution", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope",
                                         "generating_folds", "execution_error"], "ready",
                     user_title="Reset execution"),
    ]))

    _collector_spec = ParamField(name="collector_spec", title="",
                                 field_type=FieldType.CUSTOM,
                                 input_type=InputType.HIDDEN,
                                 init_val=None, required=False
    )

    res_seq = BlockField(name="res_seq", provided_data_type="SequenceContainer",
                         field_type=FieldType.HIDDEN, init_val=None)

    _results_container = OutputBlockField(
        name="results_container",
        provided_data_type="ResultsContainer",
        field_type=FieldType.HIDDEN,
        init_val=None
    )

    def __init__(self, *args, **kwargs):
        super(UniformMetaBlock, self).__init__(*args, **kwargs)
        self.auto_exec_status_working.update(["sub_scope_executing", "ready_to_run_sub_scope",
                                              "generating_folds"])

        self.inner_output_manager = IteratedInnerFieldManager()
        self.collector_spec = CollectorSpecification()
        self.collector_spec.label = self.block_base_name + "_collection"

        self.inner_output_es_names_map = {}
        self.celery_task = None

        self.set_out_var("results_container", None)
        self.res_seq = SequenceContainer()

    def remap_inputs(self, mapping):
        for var in self.bound_inputs.itervalues():
            var.change_block(mapping)
        for var in self.collector_spec.bound.itervalues():
            var.change_block(mapping)

    @property
    def is_sub_pages_visible(self):
        if self.state in ['valid_params', 'done', 'ready']:
            return True
        return False

    @abstractmethod
    def get_fold_labels(self):
        pass

    @abstractmethod
    def get_repeat_labels(self):
        pass

    def get_inner_out_var(self, name):
        return self.inner_output_manager.get_var(name)

    def run_sub_scope(self, exp, *args, **kwargs):
        self.reset_execution_for_sub_blocks()
        cell = self.res_seq.sequence[self.inner_output_manager.iterator]
        log.debug("Cell!!!!!!!! %s", str(cell))
        act = self.inner_output_manager.sequence[self.inner_output_manager.iterator]
        log.debug("Cell!!!!!!!! %s", str(act))

        exp.store_block(self)
        sr = ScopeRunner(exp, self.sub_scope_name)
        sr.execute()

    def on_sub_scope_done(self, exp, *args, **kwargs):
        """
            @type exp: Experiment

            This action should be called by ScopeRunner
            when all blocks in sub-scope have exec status == done
        """
        r = get_redis_instance()
        with redis_lock.Lock(r, ExpKeys.get_block_global_lock_key(self.exp_id, self.uuid)):

            cell = self.res_seq.sequence[self.inner_output_manager.iterator]
            for name, scope_var in self.collector_spec.bound.iteritems():
                var = exp.get_scope_var_value(scope_var)
                exp.log(self.uuid, "Collected %s from %s" % (var, scope_var.title), severity="CRITICAL")
                log.debug("Collected %s from %s", var, scope_var.title)
                if var is not None:
                    if hasattr(var, "clone"):
                        cell[name] = var.clone("%s_%s" %
                                               (self.uuid, self.inner_output_manager.iterator))
                    else:
                        cell[name] = deepcopy(var)

            self.res_seq.sequence[self.inner_output_manager.iterator] = cell

            exp.store_block(self)

        if len(cell) < len(self.res_seq.fields):
            self.do_action("continue_collecting_sub_scope", exp)
        else:
            try:
                self.inner_output_manager.next()
                self.do_action("run_sub_scope", exp)
            except StopIteration, e:
                # All folds were processed without errors
                self.build_result_collection(exp)

                self.do_action("success", exp)
Esempio n. 30
0
class CustomIterator(UniformMetaBlock):
    block_base_name = "CUSTOM_ITERATOR"
    name = "Custom Iterator"
    has_custom_layout = True

    _ci_block_actions = ActionsList([
        ActionRecord("become_ready", ["valid_params"], "ready"),
        ActionRecord("reset_settings", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope",
                                                          "generating_folds", "execution_error"], "ready",
                                       user_title="Reset to initial state", reload_block_in_client=True)
    ])

    cells_prototype = BlockField(name="cells_prototype", field_type=FieldType.CUSTOM, init_val=None)
    cells = BlockField(name="cells", field_type=FieldType.CUSTOM, init_val=None)
    is_cells_prototype_defined = BlockField(name="is_cells_prototype_defined",
                                            field_type=FieldType.BOOLEAN, init_val=False)

    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "custom_iterator/cell_prototype_definition.html",
        "custom_iterator/cell_dyn_inputs.html"
    ])

    def __init__(self, *args, **kwargs):
        super(CustomIterator, self).__init__(*args, **kwargs)
        self.cells_prototype = CellsPrototype()
        self.cells = CellInfoList()

    def add_cell_prototype_field(self, exp, received_block, *args, **kwargs):
        new_field_dict = received_block.get("cells_prototype", {}).get("new_cell_field")
        if new_field_dict:
            cf = CellField(**new_field_dict)
            cf.update_name_from_label()
            self.cells_prototype.add_cell(cf)
            exp.store_block(self)

    def finish_cells_prototype_definition(self, exp, *args, **kwargs):
        self.is_cells_prototype_defined = True

        for field_prototype in self.cells_prototype.cells_list:
            new_inner_output = InnerOutputField(
                name=field_prototype.name,
                provided_data_type=field_prototype.data_type
            )
            self.register_inner_output_variables([new_inner_output])

        exp.store_block(self)

    def add_cell(self, exp, received_block, *args, **kwargs):
        new_cell_dict = received_block.get("cells", {}).get("new")
        if new_cell_dict:
            cell = CellInfo(new_cell_dict["label"])
            for field_prototype in self.cells_prototype.cells_list:
                new_name = "%s_%s" % (field_prototype.name, len(self.cells.cells))
                cell.inputs_list.append((field_prototype.name, new_name))
                # TODO: add input port to block
                new_port = InputBlockField(
                    name=new_name,
                    required_data_type=field_prototype.data_type,
                    required=True
                )
                self.add_input_port(new_port)

            self.cells.cells.append(cell)
            exp.store_block(self)

    def remove_cell(self, exp, cell_json, *args, **kwargs):
        try:
            cell = json.loads(cell_json)
            self.cells.remove_by_label(cell["label"])
            exp.store_block(self)
        except:
            pass

    def become_ready(self, *args, **kwargs):
        pass

    def on_params_is_valid(self, exp, *args, **kwargs):
        super(CustomIterator, self).on_params_is_valid(exp, *args, **kwargs)
        self.do_action("become_ready", exp, *args, **kwargs)

    def get_fold_labels(self):
        return [cell.label for cell in self.cells.cells]

    def execute(self, exp, *args, **kwargs):
        self.inner_output_manager.reset()
        seq = []
        for cell_def in self.cells.cells:
            cell = {}
            for name, input_var_name in cell_def.inputs_list:
                # TODO: hmm maybe we should create deepcopy?
                cell[name] = self.get_input_var(input_var_name)
            seq.append(cell)
        exp.store_block(self)
        self.do_action("on_folds_generation_success", exp, seq)

    def reset_settings(self, exp, *args, **kwargs):
        self.cells_prototype = CellsPrototype()
        self.cells = CellInfoList()
        self.is_cells_prototype_defined = False
        exp.store_block(self)