Ejemplo n.º 1
0
class FilterByInteraction(GenericBlock):
    block_base_name = "FILTER_BY_BI"
    name = "Filter ES by interaction"
    block_group = GroupType.PROCESSING
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _mRNA_es = InputBlockField(name="mRNA_es",
                               order_num=10,
                               required_data_type="ExpressionSet",
                               required=True)
    _miRNA_es = InputBlockField(name="miRNA_es",
                                order_num=20,
                                required_data_type="ExpressionSet",
                                required=True)
    _interaction = InputBlockField(name="interaction",
                                   order_num=30,
                                   required_data_type="BinaryInteraction",
                                   required=True)

    m_rna_filtered_es = OutputBlockField(name="m_rna_filtered_es",
                                         provided_data_type="ExpressionSet")
    mi_rna_filtered_es = OutputBlockField(name="mi_rna_filtered_es",
                                          provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FilterByInteraction, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA_es = self.get_input_var("mRNA_es")
        miRNA_es = self.get_input_var("miRNA_es")
        interaction_matrix = self.get_input_var("interaction")

        self.celery_task = wrapper_task.s(
            filter_by_bi,
            exp,
            self,
            m_rna_es=mRNA_es,
            mi_rna_es=miRNA_es,
            interaction_matrix=interaction_matrix,
            base_filename="%s_filtered_by_BI" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, m_rna_filtered_es, mi_rna_filtered_es):
        self.set_out_var("m_rna_filtered_es", m_rna_filtered_es)
        self.set_out_var("mi_rna_filtered_es", mi_rna_filtered_es)
        exp.store_block(self)
Ejemplo n.º 2
0
class UploadGeneSets(GenericBlock):
    block_base_name = "GENE_SETS_UPLOAD"
    block_group = GroupType.INPUT_DATA
    name = "Upload Gene Sets"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "done"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    upload_gs = ParamField("upload_gs",
                           title="Gene sets in .gmt format",
                           order_num=10,
                           input_type=InputType.FILE_INPUT,
                           field_type=FieldType.CUSTOM)

    _gene_sets = OutputBlockField(name="gene_sets",
                                  provided_data_type="GeneSets")

    def on_params_is_valid(self, exp, *args, **kwargs):
        try:
            gmt_file = self.upload_gs.get_file()
            gs = GmtStorage.read_inp(gmt_file, "\t")
            gene_sets = GeneSets(exp.get_data_folder(), str(self.uuid))
            gene_sets.store_gs(gs)
            self.set_out_var("gene_sets", gene_sets)
        except Exception as e:
            exp.log(self.uuid, e, severity="CRITICAL")
            log.error(e)

        exp.store_block(self)
Ejemplo n.º 3
0
class EnrichmentNoTBlock(GenericBlock):
    block_base_name = "ENRICHMENT_COM"
    name = "Comodule Enrichment"

    is_abstract = False
    block_group = GroupType.TESTING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="gs",
                            order_num=10,
                            required_data_type="GeneSets",
                            required=True)
    H = InputBlockField(name="patterns",
                        order_num=11,
                        required_data_type="GeneSets",
                        required=True)
    _t = ParamField(name="T",
                    order_num=12,
                    title="Enrichment threshold",
                    input_type=InputType.TEXT,
                    field_type=FieldType.FLOAT,
                    init_val="0.05")

    dict = OutputBlockField(name="dictionary_set",
                            provided_data_type="DictionarySet")

    def __init__(self, *args, **kwargs):
        super(EnrichmentNoTBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        gs = self.get_input_var("gs")
        cs = self.get_input_var("patterns")
        self.celery_task = wrapper_task.s(enrichment_no_t_task,
                                          exp,
                                          self,
                                          T=self.T,
                                          gs=gs,
                                          patterns=cs,
                                          base_filename="%s_%s_enrich" %
                                          (self.uuid, 'enrichment_cont'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("dictionary_set", flt_es)
        exp.store_block(self)
Ejemplo n.º 4
0
class MergeGeneSetWithPlatformAnnotation(GenericBlock):
    block_base_name = "MERGE_GS_GPL_ANN"
    name = "Merge gene set with platform"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _input_gs = InputBlockField(name="gs",
                                order_num=10,
                                required_data_type="GeneSets",
                                required=True)
    _input_ann = InputBlockField(name="ann",
                                 order_num=20,
                                 required_data_type="PlatformAnnotation",
                                 required=True)

    _gs = OutputBlockField(name="gs",
                           field_type=FieldType.HIDDEN,
                           init_val=None,
                           provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(MergeGeneSetWithPlatformAnnotation,
              self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        gs, ann = self.get_input_var("gs"), self.get_input_var("ann")
        # import ipdb; ipdb.set_trace()
        self.celery_task = wrapper_task.s(map_gene_sets_to_probes,
                                          exp,
                                          self,
                                          base_dir=exp.get_data_folder(),
                                          base_filename="%s_merged" %
                                          self.uuid,
                                          ann_gene_sets=ann.gene_sets,
                                          src_gene_sets=gs)
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, gs):
        self.set_out_var("gs", gs)
        exp.store_block(self)
Ejemplo n.º 5
0
class SvdSubAgg(GenericBlock):
    is_abstract = True
    block_group = GroupType.AGGREGATION

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _mRNA_es = InputBlockField(name="mRNA_es", order_num=10,
                               required_data_type="ExpressionSet", required=True)
    _miRNA_es = InputBlockField(name="miRNA_es", order_num=20,
                                required_data_type="ExpressionSet", required=True)
    _interaction = InputBlockField(name="interaction", order_num=30,
                                   required_data_type="BinaryInteraction", required=True)

    c = ParamField(name="c", title="Constant c",
                   input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0)

    agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet")

    mode = ""

    def __init__(self, *args, **kwargs):
        super(SvdSubAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA_es = self.get_input_var("mRNA_es")
        miRNA_es = self.get_input_var("miRNA_es")
        interaction_matrix = self.get_input_var("interaction")

        self.celery_task = wrapper_task.s(
            aggregation_task,
            exp, self,
            mode=self.mode,
            c=self.c,
            m_rna_es=mRNA_es,
            mi_rna_es=miRNA_es,
            interaction_matrix=interaction_matrix,
            base_filename="%s_%s_agg" % (self.uuid, self.mode)
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
Ejemplo n.º 6
0
class GlobalTest(GenericBlock):
    block_base_name = "GLOBAL_TEST"
    name = "Goeman global test"
    block_group = GroupType.PROCESSING
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _input_es = InputBlockField(name="es", order_num=10,
                                required_data_type="ExpressionSet", required=True)
    _input_gs = InputBlockField(name="gs", order_num=20,
                                required_data_type="GeneSets", required=True)

    _result = OutputBlockField(name="result", field_type=FieldType.STR,
                               provided_data_type="TableResult", init_val=None)

    elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[
        "gt_result.html"
    ])

    def __init__(self, *args, **kwargs):
        super(GlobalTest, self).__init__(*args, **kwargs)
        self.celery_task = None

        exp = Experiment.get_exp_by_id(self.exp_id)
        self.result = TableResult(
            base_dir=exp.get_data_folder(),
            base_filename="%s_gt_result" % self.uuid,
        )
        self.result.headers = ['p-value', 'Statistic', 'Expected', 'Std.dev', '#Cov']

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(
            global_test_task,
            exp, self,
            es=self.get_input_var("es"),
            gene_sets=self.get_input_var("gs"),
            table_result=self.result
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        self.result = result
        self.set_out_var("result", self.result)
        exp.store_block(self)
Ejemplo n.º 7
0
class MergeExpressionSets(GenericBlock):
    block_base_name = "MergeES"
    name = "Merge ES by concatenation"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es_1 = InputBlockField(name="es_1",
                            title="Set 1",
                            order_num=10,
                            required_data_type="ExpressionSet",
                            required=True)
    _es_2 = InputBlockField(name="es_2",
                            title="Set 2",
                            order_num=20,
                            required_data_type="ExpressionSet",
                            required=True)

    merged_es = OutputBlockField(name="merged_es",
                                 provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(MergeExpressionSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        # import ipdb; ipdb.set_trace()
        self.celery_task = wrapper_task.s(
            merge_two_es,
            exp,
            self,
            es_1=self.get_input_var("es_1"),
            es_2=self.get_input_var("es_2"),
            base_filename="%s_merged" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("merged_es", es)
        exp.store_block(self)
Ejemplo n.º 8
0
class ThresholdBlock(GenericBlock):
    block_base_name = "THRESHOLD"
    name = "Threshold"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    t = ParamField(name="T",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=0.1)

    flt_es = OutputBlockField(name="gene_sets", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(ThresholdBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        # T = self.get_input_var("T")
        self.celery_task = wrapper_task.s(threshold_task,
                                          exp,
                                          self,
                                          es=es,
                                          T=self.T,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'threshold'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("gene_sets", flt_es)
        exp.store_block(self)
Ejemplo n.º 9
0
class UploadInteraction(GenericBlock):
    block_base_name = "GENE_INTERACTION"
    block_group = GroupType.INPUT_DATA
    name = "Upload gene interaction"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "done"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    upload_interaction = ParamField("upload_interaction",
                                    title="Interaction matrix",
                                    order_num=10,
                                    input_type=InputType.FILE_INPUT,
                                    field_type=FieldType.CUSTOM)
    row_units = ParamField("row_units",
                           title="Row units",
                           order_num=11,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)
    col_units = ParamField("col_units",
                           title="Column units",
                           order_num=12,
                           input_type=InputType.TEXT,
                           field_type=FieldType.STR,
                           required=False)

    _interaction = OutputBlockField(name="interaction",
                                    provided_data_type="BinaryInteraction")

    def on_params_is_valid(self, exp, *args, **kwargs):
        # Convert to  BinaryInteraction
        interaction_df = self.upload_interaction.get_as_data_frame()

        interaction = BinaryInteraction(exp.get_data_folder(), str(self.uuid))
        interaction.store_matrix(interaction_df)

        interaction.row_units = self.row_units
        interaction.col_units = self.col_units

        self.set_out_var("interaction", interaction)
        exp.store_block(self)
Ejemplo n.º 10
0
class ZScoreBlock(GenericBlock):
    block_base_name = "ZSCORE_NORM"
    name = "Z-score Normalization"

    is_abstract = False
    block_group = GroupType.NORMALIZATION

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    flt_es = OutputBlockField(name="flt_zscore_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(ZScoreBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")

        self.celery_task = wrapper_task.s(zscore_task,
                                          exp,
                                          self,
                                          es=es,
                                          base_filename="%s_%s_flt" %
                                          (self.uuid, 'zscore'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("flt_zscore_es", flt_es)
        exp.store_block(self)
Ejemplo n.º 11
0
class GetBroadInstituteGeneSet(GenericBlock):
    block_base_name = "BI_GENE_SET"
    block_group = GroupType.INPUT_DATA
    name = "Get MSigDB Gene Set"

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    # TODO: maybe create more general solution ?
    _all_gene_sets = BlockField("all_gene_sets",
                                title="",
                                input_type=InputType.HIDDEN,
                                field_type=FieldType.RAW,
                                is_a_property=True)

    msigdb_id = ParamField(
        name="msigdb_id",
        title="MSigDB gene set",
        input_type=InputType.SELECT,
        field_type=FieldType.INT,
        init_val=0,  # TODO: fix hardcoded value
        select_provider="all_gene_sets")

    _gs = OutputBlockField(name="gs",
                           field_type=FieldType.HIDDEN,
                           provided_data_type="GeneSets")

    @property
    def all_gene_sets(self):
        return BroadInstituteGeneSet.get_all_meta()

    def on_params_is_valid(self, exp):
        gs = BroadInstituteGeneSet.objects.get(
            pk=self.msigdb_id).get_gene_sets()
        self.set_out_var("gs", gs)

        super(GetBroadInstituteGeneSet, self).on_params_is_valid(exp)
Ejemplo n.º 12
0
class PatternSearch(GenericBlock):
    block_base_name = "PattSearch"
    name = "Pattern Search"
    block_group = GroupType.PATTERN_SEARCH

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA",
                             order_num=10,
                             required_data_type="ExpressionSet",
                             required=True)
    _mi_rna = InputBlockField(name="miRNA",
                              order_num=20,
                              required_data_type="ExpressionSet",
                              required=False)

    gene2gene = InputBlockField(name="gene2gene",
                                order_num=30,
                                required_data_type="BinaryInteraction",
                                required=True)
    miRNA2gene = InputBlockField(name="miRNA2gene",
                                 order_num=31,
                                 required_data_type="BinaryInteraction",
                                 required=False)

    genes_num = ParamField(name="genes_num",
                           title="Number of Genes",
                           order_num=10,
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val=100)

    # upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=32,
    #                                        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)

    # upload_mirna_platform = ParamField("upload_mirna_platform", title="miRNA platform", order_num=33,
    #                                    input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False)

    d = ParamField(name="d",
                   order_num=70,
                   title="d",
                   input_type=InputType.TEXT,
                   field_type=FieldType.INT,
                   init_val=2)
    min_imp = ParamField(name="min_imp",
                         order_num=80,
                         title="Minimal improvement",
                         input_type=InputType.TEXT,
                         field_type=FieldType.FLOAT,
                         init_val=0.06)

    _metric = ParamField(
        "metric",
        title="Metric",
        order_num=40,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        init_val="mutual_information",
        options={
            "inline_select_provider":
            True,
            "select_options":
            [["mutual_information", "Mutual Information"],
             ['normed_mutual_information', "Normed Mutual Information"],
             ['square_error', "Square Error"], ['correlation', "Correlation"],
             ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]]
        })
    patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(PatternSearch, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        exp.log(self.uuid, "Execute called")

        self.celery_task = wrapper_task.s(
            pattern_search,
            exp,
            self,
            m_rna_es=self.get_input_var("mRNA"),
            mi_rna_es=self.get_input_var("miRNA"),
            gene2gene=self.get_input_var("gene2gene"),
            miRNA2gene=self.get_input_var("miRNA2gene"),
            radius=self.d,
            min_imp=self.min_imp,
            number_of_genes=self.genes_num,
            metric=self.get_input_var("metric"),
            base_filename="%s_comodule_sets" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, gs):
        exp.log(self.uuid, "Success")
        self.set_out_var("patterns", gs)
        exp.store_block(self)
Ejemplo n.º 13
0
class UserUploadComplex(GenericBlock):
    # unit_options =
    block_base_name = "UPLOAD_CMPLX"
    block_group = GroupType.INPUT_DATA
    name = "Upload mRna/miRna/methyl"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data"),
        ActionRecord("success", ["processing_upload"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params"),
    ])

    m_rna_matrix = ParamField("m_rna_matrix",
                              title="mRNA expression",
                              order_num=10,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM)
    m_rna_platform = ParamField("m_rna_platform",
                                title="Platform ID",
                                order_num=11,
                                input_type=InputType.TEXT,
                                field_type=FieldType.STR,
                                required=False)

    m_rna_unit = ParamField(
        "m_rna_unit",
        title="Working unit [used when platform is unknown]",
        order_num=12,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        required=False,
        init_val="RefSeq",
        options={
            "inline_select_provider":
            True,
            "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"],
                               ["Symbol", "Symbol"]]
        })

    m_rna_matrix_ori = ParamField("m_rna_matrix_ori",
                                  title="Matrix orientation",
                                  order_num=13,
                                  input_type=InputType.SELECT,
                                  field_type=FieldType.STR,
                                  init_val="SxG",
                                  options={
                                      "inline_select_provider":
                                      True,
                                      "select_options":
                                      [["SxG", "Samples x Genes"],
                                       ["GxS", "Genes x Samples"]]
                                  })
    csv_sep_m_rna = ParamField("csv_sep_m_rna",
                               title="CSV separator symbol",
                               order_num=14,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val=",",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options": [
                                       [" ", "space ( )"],
                                       [",", "comma  (,)"],
                                       ["\t", "tab (\\t)"],
                                       [";", "semicolon (;)"],
                                       [":", "colon (:)"],
                                   ]
                               })

    mi_rna_matrix = ParamField("mi_rna_matrix",
                               title=u"μRNA expression",
                               order_num=20,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    mi_rna_platform = ParamField("mi_rna_platform",
                                 title="Platform ID",
                                 order_num=21,
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.STR,
                                 required=False)
    mi_rna_unit = ParamField(
        "mi_rna_unit",
        title="Working unit [used when platform is unknown]",
        order_num=22,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        required=False,
        init_val="RefSeq",
        options={
            "inline_select_provider": True,
            "select_options": [["RefSeq", "RefSeq"], ["mirbase", "miRBase ID"]]
        })

    mi_rna_matrix_ori = ParamField("mi_rna_matrix_ori",
                                   title="Matrix orientation",
                                   order_num=23,
                                   input_type=InputType.SELECT,
                                   field_type=FieldType.STR,
                                   init_val="SxG",
                                   options={
                                       "inline_select_provider":
                                       True,
                                       "select_options":
                                       [["SxG", "Samples x Genes"],
                                        ["GxS", "Genes x Samples"]]
                                   })
    csv_sep_mi_rna = ParamField("csv_sep_mi_rna",
                                title="CSV separator symbol",
                                order_num=24,
                                input_type=InputType.SELECT,
                                field_type=FieldType.STR,
                                init_val=",",
                                options={
                                    "inline_select_provider":
                                    True,
                                    "select_options": [
                                        [" ", "space ( )"],
                                        [",", "comma  (,)"],
                                        ["\t", "tab (\\t)"],
                                        [";", "semicolon (;)"],
                                        [":", "colon (:)"],
                                    ]
                                })

    methyl_matrix = ParamField("methyl_matrix",
                               title="Methylation expression",
                               order_num=30,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    methyl_platform = ParamField("methyl_platform",
                                 title="Platform ID",
                                 order_num=31,
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.STR,
                                 required=False)
    # methyl_unit = ParamField("methyl_unit", title="Working unit [used when platform is unknown]", init_val=None,
    #                        order_num=32, input_type=InputType.TEXT, field_type=FieldType.STR, required=False)

    methyl_matrix_ori = ParamField("methyl_matrix_ori",
                                   title="Matrix orientation",
                                   order_num=33,
                                   input_type=InputType.SELECT,
                                   field_type=FieldType.STR,
                                   init_val="SxG",
                                   options={
                                       "inline_select_provider":
                                       True,
                                       "select_options":
                                       [["SxG", "Samples x Genes"],
                                        ["GxS", "Genes x Samples"]]
                                   })

    csv_sep_methyl = ParamField("csv_sep_methyl",
                                title="CSV separator symbol",
                                order_num=34,
                                input_type=InputType.SELECT,
                                field_type=FieldType.STR,
                                init_val=",",
                                options={
                                    "inline_select_provider":
                                    True,
                                    "select_options": [
                                        [" ", "space ( )"],
                                        [",", "comma  (,)"],
                                        ["\t", "tab (\\t)"],
                                        [";", "semicolon (;)"],
                                        [":", "colon (:)"],
                                    ]
                                })

    pheno_matrix = ParamField("pheno_matrix",
                              title="Phenotype matrix",
                              order_num=40,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM,
                              required=False)

    csv_sep_pheno = ParamField("csv_sep_pheno",
                               title="CSV separator symbol",
                               order_num=50,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val=",",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options": [
                                       [" ", "space ( )"],
                                       [",", "comma  (,)"],
                                       ["\t", "tab (\\t)"],
                                       [";", "semicolon (;)"],
                                       [":", "colon (:)"],
                                   ]
                               })

    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       is_a_property=True)

    _m_rna_es = OutputBlockField(name="m_rna_es",
                                 field_type=FieldType.HIDDEN,
                                 provided_data_type="ExpressionSet")
    # _m_rna_annotation = OutputBlockField(name="m_rna_annotation", field_type=FieldType.HIDDEN,
    #     provided_data_type="PlatformAnnotation")
    _mi_rna_es = OutputBlockField(name="mi_rna_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")
    _methyl_es = OutputBlockField(name="methyl_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")

    mrna_gpl_file = BlockField("mrna_gpl_file", FieldType.CUSTOM, None)
    mirna_gpl_file = BlockField("mirna_gpl_file", FieldType.CUSTOM, None)
    methyl_gpl_file = BlockField("methyl_gpl_file", FieldType.CUSTOM, None)

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "assign_phenotype_classes": {
                               "title": "Assign phenotype classes",
                               "resource": "assign_phenotype_classes",
                               "widget":
                               "widgets/assign_phenotype_classes.html"
                           },
                       })

    @property
    def is_sub_pages_visible(self):
        if self.state in [
                'source_was_preprocessed', 'sample_classes_assigned', 'ready',
                'done'
        ]:
            return True
        return False

    def __init__(self, *args, **kwargs):
        super(UserUploadComplex, self).__init__(*args, **kwargs)
        self.celery_task = None

    def process_upload(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(user_upload_complex_task, exp, self)
        exp.store_block(self)
        self.celery_task.apply_async()

    def phenotype_for_js(self, exp, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es
        if es is None:
            raise Exception("No data was stored before")

        return prepare_phenotype_for_js_from_es(es)

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es

        if es is None:
            raise Exception("No data was stored before")

        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)

        pheno_df[received["user_class_title"]] = received["classes"]

        for work_es in [m_rna_es, mi_rna_es, methyl_es]:
            if work_es is not None:
                work_es.pheno_metadata["user_class_title"] = received[
                    "user_class_title"]
                work_es.store_pheno_data_frame(pheno_df)

        # import ipdb; ipdb.set_trace()
        exp.store_block(self)

    def success(self, exp, m_rna_es, mi_rna_es, methyl_es):
        if m_rna_es:
            self.set_out_var("m_rna_es", m_rna_es)
        if mi_rna_es:
            self.set_out_var("mi_rna_es", mi_rna_es)
        if methyl_es:
            self.set_out_var("methyl_es", methyl_es)
        exp.store_block(self)
Ejemplo n.º 14
0
class PatternFilter(GenericBlock):
    block_base_name = "PattFilter"
    name = "Pattern Filter"
    block_group = GroupType.FILTER

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA",
                             order_num=10,
                             required_data_type="ExpressionSet",
                             required=True)
    _mi_rna = InputBlockField(name="miRNA",
                              order_num=20,
                              required_data_type="ExpressionSet",
                              required=False)

    _gs = InputBlockField(name="gs",
                          order_num=30,
                          required_data_type="GeneSets",
                          required=True)

    metric = ParamField(
        "metric",
        title="Metric",
        order_num=40,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        init_val="mutual_information",
        options={
            "inline_select_provider":
            True,
            "select_options":
            [["mutual_information", "Mutual Information"],
             ['normed_mutual_information', "Normed Mutual Information"],
             ['square_error', "Square Error"], ['correlation', "Correlation"],
             ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]]
        })

    n_best = ParamField(name="n_best",
                        order_num=50,
                        title="# of best",
                        input_type=InputType.TEXT,
                        field_type=FieldType.INT,
                        init_val=10)

    patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets")

    def __init__(self, *args, **kwargs):
        super(PatternFilter, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()

        self.celery_task = wrapper_task.s(
            pattern_filter_task,
            exp,
            self,
            m_rna_es=self.get_input_var("mRNA"),
            mi_rna_es=self.get_input_var("miRNA"),
            gene_sets=self.get_input_var("gs"),
            metric=self.metric,
            n_best=self.n_best,
            base_filename="%s_comodule_sets" % self.uuid)
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, gs):
        self.set_out_var("patterns", gs)
        exp.store_block(self)
Ejemplo n.º 15
0
class GenericClassifier(GenericBlock):
    block_group = GroupType.CLASSIFIER
    is_abstract = True

    is_block_supports_auto_execution = True
    classifier_name = ""
    # Block behavior
    _block_actions = ActionsList([])
    _block_actions.extend(save_params_actions_list)
    _block_actions.extend(execute_block_actions_list)

    # User defined parameters
    # Input ports definition
    _train_es = InputBlockField(name="train_es", order_num=10,
                                required_data_type="ExpressionSet",
                                required=True)
    _test_es = InputBlockField(name="test_es", order_num=20,
                               required_data_type="ExpressionSet",
                               required=True)

    # Provided outputs
    _result = OutputBlockField(name="result", field_type=FieldType.CUSTOM,
                               provided_data_type="ClassifierResult", init_val=None)

    def __init__(self, *args, **kwargs):
        super(GenericClassifier, self).__init__(*args, **kwargs)

        self.celery_task = None
        self.classifier_options = {}
        self.fit_options = {}

    @abstractmethod
    def collect_options(self):
        """
            Should populate `self.classifier_options` and `self.fit_options`
            from block parameters.
        """
        pass

    def get_option_safe(self, name, target_type=None):
        if hasattr(self, name):
            raw = getattr(self, name)
            if raw:
                if target_type:
                    try:
                        return target_type(raw)
                    except:
                        pass
                else:
                    return raw
        return None

    def collect_option_safe(self, name, target_type=None, target_name=None):
        value = self.get_option_safe(name, target_type)
        # from celery.contrib import rdb; rdb.set_trace()
        if value:
            if target_name:
                self.classifier_options[target_name] = value
            else:
                self.classifier_options[name] = value
        return value

    def execute(self, exp,  *args, **kwargs):
        self.set_out_var("result", None)
        self.collect_options()

        train_es = self.get_input_var("train_es")
        test_es = self.get_input_var("test_es")

        self.celery_task = wrapper_task.s(
            apply_classifier,
            exp=exp, block=self,

            train_es=train_es, test_es=test_es,

            classifier_name=self.classifier_name,
            classifier_options=self.classifier_options,
            fit_options=self.fit_options,

            base_folder=exp.get_data_folder(),
            base_filename="%s_%s" % (self.uuid, self.classifier_name),
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        # We store obtained result as an output variable
        self.set_out_var("result", result)
        exp.store_block(self)

    def reset_execution(self, exp, *args, **kwargs):
        self.clean_errors()
        # self.get_scope().remove_temp_vars()
        self.set_out_var("result", None)
        exp.store_block(self)
Ejemplo n.º 16
0
class GeneSetAgg(GenericBlock):
    block_base_name = "GENE_SET_AGG"
    name = "Gene sets aggregation"
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])

    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)
    _gs = InputBlockField(name="gs",
                          order_num=20,
                          required_data_type="GeneSets",
                          required=True)

    agg_method = ParamField("agg_method",
                            title="Aggregate method",
                            order_num=50,
                            input_type=InputType.SELECT,
                            field_type=FieldType.STR,
                            init_val="mean",
                            options={
                                "inline_select_provider":
                                True,
                                "select_options": [["mean", "Mean"],
                                                   ["media", "Median"]]
                            })

    agg_es = OutputBlockField(name="agg_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAgg, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")
        gs = self.get_input_var("gs")

        base_filename = "%s_gs_agg" % (self.uuid, )

        self.celery_task = wrapper_task.s(do_gs_agg, exp, self, es, gs,
                                          self.agg_method, base_filename)

        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, agg_es):
        self.set_out_var("agg_es", agg_es)
        exp.store_block(self)
Ejemplo n.º 17
0
class GeneSetAggCV(GenericBlock):
    block_group = GroupType.AGGREGATION
    block_base_name = "CV_GS_A"
    name = "CV Gene Sets Aggregation"
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ])
    _block_actions.extend(execute_block_actions_list)

    _input_train_es = InputBlockField(name="train_es", order_num=10,
                                      required_data_type="ExpressionSet", required=True)
    _input_test_es = InputBlockField(name="test_es", order_num=20,
                                     required_data_type="ExpressionSet", required=True)

    _input_gs = InputBlockField(name="gs", order_num=30,
                                required_data_type="GeneSets", required=True)

    agg_method = ParamField(
        "agg_method", title="Aggregate method", order_num=50,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="mean",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["mean", "Mean"],
                ["median", "Median"],
                ["pca", "PCA"]
            ]
        }
    )

    out_train_es = OutputBlockField(name="out_train_es", provided_data_type="ExpressionSet")
    out_test_es = OutputBlockField(name="out_test_es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(GeneSetAggCV, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        train_es = self.get_input_var("train_es")
        test_es = self.get_input_var("test_es")

        gene_sets = self.get_input_var("gs")

        self.celery_task = wrapper_task.s(
            agg_task_cv,
            exp, self,
            train_es=train_es,
            test_es=test_es,
            gene_sets=gene_sets,
            method=self.agg_method,
            base_filename="%s_%s_agg" % (self.uuid, "pca_cv")
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, out_train_es, out_test_es):
        self.set_out_var("out_train_es", out_train_es)
        self.set_out_var("out_test_es", out_test_es)
        exp.store_block(self)
Ejemplo n.º 18
0
class FeatureSelectionByCut(GenericBlock):
    block_base_name = "FS_BY_CUT"
    block_group = GroupType.FILTER
    name = "Feature Selection by Ranking"

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    _rank_table = InputBlockField(name="rank_table",
                                  order_num=20,
                                  required_data_type="TableResult",
                                  required=True)

    _cut_property_options = BlockField(name="cut_property_options",
                                       field_type=FieldType.RAW,
                                       is_a_property=True)
    cut_property = ParamField(
        name="cut_property",
        title="Ranking property to use",
        # input_type=InputType.SELECT,
        input_type=InputType.TEXT,
        field_type=FieldType.STR,
        #select_provider="cut_property_options",
        order_num=10,
    )
    threshold = ParamField(
        name="threshold",
        title="Threshold for cut",
        order_num=20,
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
    )
    _cut_direction_options = BlockField(name="cut_direction_options",
                                        field_type=FieldType.RAW)
    cut_direction_options = ["<", "<=", ">=", ">"]
    cut_direction = ParamField(name="cut_direction",
                               title="Direction of cut",
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               select_provider="cut_direction_options",
                               order_num=30,
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [[op, op] for op in ["<", "<=", ">=", ">"]]
                               })

    es = OutputBlockField(name="es", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FeatureSelectionByCut, self).__init__(*args, **kwargs)
        self.celery_task = None

    @property
    def cut_property_options(self):
        # import ipdb; ipdb.set_trace()
        rank_table = self.get_input_var("rank_table")
        if rank_table and hasattr(rank_table, "headers"):
            return [{
                "pk": header,
                "str": header
            } for header in rank_table.headers]

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(
            feature_selection_by_cut,
            exp=exp,
            block=self,
            src_es=self.get_input_var("es"),
            rank_table=self.get_input_var("rank_table"),
            cut_property=self.cut_property,
            threshold=self.threshold,
            cut_direction=self.cut_direction,
            base_filename="%s_feature_selection" % self.uuid,
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, es):
        self.set_out_var("es", es)
        exp.store_block(self)
Ejemplo n.º 19
0
class NCF(GenericBlock):
    block_group = GroupType.CLASSIFIER

    block_base_name = "NCF"
    name = "Network-Constrained Forest"

    classifier_name = "ncf"

    is_abstract = False

    is_block_supports_auto_execution = True

    # Block behavior
    _block_actions = ActionsList([])
    _block_actions.extend(save_params_actions_list)
    _block_actions.extend(execute_block_actions_list)

    gene2gene = InputBlockField(name="gene2gene",
                                order_num=30,
                                required_data_type="BinaryInteraction",
                                required=True)
    miRNA2gene = InputBlockField(name="miRNA2gene",
                                 order_num=31,
                                 required_data_type="BinaryInteraction",
                                 required=True)

    # User defined parameters
    # Input ports definition
    _m_train_es = InputBlockField(name="mRNA_train_es",
                                  order_num=10,
                                  required_data_type="ExpressionSet",
                                  required=True)
    _m_test_es = InputBlockField(name="mRNA_test_es",
                                 order_num=20,
                                 required_data_type="ExpressionSet",
                                 required=True)
    _mi_train_es = InputBlockField(name="miRNA_train_es",
                                   order_num=21,
                                   required_data_type="ExpressionSet",
                                   required=True)
    _mi_test_es = InputBlockField(name="miRNA_test_es",
                                  order_num=22,
                                  required_data_type="ExpressionSet",
                                  required=True)

    # Provided outputs
    _result = OutputBlockField(name="result",
                               field_type=FieldType.CUSTOM,
                               provided_data_type="ClassifierResult",
                               init_val=None)

    n_estimators = ParamField(name="n_estimators",
                              title="The number of trees in the forest",
                              input_type=InputType.TEXT,
                              field_type=FieldType.INT,
                              init_val="1000",
                              order_num=41)

    walk_max_length = ParamField(name="walk_max_length",
                                 title="Walk max length",
                                 input_type=InputType.TEXT,
                                 field_type=FieldType.INT,
                                 init_val="10",
                                 order_num=50)

    criterion = ParamField(
        name="criterion",
        title="The function to measure the quality of a split",
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        order_num=60,
        options={
            "inline_select_provider":
            True,
            "select_options": [["gini", "Gini impurity"],
                               ["entropy", "Information gain"]]
        })

    eps = ParamField(name="eps",
                     title="Eps",
                     input_type=InputType.TEXT,
                     field_type=FieldType.FLOAT,
                     init_val="0.01",
                     order_num=70)

    max_depth = ParamField(name="max_depth",
                           title="The maximum depth of the tree",
                           input_type=InputType.TEXT,
                           field_type=FieldType.INT,
                           init_val="2",
                           order_num=80)

    min_samples_split = ParamField(
        name="min_samples_split",
        title="The minimum number of samples to split an internal node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        init_val="2",
        order_num=90,
    )

    min_samples_leaf = ParamField(
        name="min_samples_leaf",
        title="The minimum number of samples to be at a leaf node",
        input_type=InputType.TEXT,
        field_type=FieldType.INT,
        init_val="2",
        order_num=100)

    bootstrap = ParamField(name="bootstrap",
                           title="bootstrap",
                           input_type=InputType.CHECKBOX,
                           field_type=FieldType.BOOLEAN,
                           required=False,
                           order_num=110)

    def __init__(self, *args, **kwargs):
        super(NCF, self).__init__(*args, **kwargs)

        self.celery_task = None
        self.classifier_options = {}
        self.fit_options = {}

    def execute(self, exp, *args, **kwargs):
        self.set_out_var("result", None)
        self.collect_options()

        mRNA_train_es = self.get_input_var("mRNA_train_es")
        mRNA_test_es = self.get_input_var("mRNA_test_es")

        miRNA_train_es = self.get_input_var("miRNA_train_es")
        miRNA_test_es = self.get_input_var("miRNA_test_es")

        self.celery_task = wrapper_task.s(
            apply_ncf_classifier,
            exp=exp,
            block=self,
            mRNA_train_es=mRNA_train_es,
            mRNA_test_es=mRNA_test_es,
            miRNA_train_es=miRNA_train_es,
            miRNA_test_es=miRNA_test_es,
            classifier_name=self.classifier_name,
            classifier_options=self.classifier_options,
            fit_options=self.fit_options,
            base_folder=exp.get_data_folder(),
            base_filename="%s_%s" % (self.uuid, self.classifier_name),
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, result, *args, **kwargs):
        # We store obtained result as an output variable
        self.set_out_var("result", result)
        exp.store_block(self)

    def reset_execution(self, exp, *args, **kwargs):
        self.clean_errors()
        # self.get_scope().remove_temp_vars()
        self.set_out_var("result", None)
        exp.store_block(self)

    def get_option_safe(self, name, target_type=None):
        if hasattr(self, name):
            raw = getattr(self, name)
            if raw:
                if target_type:
                    try:
                        return target_type(raw)
                    except:
                        pass
                else:
                    return raw
        return None

    def collect_option_safe(self, name, target_type=None, target_name=None):
        value = self.get_option_safe(name, target_type)
        # from celery.contrib import rdb; rdb.set_trace()
        if value:
            if target_name:
                self.classifier_options[target_name] = value
            else:
                self.classifier_options[name] = value
        return value

    def collect_options(self):
        self.classifier_options["gene2gene"] = self.get_input_var("gene2gene")
        self.classifier_options["miRNA2gene"] = self.get_input_var(
            "miRNA2gene")
        self.classifier_options['walk_lengths'] = range(
            1, int(self.walk_max_length))
        self.collect_option_safe("eps")
        self.collect_option_safe("n_estimators", int)
        # self.collect_option_safe("max_features")
        self.collect_option_safe("max_depth", int)
        self.collect_option_safe("min_samples_leaf", int)
        self.collect_option_safe("min_samples_split", int)
        self.classifier_options["bootstrap"] = self.bootstrap
Ejemplo n.º 20
0
class UserUpload(GenericBlock):
    block_base_name = "UPLOAD"
    block_group = GroupType.INPUT_DATA
    is_abstract = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),

        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data", reload_block_in_client=True),
        ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params", reload_block_in_client=True),
    ])

    es_matrix = ParamField("es_matrix", title="Expression set matrix", order_num=0,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)
    es_matrix_ori = ParamField(
        "es_matrix_ori", title="Matrix orientation", order_num=1,
        input_type=InputType.SELECT, field_type=FieldType.STR,
        init_val="SxG",
        options={
            "inline_select_provider": True,
            "select_options": [
                ["SxG", "Samples x Genes"],
                ["GxS", "Genes x Samples"]
            ]
        }
    )
    pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=10,
        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)
    gpl_platform = ParamField("gpl_platform", title="Platform ID", order_num=20,
        input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    working_unit = ParamField("working_unit", title="Working unit [used when platform is unknown]",
        order_num=3, input_type=InputType.TEXT, field_type=FieldType.STR, required=False)
    # TODO: add sub page field
    # pages = BlockField("pages", FieldType.RAW, init_val={
    #     "assign_sample_classes": {
    #         "title": "Assign sample classes",
    #         "resource": "assign_sample_classes",
    #         "widget": "widgets/fetch_gse/assign_sample_classes.html"
    #     },
    # })
    _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True)

    ### PARAMETERS
    _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN,
                                       provided_data_type="ExpressionSet")
    _gpl_annotation = OutputBlockField(name="gpl_annotation", field_type=FieldType.HIDDEN,
                                       provided_data_type="PlatformAnnotation")

    # TODO: COPY PASTE from fetch_gse block
    pages = BlockField("pages", FieldType.RAW, init_val={
        "assign_phenotype_classes": {
            "title": "Assign phenotype classes",
            "resource": "assign_phenotype_classes",
            "widget": "widgets/assign_phenotype_classes.html"
        },
    })

    def __init__(self, *args, **kwargs):
        super(UserUpload, self).__init__("User upload", *args, **kwargs)


    @property
    def is_sub_pages_visible(self):
        if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']:
            return True
        return False

    def phenotype_for_js(self, exp, *args, **kwargs):
        return prepare_phenotype_for_js_from_es(self.get_out_var("expression_set"))

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        es = self.get_out_var("expression_set")
        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)
        es.pheno_metadata["user_class_title"] = received["user_class_title"]
        pheno_df[received["user_class_title"]] = received["classes"]

        es.store_pheno_data_frame(pheno_df)
        exp.store_block(self)

    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()

        assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file())

        es = ExpressionSet(base_dir=exp.get_data_folder(),
                           base_filename="%s_annotation" % self.uuid)

        pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file())
        pheno_df.set_index(pheno_df.columns[0])

        user_class_title = es.pheno_metadata["user_class_title"]
        if user_class_title not in pheno_df.columns:
            pheno_df[es.pheno_metadata["user_class_title"]] = ""

        # if matrix is bad oriented, then do transposition
        if self.es_matrix_ori == "GxS":
            assay_df = assay_df.T

        es.store_assay_data_frame(assay_df)
        es.store_pheno_data_frame(pheno_df)

        if self.working_unit:
            es.working_unit = self.working_unit

        self.set_out_var("expression_set", es)

        exp.store_block(self)

        self.do_action("success", exp)
        # self.celery_task_fetch.apply_async()

    def success(self, exp, *args, **kwargs):
        pass
Ejemplo n.º 21
0
class UploadInteraction(GenericBlock):
    block_base_name = "GENE_INTERACTION"
    block_group = GroupType.INPUT_DATA
    name = "Upload Gene Interaction"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data"),
        ActionRecord("success", ["processing_upload"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params"),
    ])

    upload_interaction = ParamField("upload_interaction",
                                    title="Interaction file",
                                    order_num=10,
                                    input_type=InputType.FILE_INPUT,
                                    field_type=FieldType.CUSTOM)

    interaction_type = ParamField("interaction_type",
                                  title="Interaction type",
                                  order_num=11,
                                  input_type=InputType.SELECT,
                                  field_type=FieldType.STR,
                                  required=True,
                                  init_val="PPI",
                                  options={
                                      "inline_select_provider":
                                      True,
                                      "select_options":
                                      [["PPI", "PPI"],
                                       ["miRNA", "miRNA Target"]]
                                  })

    x1_unit = ParamField("x1_unit",
                         title="(x1, x2) - x1 unit",
                         order_num=12,
                         input_type=InputType.SELECT,
                         field_type=FieldType.STR,
                         required=True,
                         init_val="RefSeq",
                         options={
                             "inline_select_provider":
                             True,
                             "select_options": [["RefSeq", "RefSeq"],
                                                ["Entrez", "EntrezID"],
                                                ["Symbol", "Symbol"],
                                                ["mirbase", "miRBase ID"]]
                         })

    x2_unit = ParamField("x2_unit",
                         title="(x1, x2) - x2 unit",
                         order_num=13,
                         input_type=InputType.SELECT,
                         field_type=FieldType.STR,
                         required=True,
                         init_val="RefSeq",
                         options={
                             "inline_select_provider":
                             True,
                             "select_options": [["RefSeq", "RefSeq"],
                                                ["Entrez", "EntrezID"],
                                                ["Symbol", "Symbol"],
                                                ["mirbase", "miRBase ID"]]
                         })

    header = ParamField("header",
                        title="Header",
                        order_num=23,
                        input_type=InputType.CHECKBOX,
                        field_type=FieldType.BOOLEAN,
                        required=False)

    bi_data_type = ParamField(
        "bi_data_type",
        title="Data type",
        order_num=40,
        input_type=InputType.SELECT,
        field_type=FieldType.STR,
        init_val="matrix",
        options={
            "inline_select_provider":
            True,
            "select_options": [
                # ["matrix", "Matrix"],
                ["pairs", "Pairs"],
                ["pairs_diff", "Pairs - different units in interaction"],
                ["triples", "Triples with values"],
                [
                    "triples_diff",
                    "Triples with values - different units in interaction"
                ]
            ]
        })

    csv_sep = ParamField("csv_sep",
                         title="CSV separator symbol",
                         order_num=50,
                         input_type=InputType.SELECT,
                         field_type=FieldType.STR,
                         init_val=",",
                         options={
                             "inline_select_provider":
                             True,
                             "select_options": [
                                 [" ", "space ( )"],
                                 [",", "comma  (,)"],
                                 ["\t", "tab (\\t)"],
                                 [";", "semicolon (;)"],
                                 [":", "colon (:)"],
                             ]
                         })
    _interaction = OutputBlockField(name="interaction",
                                    provided_data_type="BinaryInteraction")

    def move_to_exp(self, exp_id):
        interaction = self.get_out_var("interaction")

    def __init__(self, *args, **kwargs):
        super(UploadInteraction, self).__init__(*args, **kwargs)
        self.celery_task = None

    def process_upload(self, exp, *args, **kwargs):
        self.clean_errors()
        self.celery_task = wrapper_task.s(upload_interaction_task, exp, self)
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, interaction):
        self.set_out_var("interaction", interaction)
        exp.store_block(self)
Ejemplo n.º 22
0
class UserUploadComplex(GenericBlock):
    block_base_name = "UPLOAD_CMPLX"
    block_group = GroupType.INPUT_DATA
    name = "Upload mRna/miRna/methyl dataset"

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("process_upload", ["valid_params", "processing_upload"],
                     "processing_upload", "Process uploaded data"),
        ActionRecord("success", ["processing_upload"],
                     "done",
                     reload_block_in_client=True),
        ActionRecord("error", ["processing_upload"], "valid_params"),
    ])

    m_rna_matrix = ParamField("m_rna_matrix",
                              title="mRNA expression",
                              order_num=10,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM)
    m_rna_platform = ParamField("m_rna_platform",
                                title="Platform ID",
                                order_num=11,
                                input_type=InputType.TEXT,
                                field_type=FieldType.STR,
                                required=False)
    m_rna_unit = ParamField(
        "m_rna_unit",
        title="Working unit [used when platform is unknown]",
        init_val=None,
        order_num=12,
        input_type=InputType.TEXT,
        field_type=FieldType.STR,
        required=False)

    mi_rna_matrix = ParamField("mi_rna_matrix",
                               title=u"μRNA expression",
                               order_num=20,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    methyl_matrix = ParamField("methyl_matrix",
                               title="Methylation expression",
                               order_num=30,
                               input_type=InputType.FILE_INPUT,
                               field_type=FieldType.CUSTOM,
                               required=False)

    pheno_matrix = ParamField("pheno_matrix",
                              title="Phenotype matrix",
                              order_num=40,
                              input_type=InputType.FILE_INPUT,
                              field_type=FieldType.CUSTOM,
                              required=False)

    csv_sep = ParamField("csv_sep",
                         title="CSV separator symbol",
                         order_num=50,
                         input_type=InputType.SELECT,
                         field_type=FieldType.STR,
                         init_val=",",
                         options={
                             "inline_select_provider":
                             True,
                             "select_options": [
                                 [" ", "space ( )"],
                                 [",", "comma  (,)"],
                                 ["\t", "tab (\\t)"],
                                 [";", "semicolon (;)"],
                                 [":", "colon (:)"],
                             ]
                         })

    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       is_a_property=True)

    _m_rna_es = OutputBlockField(name="m_rna_es",
                                 field_type=FieldType.HIDDEN,
                                 provided_data_type="ExpressionSet")
    _m_rna_annotation = OutputBlockField(
        name="m_rna_annotation",
        field_type=FieldType.HIDDEN,
        provided_data_type="PlatformAnnotation")
    _mi_rna_es = OutputBlockField(name="mi_rna_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")
    _methyl_es = OutputBlockField(name="methyl_es",
                                  field_type=FieldType.HIDDEN,
                                  provided_data_type="ExpressionSet")

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "assign_phenotype_classes": {
                               "title": "Assign phenotype classes",
                               "resource": "assign_phenotype_classes",
                               "widget":
                               "widgets/assign_phenotype_classes.html"
                           },
                       })

    @property
    def is_sub_pages_visible(self):
        if self.state in [
                'source_was_preprocessed', 'sample_classes_assigned', 'ready',
                'done'
        ]:
            return True
        return False

    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        # TODO: move to celery
        self.clean_errors()
        sep = getattr(self, "csv_sep", " ")

        try:
            if not self.pheno_matrix:
                self.warnings.append(Exception("Phenotype is undefined"))
                pheno_df = None
            else:
                pheno_df = self.pheno_matrix.get_as_data_frame(sep)
                pheno_df.set_index(pheno_df.columns[0])

                # TODO: solve somehow better: Here we add empty column with user class assignment
                pheno_df[ExpressionSet(
                    None, None).pheno_metadata["user_class_title"]] = ""

            if self.m_rna_matrix is not None:
                m_rna_assay_df = self.m_rna_matrix.get_as_data_frame(sep)

                m_rna_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                         base_filename="%s_m_rna_es" %
                                         self.uuid)
                m_rna_es.store_assay_data_frame(m_rna_assay_df)
                m_rna_es.store_pheno_data_frame(pheno_df)
                m_rna_es.working_unit = self.m_rna_unit

                self.set_out_var("m_rna_es", m_rna_es)

                # TODO: fetch GPL annotation if GPL id was provided

            if self.mi_rna_matrix is not None:
                mi_rna_assay_df = self.mi_rna_matrix.get_as_data_frame(sep)

                mi_rna_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                          base_filename="%s_mi_rna_es" %
                                          self.uuid)
                mi_rna_es.store_assay_data_frame(mi_rna_assay_df)
                mi_rna_es.store_pheno_data_frame(pheno_df)

                self.set_out_var("mi_rna_es", mi_rna_es)

            if self.methyl_matrix is not None:

                methyl_assay_df = self.methyl_matrix.get_as_data_frame(sep)

                methyl_es = ExpressionSet(base_dir=exp.get_data_folder(),
                                          base_filename="%s_methyl_es" %
                                          self.uuid)
                methyl_es.store_assay_data_frame(methyl_assay_df)
                methyl_es.store_pheno_data_frame(pheno_df)

                self.set_out_var("methyl_es", methyl_es)

            self.do_action("success", exp)
        except Exception as e:
            ex_type, ex, tb = sys.exc_info()
            traceback.print_tb(tb)
            self.do_action("error", exp, e)
        # self.celery_task_fetch.apply_async()

    def phenotype_for_js(self, exp, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es

        if es is None:
            raise Exception("No data was stored before")

        return prepare_phenotype_for_js_from_es(es)

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        m_rna_es = self.get_out_var("m_rna_es")
        mi_rna_es = self.get_out_var("mi_rna_es")
        methyl_es = self.get_out_var("methyl_es")
        es = None
        if m_rna_es is not None:
            es = m_rna_es
        elif mi_rna_es is not None:
            es = mi_rna_es
        elif methyl_es is not None:
            es = methyl_es

        if es is None:
            raise Exception("No data was stored before")

        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)

        pheno_df[received["user_class_title"]] = received["classes"]

        for work_es in [m_rna_es, mi_rna_es, methyl_es]:
            if work_es is not None:
                work_es.pheno_metadata["user_class_title"] = received[
                    "user_class_title"]
                work_es.store_pheno_data_frame(pheno_df)

        # import ipdb; ipdb.set_trace()
        exp.store_block(self)

    def success(self, exp, *args, **kwargs):
        pass
Ejemplo n.º 23
0
class FetchGSE(GenericBlock):
    block_base_name = "FETCH_GEO"
    name = "Fetch from NCBI GEO"
    block_group = GroupType.INPUT_DATA

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"],
                     "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
        ActionRecord("start_fetch", ["valid_params", "done"],
                     "source_is_being_fetched", "Start fetch"),
        ActionRecord("error_during_fetch", ["source_is_being_fetched"],
                     "form_valid",
                     reload_block_in_client=True),
        ActionRecord("successful_fetch", ["source_is_being_fetched"],
                     "source_was_fetched",
                     reload_block_in_client=True),
        ActionRecord("start_preprocess",
                     ["source_was_fetched", "source_was_preprocessed"],
                     "source_is_being_fetched", "Run preprocess"),
        ActionRecord("error_during_preprocess", ["source_is_being_fetched"],
                     "source_was_fetched",
                     reload_block_in_client=True),
        ActionRecord("successful_preprocess", ["source_is_being_fetched"],
                     "source_was_preprocessed",
                     reload_block_in_client=True),
        ActionRecord("assign_sample_classes",
                     ["source_was_preprocessed", "done"], "done"),
    ])

    source_file = BlockField("source_file", FieldType.CUSTOM, None)

    pages = BlockField("pages",
                       FieldType.RAW,
                       init_val={
                           "assign_phenotype_classes": {
                               "title": "Assign phenotype classes",
                               "resource": "assign_phenotype_classes",
                               "widget":
                               "widgets/assign_phenotype_classes.html"
                           },
                       })
    _is_sub_pages_visible = BlockField("is_sub_pages_visible",
                                       FieldType.RAW,
                                       is_a_property=True)

    ### PARAMETERS
    geo_uid = ParamField("geo_uid", "Geo accession id", InputType.TEXT,
                         FieldType.STR, "")

    _expression_set = OutputBlockField(name="expression_set",
                                       field_type=FieldType.HIDDEN,
                                       provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        #"Fetch ncbi gse",
        super(FetchGSE, self).__init__(*args, **kwargs)
        self.celery_task_fetch = None
        self.celery_task_preprocess = None

    def is_form_fields_editable(self):
        if self.state in ['created', 'form_modified']:
            return True
        return False

    def phenotype_for_js(self, exp, *args, **kwargs):
        headers_options = {
            "custom_title_prefix_map": [
                ("Sample_title", "Title"),
                ("Sample_description", "Description"),
                ("Sample_characteristics", "Characteristics"),
                ("Sample_organism", "Organism"),
                ("Sample_geo_accession", "GEO #"),
                ("Sample_", ""),
            ],
            "prefix_order": [
                "Sample_geo_accession",
                "Sample_title",
                "Sample_description",
                "Sample_contact",
                "Sample_characteristics",
            ],
            "prefix_hide": {
                "Sample_contact",
                "Sample_channel",
                "Sample_data_row_count",
                "Sample_data",
                "Sample_platform",
                "Sample_growth",
                "Sample_series_id",
                "Sample_status",
                "Sample_extract",
                "Sample_supplementary_file",
                "Sample_hyb",
                "Sample_label",
                "Sample_source",
                "Sample_last_update",
                "Sample_molecule",
                "Sample_organism",
                "Sample_scan",
                "Sample_taxid",
                "Sample_type",
                "Sample_submission",
            }
        }
        return prepare_phenotype_for_js_from_es(
            self.get_out_var("expression_set"), headers_options)

    @property
    def is_sub_pages_visible(self):
        if self.state in [
                'source_was_preprocessed', 'sample_classes_assigned', 'ready'
        ]:
            return True
        return False

    def start_fetch(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        self.clean_errors()
        self.celery_task_fetch = wrapper_task.s(
            fetch_geo_gse,
            exp,
            self,
            geo_uid=self.geo_uid,
            success_action="successful_fetch",
            error_action="error_during_fetch",
            ignore_cache=False)
        exp.store_block(self)
        self.celery_task_fetch.apply_async()

    def error_during_fetch(self, exp, *args, **kwargs):
        exp.store_block(self)

    def successful_fetch(self, exp, source_file, *args, **kwargs):
        self.clean_errors()
        self.source_file = source_file
        self.do_action("start_preprocess", exp)
        exp.store_block(self)

    def start_preprocess(self, exp, *args, **kwargs):
        self.celery_task_preprocess = wrapper_task.s(
            preprocess_soft,
            exp,
            self,
            source_file=self.source_file,
            success_action="successful_preprocess",
            error_action="error_during_preprocess")
        exp.store_block(self)
        self.celery_task_preprocess.apply_async()

    def error_during_preprocess(self, exp, *args, **kwargs):
        exp.store_block(self)

    def successful_preprocess(self, exp, es, *args, **kwargs):
        """
            @type es: ExpressionSet
            @type ann: PlatformAnnotation
        """
        self.set_out_var("expression_set", es)
        # self.set_out_var("gpl_annotation", ann)

        self.clean_errors()
        exp.store_block(self)

        msg = BlockUpdated(self.exp_id, self.uuid, self.base_name)
        msg.comment = u"Dataset %s was preprocessed, \n please assign samples to classes" % self.geo_uid
        msg.silent = False
        msg.send()

    def update_user_classes_assignment(self, exp, request, *args, **kwargs):
        #TODO: unify code with user upload
        es = self.get_out_var("expression_set")
        pheno_df = es.get_pheno_data_frame()

        received = json.loads(request.body)
        es.pheno_metadata["user_class_title"] = received["user_class_title"]
        pheno_df[received["user_class_title"]] = received["classes"]

        es.store_pheno_data_frame(pheno_df)
        exp.store_block(self)

        self.do_action("assign_sample_classes", exp)

    def assign_sample_classes(self, exp, *args, **kwargs):
        pass
Ejemplo n.º 24
0
class NIMFASNMNMFBlock(GenericBlock):
    block_base_name = "NIMFA_SNMNMF"
    name = "NIMFA SNMNMF"

    is_abstract = False
    block_group = GroupType.SNMNMF

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True)
    _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=True)
    # _dna_methyl = InputBlockField(name="DNAmethyl", order_num=30, required_data_type="ExpressionSet", required=False)
    _gene2gene = InputBlockField(name="Gene2Gene", order_num=40, required_data_type="BinaryInteraction", required=True)
    _mirna2gene = InputBlockField(name="miRNA2gene", order_num=50, required_data_type="BinaryInteraction",
                                  required=True)
    # _gene2DNAmethylation =  InputBlockField(name="Gene2DNAmethyl", order_num=60, required_data_type="BinaryInteraction", required=False)


    l1 = ParamField(name="l1", order_num=70, title="l1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    l2 = ParamField(name="l2", order_num=80, title="l2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g1 = ParamField(name="g1", order_num=90, title="g1", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    g2 = ParamField(name="g2", order_num=100, title="g2", input_type=InputType.TEXT, field_type=FieldType.FLOAT,
                    init_val=0.1)
    rank = ParamField(name="rank", order_num=110, title="rank", input_type=InputType.TEXT, field_type=FieldType.INT,
                      init_val=50)

    w = OutputBlockField(name="W", provided_data_type="ExpressionSet")
    H1_miRNA = OutputBlockField(name="H1_miRNA", provided_data_type="ExpressionSet")
    H2_genes = OutputBlockField(name="H2_genes", provided_data_type="ExpressionSet")
    # H3_DNAmethyl = OutputBlockField(name="H3_DNAmethyl", provided_data_type="ExpressionSet")

    #H1_perf = OutputBlockField(name="H1_perf", provided_data_type="ExpressionSet")
    #H2_perf = OutputBlockField(name="H2_perf", provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(NIMFASNMNMFBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        mRNA = self.get_input_var("mRNA")
        miRNA = self.get_input_var("miRNA")
        #DNAmethyl = self.get_input_var("DNAmethyl")
        Gene2Gene = self.get_input_var("Gene2Gene")
        miRNA2gene = self.get_input_var("miRNA2gene")
        #Gene2DNAmethyl = self.get_input_var("Gene2DNAmethyl")

        self.celery_task = wrapper_task.s(
            nimfa_snmnmf_task,
            exp,
            self,
            mRNA=mRNA,
            miRNA=miRNA,
            #DNAmethyl = DNAmethyl,
            gene2gene=Gene2Gene,
            miRNA2gene=miRNA2gene,
            #gene2DNAmethylation = Gene2DNAmethyl,
            params={'l1': self.l1, 'l2': self.l2, 'g1': self.g1, 'g2': self.g2, 'rank': self.rank},
            base_filename="%s_nimfa_snmnmf" % self.uuid
        )
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, W, H1, H2):
        self.set_out_var("W", W)
        self.set_out_var("H1_miRNA", H1)
        self.set_out_var("H2_genes", H2)
        #self.set_out_var("H1_perf", matrices[3])
        #self.set_out_var("H2_perf", matrices[4])
        exp.store_block(self)
Ejemplo n.º 25
0
class GenericRankingBlock(GenericBlock):
    block_base_name = ""
    block_group = GroupType.PROCESSING
    is_abstract = True

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(
        name="es", order_num=10,
        required_data_type="ExpressionSet", required=True
    )

    ## TODO: remove from generic ranking
    best = ParamField(
        name="best", title="Consider only best",
        input_type=InputType.TEXT,
        field_type=FieldType.INT, init_val=None
    )

    _result = OutputBlockField(name="result", field_type=FieldType.STR,
                               provided_data_type="TableResult", init_val=None)

    def __init__(self, *args, **kwargs):
        super(GenericRankingBlock, self).__init__(*args, **kwargs)
        self.ranking_name = None
        self.ranking_options = {}
        self.celery_task = None

        exp = Experiment.get_exp_by_id(self.exp_id)
        self.result = TableResult(
            base_dir=exp.get_data_folder(),
            base_filename="%s_gt_result" % self.uuid,
        )
        self.set_out_var("result", self.result)

    def collect_options(self):
        self.ranking_options = {}

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        self.collect_options()

        self.celery_task = wrapper_task.s(
            apply_ranking,
            exp=exp, block=self,
            es=self.get_input_var("es"),
            ranking_name=self.ranking_name,
            result_table=self.result,
            options=self.ranking_options
        )
        exp.store_block(self)
        self.celery_task.apply_async()
        exp.log(self.uuid, "Sent ranking computation to queue")
        log.debug("Sent ranking computation to queue")

    def success(self, exp, result, *args, **kwargs):
        self.result = result
        self.set_out_var("result", self.result)
        exp.store_block(self)
Ejemplo n.º 26
0
class FilterBlock(GenericBlock):
    block_base_name = "FILTER"
    name = "Var/Val Filter"

    is_abstract = False
    block_group = GroupType.FILTER

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _es = InputBlockField(name="es",
                          order_num=10,
                          required_data_type="ExpressionSet",
                          required=True)

    filter_method = ParamField("filter_method",
                               title="Filter method",
                               order_num=50,
                               input_type=InputType.SELECT,
                               field_type=FieldType.STR,
                               init_val="LOW_VAL",
                               options={
                                   "inline_select_provider":
                                   True,
                                   "select_options":
                                   [["LOW_VAL", "Low Val Filter"],
                                    ["VAR", "Var Filter"]]
                               })
    q = ParamField(name="q",
                   title="Threshold",
                   input_type=InputType.TEXT,
                   field_type=FieldType.FLOAT,
                   init_val=30.0)

    flt_es = OutputBlockField(name="flt_es",
                              provided_data_type="ExpressionSet")

    def __init__(self, *args, **kwargs):
        super(FilterBlock, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        es = self.get_input_var("es")

        self.celery_task = wrapper_task.s(filter_task,
                                          exp,
                                          self,
                                          filter_type=self.filter_method,
                                          q=self.q,
                                          es=es,
                                          base_filename="%s_%s_flt" %
                                          (self.uuid, self.filter_method))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("flt_es", flt_es)
        exp.store_block(self)
Ejemplo n.º 27
0
class MergeComoduleSets(GenericBlock):
    block_base_name = "MERGE_COMODULE_SETS"
    name = "Merge Comodule Sets"

    is_abstract = False
    block_group = GroupType.PROCESSING

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _cs_1 = InputBlockField(name="cs_1",
                            order_num=10,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_1_name = ParamField(name="cs_1_name",
                            order_num=11,
                            title="Comodule 1 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    _cs_2 = InputBlockField(name="cs_2",
                            order_num=20,
                            required_data_type="ComoduleSet",
                            required=True)
    _cs_2_name = ParamField(name="cs_2_name",
                            order_num=21,
                            title="Comodule 2 name",
                            input_type=InputType.TEXT,
                            field_type=FieldType.STR,
                            init_val="genes")

    flt_es = OutputBlockField(name="comodule_set",
                              provided_data_type="ComoduleSet")

    def __init__(self, *args, **kwargs):
        super(MergeComoduleSets, self).__init__(*args, **kwargs)
        self.celery_task = None

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        cs_1 = self.get_input_var("cs_1")
        cs_2 = self.get_input_var("cs_2")

        self.celery_task = wrapper_task.s(merge_comodules_task,
                                          exp,
                                          self,
                                          cs_1=cs_1,
                                          cs_2=cs_2,
                                          cs_1_name=self.cs_1_name,
                                          cs_2_name=self.cs_2_name,
                                          base_filename="%s_%s_thr" %
                                          (self.uuid, 'merge_cs'))
        exp.store_block(self)
        self.celery_task.apply_async()

    def success(self, exp, flt_es):
        self.set_out_var("comodule_set", flt_es)
        exp.store_block(self)
Ejemplo n.º 28
0
class UniformMetaBlock(GenericBlock):
    is_abstract = True
    block_group = GroupType.META_PLUGIN
    create_new_scope = True
    is_block_supports_auto_execution = True

    _block_actions = ActionsList([])
    _block_actions.extend(ActionsList([
        ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),

        ActionRecord("add_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"),
        ActionRecord("remove_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"),

        ActionRecord("execute", ["ready"], "generating_folds", user_title="Run block"),

        ActionRecord("on_folds_generation_success", ["generating_folds"], "ready_to_run_sub_scope",
                     reload_block_in_client=True),
        ActionRecord("continue_collecting_sub_scope", ["ready_to_run_sub_scope"],
                     "sub_scope_executing"),

        ActionRecord("run_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"),
        ActionRecord("on_sub_scope_done", ["sub_scope_executing"], "ready_to_run_sub_scope"),

        ActionRecord("success", ["working", "ready_to_run_sub_scope"], "done",
                     propagate_auto_execution=True, reload_block_in_client=True),
        ActionRecord("error", ["*", "ready", "working", "sub_scope_executing",
                               "generating_folds", "ready_to_run_sub_scope"],
                     "execution_error", reload_block_in_client=True),

        ActionRecord("reset_execution", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope",
                                         "generating_folds", "execution_error"], "ready",
                     user_title="Reset execution"),
    ]))

    _collector_spec = ParamField(name="collector_spec", title="",
                                 field_type=FieldType.CUSTOM,
                                 input_type=InputType.HIDDEN,
                                 init_val=None, required=False
    )

    res_seq = BlockField(name="res_seq", provided_data_type="SequenceContainer",
                         field_type=FieldType.HIDDEN, init_val=None)

    _results_container = OutputBlockField(
        name="results_container",
        provided_data_type="ResultsContainer",
        field_type=FieldType.HIDDEN,
        init_val=None
    )

    def __init__(self, *args, **kwargs):
        super(UniformMetaBlock, self).__init__(*args, **kwargs)
        self.auto_exec_status_working.update(["sub_scope_executing", "ready_to_run_sub_scope",
                                              "generating_folds"])

        self.inner_output_manager = IteratedInnerFieldManager()
        self.collector_spec = CollectorSpecification()
        self.collector_spec.label = self.block_base_name + "_collection"

        self.inner_output_es_names_map = {}
        self.celery_task = None

        self.set_out_var("results_container", None)
        self.res_seq = SequenceContainer()

    def remap_inputs(self, mapping):
        for var in self.bound_inputs.itervalues():
            var.change_block(mapping)
        for var in self.collector_spec.bound.itervalues():
            var.change_block(mapping)

    @property
    def is_sub_pages_visible(self):
        if self.state in ['valid_params', 'done', 'ready']:
            return True
        return False

    @abstractmethod
    def get_fold_labels(self):
        pass

    @abstractmethod
    def get_repeat_labels(self):
        pass

    def get_inner_out_var(self, name):
        return self.inner_output_manager.get_var(name)

    def run_sub_scope(self, exp, *args, **kwargs):
        self.reset_execution_for_sub_blocks()
        cell = self.res_seq.sequence[self.inner_output_manager.iterator]
        log.debug("Cell!!!!!!!! %s", str(cell))
        act = self.inner_output_manager.sequence[self.inner_output_manager.iterator]
        log.debug("Cell!!!!!!!! %s", str(act))

        exp.store_block(self)
        sr = ScopeRunner(exp, self.sub_scope_name)
        sr.execute()

    def on_sub_scope_done(self, exp, *args, **kwargs):
        """
            @type exp: Experiment

            This action should be called by ScopeRunner
            when all blocks in sub-scope have exec status == done
        """
        r = get_redis_instance()
        with redis_lock.Lock(r, ExpKeys.get_block_global_lock_key(self.exp_id, self.uuid)):

            cell = self.res_seq.sequence[self.inner_output_manager.iterator]
            for name, scope_var in self.collector_spec.bound.iteritems():
                var = exp.get_scope_var_value(scope_var)
                exp.log(self.uuid, "Collected %s from %s" % (var, scope_var.title), severity="CRITICAL")
                log.debug("Collected %s from %s", var, scope_var.title)
                if var is not None:
                    if hasattr(var, "clone"):
                        cell[name] = var.clone("%s_%s" %
                                               (self.uuid, self.inner_output_manager.iterator))
                    else:
                        cell[name] = deepcopy(var)

            self.res_seq.sequence[self.inner_output_manager.iterator] = cell

            exp.store_block(self)

        if len(cell) < len(self.res_seq.fields):
            self.do_action("continue_collecting_sub_scope", exp)
        else:
            try:
                self.inner_output_manager.next()
                self.do_action("run_sub_scope", exp)
            except StopIteration, e:
                # All folds were processed without errors
                self.build_result_collection(exp)

                self.do_action("success", exp)
Ejemplo n.º 29
0
class PatternEdges(GenericBlock):
    block_base_name = "PA_EDGES"
    block_group = GroupType.PATTERN_SEARCH
    name = "Patterns Edges"

    is_block_supports_auto_execution = True

    _block_actions = ActionsList([
        ActionRecord("save_params",
                     ["created", "valid_params", "done", "ready"],
                     "validating_params",
                     user_title="Save parameters"),
        ActionRecord("on_params_is_valid", ["validating_params"], "ready"),
        ActionRecord("on_params_not_valid", ["validating_params"], "created"),
    ])
    _block_actions.extend(execute_block_actions_list)

    _input_es = InputBlockField(name="es",
                                order_num=10,
                                required_data_type="ExpressionSet",
                                required=True)

    # _upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=12,
    #                                        input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM)

    _input_comodule_set = InputBlockField(name="cs",
                                          order_num=20,
                                          required_data_type="GeneSets",
                                          required=True)

    _gene2gene = InputBlockField(name="gene2gene",
                                 order_num=30,
                                 required_data_type="BinaryInteraction",
                                 required=True)

    edges = OutputBlockField(name="edges", provided_data_type="Edges")

    diff_expr = OutputBlockField(name="diff_expr",
                                 provided_data_type="DiffExpr")

    def execute(self, exp, *args, **kwargs):
        self.clean_errors()
        cs = self.get_input_var("cs")
        """:type :ComoduleSet"""
        es = self.get_input_var("es")
        """:type :ExpressionSet"""
        # gene_platform = self.upload_gene2gene_platform
        gene2gene = self.get_input_var("gene2gene")
        """:type :BinaryInteraction"""
        # gene_platform = gene_platform.get_file()
        # with open(gene_platform.path) as f:
        #     for line in f:
        #         gene_platform = line.split(',')
        #         self.clean_errors()

        self.celery_task = wrapper_task.s(
            compute_edges,
            exp,
            self,
            m_rna_es=es,
            comodule_set=cs,
            gene2gene=gene2gene,
            # gene_platform = gene_platform,
            base_filename="%s_pattern_edges" % self.uuid)
        exp.store_block(self)
        self.celery_task.apply_async()

    # def export_json(self, exp, *args, **kwargs):
    #     ds = self.get_input_var("es")
    #     dic = ds.load_set()
    #     return dic

    def process_upload(self, exp, *args, **kwargs):
        """
            @param exp: Experiment
        """
        try:
            self.do_action("success", exp)
        except Exception as e:
            ex_type, ex, tb = sys.exc_info()
            traceback.print_tb(tb)
            self.do_action("error", exp, e)

    def success(self, exp, edges, diff_expr):
        self.set_out_var("edges", edges)
        self.set_out_var("diff_expr", diff_expr)
        exp.store_block(self)