class FilterByInteraction(GenericBlock): block_base_name = "FILTER_BY_BI" name = "Filter ES by interaction" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _mRNA_es = InputBlockField(name="mRNA_es", order_num=10, required_data_type="ExpressionSet", required=True) _miRNA_es = InputBlockField(name="miRNA_es", order_num=20, required_data_type="ExpressionSet", required=True) _interaction = InputBlockField(name="interaction", order_num=30, required_data_type="BinaryInteraction", required=True) m_rna_filtered_es = OutputBlockField(name="m_rna_filtered_es", provided_data_type="ExpressionSet") mi_rna_filtered_es = OutputBlockField(name="mi_rna_filtered_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FilterByInteraction, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA_es = self.get_input_var("mRNA_es") miRNA_es = self.get_input_var("miRNA_es") interaction_matrix = self.get_input_var("interaction") self.celery_task = wrapper_task.s( filter_by_bi, exp, self, m_rna_es=mRNA_es, mi_rna_es=miRNA_es, interaction_matrix=interaction_matrix, base_filename="%s_filtered_by_BI" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, m_rna_filtered_es, mi_rna_filtered_es): self.set_out_var("m_rna_filtered_es", m_rna_filtered_es) self.set_out_var("mi_rna_filtered_es", mi_rna_filtered_es) exp.store_block(self)
class EnrichmentNoTBlock(GenericBlock): block_base_name = "ENRICHMENT_COM" name = "Comodule Enrichment" is_abstract = False block_group = GroupType.TESTING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _cs_1 = InputBlockField(name="gs", order_num=10, required_data_type="GeneSets", required=True) H = InputBlockField(name="patterns", order_num=11, required_data_type="GeneSets", required=True) _t = ParamField(name="T", order_num=12, title="Enrichment threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val="0.05") dict = OutputBlockField(name="dictionary_set", provided_data_type="DictionarySet") def __init__(self, *args, **kwargs): super(EnrichmentNoTBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() gs = self.get_input_var("gs") cs = self.get_input_var("patterns") self.celery_task = wrapper_task.s(enrichment_no_t_task, exp, self, T=self.T, gs=gs, patterns=cs, base_filename="%s_%s_enrich" % (self.uuid, 'enrichment_cont')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("dictionary_set", flt_es) exp.store_block(self)
class MergeGeneSetWithPlatformAnnotation(GenericBlock): block_base_name = "MERGE_GS_GPL_ANN" name = "Merge gene set with platform" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_gs = InputBlockField(name="gs", order_num=10, required_data_type="GeneSets", required=True) _input_ann = InputBlockField(name="ann", order_num=20, required_data_type="PlatformAnnotation", required=True) _gs = OutputBlockField(name="gs", field_type=FieldType.HIDDEN, init_val=None, provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(MergeGeneSetWithPlatformAnnotation, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() gs, ann = self.get_input_var("gs"), self.get_input_var("ann") # import ipdb; ipdb.set_trace() self.celery_task = wrapper_task.s(map_gene_sets_to_probes, exp, self, base_dir=exp.get_data_folder(), base_filename="%s_merged" % self.uuid, ann_gene_sets=ann.gene_sets, src_gene_sets=gs) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, gs): self.set_out_var("gs", gs) exp.store_block(self)
class SvdSubAgg(GenericBlock): is_abstract = True block_group = GroupType.AGGREGATION is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _mRNA_es = InputBlockField(name="mRNA_es", order_num=10, required_data_type="ExpressionSet", required=True) _miRNA_es = InputBlockField(name="miRNA_es", order_num=20, required_data_type="ExpressionSet", required=True) _interaction = InputBlockField(name="interaction", order_num=30, required_data_type="BinaryInteraction", required=True) c = ParamField(name="c", title="Constant c", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0) agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet") mode = "" def __init__(self, *args, **kwargs): super(SvdSubAgg, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA_es = self.get_input_var("mRNA_es") miRNA_es = self.get_input_var("miRNA_es") interaction_matrix = self.get_input_var("interaction") self.celery_task = wrapper_task.s( aggregation_task, exp, self, mode=self.mode, c=self.c, m_rna_es=mRNA_es, mi_rna_es=miRNA_es, interaction_matrix=interaction_matrix, base_filename="%s_%s_agg" % (self.uuid, self.mode) ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, agg_es): self.set_out_var("agg_es", agg_es) exp.store_block(self)
class GlobalTest(GenericBlock): block_base_name = "GLOBAL_TEST" name = "Goeman global test" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _input_gs = InputBlockField(name="gs", order_num=20, required_data_type="GeneSets", required=True) _result = OutputBlockField(name="result", field_type=FieldType.STR, provided_data_type="TableResult", init_val=None) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "gt_result.html" ]) def __init__(self, *args, **kwargs): super(GlobalTest, self).__init__(*args, **kwargs) self.celery_task = None exp = Experiment.get_exp_by_id(self.exp_id) self.result = TableResult( base_dir=exp.get_data_folder(), base_filename="%s_gt_result" % self.uuid, ) self.result.headers = ['p-value', 'Statistic', 'Expected', 'Std.dev', '#Cov'] def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( global_test_task, exp, self, es=self.get_input_var("es"), gene_sets=self.get_input_var("gs"), table_result=self.result ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, result, *args, **kwargs): self.result = result self.set_out_var("result", self.result) exp.store_block(self)
class MergeExpressionSets(GenericBlock): block_base_name = "MergeES" name = "Merge ES by concatenation" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es_1 = InputBlockField(name="es_1", title="Set 1", order_num=10, required_data_type="ExpressionSet", required=True) _es_2 = InputBlockField(name="es_2", title="Set 2", order_num=20, required_data_type="ExpressionSet", required=True) merged_es = OutputBlockField(name="merged_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(MergeExpressionSets, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() # import ipdb; ipdb.set_trace() self.celery_task = wrapper_task.s( merge_two_es, exp, self, es_1=self.get_input_var("es_1"), es_2=self.get_input_var("es_2"), base_filename="%s_merged" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, es): self.set_out_var("merged_es", es) exp.store_block(self)
def add_dyn_input(self, exp, received_block, *args, **kwargs): spec = received_block.get("_add_dyn_port") if not spec: return if not spec['new_port'] or not spec['input']: return dyn_port_name = spec['input'] dyn_port = self._block_serializer.inputs.get(dyn_port_name) if not dyn_port: return order_num = 1000 + abs(dyn_port.order_num) * 10 dp = getattr(self, dyn_port_name) if dp: order_num += len(dp) new_port = InputBlockField( name=spec['new_port'], required_data_type=dyn_port.required_data_type, order_num=order_num) self.add_input_port(new_port) getattr(self, dyn_port_name).append(spec["new_port"]) self.add_dyn_input_hook(exp, dyn_port, new_port) exp.store_block(self)
class ThresholdBlock(GenericBlock): block_base_name = "THRESHOLD" name = "Threshold" is_abstract = False block_group = GroupType.SNMNMF is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) t = ParamField(name="T", title="Threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) flt_es = OutputBlockField(name="gene_sets", provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(ThresholdBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") # T = self.get_input_var("T") self.celery_task = wrapper_task.s(threshold_task, exp, self, es=es, T=self.T, base_filename="%s_%s_thr" % (self.uuid, 'threshold')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("gene_sets", flt_es) exp.store_block(self)
class RcVisualizer(GenericBlock): block_base_name = "RC_VIZUALIZER" is_block_supports_auto_execution = False block_group = GroupType.VISUALIZE is_abstract = True _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "input_bound"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("configure_table", ["input_bound", "ready"], "ready"), ]) results_container = InputBlockField(name="results_container", required_data_type="ResultsContainer", required=True, field_type=FieldType.CUSTOM) _rc = BlockField(name="rc", field_type=FieldType.CUSTOM, is_a_property=True) _available_metrics = BlockField(name="available_metrics", field_type=FieldType.RAW, is_a_property=True) metric = ParamField(name="metric", title="Metric", field_type=FieldType.STR, input_type=InputType.SELECT, select_provider="available_metrics") def __init__(self, *args, **kwargs): super(RcVisualizer, self).__init__(*args, **kwargs) @property @log_timing def available_metrics(self): try: return [{ "pk": metric_name, "str": metric.title } for metric_name, metric in metrics_dict.iteritems() if metric.produce_single_number] except Exception, e: log.exception(e) return []
class ZScoreBlock(GenericBlock): block_base_name = "ZSCORE_NORM" name = "Z-score Normalization" is_abstract = False block_group = GroupType.NORMALIZATION is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) flt_es = OutputBlockField(name="flt_zscore_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(ZScoreBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") self.celery_task = wrapper_task.s(zscore_task, exp, self, es=es, base_filename="%s_%s_flt" % (self.uuid, 'zscore')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("flt_zscore_es", flt_es) exp.store_block(self)
def add_cell(self, exp, received_block, *args, **kwargs): new_cell_dict = received_block.get("cells", {}).get("new") if new_cell_dict: cell = CellInfo(new_cell_dict["label"]) for field_prototype in self.cells_prototype.cells_list: new_name = "%s_%s" % (field_prototype.name, len(self.cells.cells)) cell.inputs_list.append((field_prototype.name, new_name)) # TODO: add input port to block new_port = InputBlockField( name=new_name, required_data_type=field_prototype.data_type, required=True ) self.add_input_port(new_port) self.cells.cells.append(cell) exp.store_block(self)
class CrossValidation(UniformMetaBlock): block_base_name = "CROSS_VALID" name = "Cross Validation K-fold" _cv_actions = ActionsList( [ActionRecord("become_ready", ["valid_params"], "ready")]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["cv_info.html"]) _input_es_dyn = InputBlockField(name="es_inputs", required_data_type="ExpressionSet", required=True, multiply_extensible=True, order_num=-1) folds_num = ParamField(name="folds_num", title="Folds number", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=5) repeats_num = ParamField(name="repeats_num", title="Repeats number", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1) def get_fold_labels(self): out = [] for repeat in range(self.repeats_num): for num in range(self.folds_num): out.append("fold_%s_%s" % (repeat + 1, num + 1)) return out # ["fold_%s_%s" % (repeat + 1, num + 1) for num in range(self.folds_num) for repeat in range(self.repeats_num)] def get_repeat_labels(self): return [ "repeat_%s" % (repeat + 1) for repeat in range(self.repeats_num) ] def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output_train = InnerOutputField( name="%s_train_i" % new_port.name, provided_data_type=new_port.required_data_type) new_inner_output_test = InnerOutputField( name="%s_test_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = \ (new_inner_output_train.name, new_inner_output_test.name) self.register_inner_output_variables( [new_inner_output_train, new_inner_output_test]) def execute(self, exp, *args, **kwargs): self.clean_errors() self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( generate_cv_folds, exp, self, folds_num=self.folds_num, repeats_num=self.repeats_num, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success", ) exp.store_block(self) self.celery_task.apply_async() def on_params_is_valid(self, exp, *args, **kwargs): super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("become_ready", exp) def become_ready(self, *args, **kwargs): pass def build_result_collection(self, exp): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) rc = ResultsContainer(base_dir=exp.get_data_folder(), base_filename="%s" % self.uuid) res_seq = self.res_seq def create_new_dim_rc(local_rc, axis_meta_block, axis_meta_block_labels): local_rc.axis_list = [axis_meta_block] local_rc.labels_dict[axis_meta_block] = axis_meta_block_labels local_rc.init_ar() local_rc.update_label_index() # WARNING: We only support homogeneous results, so we only check first element res_seq_field_name, data_type = res_seq.fields.iteritems().next() if data_type == "ClassifierResult": fold_labels = self.get_fold_labels() single_rc_list = [] for field_name in res_seq.fields: run_num = 0 loc_list = [] for idx, res_seq_cell in enumerate(res_seq.sequence): if (idx % self.folds_num) == 0: rc_run = ResultsContainer("", "") create_new_dim_rc(rc_run, self.base_name + "_folds", [ "fold_%s" % fold_num for fold_num in range(self.folds_num) ]) loc_list.append(rc_run) run_num += 1 rc_run.ar[idx % self.folds_num] = res_seq_cell[field_name] rc_single = ResultsContainer("", "") rc_single.add_dim_layer(loc_list, self.base_name, self.get_repeat_labels()) single_rc_list.append(rc_single) rc.add_dim_layer(single_rc_list, self.collector_spec.label, res_seq.fields.keys()) elif data_type == "ResultsContainer": if len(res_seq.fields) > 1: raise Exception( "Meta block only support single output of type ResultsContainer" ) else: rc_list = [] for cell in res_seq.sequence: sub_rc = cell[res_seq_field_name] sub_rc.load() rc_list.append(sub_rc) rc.add_dim_layer(rc_list, self.base_name, self.get_fold_labels()) elif data_type == "SequenceContainer": # TODO remove this check pass else: raise Exception("Meta blocks only support ClassifierResult " "or ResultsContainer in the output collection. " " Instead got: %s" % data_type) rc.store() rc.ar = None self.set_out_var("results_container", rc)
class PatternSearch(GenericBlock): block_base_name = "PattSearch" name = "Pattern Search" block_group = GroupType.PATTERN_SEARCH is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True) _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=False) gene2gene = InputBlockField(name="gene2gene", order_num=30, required_data_type="BinaryInteraction", required=True) miRNA2gene = InputBlockField(name="miRNA2gene", order_num=31, required_data_type="BinaryInteraction", required=False) genes_num = ParamField(name="genes_num", title="Number of Genes", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=100) # upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=32, # input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) # upload_mirna_platform = ParamField("upload_mirna_platform", title="miRNA platform", order_num=33, # input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) d = ParamField(name="d", order_num=70, title="d", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=2) min_imp = ParamField(name="min_imp", order_num=80, title="Minimal improvement", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.06) _metric = ParamField( "metric", title="Metric", order_num=40, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mutual_information", options={ "inline_select_provider": True, "select_options": [["mutual_information", "Mutual Information"], ['normed_mutual_information', "Normed Mutual Information"], ['square_error', "Square Error"], ['correlation', "Correlation"], ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]] }) patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(PatternSearch, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() exp.log(self.uuid, "Execute called") self.celery_task = wrapper_task.s( pattern_search, exp, self, m_rna_es=self.get_input_var("mRNA"), mi_rna_es=self.get_input_var("miRNA"), gene2gene=self.get_input_var("gene2gene"), miRNA2gene=self.get_input_var("miRNA2gene"), radius=self.d, min_imp=self.min_imp, number_of_genes=self.genes_num, metric=self.get_input_var("metric"), base_filename="%s_comodule_sets" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, gs): exp.log(self.uuid, "Success") self.set_out_var("patterns", gs) exp.store_block(self)
class NIMFASNMNMFBlock(GenericBlock): block_base_name = "NIMFA_SNMNMF" name = "NIMFA SNMNMF" is_abstract = False block_group = GroupType.SNMNMF is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True) _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=True) # _dna_methyl = InputBlockField(name="DNAmethyl", order_num=30, required_data_type="ExpressionSet", required=False) _gene2gene = InputBlockField(name="Gene2Gene", order_num=40, required_data_type="BinaryInteraction", required=True) _mirna2gene = InputBlockField(name="miRNA2gene", order_num=50, required_data_type="BinaryInteraction", required=True) # _gene2DNAmethylation = InputBlockField(name="Gene2DNAmethyl", order_num=60, required_data_type="BinaryInteraction", required=False) l1 = ParamField(name="l1", order_num=70, title="l1", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) l2 = ParamField(name="l2", order_num=80, title="l2", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) g1 = ParamField(name="g1", order_num=90, title="g1", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) g2 = ParamField(name="g2", order_num=100, title="g2", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) rank = ParamField(name="rank", order_num=110, title="rank", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=50) w = OutputBlockField(name="W", provided_data_type="ExpressionSet") H1_miRNA = OutputBlockField(name="H1_miRNA", provided_data_type="ExpressionSet") H2_genes = OutputBlockField(name="H2_genes", provided_data_type="ExpressionSet") # H3_DNAmethyl = OutputBlockField(name="H3_DNAmethyl", provided_data_type="ExpressionSet") #H1_perf = OutputBlockField(name="H1_perf", provided_data_type="ExpressionSet") #H2_perf = OutputBlockField(name="H2_perf", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(NIMFASNMNMFBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA = self.get_input_var("mRNA") miRNA = self.get_input_var("miRNA") #DNAmethyl = self.get_input_var("DNAmethyl") Gene2Gene = self.get_input_var("Gene2Gene") miRNA2gene = self.get_input_var("miRNA2gene") #Gene2DNAmethyl = self.get_input_var("Gene2DNAmethyl") self.celery_task = wrapper_task.s( nimfa_snmnmf_task, exp, self, mRNA=mRNA, miRNA=miRNA, #DNAmethyl = DNAmethyl, gene2gene=Gene2Gene, miRNA2gene=miRNA2gene, #gene2DNAmethylation = Gene2DNAmethyl, params={'l1': self.l1, 'l2': self.l2, 'g1': self.g1, 'g2': self.g2, 'rank': self.rank}, base_filename="%s_nimfa_snmnmf" % self.uuid ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, W, H1, H2): self.set_out_var("W", W) self.set_out_var("H1_miRNA", H1) self.set_out_var("H2_genes", H2) #self.set_out_var("H1_perf", matrices[3]) #self.set_out_var("H2_perf", matrices[4]) exp.store_block(self)
class ComoduleSetView(GenericBlock): block_base_name = "CS_VIEW" block_group = GroupType.VISUALIZE name = "Comodule Set View" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ]) input_comodule_set = InputBlockField(name="cs", order_num=10, required_data_type="ComoduleSet", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) _export_results_csv_url = BlockField(name="export_results_csv_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "comodule_set_view.html" ]) @property def export_results_csv_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_csv", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def table_js(self): cs = self.get_input_var("cs") """:type :ComoduleSet""" if cs: table = cs.load_set() """:type :dict""" if isinstance(table[0], set): columns = ["values"] else: columns = ["values", "values"] # table_headers = ["#"] + table.columns.tolist() table_headers = ["#"] + columns column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [column_title_to_code_name[title] for title in table_headers] return { "columns": [ { "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers ], "rows": [ dict(zip(fields_list, [idx, value])) for idx, value in table.iteritems() # [:100] ] } else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("cs") table = ds.load_set() return [(idx, list(value)) for idx, value in table.iteritems()] def export_csv(self, exp, *args, **kwargs): import csv import StringIO ds = self.get_input_var("cs") tab = ds.load_set() out = StringIO.StringIO() w = csv.writer(out) w.writerows(tab.items()) out.seek(0) return out.read()
class GeneSetsView(GenericBlock): block_base_name = "GS_VIEW" block_group = GroupType.VISUALIZE name = "Gene Sets view" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ]) _input_dictionary_set = InputBlockField(name="gs", order_num=10, required_data_type="GeneSets", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["dictionary_set_view.html"]) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def table_js(self): cs = self.get_input_var("gs") """:type :GeneSets""" if cs: table = cs.get_gs(conv=False).genes table_headers = ['key', 'value'] column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [ column_title_to_code_name[title] for title in table_headers ] return { "columns": [{ "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers], "rows": [ dict(zip(fields_list, row)) for row in [(k, list(v)) for k, v in table.iteritems()] # table.to_records().tolist() #[:100] ] } else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("gs") dic = ds.get_gs().genes return dic
class GeneSetAggCV(GenericBlock): block_group = GroupType.AGGREGATION block_base_name = "CV_GS_A" name = "CV Gene Sets Aggregation" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_train_es = InputBlockField(name="train_es", order_num=10, required_data_type="ExpressionSet", required=True) _input_test_es = InputBlockField(name="test_es", order_num=20, required_data_type="ExpressionSet", required=True) _input_gs = InputBlockField(name="gs", order_num=30, required_data_type="GeneSets", required=True) agg_method = ParamField( "agg_method", title="Aggregate method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mean", options={ "inline_select_provider": True, "select_options": [ ["mean", "Mean"], ["median", "Median"], ["pca", "PCA"] ] } ) out_train_es = OutputBlockField(name="out_train_es", provided_data_type="ExpressionSet") out_test_es = OutputBlockField(name="out_test_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(GeneSetAggCV, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() train_es = self.get_input_var("train_es") test_es = self.get_input_var("test_es") gene_sets = self.get_input_var("gs") self.celery_task = wrapper_task.s( agg_task_cv, exp, self, train_es=train_es, test_es=test_es, gene_sets=gene_sets, method=self.agg_method, base_filename="%s_%s_agg" % (self.uuid, "pca_cv") ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, out_train_es, out_test_es): self.set_out_var("out_train_es", out_train_es) self.set_out_var("out_test_es", out_test_es) exp.store_block(self)
class MergeComoduleSets(GenericBlock): block_base_name = "MERGE_COMODULE_SETS" name = "Merge Comodule Sets" is_abstract = False block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _cs_1 = InputBlockField(name="cs_1", order_num=10, required_data_type="ComoduleSet", required=True) _cs_1_name = ParamField(name="cs_1_name", order_num=11, title="Comodule 1 name", input_type=InputType.TEXT, field_type=FieldType.STR, init_val="genes") _cs_2 = InputBlockField(name="cs_2", order_num=20, required_data_type="ComoduleSet", required=True) _cs_2_name = ParamField(name="cs_2_name", order_num=21, title="Comodule 2 name", input_type=InputType.TEXT, field_type=FieldType.STR, init_val="genes") flt_es = OutputBlockField(name="comodule_set", provided_data_type="ComoduleSet") def __init__(self, *args, **kwargs): super(MergeComoduleSets, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() cs_1 = self.get_input_var("cs_1") cs_2 = self.get_input_var("cs_2") self.celery_task = wrapper_task.s(merge_comodules_task, exp, self, cs_1=cs_1, cs_2=cs_2, cs_1_name=self.cs_1_name, cs_2_name=self.cs_2_name, base_filename="%s_%s_thr" % (self.uuid, 'merge_cs')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("comodule_set", flt_es) exp.store_block(self)
class PcaVisualize(GenericBlock): block_base_name = "PCA_VISUALIZE" name = "2D PCA Plot" block_group = GroupType.VISUALIZE is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("compute_pca", ["valid_params"], "computing_pca", user_title="Compute PCA"), ActionRecord( "pca_done", ["computing_pca"], "done", ), ActionRecord("reset_execution", ["*", "done", "execution_error", "ready", "working"], "ready", user_title="Reset execution") #ActionRecord("update", ["input_bound", "ready"], "ready"), ]) input_es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) chart_series = BlockField(name="chart_series", field_type=FieldType.RAW, init_val=[]) chart_categories = BlockField(name="chart_categories", field_type=FieldType.SIMPLE_LIST, init_val=[]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["pca.html"]) def __init__(self, *args, **kwargs): super(PcaVisualize, self).__init__("PCA visualise", *args, **kwargs) def on_params_is_valid(self, exp, *args, **kwargs): super(PcaVisualize, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("compute_pca", exp) def compute_pca(self, exp, *args, **kwargs): log.info("compute pca invoked") es = self.get_input_var("es") """:type :ExpressionSet""" df = es.get_assay_data_frame() pheno_df = es.get_pheno_data_frame() target_column = es.pheno_metadata['user_class_title'] X = df.as_matrix().transpose() pca_model = decomposition.PCA(n_components=2) pca_model.fit(X) Xp = pca_model.transform(X).tolist() names = [x.strip() for x in pheno_df[target_column].tolist()] series_by_names = defaultdict(list) for x, name in zip(Xp, names): series_by_names[name].append(x) self.chart_series = [{ "name": name, "data": points } for name, points in series_by_names.iteritems()] self.do_action("pca_done", exp) def pca_done(self, exp, *args, **kwargs): log.info("pca done")
class CrossValidation(UniformMetaBlock): block_base_name = "CROSS_VALID" name = "Cross validation K-fold" _cv_actions = ActionsList( [ActionRecord("become_ready", ["valid_params"], "ready")]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["cv_info.html"]) _input_es_dyn = InputBlockField(name="es_inputs", required_data_type="ExpressionSet", required=True, multiply_extensible=True, order_num=-1) folds_num = ParamField(name="folds_num", title="Folds number", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=5) repeats_num = ParamField(name="repeats_num", title="Repeats number", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1) def get_fold_labels(self): return [ "fold_%s" % (num + 1, ) for num in range(self.folds_num * self.repeats_num) ] def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output_train = InnerOutputField( name="%s_train_i" % new_port.name, provided_data_type=new_port.required_data_type) new_inner_output_test = InnerOutputField( name="%s_test_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = \ (new_inner_output_train.name, new_inner_output_test.name) self.register_inner_output_variables( [new_inner_output_train, new_inner_output_test]) def execute(self, exp, *args, **kwargs): self.clean_errors() self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( generate_cv_folds, exp, self, folds_num=self.folds_num, repeats_num=self.repeats_num, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success", ) exp.store_block(self) self.celery_task.apply_async() def on_params_is_valid(self, exp, *args, **kwargs): super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("become_ready", exp) def become_ready(self, *args, **kwargs): pass
class GenericRankingBlock(GenericBlock): block_base_name = "" block_group = GroupType.PROCESSING is_abstract = True is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField( name="es", order_num=10, required_data_type="ExpressionSet", required=True ) ## TODO: remove from generic ranking best = ParamField( name="best", title="Consider only best", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=None ) _result = OutputBlockField(name="result", field_type=FieldType.STR, provided_data_type="TableResult", init_val=None) def __init__(self, *args, **kwargs): super(GenericRankingBlock, self).__init__(*args, **kwargs) self.ranking_name = None self.ranking_options = {} self.celery_task = None exp = Experiment.get_exp_by_id(self.exp_id) self.result = TableResult( base_dir=exp.get_data_folder(), base_filename="%s_gt_result" % self.uuid, ) self.set_out_var("result", self.result) def collect_options(self): self.ranking_options = {} def execute(self, exp, *args, **kwargs): self.clean_errors() self.collect_options() self.celery_task = wrapper_task.s( apply_ranking, exp=exp, block=self, es=self.get_input_var("es"), ranking_name=self.ranking_name, result_table=self.result, options=self.ranking_options ) exp.store_block(self) self.celery_task.apply_async() exp.log(self.uuid, "Sent ranking computation to queue") log.debug("Sent ranking computation to queue") def success(self, exp, result, *args, **kwargs): self.result = result self.set_out_var("result", self.result) exp.store_block(self)
class NCF(GenericBlock): block_group = GroupType.CLASSIFIER block_base_name = "NCF" name = "Network-Constrained Forest" classifier_name = "ncf" is_abstract = False is_block_supports_auto_execution = True # Block behavior _block_actions = ActionsList([]) _block_actions.extend(save_params_actions_list) _block_actions.extend(execute_block_actions_list) gene2gene = InputBlockField(name="gene2gene", order_num=30, required_data_type="BinaryInteraction", required=True) miRNA2gene = InputBlockField(name="miRNA2gene", order_num=31, required_data_type="BinaryInteraction", required=True) # User defined parameters # Input ports definition _m_train_es = InputBlockField(name="mRNA_train_es", order_num=10, required_data_type="ExpressionSet", required=True) _m_test_es = InputBlockField(name="mRNA_test_es", order_num=20, required_data_type="ExpressionSet", required=True) _mi_train_es = InputBlockField(name="miRNA_train_es", order_num=21, required_data_type="ExpressionSet", required=True) _mi_test_es = InputBlockField(name="miRNA_test_es", order_num=22, required_data_type="ExpressionSet", required=True) # Provided outputs _result = OutputBlockField(name="result", field_type=FieldType.CUSTOM, provided_data_type="ClassifierResult", init_val=None) n_estimators = ParamField(name="n_estimators", title="The number of trees in the forest", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="1000", order_num=41) walk_max_length = ParamField(name="walk_max_length", title="Walk max length", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="10", order_num=50) criterion = ParamField( name="criterion", title="The function to measure the quality of a split", input_type=InputType.SELECT, field_type=FieldType.STR, order_num=60, options={ "inline_select_provider": True, "select_options": [["gini", "Gini impurity"], ["entropy", "Information gain"]] }) eps = ParamField(name="eps", title="Eps", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val="0.01", order_num=70) max_depth = ParamField(name="max_depth", title="The maximum depth of the tree", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="2", order_num=80) min_samples_split = ParamField( name="min_samples_split", title="The minimum number of samples to split an internal node", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="2", order_num=90, ) min_samples_leaf = ParamField( name="min_samples_leaf", title="The minimum number of samples to be at a leaf node", input_type=InputType.TEXT, field_type=FieldType.INT, init_val="2", order_num=100) bootstrap = ParamField(name="bootstrap", title="bootstrap", input_type=InputType.CHECKBOX, field_type=FieldType.BOOLEAN, required=False, order_num=110) def __init__(self, *args, **kwargs): super(NCF, self).__init__(*args, **kwargs) self.celery_task = None self.classifier_options = {} self.fit_options = {} def execute(self, exp, *args, **kwargs): self.set_out_var("result", None) self.collect_options() mRNA_train_es = self.get_input_var("mRNA_train_es") mRNA_test_es = self.get_input_var("mRNA_test_es") miRNA_train_es = self.get_input_var("miRNA_train_es") miRNA_test_es = self.get_input_var("miRNA_test_es") self.celery_task = wrapper_task.s( apply_ncf_classifier, exp=exp, block=self, mRNA_train_es=mRNA_train_es, mRNA_test_es=mRNA_test_es, miRNA_train_es=miRNA_train_es, miRNA_test_es=miRNA_test_es, classifier_name=self.classifier_name, classifier_options=self.classifier_options, fit_options=self.fit_options, base_folder=exp.get_data_folder(), base_filename="%s_%s" % (self.uuid, self.classifier_name), ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, result, *args, **kwargs): # We store obtained result as an output variable self.set_out_var("result", result) exp.store_block(self) def reset_execution(self, exp, *args, **kwargs): self.clean_errors() # self.get_scope().remove_temp_vars() self.set_out_var("result", None) exp.store_block(self) def get_option_safe(self, name, target_type=None): if hasattr(self, name): raw = getattr(self, name) if raw: if target_type: try: return target_type(raw) except: pass else: return raw return None def collect_option_safe(self, name, target_type=None, target_name=None): value = self.get_option_safe(name, target_type) # from celery.contrib import rdb; rdb.set_trace() if value: if target_name: self.classifier_options[target_name] = value else: self.classifier_options[name] = value return value def collect_options(self): self.classifier_options["gene2gene"] = self.get_input_var("gene2gene") self.classifier_options["miRNA2gene"] = self.get_input_var( "miRNA2gene") self.classifier_options['walk_lengths'] = range( 1, int(self.walk_max_length)) self.collect_option_safe("eps") self.collect_option_safe("n_estimators", int) # self.collect_option_safe("max_features") self.collect_option_safe("max_depth", int) self.collect_option_safe("min_samples_leaf", int) self.collect_option_safe("min_samples_split", int) self.classifier_options["bootstrap"] = self.bootstrap
class EnrichmentVisualize(GenericBlock): block_base_name = "EV_VIEW" block_group = GroupType.VISUALIZE name = "Enrichment Visualize" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params") ]) _input_dictionary_set = InputBlockField(name="ds", order_num=10, required_data_type="DictionarySet", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) _export_results_csv_url = BlockField(name="export_results_csv_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "enrichment_view.html" ]) def map_to_symbols(self, gene_set): genes = [gi.name for gi in GeneIdentifier.objects.filter(refseq__refseq__in=gene_set)] return genes @property def export_results_csv_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_csv", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def table_js(self): cs = self.get_input_var("ds") """:type :DictionarySet""" if cs: table = cs.load_dict() table_headers = ['comodule','(term, p-val)','genes'] column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [column_title_to_code_name[title] for title in table_headers] return { "columns": [ { "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers ], "rows": [ dict(zip(fields_list, row)) for row in [(k, v[1], set(self.map_to_symbols(v[0]))) for k, v in table.iteritems()] #table.to_records().tolist() #[:100] ] } else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("ds") dic = ds.load_dict() return dic def export_csv(self, exp, *args, **kwargs): import csv import StringIO ds = self.get_input_var("ds") dic = ds.load_dict() out = StringIO.StringIO() w = csv.writer(out) w.writerows(dic.items()) out.seek(0) return out.read()
class GeneSetAgg(GenericBlock): block_base_name = "GENE_SET_AGG" name = "Gene sets aggregation" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _gs = InputBlockField(name="gs", order_num=20, required_data_type="GeneSets", required=True) agg_method = ParamField("agg_method", title="Aggregate method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mean", options={ "inline_select_provider": True, "select_options": [["mean", "Mean"], ["media", "Median"]] }) agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(GeneSetAgg, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") gs = self.get_input_var("gs") base_filename = "%s_gs_agg" % (self.uuid, ) self.celery_task = wrapper_task.s(do_gs_agg, exp, self, es, gs, self.agg_method, base_filename) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, agg_es): self.set_out_var("agg_es", agg_es) exp.store_block(self)
class MultiFeature(UniformMetaBlock): block_base_name = "MULTI_FEATURE" name = "Multi Feature Validation" _mf_block_actions = ActionsList([ ActionRecord("on_feature_selection_updated", ["valid_params", "ready", "done"], "ready"), ]) _input_es_dyn = InputBlockField(name="es_inputs", order_num=-10, required_data_type="ExpressionSet", required=True, multiply_extensible=True) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, init_val=False, is_a_property=True) pages = BlockField("pages", FieldType.RAW, init_val={ "select_feature": { "title": "Select features to examine", "resource": "select_feature", "widget": "widgets/select_feature.html" }, }) def __init__(self, *args, **kwargs): super(MultiFeature, self).__init__(*args, **kwargs) self.features = [] @property def is_sub_pages_visible(self): if self.state in ['valid_params', 'done', 'ready']: return True return False def get_fold_labels(self): return self.features def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output = InnerOutputField( name="%s_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = new_inner_output.name self.register_inner_output_variables([new_inner_output]) def execute(self, exp, *args, **kwargs): # self.celery_task = wrapper_task.s( # # ) self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( prepare_folds, exp, self, features=self.features, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success") exp.store_block(self) self.celery_task.apply_async() def phenotype_for_js(self, exp, *args, **kwargs): es = None for input_name in self.es_inputs: es = self.get_input_var(input_name) if es is not None: break res = prepare_phenotype_for_js_from_es(es) res["features"] = self.features return res def update_feature_selection(self, exp, request, *args, **kwargs): req = json.loads(request.body) self.features = req["features"] if self.features: self.do_action("on_feature_selection_updated", exp) def on_feature_selection_updated(self, *args, **kwargs): pass
class PatternView(GenericBlock): block_base_name = "PA_VIEW" block_group = GroupType.VISUALIZE name = "Patterns Visualizer" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params") ]) _input_patterns = InputBlockField(name="patterns", order_num=10, required_data_type="GeneSets", required=True) _input_edges = InputBlockField(name="edges", order_num=20, required_data_type="Edges", required=True) _diff_expr = InputBlockField(name="diff_expr", order_num=30, required_data_type="DiffExpr", required=True) _graph_for_js = BlockField(name="graph_js", field_type=FieldType.RAW, is_a_property=True) _edges_for_js = BlockField(name="edges", field_type=FieldType.RAW, is_a_property=False) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "pattern_view.html" ]) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def graph_js(self): # return None diff_expr = self.get_input_var("diff_expr") edges = self.get_input_var("edges") cs = self.get_input_var("patterns") if cs and edges and diff_expr: pattern_set = cs.get_gs(conv=False).genes edges = edges.load_edges() diff_expr = diff_expr.load_expr() import math # "x": math.cos(2*i*math.pi/len(com)) + 5*math.cos(2*j*math.pi/len(pattern_set)), # "y": math.sin(2*i*math.pi/len(com)) + 5*math.sin(2*j*math.pi/len(pattern_set)), res = { "nodes": [ {"id": "%s_%s" % (j, gene), "label": gene, "x": math.cos(2*i*math.pi/len(com)) + math.floor(math.sqrt(len(pattern_set))) * (j % int(math.floor(math.sqrt(len(pattern_set))))), "y": math.sin(2*i*math.pi/len(com)) + math.floor(math.sqrt(len(pattern_set))) * (j / int(math.floor(math.sqrt(len(pattern_set))))), "color": "rgb(%s, %s, %s)" % (abs(int(math.floor(((diff_expr[gene] + 1) * 128) - 1))), abs(int(math.floor(255-(((diff_expr[gene] + 1) * 128) - 1)))), 0), "size": 2 + abs(diff_expr[gene]) * 2 } for j, com in enumerate(pattern_set) for i, gene in enumerate(com) ], "edges": [ {"id": "%s_%s_%s" % (k, i, j), "source": "%s_%s" % (k, i), "target": "%s_%s" % (k, j)} for k, graph_edges in enumerate(edges) for i, j in graph_edges ] } return res else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("es") dic = ds.load_set() return dic def process_upload(self, exp, *args, **kwargs): pass def success(self, exp, *args, **kwargs): pass
class FeatureSelectionByCut(GenericBlock): block_base_name = "FS_BY_CUT" block_group = GroupType.FILTER name = "Feature Selection by Ranking" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _rank_table = InputBlockField(name="rank_table", order_num=20, required_data_type="TableResult", required=True) _cut_property_options = BlockField(name="cut_property_options", field_type=FieldType.RAW, is_a_property=True) cut_property = ParamField( name="cut_property", title="Ranking property to use", # input_type=InputType.SELECT, input_type=InputType.TEXT, field_type=FieldType.STR, #select_provider="cut_property_options", order_num=10, ) threshold = ParamField( name="threshold", title="Threshold for cut", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, ) _cut_direction_options = BlockField(name="cut_direction_options", field_type=FieldType.RAW) cut_direction_options = ["<", "<=", ">=", ">"] cut_direction = ParamField(name="cut_direction", title="Direction of cut", input_type=InputType.SELECT, field_type=FieldType.STR, select_provider="cut_direction_options", order_num=30, options={ "inline_select_provider": True, "select_options": [[op, op] for op in ["<", "<=", ">=", ">"]] }) es = OutputBlockField(name="es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FeatureSelectionByCut, self).__init__(*args, **kwargs) self.celery_task = None @property def cut_property_options(self): # import ipdb; ipdb.set_trace() rank_table = self.get_input_var("rank_table") if rank_table and hasattr(rank_table, "headers"): return [{ "pk": header, "str": header } for header in rank_table.headers] def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( feature_selection_by_cut, exp=exp, block=self, src_es=self.get_input_var("es"), rank_table=self.get_input_var("rank_table"), cut_property=self.cut_property, threshold=self.threshold, cut_direction=self.cut_direction, base_filename="%s_feature_selection" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, es): self.set_out_var("es", es) exp.store_block(self)
class TableResultView(GenericBlock): block_base_name = "TR_VIEW" block_group = GroupType.VISUALIZE name = "Table Result view" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ]) input_table_result = InputBlockField(name="tr", order_num=10, required_data_type="TableResult", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_table_url = BlockField(name="export_table_url", field_type=FieldType.STR, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["table_result_view.html"]) @property def export_table_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_table", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_rc", "format": "json" }) @property def table_js(self): tr = self.get_input_var("tr") """:type :TableResult""" if tr: table = tr.get_table() table_headers = ["#"] + table.columns.tolist() column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [ column_title_to_code_name[title] for title in table_headers ] return { "columns": [{ "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers], "rows": [ dict(zip(fields_list, row)) for row in table.to_records().tolist() #[:100] ] } else: return None def export_rc(self, exp, *args, **kwargs): return self.table_js def export_table(self, exp, *args, **kwargs): pd_float_format_func = lambda x: "%1.4f" % x tr = self.get_input_var("tr") """:type :TableResult""" table = tr.get_table() out = StringIO.StringIO() # Float format in fact doesn't work in pandas # table.df.to_csv(out, float_format=pd_float_format_func) # tmp_df = table.applymap(pd_float_format_func) tmp_df.to_csv(out, float_format=pd_float_format_func) out.seek(0) return out.read()
class FilterBlock(GenericBlock): block_base_name = "FILTER" name = "Var/Val Filter" is_abstract = False block_group = GroupType.FILTER is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) filter_method = ParamField("filter_method", title="Filter method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="LOW_VAL", options={ "inline_select_provider": True, "select_options": [["LOW_VAL", "Low Val Filter"], ["VAR", "Var Filter"]] }) q = ParamField(name="q", title="Threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=30.0) flt_es = OutputBlockField(name="flt_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FilterBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") self.celery_task = wrapper_task.s(filter_task, exp, self, filter_type=self.filter_method, q=self.q, es=es, base_filename="%s_%s_flt" % (self.uuid, self.filter_method)) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("flt_es", flt_es) exp.store_block(self)
class GenericClassifier(GenericBlock): block_group = GroupType.CLASSIFIER is_abstract = True is_block_supports_auto_execution = True classifier_name = "" # Block behavior _block_actions = ActionsList([]) _block_actions.extend(save_params_actions_list) _block_actions.extend(execute_block_actions_list) # User defined parameters # Input ports definition _train_es = InputBlockField(name="train_es", order_num=10, required_data_type="ExpressionSet", required=True) _test_es = InputBlockField(name="test_es", order_num=20, required_data_type="ExpressionSet", required=True) # Provided outputs _result = OutputBlockField(name="result", field_type=FieldType.CUSTOM, provided_data_type="ClassifierResult", init_val=None) def __init__(self, *args, **kwargs): super(GenericClassifier, self).__init__(*args, **kwargs) self.celery_task = None self.classifier_options = {} self.fit_options = {} @abstractmethod def collect_options(self): """ Should populate `self.classifier_options` and `self.fit_options` from block parameters. """ pass def get_option_safe(self, name, target_type=None): if hasattr(self, name): raw = getattr(self, name) if raw: if target_type: try: return target_type(raw) except: pass else: return raw return None def collect_option_safe(self, name, target_type=None, target_name=None): value = self.get_option_safe(name, target_type) # from celery.contrib import rdb; rdb.set_trace() if value: if target_name: self.classifier_options[target_name] = value else: self.classifier_options[name] = value return value def execute(self, exp, *args, **kwargs): self.set_out_var("result", None) self.collect_options() train_es = self.get_input_var("train_es") test_es = self.get_input_var("test_es") self.celery_task = wrapper_task.s( apply_classifier, exp=exp, block=self, train_es=train_es, test_es=test_es, classifier_name=self.classifier_name, classifier_options=self.classifier_options, fit_options=self.fit_options, base_folder=exp.get_data_folder(), base_filename="%s_%s" % (self.uuid, self.classifier_name), ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, result, *args, **kwargs): # We store obtained result as an output variable self.set_out_var("result", result) exp.store_block(self) def reset_execution(self, exp, *args, **kwargs): self.clean_errors() # self.get_scope().remove_temp_vars() self.set_out_var("result", None) exp.store_block(self)