class LinearSVM(GenericClassifier): block_base_name = "LIN_SVM" name = "Linear SVM Classifier" classifier_name = "linear_svm" C = ParamField(name="C", title="Penalty", order_num=10, input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0) tol = ParamField(name="tol", order_num=20, title="Tolerance for stopping criteria", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.0001) loss = ParamField( name="loss", order_num=30, title="The loss function", input_type=InputType.SELECT, field_type=FieldType.STR, options={ "inline_select_provider": True, "select_options": [ ["l1", "Hinge loss"], ["l2", "Squared hinge loss"], ] } ) def collect_options(self): self.collect_option_safe("C", float) self.collect_option_safe("tol", float) self.collect_option_safe("loss", str)
class UploadGeneSets(GenericBlock): block_base_name = "GENE_SETS_UPLOAD" block_group = GroupType.INPUT_DATA name = "Upload gene sets" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "done"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) upload_gs = ParamField("upload_gs", title="Gene sets in .gmt format", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) set_units = ParamField("set_units", title="Set units", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) gen_units = ParamField("gen_units", title="Gene units", order_num=12, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) _gene_sets = OutputBlockField(name="gene_sets", provided_data_type="GeneSets") def on_params_is_valid(self, exp, *args, **kwargs): try: gmt_file = self.upload_gs.get_file() gs = GmtStorage.read_inp(gmt_file, "\t") gene_sets = GeneSets(exp.get_data_folder(), str(self.uuid)) gene_sets.store_gs(gs) self.set_out_var("gene_sets", gene_sets) except Exception as e: log.error(e) exp.store_block(self)
class UploadInteraction(GenericBlock): block_base_name = "GENE_INTERACTION" block_group = GroupType.INPUT_DATA name = "Upload gene interaction" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "done"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) upload_interaction = ParamField("upload_interaction", title="Interaction matrix", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) row_units = ParamField("row_units", title="Row units", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) col_units = ParamField("col_units", title="Column units", order_num=12, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) _interaction = OutputBlockField(name="interaction", provided_data_type="BinaryInteraction") def on_params_is_valid(self, exp, *args, **kwargs): # Convert to BinaryInteraction interaction_df = self.upload_interaction.get_as_data_frame() interaction = BinaryInteraction(exp.get_data_folder(), str(self.uuid)) interaction.store_matrix(interaction_df) interaction.row_units = self.row_units interaction.col_units = self.col_units self.set_out_var("interaction", interaction) exp.store_block(self)
class EnrichmentNoTBlock(GenericBlock): block_base_name = "ENRICHMENT_COM" name = "Comodule Enrichment" is_abstract = False block_group = GroupType.TESTING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _cs_1 = InputBlockField(name="gs", order_num=10, required_data_type="GeneSets", required=True) H = InputBlockField(name="patterns", order_num=11, required_data_type="GeneSets", required=True) _t = ParamField(name="T", order_num=12, title="Enrichment threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val="0.05") dict = OutputBlockField(name="dictionary_set", provided_data_type="DictionarySet") def __init__(self, *args, **kwargs): super(EnrichmentNoTBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() gs = self.get_input_var("gs") cs = self.get_input_var("patterns") self.celery_task = wrapper_task.s(enrichment_no_t_task, exp, self, T=self.T, gs=gs, patterns=cs, base_filename="%s_%s_enrich" % (self.uuid, 'enrichment_cont')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("dictionary_set", flt_es) exp.store_block(self)
class KernelSvm(GenericClassifier): block_base_name = "KERNEL_SVM" name = "Kernel SVM Classifier" classifier_name = "svm" C = ParamField(name="C", title="Penalty", order_num=10, input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0) kernel = ParamField( name="kernel", order_num=20, title="Kernel type", input_type=InputType.SELECT, field_type=FieldType.STR, init_val="rbf", options={ "inline_select_provider": True, "select_options": [ ["linear", "Linear"], ["poly", "Polynomial"], ["rbf", "RBF"], ["sigmoid", "Sigmoid"], ] } ) degree = ParamField( name="degree", order_num=21, title="Degree of the polynomial kernel", input_type=InputType.TEXT, field_type=FieldType.INT ) gamma = ParamField( name="gamma", order_num=22, title="Kernel coefficient for RBF, Polynomial and Sigmoid", input_type=InputType.TEXT, field_type=FieldType.FLOAT ) tol = ParamField(name="tol", order_num=30, title="Tolerance for stopping criteria", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.001) def collect_options(self): self.collect_option_safe("C", float) self.collect_option_safe("kernel", str) self.collect_option_safe("degree", int) self.collect_option_safe("gamma", float) self.collect_option_safe("tol", float)
class SvdSubAgg(GenericBlock): is_abstract = True block_group = GroupType.AGGREGATION is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _mRNA_es = InputBlockField(name="mRNA_es", order_num=10, required_data_type="ExpressionSet", required=True) _miRNA_es = InputBlockField(name="miRNA_es", order_num=20, required_data_type="ExpressionSet", required=True) _interaction = InputBlockField(name="interaction", order_num=30, required_data_type="BinaryInteraction", required=True) c = ParamField(name="c", title="Constant c", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=1.0) agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet") mode = "" def __init__(self, *args, **kwargs): super(SvdSubAgg, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA_es = self.get_input_var("mRNA_es") miRNA_es = self.get_input_var("miRNA_es") interaction_matrix = self.get_input_var("interaction") self.celery_task = wrapper_task.s( aggregation_task, exp, self, mode=self.mode, c=self.c, m_rna_es=mRNA_es, mi_rna_es=miRNA_es, interaction_matrix=interaction_matrix, base_filename="%s_%s_agg" % (self.uuid, self.mode) ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, agg_es): self.set_out_var("agg_es", agg_es) exp.store_block(self)
class ThresholdBlock(GenericBlock): block_base_name = "THRESHOLD" name = "Threshold" is_abstract = False block_group = GroupType.SNMNMF is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) t = ParamField(name="T", title="Threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) flt_es = OutputBlockField(name="gene_sets", provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(ThresholdBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") # T = self.get_input_var("T") self.celery_task = wrapper_task.s(threshold_task, exp, self, es=es, T=self.T, base_filename="%s_%s_thr" % (self.uuid, 'threshold')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("gene_sets", flt_es) exp.store_block(self)
class RcVisualizer(GenericBlock): block_base_name = "RC_VIZUALIZER" is_block_supports_auto_execution = False block_group = GroupType.VISUALIZE is_abstract = True _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "input_bound"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("configure_table", ["input_bound", "ready"], "ready"), ]) results_container = InputBlockField(name="results_container", required_data_type="ResultsContainer", required=True, field_type=FieldType.CUSTOM) _rc = BlockField(name="rc", field_type=FieldType.CUSTOM, is_a_property=True) _available_metrics = BlockField(name="available_metrics", field_type=FieldType.RAW, is_a_property=True) metric = ParamField(name="metric", title="Metric", field_type=FieldType.STR, input_type=InputType.SELECT, select_provider="available_metrics") def __init__(self, *args, **kwargs): super(RcVisualizer, self).__init__(*args, **kwargs) @property @log_timing def available_metrics(self): try: return [{ "pk": metric_name, "str": metric.title } for metric_name, metric in metrics_dict.iteritems() if metric.produce_single_number] except Exception, e: log.exception(e) return []
class GetBroadInstituteGeneSet(GenericBlock): block_base_name = "BI_GENE_SET" block_group = GroupType.INPUT_DATA name = "Get MSigDB Gene Set" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "done", reload_block_in_client=True), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) # TODO: maybe create more general solution ? _all_gene_sets = BlockField("all_gene_sets", title="", input_type=InputType.HIDDEN, field_type=FieldType.RAW, is_a_property=True) msigdb_id = ParamField( name="msigdb_id", title="MSigDB gene set", input_type=InputType.SELECT, field_type=FieldType.INT, init_val=0, # TODO: fix hardcoded value select_provider="all_gene_sets") _gs = OutputBlockField(name="gs", field_type=FieldType.HIDDEN, provided_data_type="GeneSets") @property def all_gene_sets(self): return BroadInstituteGeneSet.get_all_meta() def on_params_is_valid(self, exp): gs = BroadInstituteGeneSet.objects.get( pk=self.msigdb_id).get_gene_sets() self.set_out_var("gs", gs) super(GetBroadInstituteGeneSet, self).on_params_is_valid(exp)
class CrossValidation(UniformMetaBlock): block_base_name = "CROSS_VALID" name = "Cross Validation K-fold" _cv_actions = ActionsList( [ActionRecord("become_ready", ["valid_params"], "ready")]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["cv_info.html"]) _input_es_dyn = InputBlockField(name="es_inputs", required_data_type="ExpressionSet", required=True, multiply_extensible=True, order_num=-1) folds_num = ParamField(name="folds_num", title="Folds number", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=5) repeats_num = ParamField(name="repeats_num", title="Repeats number", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1) def get_fold_labels(self): out = [] for repeat in range(self.repeats_num): for num in range(self.folds_num): out.append("fold_%s_%s" % (repeat + 1, num + 1)) return out # ["fold_%s_%s" % (repeat + 1, num + 1) for num in range(self.folds_num) for repeat in range(self.repeats_num)] def get_repeat_labels(self): return [ "repeat_%s" % (repeat + 1) for repeat in range(self.repeats_num) ] def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output_train = InnerOutputField( name="%s_train_i" % new_port.name, provided_data_type=new_port.required_data_type) new_inner_output_test = InnerOutputField( name="%s_test_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = \ (new_inner_output_train.name, new_inner_output_test.name) self.register_inner_output_variables( [new_inner_output_train, new_inner_output_test]) def execute(self, exp, *args, **kwargs): self.clean_errors() self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( generate_cv_folds, exp, self, folds_num=self.folds_num, repeats_num=self.repeats_num, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success", ) exp.store_block(self) self.celery_task.apply_async() def on_params_is_valid(self, exp, *args, **kwargs): super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("become_ready", exp) def become_ready(self, *args, **kwargs): pass def build_result_collection(self, exp): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) rc = ResultsContainer(base_dir=exp.get_data_folder(), base_filename="%s" % self.uuid) res_seq = self.res_seq def create_new_dim_rc(local_rc, axis_meta_block, axis_meta_block_labels): local_rc.axis_list = [axis_meta_block] local_rc.labels_dict[axis_meta_block] = axis_meta_block_labels local_rc.init_ar() local_rc.update_label_index() # WARNING: We only support homogeneous results, so we only check first element res_seq_field_name, data_type = res_seq.fields.iteritems().next() if data_type == "ClassifierResult": fold_labels = self.get_fold_labels() single_rc_list = [] for field_name in res_seq.fields: run_num = 0 loc_list = [] for idx, res_seq_cell in enumerate(res_seq.sequence): if (idx % self.folds_num) == 0: rc_run = ResultsContainer("", "") create_new_dim_rc(rc_run, self.base_name + "_folds", [ "fold_%s" % fold_num for fold_num in range(self.folds_num) ]) loc_list.append(rc_run) run_num += 1 rc_run.ar[idx % self.folds_num] = res_seq_cell[field_name] rc_single = ResultsContainer("", "") rc_single.add_dim_layer(loc_list, self.base_name, self.get_repeat_labels()) single_rc_list.append(rc_single) rc.add_dim_layer(single_rc_list, self.collector_spec.label, res_seq.fields.keys()) elif data_type == "ResultsContainer": if len(res_seq.fields) > 1: raise Exception( "Meta block only support single output of type ResultsContainer" ) else: rc_list = [] for cell in res_seq.sequence: sub_rc = cell[res_seq_field_name] sub_rc.load() rc_list.append(sub_rc) rc.add_dim_layer(rc_list, self.base_name, self.get_fold_labels()) elif data_type == "SequenceContainer": # TODO remove this check pass else: raise Exception("Meta blocks only support ClassifierResult " "or ResultsContainer in the output collection. " " Instead got: %s" % data_type) rc.store() rc.ar = None self.set_out_var("results_container", rc)
class FilterBlock(GenericBlock): block_base_name = "FILTER" name = "Var/Val Filter" is_abstract = False block_group = GroupType.FILTER is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) filter_method = ParamField("filter_method", title="Filter method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="LOW_VAL", options={ "inline_select_provider": True, "select_options": [["LOW_VAL", "Low Val Filter"], ["VAR", "Var Filter"]] }) q = ParamField(name="q", title="Threshold", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=30.0) flt_es = OutputBlockField(name="flt_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FilterBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") self.celery_task = wrapper_task.s(filter_task, exp, self, filter_type=self.filter_method, q=self.q, es=es, base_filename="%s_%s_flt" % (self.uuid, self.filter_method)) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("flt_es", flt_es) exp.store_block(self)
class BoxPlot(RcVisualizer): block_base_name = "BOX_PLOT" block_group = GroupType.VISUALIZE name = "Box plot" boxplot_config = ParamField(name="boxplot_config", title="", input_type=InputType.HIDDEN, field_type=FieldType.RAW) plot_inputs = BlockField(name="plot_inputs", field_type=FieldType.RAW, init_val=[]) chart_series = BlockField(name="chart_series", field_type=FieldType.RAW, init_val=[]) chart_categories = BlockField(name="chart_categories", field_type=FieldType.SIMPLE_LIST, init_val=[]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "box_plot.html" ]) def __init__(self, *args, **kwargs): super(BoxPlot, self).__init__(*args, **kwargs) self.boxplot_config = { "agg_axis_for_scoring": {}, "compare_axis_by_boxplot": {}, } @log_timing def compute_boxplot_stats(self, exp, *args, **kwargs): agg_axis_for_scoring = [ axis for axis, is_selected in self.boxplot_config["agg_axis_for_scoring"].items() if is_selected ] compare_axis_by_boxplot = [ axis for axis, is_selected in self.boxplot_config["compare_axis_by_boxplot"].items() if is_selected ] rc = self.rc if compare_axis_by_boxplot and rc: rc.load() df = rc.get_pandas_slice_for_boxplot( compare_axis_by_boxplot, agg_axis_for_scoring or [], self.metric ) categories = [] for row_id, _ in df.iterrows(): if type(row_id) == tuple: title = ":".join(map(str, row_id)) else: title = str(row_id) categories.append(title) # import ipdb; ipdb.set_trace() bps = boxplot_stats(np.array(df.T, dtype=float)) if bps: self.chart_series = [{ "data": [], }, { "name": "Outliers", "data": [], "type": "scatter", "marker": { "fillColor": "white", "lineWidth": 1, "lineColor": "blue" }, "tooltip": { "pointFormat": '%s: {point.y} ' % self.metric } }] self.chart_series[0]["data"] = [ [ fix_nan(rec["whislo"]), fix_nan(rec["q1"]), fix_nan(rec["med"]), fix_nan(rec["q3"]), fix_nan(rec["whishi"]) ] for rec in bps ] for cat_idx, rec in enumerate(bps): for outlier in rec['fliers']: self.chart_series[1]["data"].append([cat_idx, outlier]) self.chart_categories = categories exp.store_block(self) def on_params_is_valid(self, exp, *args, **kwargs): super(BoxPlot, self).on_params_is_valid(exp, *args, **kwargs) if self.rc is not None: for axis in self.rc.axis_list: if axis not in self.boxplot_config["agg_axis_for_scoring"]: self.boxplot_config["agg_axis_for_scoring"][axis] = "" if axis not in self.boxplot_config["compare_axis_by_boxplot"]: self.boxplot_config["compare_axis_by_boxplot"][axis] = "" self.compute_boxplot_stats(exp) exp.store_block(self)
class MassUpload(UniformMetaBlock): block_base_name = "BunchUpload" name = "Mass Upload Expression Sets" _bu_block_actions = ActionsList([ ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data"), ActionRecord("error_on_processing", ["processing_upload"], "valid_params"), ActionRecord("processing_done", ["processing_upload"], "ready") # ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), # ActionRecord("error", ["processing_upload"], "valid_params"), ]) es_mRNA_matrices = ParamField( "es_mRNA_matrices", title="mRNA Expression sets", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, options={"multiple": True}, ) es_mRNA_matrices_ori = ParamField( "es_mRNA_matrices_ori", title="Matrices orientation", order_num=11, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [ ["SxG", "Samples x Features"], ["GxS", "Features x Samples"] ] } ) m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=12, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) m_rna_unit = ParamField("m_rna_unit", title="Working unit [used when platform is unknown]", order_num=13, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [ ["RefSeq", "RefSeq"], ["Entrez", "EntrezID"], ["Symbol", "Symbol"] ] }) csv_sep_m_rna = ParamField( "csv_sep_m_rna", title="CSV separator symbol", order_num=14, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] } ) es_miRNA_matrices = ParamField( "es_miRNA_matrices", title="miRNA Expression sets", order_num=15, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, options={"multiple": True}, ) mi_rna_platform = ParamField("mi_rna_platform", title="Platform ID", order_num=21, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) mi_rna_unit = ParamField("mi_rna_unit", title="Working unit [used when platform is unknown]", order_num=22, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [ ["RefSeq", "RefSeq"], ["mirbase", "miRBase ID"] ] }) es_miRNA_matrices_ori = ParamField( "es_miRNA_matrices_ori", title="Matrices orientation", order_num=23, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [ ["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"] ] } ) csv_sep_mi_rna = ParamField( "csv_sep_mi_rna", title="CSV separator symbol", order_num=24, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] } ) pheno_matrices = ParamField( "pheno_matrices", title="Phenotypes", order_num=40, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, options={"multiple": True}, ) csv_sep = ParamField( "csv_sep", title="CSV separator symbol", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] } ) # cells = BlockField(name="cells", field_type=FieldType.CUSTOM, init_val=None) # elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ # "mass_upload/data_spec.html" # ]) def __init__(self, *args, **kwargs): super(MassUpload, self).__init__(*args, **kwargs) self.es_mRNA_matrices = MultiUploadField() self.es_miRNA_matrices = MultiUploadField() self.pheno_matrices = MultiUploadField() self.pheno_by_es_names = {} self.labels = [] self.seq = [] self.register_inner_output_variables([InnerOutputField( name="mRNA_es", provided_data_type="ExpressionSet" ), InnerOutputField( name="miRNA_es", provided_data_type="ExpressionSet" )]) @property def is_sub_pages_visible(self): if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']: return True return False def get_fold_labels(self): return self.labels def error_on_processing(self, *args, **kwargs): pass def processing_done(self, exp, block): exp.store_block(block) def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() try: if len(self.pheno_matrices) != len(self.es_mRNA_matrices): raise RuntimeError("Different number of phenotypes and mRNA expression sets") if self.es_miRNA_matrices: if len(self.pheno_matrices) != len(self.es_miRNA_matrices): raise RuntimeError("Different number of phenotypes and miRNA expression sets") self.labels = es_mRNA_matrix_names = sorted(self.es_mRNA_matrices) es_miRNA_matrix_names = sorted(self.es_miRNA_matrices) pheno_matrix_names = sorted(self.pheno_matrices) if len(es_miRNA_matrix_names) == 0: es_miRNA_matrix_names = len(es_mRNA_matrix_names) * [None] self.pheno_by_es_names = { pheno_name: es_name for es_name, pheno_name in zip(zip(es_mRNA_matrix_names, es_miRNA_matrix_names), pheno_matrix_names) } self.clean_errors() self.celery_task = wrapper_task.s( bunch_upload_task, exp, self, success_action="processing_done", error_action="error_on_processing" ) exp.store_block(self) self.celery_task.apply_async() except Exception as e: exp.log(self.uuid, e, severity="CRITICAL") log.exception(e) self.errors.append(e) self.do_action("error_on_processing", exp, e) # self.celery_task_fetch.apply_async() def execute(self, exp, *args, **kwargs): self.inner_output_manager.reset() self.do_action("on_folds_generation_success", exp, self.seq) def get_repeat_labels(self): pass
class FeatureSelectionByCut(GenericBlock): block_base_name = "FS_BY_CUT" block_group = GroupType.FILTER name = "Feature Selection by Ranking" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _rank_table = InputBlockField(name="rank_table", order_num=20, required_data_type="TableResult", required=True) _cut_property_options = BlockField(name="cut_property_options", field_type=FieldType.RAW, is_a_property=True) cut_property = ParamField( name="cut_property", title="Ranking property to use", # input_type=InputType.SELECT, input_type=InputType.TEXT, field_type=FieldType.STR, #select_provider="cut_property_options", order_num=10, ) threshold = ParamField( name="threshold", title="Threshold for cut", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, ) _cut_direction_options = BlockField(name="cut_direction_options", field_type=FieldType.RAW) cut_direction_options = ["<", "<=", ">=", ">"] cut_direction = ParamField(name="cut_direction", title="Direction of cut", input_type=InputType.SELECT, field_type=FieldType.STR, select_provider="cut_direction_options", order_num=30, options={ "inline_select_provider": True, "select_options": [[op, op] for op in ["<", "<=", ">=", ">"]] }) es = OutputBlockField(name="es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FeatureSelectionByCut, self).__init__(*args, **kwargs) self.celery_task = None @property def cut_property_options(self): # import ipdb; ipdb.set_trace() rank_table = self.get_input_var("rank_table") if rank_table and hasattr(rank_table, "headers"): return [{ "pk": header, "str": header } for header in rank_table.headers] def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( feature_selection_by_cut, exp=exp, block=self, src_es=self.get_input_var("es"), rank_table=self.get_input_var("rank_table"), cut_property=self.cut_property, threshold=self.threshold, cut_direction=self.cut_direction, base_filename="%s_feature_selection" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, es): self.set_out_var("es", es) exp.store_block(self)
class GeneSetAgg(GenericBlock): block_base_name = "GENE_SET_AGG" name = "Gene sets aggregation" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _gs = InputBlockField(name="gs", order_num=20, required_data_type="GeneSets", required=True) agg_method = ParamField("agg_method", title="Aggregate method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mean", options={ "inline_select_provider": True, "select_options": [["mean", "Mean"], ["media", "Median"]] }) agg_es = OutputBlockField(name="agg_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(GeneSetAgg, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() es = self.get_input_var("es") gs = self.get_input_var("gs") base_filename = "%s_gs_agg" % (self.uuid, ) self.celery_task = wrapper_task.s(do_gs_agg, exp, self, es, gs, self.agg_method, base_filename) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, agg_es): self.set_out_var("agg_es", agg_es) exp.store_block(self)
class NIMFASNMNMFBlock(GenericBlock): block_base_name = "NIMFA_SNMNMF" name = "NIMFA SNMNMF" is_abstract = False block_group = GroupType.SNMNMF is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True) _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=True) # _dna_methyl = InputBlockField(name="DNAmethyl", order_num=30, required_data_type="ExpressionSet", required=False) _gene2gene = InputBlockField(name="Gene2Gene", order_num=40, required_data_type="BinaryInteraction", required=True) _mirna2gene = InputBlockField(name="miRNA2gene", order_num=50, required_data_type="BinaryInteraction", required=True) # _gene2DNAmethylation = InputBlockField(name="Gene2DNAmethyl", order_num=60, required_data_type="BinaryInteraction", required=False) l1 = ParamField(name="l1", order_num=70, title="l1", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) l2 = ParamField(name="l2", order_num=80, title="l2", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) g1 = ParamField(name="g1", order_num=90, title="g1", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) g2 = ParamField(name="g2", order_num=100, title="g2", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.1) rank = ParamField(name="rank", order_num=110, title="rank", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=50) w = OutputBlockField(name="W", provided_data_type="ExpressionSet") H1_miRNA = OutputBlockField(name="H1_miRNA", provided_data_type="ExpressionSet") H2_genes = OutputBlockField(name="H2_genes", provided_data_type="ExpressionSet") # H3_DNAmethyl = OutputBlockField(name="H3_DNAmethyl", provided_data_type="ExpressionSet") #H1_perf = OutputBlockField(name="H1_perf", provided_data_type="ExpressionSet") #H2_perf = OutputBlockField(name="H2_perf", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(NIMFASNMNMFBlock, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() mRNA = self.get_input_var("mRNA") miRNA = self.get_input_var("miRNA") #DNAmethyl = self.get_input_var("DNAmethyl") Gene2Gene = self.get_input_var("Gene2Gene") miRNA2gene = self.get_input_var("miRNA2gene") #Gene2DNAmethyl = self.get_input_var("Gene2DNAmethyl") self.celery_task = wrapper_task.s( nimfa_snmnmf_task, exp, self, mRNA=mRNA, miRNA=miRNA, #DNAmethyl = DNAmethyl, gene2gene=Gene2Gene, miRNA2gene=miRNA2gene, #gene2DNAmethylation = Gene2DNAmethyl, params={'l1': self.l1, 'l2': self.l2, 'g1': self.g1, 'g2': self.g2, 'rank': self.rank}, base_filename="%s_nimfa_snmnmf" % self.uuid ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, W, H1, H2): self.set_out_var("W", W) self.set_out_var("H1_miRNA", H1) self.set_out_var("H2_genes", H2) #self.set_out_var("H1_perf", matrices[3]) #self.set_out_var("H2_perf", matrices[4]) exp.store_block(self)
class PatternSearch(GenericBlock): block_base_name = "PattSearch" name = "Pattern Search" block_group = GroupType.PATTERN_SEARCH is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _m_rna = InputBlockField(name="mRNA", order_num=10, required_data_type="ExpressionSet", required=True) _mi_rna = InputBlockField(name="miRNA", order_num=20, required_data_type="ExpressionSet", required=False) gene2gene = InputBlockField(name="gene2gene", order_num=30, required_data_type="BinaryInteraction", required=True) miRNA2gene = InputBlockField(name="miRNA2gene", order_num=31, required_data_type="BinaryInteraction", required=False) genes_num = ParamField(name="genes_num", title="Number of Genes", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=100) # upload_gene2gene_platform = ParamField("upload_gene2gene_platform", title="PPI platform", order_num=32, # input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) # upload_mirna_platform = ParamField("upload_mirna_platform", title="miRNA platform", order_num=33, # input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) d = ParamField(name="d", order_num=70, title="d", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=2) min_imp = ParamField(name="min_imp", order_num=80, title="Minimal improvement", input_type=InputType.TEXT, field_type=FieldType.FLOAT, init_val=0.06) _metric = ParamField( "metric", title="Metric", order_num=40, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mutual_information", options={ "inline_select_provider": True, "select_options": [["mutual_information", "Mutual Information"], ['normed_mutual_information', "Normed Mutual Information"], ['square_error', "Square Error"], ['correlation', "Correlation"], ['t-test', "TTest"], ['wilcoxon', "Wilcoxon"]] }) patterns = OutputBlockField(name="patterns", provided_data_type="GeneSets") def __init__(self, *args, **kwargs): super(PatternSearch, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() exp.log(self.uuid, "Execute called") self.celery_task = wrapper_task.s( pattern_search, exp, self, m_rna_es=self.get_input_var("mRNA"), mi_rna_es=self.get_input_var("miRNA"), gene2gene=self.get_input_var("gene2gene"), miRNA2gene=self.get_input_var("miRNA2gene"), radius=self.d, min_imp=self.min_imp, number_of_genes=self.genes_num, metric=self.get_input_var("metric"), base_filename="%s_comodule_sets" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, gs): exp.log(self.uuid, "Success") self.set_out_var("patterns", gs) exp.store_block(self)
class GenericRankingBlock(GenericBlock): block_base_name = "" block_group = GroupType.PROCESSING is_abstract = True is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField( name="es", order_num=10, required_data_type="ExpressionSet", required=True ) ## TODO: remove from generic ranking best = ParamField( name="best", title="Consider only best", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=None ) _result = OutputBlockField(name="result", field_type=FieldType.STR, provided_data_type="TableResult", init_val=None) def __init__(self, *args, **kwargs): super(GenericRankingBlock, self).__init__(*args, **kwargs) self.ranking_name = None self.ranking_options = {} self.celery_task = None exp = Experiment.get_exp_by_id(self.exp_id) self.result = TableResult( base_dir=exp.get_data_folder(), base_filename="%s_gt_result" % self.uuid, ) self.set_out_var("result", self.result) def collect_options(self): self.ranking_options = {} def execute(self, exp, *args, **kwargs): self.clean_errors() self.collect_options() self.celery_task = wrapper_task.s( apply_ranking, exp=exp, block=self, es=self.get_input_var("es"), ranking_name=self.ranking_name, result_table=self.result, options=self.ranking_options ) exp.store_block(self) self.celery_task.apply_async() exp.log(self.uuid, "Sent ranking computation to queue") log.debug("Sent ranking computation to queue") def success(self, exp, result, *args, **kwargs): self.result = result self.set_out_var("result", self.result) exp.store_block(self)
class FetchGSE(GenericBlock): block_base_name = "FETCH_GEO" name = "Fetch from NCBI GEO" block_group = GroupType.INPUT_DATA _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("start_fetch", ["valid_params", "done"], "source_is_being_fetched", "Start fetch"), ActionRecord("error_during_fetch", ["source_is_being_fetched"], "form_valid", reload_block_in_client=True), ActionRecord("successful_fetch", ["source_is_being_fetched"], "source_was_fetched", reload_block_in_client=True), ActionRecord("start_preprocess", ["source_was_fetched", "source_was_preprocessed"], "source_is_being_fetched", "Run preprocess"), ActionRecord("error_during_preprocess", ["source_is_being_fetched"], "source_was_fetched", reload_block_in_client=True), ActionRecord("successful_preprocess", ["source_is_being_fetched"], "source_was_preprocessed", reload_block_in_client=True), ActionRecord("assign_sample_classes", ["source_was_preprocessed", "done"], "done"), ]) source_file = BlockField("source_file", FieldType.CUSTOM, None) pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) ### PARAMETERS geo_uid = ParamField("geo_uid", "Geo accession id", InputType.TEXT, FieldType.STR, "") _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): #"Fetch ncbi gse", super(FetchGSE, self).__init__(*args, **kwargs) self.celery_task_fetch = None self.celery_task_preprocess = None def is_form_fields_editable(self): if self.state in ['created', 'form_modified']: return True return False def phenotype_for_js(self, exp, *args, **kwargs): headers_options = { "custom_title_prefix_map": [ ("Sample_title", "Title"), ("Sample_description", "Description"), ("Sample_characteristics", "Characteristics"), ("Sample_organism", "Organism"), ("Sample_geo_accession", "GEO #"), ("Sample_", ""), ], "prefix_order": [ "Sample_geo_accession", "Sample_title", "Sample_description", "Sample_contact", "Sample_characteristics", ], "prefix_hide": { "Sample_contact", "Sample_channel", "Sample_data_row_count", "Sample_data", "Sample_platform", "Sample_growth", "Sample_series_id", "Sample_status", "Sample_extract", "Sample_supplementary_file", "Sample_hyb", "Sample_label", "Sample_source", "Sample_last_update", "Sample_molecule", "Sample_organism", "Sample_scan", "Sample_taxid", "Sample_type", "Sample_submission", } } return prepare_phenotype_for_js_from_es( self.get_out_var("expression_set"), headers_options) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready' ]: return True return False def start_fetch(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() self.celery_task_fetch = wrapper_task.s( fetch_geo_gse, exp, self, geo_uid=self.geo_uid, success_action="successful_fetch", error_action="error_during_fetch", ignore_cache=False) exp.store_block(self) self.celery_task_fetch.apply_async() def error_during_fetch(self, exp, *args, **kwargs): exp.store_block(self) def successful_fetch(self, exp, source_file, *args, **kwargs): self.clean_errors() self.source_file = source_file self.do_action("start_preprocess", exp) exp.store_block(self) def start_preprocess(self, exp, *args, **kwargs): self.celery_task_preprocess = wrapper_task.s( preprocess_soft, exp, self, source_file=self.source_file, success_action="successful_preprocess", error_action="error_during_preprocess") exp.store_block(self) self.celery_task_preprocess.apply_async() def error_during_preprocess(self, exp, *args, **kwargs): exp.store_block(self) def successful_preprocess(self, exp, es, *args, **kwargs): """ @type es: ExpressionSet @type ann: PlatformAnnotation """ self.set_out_var("expression_set", es) # self.set_out_var("gpl_annotation", ann) self.clean_errors() exp.store_block(self) msg = BlockUpdated(self.exp_id, self.uuid, self.base_name) msg.comment = u"Dataset %s was preprocessed, \n please assign samples to classes" % self.geo_uid msg.silent = False msg.send() def update_user_classes_assignment(self, exp, request, *args, **kwargs): #TODO: unify code with user upload es = self.get_out_var("expression_set") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) es.pheno_metadata["user_class_title"] = received["user_class_title"] pheno_df[received["user_class_title"]] = received["classes"] es.store_pheno_data_frame(pheno_df) exp.store_block(self) self.do_action("assign_sample_classes", exp) def assign_sample_classes(self, exp, *args, **kwargs): pass
class GeneSetAggCV(GenericBlock): block_group = GroupType.AGGREGATION block_base_name = "CV_GS_A" name = "CV Gene Sets Aggregation" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_train_es = InputBlockField(name="train_es", order_num=10, required_data_type="ExpressionSet", required=True) _input_test_es = InputBlockField(name="test_es", order_num=20, required_data_type="ExpressionSet", required=True) _input_gs = InputBlockField(name="gs", order_num=30, required_data_type="GeneSets", required=True) agg_method = ParamField( "agg_method", title="Aggregate method", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="mean", options={ "inline_select_provider": True, "select_options": [ ["mean", "Mean"], ["median", "Median"], ["pca", "PCA"] ] } ) out_train_es = OutputBlockField(name="out_train_es", provided_data_type="ExpressionSet") out_test_es = OutputBlockField(name="out_test_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(GeneSetAggCV, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() train_es = self.get_input_var("train_es") test_es = self.get_input_var("test_es") gene_sets = self.get_input_var("gs") self.celery_task = wrapper_task.s( agg_task_cv, exp, self, train_es=train_es, test_es=test_es, gene_sets=gene_sets, method=self.agg_method, base_filename="%s_%s_agg" % (self.uuid, "pca_cv") ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, out_train_es, out_test_es): self.set_out_var("out_train_es", out_train_es) self.set_out_var("out_test_es", out_test_es) exp.store_block(self)
class CrossValidation(UniformMetaBlock): block_base_name = "CROSS_VALID" name = "Cross validation K-fold" _cv_actions = ActionsList( [ActionRecord("become_ready", ["valid_params"], "ready")]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["cv_info.html"]) _input_es_dyn = InputBlockField(name="es_inputs", required_data_type="ExpressionSet", required=True, multiply_extensible=True, order_num=-1) folds_num = ParamField(name="folds_num", title="Folds number", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=5) repeats_num = ParamField(name="repeats_num", title="Repeats number", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1) def get_fold_labels(self): return [ "fold_%s" % (num + 1, ) for num in range(self.folds_num * self.repeats_num) ] def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output_train = InnerOutputField( name="%s_train_i" % new_port.name, provided_data_type=new_port.required_data_type) new_inner_output_test = InnerOutputField( name="%s_test_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = \ (new_inner_output_train.name, new_inner_output_test.name) self.register_inner_output_variables( [new_inner_output_train, new_inner_output_test]) def execute(self, exp, *args, **kwargs): self.clean_errors() self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( generate_cv_folds, exp, self, folds_num=self.folds_num, repeats_num=self.repeats_num, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success", ) exp.store_block(self) self.celery_task.apply_async() def on_params_is_valid(self, exp, *args, **kwargs): super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("become_ready", exp) def become_ready(self, *args, **kwargs): pass
class MergeComoduleSets(GenericBlock): block_base_name = "MERGE_COMODULE_SETS" name = "Merge Comodule Sets" is_abstract = False block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _cs_1 = InputBlockField(name="cs_1", order_num=10, required_data_type="ComoduleSet", required=True) _cs_1_name = ParamField(name="cs_1_name", order_num=11, title="Comodule 1 name", input_type=InputType.TEXT, field_type=FieldType.STR, init_val="genes") _cs_2 = InputBlockField(name="cs_2", order_num=20, required_data_type="ComoduleSet", required=True) _cs_2_name = ParamField(name="cs_2_name", order_num=21, title="Comodule 2 name", input_type=InputType.TEXT, field_type=FieldType.STR, init_val="genes") flt_es = OutputBlockField(name="comodule_set", provided_data_type="ComoduleSet") def __init__(self, *args, **kwargs): super(MergeComoduleSets, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() cs_1 = self.get_input_var("cs_1") cs_2 = self.get_input_var("cs_2") self.celery_task = wrapper_task.s(merge_comodules_task, exp, self, cs_1=cs_1, cs_2=cs_2, cs_1_name=self.cs_1_name, cs_2_name=self.cs_2_name, base_filename="%s_%s_thr" % (self.uuid, 'merge_cs')) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, flt_es): self.set_out_var("comodule_set", flt_es) exp.store_block(self)
class UserUploadComplex(GenericBlock): # unit_options = block_base_name = "UPLOAD_CMPLX" block_group = GroupType.INPUT_DATA name = "Upload mRna/miRna/methyl" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data"), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params"), ]) m_rna_matrix = ParamField("m_rna_matrix", title="mRNA expression", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) m_rna_unit = ParamField( "m_rna_unit", title="Working unit [used when platform is unknown]", order_num=12, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"], ["Symbol", "Symbol"]] }) m_rna_matrix_ori = ParamField("m_rna_matrix_ori", title="Matrix orientation", order_num=13, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_m_rna = ParamField("csv_sep_m_rna", title="CSV separator symbol", order_num=14, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) mi_rna_matrix = ParamField("mi_rna_matrix", title=u"μRNA expression", order_num=20, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) mi_rna_platform = ParamField("mi_rna_platform", title="Platform ID", order_num=21, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) mi_rna_unit = ParamField( "mi_rna_unit", title="Working unit [used when platform is unknown]", order_num=22, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["mirbase", "miRBase ID"]] }) mi_rna_matrix_ori = ParamField("mi_rna_matrix_ori", title="Matrix orientation", order_num=23, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_mi_rna = ParamField("csv_sep_mi_rna", title="CSV separator symbol", order_num=24, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) methyl_matrix = ParamField("methyl_matrix", title="Methylation expression", order_num=30, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) methyl_platform = ParamField("methyl_platform", title="Platform ID", order_num=31, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) # methyl_unit = ParamField("methyl_unit", title="Working unit [used when platform is unknown]", init_val=None, # order_num=32, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) methyl_matrix_ori = ParamField("methyl_matrix_ori", title="Matrix orientation", order_num=33, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_methyl = ParamField("csv_sep_methyl", title="CSV separator symbol", order_num=34, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=40, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) csv_sep_pheno = ParamField("csv_sep_pheno", title="CSV separator symbol", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) _m_rna_es = OutputBlockField(name="m_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") # _m_rna_annotation = OutputBlockField(name="m_rna_annotation", field_type=FieldType.HIDDEN, # provided_data_type="PlatformAnnotation") _mi_rna_es = OutputBlockField(name="mi_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _methyl_es = OutputBlockField(name="methyl_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") mrna_gpl_file = BlockField("mrna_gpl_file", FieldType.CUSTOM, None) mirna_gpl_file = BlockField("mirna_gpl_file", FieldType.CUSTOM, None) methyl_gpl_file = BlockField("methyl_gpl_file", FieldType.CUSTOM, None) pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done' ]: return True return False def __init__(self, *args, **kwargs): super(UserUploadComplex, self).__init__(*args, **kwargs) self.celery_task = None def process_upload(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s(user_upload_complex_task, exp, self) exp.store_block(self) self.celery_task.apply_async() def phenotype_for_js(self, exp, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") return prepare_phenotype_for_js_from_es(es) def update_user_classes_assignment(self, exp, request, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) pheno_df[received["user_class_title"]] = received["classes"] for work_es in [m_rna_es, mi_rna_es, methyl_es]: if work_es is not None: work_es.pheno_metadata["user_class_title"] = received[ "user_class_title"] work_es.store_pheno_data_frame(pheno_df) # import ipdb; ipdb.set_trace() exp.store_block(self) def success(self, exp, m_rna_es, mi_rna_es, methyl_es): if m_rna_es: self.set_out_var("m_rna_es", m_rna_es) if mi_rna_es: self.set_out_var("mi_rna_es", mi_rna_es) if methyl_es: self.set_out_var("methyl_es", methyl_es) exp.store_block(self)
class UserUpload(GenericBlock): block_base_name = "UPLOAD" block_group = GroupType.INPUT_DATA is_abstract = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data", reload_block_in_client=True), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params", reload_block_in_client=True), ]) es_matrix = ParamField("es_matrix", title="Expression set matrix", order_num=0, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) es_matrix_ori = ParamField( "es_matrix_ori", title="Matrix orientation", order_num=1, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [ ["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"] ] } ) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) gpl_platform = ParamField("gpl_platform", title="Platform ID", order_num=20, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) working_unit = ParamField("working_unit", title="Working unit [used when platform is unknown]", order_num=3, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) # TODO: add sub page field # pages = BlockField("pages", FieldType.RAW, init_val={ # "assign_sample_classes": { # "title": "Assign sample classes", # "resource": "assign_sample_classes", # "widget": "widgets/fetch_gse/assign_sample_classes.html" # }, # }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) ### PARAMETERS _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _gpl_annotation = OutputBlockField(name="gpl_annotation", field_type=FieldType.HIDDEN, provided_data_type="PlatformAnnotation") # TODO: COPY PASTE from fetch_gse block pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) def __init__(self, *args, **kwargs): super(UserUpload, self).__init__("User upload", *args, **kwargs) @property def is_sub_pages_visible(self): if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']: return True return False def phenotype_for_js(self, exp, *args, **kwargs): return prepare_phenotype_for_js_from_es(self.get_out_var("expression_set")) def update_user_classes_assignment(self, exp, request, *args, **kwargs): es = self.get_out_var("expression_set") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) es.pheno_metadata["user_class_title"] = received["user_class_title"] pheno_df[received["user_class_title"]] = received["classes"] es.store_pheno_data_frame(pheno_df) exp.store_block(self) def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file()) es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_annotation" % self.uuid) pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file()) pheno_df.set_index(pheno_df.columns[0]) user_class_title = es.pheno_metadata["user_class_title"] if user_class_title not in pheno_df.columns: pheno_df[es.pheno_metadata["user_class_title"]] = "" # if matrix is bad oriented, then do transposition if self.es_matrix_ori == "GxS": assay_df = assay_df.T es.store_assay_data_frame(assay_df) es.store_pheno_data_frame(pheno_df) if self.working_unit: es.working_unit = self.working_unit self.set_out_var("expression_set", es) exp.store_block(self) self.do_action("success", exp) # self.celery_task_fetch.apply_async() def success(self, exp, *args, **kwargs): pass
class DecisionTree(GenericClassifier): block_base_name = "DT" name = "Decision Tree" classifier_name = "DT" criterion = ParamField( name="criterion", title="The function to measure the quality of a split", input_type=InputType.SELECT, field_type=FieldType.STR, order_num=11, options={ "inline_select_provider": True, "select_options": [ ["gini", "Gini impurity"], ["entropy", "Information gain"] ] } ) # max_features_mode = ParamField( # name="max_features_mode", # title="Max features for split, mode", # input_type=InputType.SELECT, # field_type=FieldType.STR, # options={ # "inline_select_provider": True, # "select_options": [ # ["int", "Fixed number"], # ["float", "Ratio of the features number [0.0 .. 1.0]"], # ["sqrt", "sqrt(number of features)"], # ["log2", "log2(number of features)"], # ] # }, # order_num=20, # ) # max_features_value = ParamField( # name="max_features_value", # title="Value for the chosen max feature mode", # input_type=InputType.TEXT, # field_type=FieldType.STR, # order_num=30, # ) max_depth = ParamField( name="max_depth", title="The maximum depth of the tree", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=40, ) min_samples_split = ParamField( name="min_samples_split", title="The minimum number of samples to split an internal node", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=50, ) min_samples_leaf = ParamField( name="min_samples_leaf", title="The minimum number of samples to be at a leaf node", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=60, ) def collect_options(self): # max_features_mode = self.get_option_safe("max_features_mode", str) # if max_features_mode in ["sqrt", "log2"]: # self.classifier_options["max_features"] = max_features_mode # elif max_features_mode == "int": # self.collect_option_safe("max_features_value", int, target_name="max_features") # elif max_features_mode == "float": # self.collect_option_safe("max_features_value", float, target_name="max_features") self.collect_option_safe("max_depth", int) self.collect_option_safe("min_samples_split", int) self.collect_option_safe("min_samples_leaf", int)
class MergeExpressionSets(GenericBlock): block_base_name = "MergeES" name = "Concatenate Expression" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es_1 = InputBlockField(name="es_1", title="Set 1", order_num=10, required_data_type="ExpressionSet", required=True) _es_2 = InputBlockField(name="es_2", title="Set 2", order_num=20, required_data_type="ExpressionSet", required=True) _es_matrix_con = ParamField("_es_matrix_con", title="Concatenation", order_num=30, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="CR", options={ "inline_select_provider": True, "select_options": [["CR", "concatenate samples"], ["CC", "concatenate features"]] }) merged_es = OutputBlockField(name="merged_es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(MergeExpressionSets, self).__init__(*args, **kwargs) self.celery_task = None def execute(self, exp, *args, **kwargs): self.clean_errors() con = getattr(self, "_es_matrix_con", "CR") # import ipdb; ipdb.set_trace() self.celery_task = wrapper_task.s( merge_two_es, exp, self, es_1=self.get_input_var("es_1"), es_2=self.get_input_var("es_2"), con=con, base_filename="%s_merged" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, es): self.set_out_var("merged_es", es) exp.store_block(self)
class RenderTable(RcVisualizer): block_base_name = "RENDER_TABLE" name = "Results Container as Table" _table = BlockField(name="table", field_type=FieldType.CUSTOM, is_a_property=True) _export_table_url = BlockField(name="export_table_url", field_type=FieldType.STR, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["rc_table.html"]) table_config = ParamField(name="table_config", title="", input_type=InputType.HIDDEN, field_type=FieldType.RAW) def __init__(self, *args, **kwargs): super(RenderTable, self).__init__(*args, **kwargs) self.table_config = { "header_axis": "", "multi_index_axis_dict": {}, } @property def table(self): rc = self.rc to = TableObj() if rc: rc.load() header_axis = self.table_config.get("header_axis") index_axis_list = [] for axis, flag in self.table_config.get("multi_index_axis_dict", {}).iteritems(): if flag: index_axis_list.append(axis) if header_axis and index_axis_list and hasattr(self, "metric"): # log.debug("Can build table slice") df = rc.get_pandas_slice(header_axis, index_axis_list, metric_name=self.metric) # log.debug(df) to.html = df.to_html(float_format=pd_float_format_func) to.df = df else: if self.exp_id: exp = Experiment.get_exp_by_id(self.exp_id) exp.log( self.uuid, "Can't build table slice, header axis `%s`, index axis_list `%s`" % (header_axis, index_axis_list)) log.debug( "Can't build table slice, header axis `%s`, index axis_list `%s`", header_axis, index_axis_list) # log.debug("Table: %s", to.to_dict()) return to @property def export_table_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_table", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_rc", "format": "json" }) # import ipdb; ipdb.set_trace() # return def export_rc(self, exp, *args, **kwargs): return self.rc.export_to_json_dict() def export_table(self, exp, *args, **kwargs): table = self.table out = StringIO.StringIO() # Float format in fact doesn't work in pandas # table.df.to_csv(out, float_format=pd_float_format_func) # tmp_df = table.df.applymap(pd_float_format_func) tmp_df.to_csv(out, float_format=pd_float_format_func) out.seek(0) return out.read() def on_params_is_valid(self, exp, *args, **kwargs): super(RenderTable, self).on_params_is_valid(exp, *args, **kwargs) if self.rc is not None: for axis in self.rc.axis_list: if axis not in self.table_config["multi_index_axis_dict"]: self.table_config["multi_index_axis_dict"][axis] = "" exp.store_block(self)
class RandomForest(GenericClassifier): block_base_name = "RND_FOREST" name = "Random forest" classifier_name = "random_forest" n_estimators = ParamField( name="n_estimators", title="The number of trees in the forest", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=10, order_num=10, ) criterion = ParamField( name="criterion", title="The function to measure the quality of a split", input_type=InputType.SELECT, field_type=FieldType.STR, order_num=11, options={ "inline_select_provider": True, "select_options": [ ["gini", "Gini impurity"], ["entropy", "Information gain"] ] } ) max_features_mode = ParamField( name="max_features_mode", title="The number of features to consider when looking for the best split", input_type=InputType.SELECT, field_type=FieldType.STR, options={ "inline_select_provider": True, "select_options": [ ["int", "Fixed number"], ["float", "Ratio of the features number [0.0 .. 1.0]"], ["sqrt", "sqrt(number of features)"], ["log2", "log2(number of features)"], ] }, order_num=20, ) max_features_value = ParamField( name="max_features_value", title="Value for the chosen mode", input_type=InputType.TEXT, field_type=FieldType.STR, order_num=30, ) max_depth = ParamField( name="max_depth", title="The maximum depth of the tree.", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=40, ) min_samples_split = ParamField( name="min_samples_split", title="The minimum number of samples to split an internal node", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=50, ) min_samples_leaf = ParamField( name="min_samples_leaf", title="The minimum number of samples to be at a leaf node", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=60, ) def collect_options(self): self.collect_option_safe("n_n_estimators", int) max_features_mode = self.get_option_safe("max_features_mode", str) if max_features_mode in ["sqrt", "log2"]: self.classifier_options["max_features"] = max_features_mode elif max_features_mode == "int": self.collect_option_safe("max_features_value", int, target_name="max_features") elif max_features_mode == "float": self.collect_option_safe("max_features_value", float, target_name="max_features") self.collect_option_safe("max_depth", int) self.collect_option_safe("min_samples_split", int) self.collect_option_safe("min_samples_leaf", int)
class UniformMetaBlock(GenericBlock): is_abstract = True block_group = GroupType.META_PLUGIN create_new_scope = True is_block_supports_auto_execution = True _block_actions = ActionsList([]) _block_actions.extend(ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("add_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"), ActionRecord("remove_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"), ActionRecord("execute", ["ready"], "generating_folds", user_title="Run block"), ActionRecord("on_folds_generation_success", ["generating_folds"], "ready_to_run_sub_scope", reload_block_in_client=True), ActionRecord("continue_collecting_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"), ActionRecord("run_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"), ActionRecord("on_sub_scope_done", ["sub_scope_executing"], "ready_to_run_sub_scope"), ActionRecord("success", ["working", "ready_to_run_sub_scope"], "done", propagate_auto_execution=True, reload_block_in_client=True), ActionRecord("error", ["*", "ready", "working", "sub_scope_executing", "generating_folds", "ready_to_run_sub_scope"], "execution_error", reload_block_in_client=True), ActionRecord("reset_execution", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope", "generating_folds", "execution_error"], "ready", user_title="Reset execution"), ])) _collector_spec = ParamField(name="collector_spec", title="", field_type=FieldType.CUSTOM, input_type=InputType.HIDDEN, init_val=None, required=False ) res_seq = BlockField(name="res_seq", provided_data_type="SequenceContainer", field_type=FieldType.HIDDEN, init_val=None) _results_container = OutputBlockField( name="results_container", provided_data_type="ResultsContainer", field_type=FieldType.HIDDEN, init_val=None ) def __init__(self, *args, **kwargs): super(UniformMetaBlock, self).__init__(*args, **kwargs) self.auto_exec_status_working.update(["sub_scope_executing", "ready_to_run_sub_scope", "generating_folds"]) self.inner_output_manager = IteratedInnerFieldManager() self.collector_spec = CollectorSpecification() self.collector_spec.label = self.block_base_name + "_collection" self.inner_output_es_names_map = {} self.celery_task = None self.set_out_var("results_container", None) self.res_seq = SequenceContainer() def remap_inputs(self, mapping): for var in self.bound_inputs.itervalues(): var.change_block(mapping) for var in self.collector_spec.bound.itervalues(): var.change_block(mapping) @property def is_sub_pages_visible(self): if self.state in ['valid_params', 'done', 'ready']: return True return False @abstractmethod def get_fold_labels(self): pass @abstractmethod def get_repeat_labels(self): pass def get_inner_out_var(self, name): return self.inner_output_manager.get_var(name) def run_sub_scope(self, exp, *args, **kwargs): self.reset_execution_for_sub_blocks() cell = self.res_seq.sequence[self.inner_output_manager.iterator] log.debug("Cell!!!!!!!! %s", str(cell)) act = self.inner_output_manager.sequence[self.inner_output_manager.iterator] log.debug("Cell!!!!!!!! %s", str(act)) exp.store_block(self) sr = ScopeRunner(exp, self.sub_scope_name) sr.execute() def on_sub_scope_done(self, exp, *args, **kwargs): """ @type exp: Experiment This action should be called by ScopeRunner when all blocks in sub-scope have exec status == done """ r = get_redis_instance() with redis_lock.Lock(r, ExpKeys.get_block_global_lock_key(self.exp_id, self.uuid)): cell = self.res_seq.sequence[self.inner_output_manager.iterator] for name, scope_var in self.collector_spec.bound.iteritems(): var = exp.get_scope_var_value(scope_var) exp.log(self.uuid, "Collected %s from %s" % (var, scope_var.title), severity="CRITICAL") log.debug("Collected %s from %s", var, scope_var.title) if var is not None: if hasattr(var, "clone"): cell[name] = var.clone("%s_%s" % (self.uuid, self.inner_output_manager.iterator)) else: cell[name] = deepcopy(var) self.res_seq.sequence[self.inner_output_manager.iterator] = cell exp.store_block(self) if len(cell) < len(self.res_seq.fields): self.do_action("continue_collecting_sub_scope", exp) else: try: self.inner_output_manager.next() self.do_action("run_sub_scope", exp) except StopIteration, e: # All folds were processed without errors self.build_result_collection(exp) self.do_action("success", exp)
class KnnClassifier(GenericClassifier): block_base_name = "KNN" name = "Knn classifier" classifier_name = "knn" n_neighbors = ParamField( name="n_neighbors", title="Number of neighbors", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1, order_num=10, ) algorithm = ParamField( name="algorithm", title="Algorithm [optional]", input_type=InputType.SELECT, field_type=FieldType.STR, order_num=20, options={ "inline_select_provider": True, "select_options": [ ["ball_tree", "BallTree"], ["kd_tree", "KDTree"], ["brute", "Brute force search"], ["auto", "Auto guess algorithm"], ] } ) leaf_size = ParamField( name="leaf_size", title="Leaf size for BallTree or KDTree [optional]", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=30, ) _metric_options = BlockField(name="metric_options", field_type=FieldType.RAW) metric_options = [ {"pk": "euclidean", "str": "Euclidean Distance"}, {"pk": "manhattan", "str": "Manhattan Distance"}, {"pk": "chebyshev", "str": "Chebyshev Distance"}, ] metric = ParamField( name="metric", title="The distance metric to use for the tree [optional]", input_type=InputType.SELECT, field_type=FieldType.STR, select_provider="metric_options", order_num=40, options={ "inline_select_provider": True, "select_options": [ ["euclidean", "Euclidean Distance"], ["manhattan", "Manhattan Distance"], ["chebyshev", "Chebyshev Distance"], ] } ) def collect_options(self): self.collect_option_safe("n_neighbors", int) self.collect_option_safe("algorithm") self.collect_option_safe("leaf_size", int) self.collect_option_safe("metric")