class RcVisualizer(GenericBlock): block_base_name = "RC_VIZUALIZER" is_block_supports_auto_execution = False block_group = GroupType.VISUALIZE is_abstract = True _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "input_bound"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("configure_table", ["input_bound", "ready"], "ready"), ]) results_container = InputBlockField(name="results_container", required_data_type="ResultsContainer", required=True, field_type=FieldType.CUSTOM) _rc = BlockField(name="rc", field_type=FieldType.CUSTOM, is_a_property=True) _available_metrics = BlockField(name="available_metrics", field_type=FieldType.RAW, is_a_property=True) metric = ParamField(name="metric", title="Metric", field_type=FieldType.STR, input_type=InputType.SELECT, select_provider="available_metrics") def __init__(self, *args, **kwargs): super(RcVisualizer, self).__init__(*args, **kwargs) @property @log_timing def available_metrics(self): try: return [{ "pk": metric_name, "str": metric.title } for metric_name, metric in metrics_dict.iteritems() if metric.produce_single_number] except Exception, e: log.exception(e) return []
class GlobalTest(GenericBlock): block_base_name = "GLOBAL_TEST" name = "Goeman global test" block_group = GroupType.PROCESSING is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _input_es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _input_gs = InputBlockField(name="gs", order_num=20, required_data_type="GeneSets", required=True) _result = OutputBlockField(name="result", field_type=FieldType.STR, provided_data_type="TableResult", init_val=None) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "gt_result.html" ]) def __init__(self, *args, **kwargs): super(GlobalTest, self).__init__(*args, **kwargs) self.celery_task = None exp = Experiment.get_exp_by_id(self.exp_id) self.result = TableResult( base_dir=exp.get_data_folder(), base_filename="%s_gt_result" % self.uuid, ) self.result.headers = ['p-value', 'Statistic', 'Expected', 'Std.dev', '#Cov'] def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( global_test_task, exp, self, es=self.get_input_var("es"), gene_sets=self.get_input_var("gs"), table_result=self.result ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, result, *args, **kwargs): self.result = result self.set_out_var("result", self.result) exp.store_block(self)
class GetBroadInstituteGeneSet(GenericBlock): block_base_name = "BI_GENE_SET" block_group = GroupType.INPUT_DATA name = "Get MSigDB Gene Set" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "done", reload_block_in_client=True), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) # TODO: maybe create more general solution ? _all_gene_sets = BlockField("all_gene_sets", title="", input_type=InputType.HIDDEN, field_type=FieldType.RAW, is_a_property=True) msigdb_id = ParamField( name="msigdb_id", title="MSigDB gene set", input_type=InputType.SELECT, field_type=FieldType.INT, init_val=0, # TODO: fix hardcoded value select_provider="all_gene_sets") _gs = OutputBlockField(name="gs", field_type=FieldType.HIDDEN, provided_data_type="GeneSets") @property def all_gene_sets(self): return BroadInstituteGeneSet.get_all_meta() def on_params_is_valid(self, exp): gs = BroadInstituteGeneSet.objects.get( pk=self.msigdb_id).get_gene_sets() self.set_out_var("gs", gs) super(GetBroadInstituteGeneSet, self).on_params_is_valid(exp)
class GenericBlock(BaseBlock): # block fields is_abstract = True _uuid = BlockField("uuid", FieldType.STR, None, is_immutable=True) _name = BlockField("name", FieldType.STR, None) name = "Generic block" _base_name = BlockField("base_name", FieldType.STR, "", is_immutable=True) _block_group = BlockField("block_group", FieldType.STR, "", is_immutable=True) block_group = None _exp_id = BlockField("exp_id", FieldType.STR, None, is_immutable=True) _scope_name = BlockField("scope_name", FieldType.STR, "root", is_immutable=True) _sub_scope_name = BlockField("sub_scope_name", FieldType.STR, None, is_immutable=True) _visible_scopes_list = BlockField("visible_scopes_list", FieldType.SIMPLE_LIST, is_immutable=True) _state = BlockField("state", FieldType.STR, "created") _ui_folded = BlockField("ui_folded", FieldType.BOOLEAN, init_val=False) _ui_internal_folded = BlockField("ui_internal_folded", FieldType.BOOLEAN, init_val=False) _show_collector_editor = BlockField("show_collector_editor", FieldType.BOOLEAN, init_val=False) _has_custom_layout = BlockField("has_custom_layout", FieldType.BOOLEAN) _custom_layout_name = BlockField("custom_layout_name", FieldType.STR) _create_new_scope = BlockField("create_new_scope", FieldType.BOOLEAN) create_new_scope = False is_block_supports_auto_execution = False _errors = BlockField("errors", FieldType.SIMPLE_LIST, list()) _warnings = BlockField("warnings", FieldType.SIMPLE_LIST, list()) _bound_inputs = BlockField("bound_inputs", FieldType.SIMPLE_DICT, defaultdict()) def move_to_exp(self, exp_id): pass def duplicate(self, exp_id, mapping): import copy old_uuid = self.uuid new_obj = copy.deepcopy(self) new_obj.uuid = "B" + uuid1().hex[:8] new_obj.exp_id = exp_id if new_obj.scope_name: # little hack, it uses the fact that a scope name has a structure root_uuid1_uuid2.... parent_uuids = new_obj.scope_name.split('_') for parent_uuid in parent_uuids: new_obj.scope_name = new_obj.scope_name.replace( parent_uuid, mapping[parent_uuid]) scope = new_obj.get_scope() scope.load() for f_name, f in new_obj._block_serializer.outputs.iteritems(): scope.register_variable( ScopeVar(new_obj.uuid, f_name, f.provided_data_type)) # log.debug("Registering normal outputs: %s", f_name) # new_obj.register_provided_objects(scope, ScopeVar(new_obj.uuid, f_name, f.provided_data_type)) scope.store() return new_obj def remap_inputs(self, mapping): for var in self.bound_inputs.itervalues(): var.change_block(mapping) def __init__(self, exp_id=None, scope_name=None): """ Building block for workflow """ # TODO: due to dynamic inputs, find better solution self._block_serializer = BlockSerializer.clone( self.__class__._block_serializer) self.state = "created" self.uuid = "B" + uuid1().hex[:8] self.exp_id = exp_id exp = None if exp_id: exp = Experiment.get_exp_by_id(exp_id) self.scope_name = scope_name self.base_name = "" # Used only be meta-blocks self.children_blocks = [] # End self._out_data = dict() self.out_manager = OutManager() self.input_manager = InputManager() # Automatic execution status map self.auto_exec_status_ready = set(["ready"]) self.auto_exec_status_done = set(["done"]) self.auto_exec_status_working = set(["working"]) self.auto_exec_status_error = set(["execution_error"]) # Init block fields for f_name, f in itertools.chain( self._block_serializer.fields.iteritems(), self._block_serializer.params.iteritems()): #if f_name not in self.__dict__ and not f.is_a_property: if not f.is_a_property and not hasattr(self, f_name): try: setattr(self, f_name, f.init_val) except: import ipdb ipdb.set_trace() for f_name, f in self._block_serializer.inputs.iteritems(): if f.multiply_extensible: setattr(self, f_name, []) # Names of dynamically added ports # TODO: Hmm maybe more metaclass magic can be applied here scope = self.get_scope() scope.load() for f_name, f in self._block_serializer.outputs.iteritems(): if exp: exp.log(self.uuid, "Registering normal outputs: %s" % f_name) log.debug("Registering normal outputs: %s", f_name) self.register_provided_objects( scope, ScopeVar(self.uuid, f_name, f.provided_data_type)) # TODO: Use factories for init values #if f.init_val is not None: # setattr(self, f.name, f.init_val) scope.store() for f_name, f in self._block_serializer.fields.items(): if f.init_val is not None: #setattr(self, f.name, f.init_val) pass for f_name, f in self._block_serializer.inputs.iteritems(): self.input_manager.register(f) def on_remove(self, *args, **kwargs): """ Cleanup all created files TODO: github:#61 """ pass def get_exec_status(self): if self.state in self.auto_exec_status_done: return "done" if self.state in self.auto_exec_status_error: return "error" if self.state in self.auto_exec_status_ready: return "ready" return "not_ready" def bind_input_var(self, input_name, bound_var): if self.exp_id: exp = Experiment.get_exp_by_id(self.exp_id) exp.log( self.uuid, "bound input %s to %s in block: %s, exp: %s" % (input_name, bound_var, self.base_name, self.exp_id)) log.debug("bound input %s to %s in block: %s, exp: %s", input_name, bound_var, self.base_name, self.exp_id) self.bound_inputs[input_name] = bound_var def get_input_var(self, name): try: exp = Experiment.get_exp_by_id(self.exp_id) scope_var = self.bound_inputs[name] return exp.get_scope_var_value(scope_var) except: return None def get_out_var(self, name): if self.out_manager.contains(name): return self._out_data.get(name) elif self.create_new_scope: return self.get_inner_out_var(name) else: return None # def get_inner_out_var(self, name): # raise NotImplementedError("Not implemented in the base class") # def set_inner_out_var(self, name, value): # raise NotImplementedError("Not implemented in the base class") def set_out_var(self, name, value): self._out_data[name] = value def get_scope(self): exp = Experiment.get_exp_by_id(self.exp_id) return Scope(exp, self.scope_name) @property def sub_scope_name(self): if hasattr(self, "create_new_scope") and self.create_new_scope: return "%s_%s" % (self.scope_name, self.uuid) else: return "" @property def visible_scopes_list(self): scope = self.get_scope() scope_names_list = scope.get_parent_scope_list() scope_names_list.append(self.scope_name) return scope_names_list def get_sub_scope(self): exp = Experiment.get_exp_by_id(self.exp_id) return Scope(exp, self.sub_scope_name) def reset_execution_for_sub_blocks(self): exp = Experiment.get_exp_by_id(self.exp_id) for block_uuid, block in exp.get_blocks(self.children_blocks): block.do_action("reset_execution", exp) def get_input_blocks(self): required_blocks = [] for f in self.input_manager.input_fields: if f.multiply_extensible: continue if self.bound_inputs.get(f.name) is None and f.required: raise RuntimeError("Not all required inputs are bound") elif self.bound_inputs.get(f.name): required_blocks.append(self.bound_inputs[f.name].block_uuid) return required_blocks def get_user_actions(self): """ @rtype: list of workflow.blocks.fields.ActionRecord """ return self._trans.user_visible(self.state) @log_timing def to_dict(self): result = self._block_serializer.to_dict(self) # import ipdb; ipdb.set_trace() return result def register_provided_objects(self, scope, scope_var): self.out_manager.register(scope_var.var_name, scope_var.data_type) scope.register_variable(scope_var) @log_timing def apply_action_from_js(self, action_name, *args, **kwargs): if self._trans.is_action_available(self.state, action_name): self.do_action(action_name, *args, **kwargs) elif hasattr(self, action_name) and hasattr(getattr(self, action_name), "__call__"): return getattr(self, action_name)(*args, **kwargs) else: raise RuntimeError("Block %s doesn't have action: %s" % (self.name, action_name)) def do_action(self, action_name, exp, *args, **kwargs): # if action_name == "success" and self.block_base_name == "CROSS_VALID": # from celery.contrib import rdb; rdb.set_trace() ar = self._trans.action_records_by_name[action_name] old_exec_state = self.get_exec_status() next_state = self._trans.next_state(self.state, action_name) if next_state is not None: log.debug("Do action: %s in block %s from state %s -> %s", action_name, self.base_name, self.state, next_state) exp.log( self.uuid, "Do action: %s in block %s from state %s -> %s" % (action_name, self.base_name, self.state, next_state)) self.state = next_state if old_exec_state != "done" and self.get_exec_status() == "done": if self.is_block_supports_auto_execution: BlockUpdated(self.exp_id, block_uuid=self.uuid, block_alias=self.base_name, silent=True).send() exp.store_block(self) getattr(self, action_name)(exp, *args, **kwargs) if ar.reload_block_in_client: BlockUpdated(self.exp_id, self.uuid, self.base_name).send() # TODO: Check if self.scope_name is actually set to auto execution # if old_exec_state != "done" and self.get_exec_status() == "done" \ and ar.propagate_auto_execution \ and self.is_block_supports_auto_execution: exp.log(self.uuid, "Propagate execution: %s " % self.base_name) log.debug("Propagate execution: %s ", self.base_name) auto_exec_task.s(exp, self.scope_name).apply_async() elif self.state in self.auto_exec_status_error \ and self.is_block_supports_auto_execution: exp.log(self.uuid, "Detected error during automated workflow execution") log.debug("Detected error during automated workflow execution") halt_execution_task.s(exp, self.scope_name).apply_async() else: raise RuntimeError( "Action %s isn't available for block %s in state %s" % (action_name, self.base_name, self.state)) def change_base_name(self, exp, received_block, *args, **kwargs): # TODO: check if the name is correct new_name = received_block.get("base_name") if new_name: exp.change_block_alias(self, new_name) def toggle_ui_folded(self, exp, received_block, *args, **kwargs): self.ui_folded = received_block["ui_folded"] exp.store_block(self) def save_params(self, exp, received_block=None, *args, **kwargs): self._block_serializer.save_params(self, received_block) exp.store_block(self) self.validate_params(exp) def save_file_input(self, exp, field_name, file_obj, multiple=False, upload_meta=None): if upload_meta is None: upload_meta = {} if not hasattr(self, field_name): raise Exception("Block doesn't have field: %s" % field_name) orig_name = file_obj.name local_filename = "%s_%s_%s" % (self.uuid[:8], field_name, file_obj.name) if not multiple: exp.log(self.uuid, "Storing single upload to field: %s" % field_name) log.debug("Storing single upload to field: %s", field_name) ud, is_created = UploadedData.objects.get_or_create( exp=exp, block_uuid=self.uuid, var_name=field_name) file_obj.name = local_filename ud.data = file_obj ud.save() ufw = UploadedFileWrapper(ud.pk) ufw.orig_name = orig_name setattr(self, field_name, ufw) exp.store_block(self) else: exp.log(self.uuid, "Adding upload to field: %s" % field_name) log.debug("Adding upload to field: %s", field_name) ud, is_created = UploadedData.objects.get_or_create( exp=exp, block_uuid=self.uuid, var_name=field_name, filename=orig_name) file_obj.name = local_filename ud.data = file_obj ud.filename = orig_name ud.save() ufw = UploadedFileWrapper(ud.pk) ufw.orig_name = orig_name r = get_redis_instance() with redis_lock.Lock( r, ExpKeys.get_block_global_lock_key(self.exp_id, self.uuid)): exp.log(self.uuid, "Enter lock, file: %s" % orig_name) log.debug("Enter lock, file: %s", orig_name) block = exp.get_block(self.uuid) attr = getattr(block, field_name) attr[orig_name] = ufw exp.log( self.uuid, "Added upload `%s` to collection: %s" % (orig_name, attr.keys())) log.debug("Added upload `%s` to collection: %s", orig_name, attr.keys()) exp.store_block(block) exp.log(self.uuid, "Exit lock, file: %s" % orig_name) log.debug("Exit lock, file: %s", orig_name) def erase_file_input(self, exp, data): field_name = json.loads(data)["field_name"] field = self._block_serializer.params.get(field_name) if not field.options.get("multiple", False): # single stored value ufw = getattr(self, field_name) ud = ufw.ud ud.delete() setattr(self, field_name, None) else: # multiple ufw_dict = getattr(self, field_name) for name, ufw in ufw_dict.items(): ufw.ud.delete() setattr(self, field_name, MultiUploadField()) exp.store_block(self) def add_dyn_input_hook(self, exp, dyn_port, new_port): """ to override later """ pass def add_input_port(self, new_port): self._block_serializer.register(new_port) self.input_manager.register(new_port) def add_dyn_input(self, exp, received_block, *args, **kwargs): spec = received_block.get("_add_dyn_port") if not spec: return if not spec['new_port'] or not spec['input']: return dyn_port_name = spec['input'] dyn_port = self._block_serializer.inputs.get(dyn_port_name) if not dyn_port: return order_num = 1000 + abs(dyn_port.order_num) * 10 dp = getattr(self, dyn_port_name) if dp: order_num += len(dp) new_port = InputBlockField( name=spec['new_port'], required_data_type=dyn_port.required_data_type, order_num=order_num) self.add_input_port(new_port) getattr(self, dyn_port_name).append(spec["new_port"]) self.add_dyn_input_hook(exp, dyn_port, new_port) exp.store_block(self) def validate_params_hook(self, exp, *args, **kwargs): return True def validate_params(self, exp, *args, **kwargs): is_valid = True # check required inputs if not self.input_manager.validate_inputs(self, self.bound_inputs, self.errors, self.warnings): is_valid = False # check user provided values if not self._block_serializer.validate_params(self, exp): is_valid = False if not self.validate_params_hook(exp, *args, **kwargs): is_valid = False if is_valid: self.errors = [] self.do_action("on_params_is_valid", exp) else: self.do_action("on_params_not_valid", exp) def on_params_is_valid(self, exp, *args, **kwargs): self.errors = [] exp.store_block(self) def on_params_not_valid(self, exp, *args, **kwargs): pass def clean_errors(self): self.errors = [] def error(self, exp, new_errors=None): if isinstance(new_errors, collections.Iterable): self.errors.extend(new_errors) elif new_errors: self.errors.append(new_errors) exp.store_block(self) def reset_execution(self, exp, *args, **kwargs): self.clean_errors() exp.store_block(self)
class UserUploadComplex(GenericBlock): block_base_name = "UPLOAD_CMPLX" block_group = GroupType.INPUT_DATA name = "Upload mRna/miRna/methyl dataset" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data"), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params"), ]) m_rna_matrix = ParamField("m_rna_matrix", title="mRNA expression", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) m_rna_unit = ParamField( "m_rna_unit", title="Working unit [used when platform is unknown]", init_val=None, order_num=12, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) mi_rna_matrix = ParamField("mi_rna_matrix", title=u"μRNA expression", order_num=20, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) methyl_matrix = ParamField("methyl_matrix", title="Methylation expression", order_num=30, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=40, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) csv_sep = ParamField("csv_sep", title="CSV separator symbol", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) _m_rna_es = OutputBlockField(name="m_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _m_rna_annotation = OutputBlockField( name="m_rna_annotation", field_type=FieldType.HIDDEN, provided_data_type="PlatformAnnotation") _mi_rna_es = OutputBlockField(name="mi_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _methyl_es = OutputBlockField(name="methyl_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done' ]: return True return False def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ # TODO: move to celery self.clean_errors() sep = getattr(self, "csv_sep", " ") try: if not self.pheno_matrix: self.warnings.append(Exception("Phenotype is undefined")) pheno_df = None else: pheno_df = self.pheno_matrix.get_as_data_frame(sep) pheno_df.set_index(pheno_df.columns[0]) # TODO: solve somehow better: Here we add empty column with user class assignment pheno_df[ExpressionSet( None, None).pheno_metadata["user_class_title"]] = "" if self.m_rna_matrix is not None: m_rna_assay_df = self.m_rna_matrix.get_as_data_frame(sep) m_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_m_rna_es" % self.uuid) m_rna_es.store_assay_data_frame(m_rna_assay_df) m_rna_es.store_pheno_data_frame(pheno_df) m_rna_es.working_unit = self.m_rna_unit self.set_out_var("m_rna_es", m_rna_es) # TODO: fetch GPL annotation if GPL id was provided if self.mi_rna_matrix is not None: mi_rna_assay_df = self.mi_rna_matrix.get_as_data_frame(sep) mi_rna_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_mi_rna_es" % self.uuid) mi_rna_es.store_assay_data_frame(mi_rna_assay_df) mi_rna_es.store_pheno_data_frame(pheno_df) self.set_out_var("mi_rna_es", mi_rna_es) if self.methyl_matrix is not None: methyl_assay_df = self.methyl_matrix.get_as_data_frame(sep) methyl_es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_methyl_es" % self.uuid) methyl_es.store_assay_data_frame(methyl_assay_df) methyl_es.store_pheno_data_frame(pheno_df) self.set_out_var("methyl_es", methyl_es) self.do_action("success", exp) except Exception as e: ex_type, ex, tb = sys.exc_info() traceback.print_tb(tb) self.do_action("error", exp, e) # self.celery_task_fetch.apply_async() def phenotype_for_js(self, exp, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") return prepare_phenotype_for_js_from_es(es) def update_user_classes_assignment(self, exp, request, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) pheno_df[received["user_class_title"]] = received["classes"] for work_es in [m_rna_es, mi_rna_es, methyl_es]: if work_es is not None: work_es.pheno_metadata["user_class_title"] = received[ "user_class_title"] work_es.store_pheno_data_frame(pheno_df) # import ipdb; ipdb.set_trace() exp.store_block(self) def success(self, exp, *args, **kwargs): pass
class UserUpload(GenericBlock): block_base_name = "UPLOAD" block_group = GroupType.INPUT_DATA is_abstract = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data", reload_block_in_client=True), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params", reload_block_in_client=True), ]) es_matrix = ParamField("es_matrix", title="Expression set matrix", order_num=0, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) es_matrix_ori = ParamField( "es_matrix_ori", title="Matrix orientation", order_num=1, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [ ["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"] ] } ) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) gpl_platform = ParamField("gpl_platform", title="Platform ID", order_num=20, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) working_unit = ParamField("working_unit", title="Working unit [used when platform is unknown]", order_num=3, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) # TODO: add sub page field # pages = BlockField("pages", FieldType.RAW, init_val={ # "assign_sample_classes": { # "title": "Assign sample classes", # "resource": "assign_sample_classes", # "widget": "widgets/fetch_gse/assign_sample_classes.html" # }, # }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) ### PARAMETERS _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _gpl_annotation = OutputBlockField(name="gpl_annotation", field_type=FieldType.HIDDEN, provided_data_type="PlatformAnnotation") # TODO: COPY PASTE from fetch_gse block pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) def __init__(self, *args, **kwargs): super(UserUpload, self).__init__("User upload", *args, **kwargs) @property def is_sub_pages_visible(self): if self.state in ['source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done']: return True return False def phenotype_for_js(self, exp, *args, **kwargs): return prepare_phenotype_for_js_from_es(self.get_out_var("expression_set")) def update_user_classes_assignment(self, exp, request, *args, **kwargs): es = self.get_out_var("expression_set") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) es.pheno_metadata["user_class_title"] = received["user_class_title"] pheno_df[received["user_class_title"]] = received["classes"] es.store_pheno_data_frame(pheno_df) exp.store_block(self) def process_upload(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() assay_df = pd.DataFrame.from_csv(self.es_matrix.get_file()) es = ExpressionSet(base_dir=exp.get_data_folder(), base_filename="%s_annotation" % self.uuid) pheno_df = pd.DataFrame.from_csv(self.pheno_matrix.get_file()) pheno_df.set_index(pheno_df.columns[0]) user_class_title = es.pheno_metadata["user_class_title"] if user_class_title not in pheno_df.columns: pheno_df[es.pheno_metadata["user_class_title"]] = "" # if matrix is bad oriented, then do transposition if self.es_matrix_ori == "GxS": assay_df = assay_df.T es.store_assay_data_frame(assay_df) es.store_pheno_data_frame(pheno_df) if self.working_unit: es.working_unit = self.working_unit self.set_out_var("expression_set", es) exp.store_block(self) self.do_action("success", exp) # self.celery_task_fetch.apply_async() def success(self, exp, *args, **kwargs): pass
class GeneSetsView(GenericBlock): block_base_name = "GS_VIEW" block_group = GroupType.VISUALIZE name = "Gene Sets view" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ]) _input_dictionary_set = InputBlockField(name="gs", order_num=10, required_data_type="GeneSets", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["dictionary_set_view.html"]) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def table_js(self): cs = self.get_input_var("gs") """:type :GeneSets""" if cs: table = cs.get_gs(conv=False).genes table_headers = ['key', 'value'] column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [ column_title_to_code_name[title] for title in table_headers ] return { "columns": [{ "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers], "rows": [ dict(zip(fields_list, row)) for row in [(k, list(v)) for k, v in table.iteritems()] # table.to_records().tolist() #[:100] ] } else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("gs") dic = ds.get_gs().genes return dic
class CrossValidation(UniformMetaBlock): block_base_name = "CROSS_VALID" name = "Cross validation K-fold" _cv_actions = ActionsList( [ActionRecord("become_ready", ["valid_params"], "ready")]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["cv_info.html"]) _input_es_dyn = InputBlockField(name="es_inputs", required_data_type="ExpressionSet", required=True, multiply_extensible=True, order_num=-1) folds_num = ParamField(name="folds_num", title="Folds number", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=5) repeats_num = ParamField(name="repeats_num", title="Repeats number", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1) def get_fold_labels(self): return [ "fold_%s" % (num + 1, ) for num in range(self.folds_num * self.repeats_num) ] def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output_train = InnerOutputField( name="%s_train_i" % new_port.name, provided_data_type=new_port.required_data_type) new_inner_output_test = InnerOutputField( name="%s_test_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = \ (new_inner_output_train.name, new_inner_output_test.name) self.register_inner_output_variables( [new_inner_output_train, new_inner_output_test]) def execute(self, exp, *args, **kwargs): self.clean_errors() self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( generate_cv_folds, exp, self, folds_num=self.folds_num, repeats_num=self.repeats_num, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success", ) exp.store_block(self) self.celery_task.apply_async() def on_params_is_valid(self, exp, *args, **kwargs): super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("become_ready", exp) def become_ready(self, *args, **kwargs): pass
class PcaVisualize(GenericBlock): block_base_name = "PCA_VISUALIZE" name = "2D PCA Plot" block_group = GroupType.VISUALIZE is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("compute_pca", ["valid_params"], "computing_pca", user_title="Compute PCA"), ActionRecord( "pca_done", ["computing_pca"], "done", ), ActionRecord("reset_execution", ["*", "done", "execution_error", "ready", "working"], "ready", user_title="Reset execution") #ActionRecord("update", ["input_bound", "ready"], "ready"), ]) input_es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) chart_series = BlockField(name="chart_series", field_type=FieldType.RAW, init_val=[]) chart_categories = BlockField(name="chart_categories", field_type=FieldType.SIMPLE_LIST, init_val=[]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["pca.html"]) def __init__(self, *args, **kwargs): super(PcaVisualize, self).__init__("PCA visualise", *args, **kwargs) def on_params_is_valid(self, exp, *args, **kwargs): super(PcaVisualize, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("compute_pca", exp) def compute_pca(self, exp, *args, **kwargs): log.info("compute pca invoked") es = self.get_input_var("es") """:type :ExpressionSet""" df = es.get_assay_data_frame() pheno_df = es.get_pheno_data_frame() target_column = es.pheno_metadata['user_class_title'] X = df.as_matrix().transpose() pca_model = decomposition.PCA(n_components=2) pca_model.fit(X) Xp = pca_model.transform(X).tolist() names = [x.strip() for x in pheno_df[target_column].tolist()] series_by_names = defaultdict(list) for x, name in zip(Xp, names): series_by_names[name].append(x) self.chart_series = [{ "name": name, "data": points } for name, points in series_by_names.iteritems()] self.do_action("pca_done", exp) def pca_done(self, exp, *args, **kwargs): log.info("pca done")
class FetchGSE(GenericBlock): block_base_name = "FETCH_GEO" name = "Fetch from NCBI GEO" block_group = GroupType.INPUT_DATA _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("start_fetch", ["valid_params", "done"], "source_is_being_fetched", "Start fetch"), ActionRecord("error_during_fetch", ["source_is_being_fetched"], "form_valid", reload_block_in_client=True), ActionRecord("successful_fetch", ["source_is_being_fetched"], "source_was_fetched", reload_block_in_client=True), ActionRecord("start_preprocess", ["source_was_fetched", "source_was_preprocessed"], "source_is_being_fetched", "Run preprocess"), ActionRecord("error_during_preprocess", ["source_is_being_fetched"], "source_was_fetched", reload_block_in_client=True), ActionRecord("successful_preprocess", ["source_is_being_fetched"], "source_was_preprocessed", reload_block_in_client=True), ActionRecord("assign_sample_classes", ["source_was_preprocessed", "done"], "done"), ]) source_file = BlockField("source_file", FieldType.CUSTOM, None) pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) ### PARAMETERS geo_uid = ParamField("geo_uid", "Geo accession id", InputType.TEXT, FieldType.STR, "") _expression_set = OutputBlockField(name="expression_set", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): #"Fetch ncbi gse", super(FetchGSE, self).__init__(*args, **kwargs) self.celery_task_fetch = None self.celery_task_preprocess = None def is_form_fields_editable(self): if self.state in ['created', 'form_modified']: return True return False def phenotype_for_js(self, exp, *args, **kwargs): headers_options = { "custom_title_prefix_map": [ ("Sample_title", "Title"), ("Sample_description", "Description"), ("Sample_characteristics", "Characteristics"), ("Sample_organism", "Organism"), ("Sample_geo_accession", "GEO #"), ("Sample_", ""), ], "prefix_order": [ "Sample_geo_accession", "Sample_title", "Sample_description", "Sample_contact", "Sample_characteristics", ], "prefix_hide": { "Sample_contact", "Sample_channel", "Sample_data_row_count", "Sample_data", "Sample_platform", "Sample_growth", "Sample_series_id", "Sample_status", "Sample_extract", "Sample_supplementary_file", "Sample_hyb", "Sample_label", "Sample_source", "Sample_last_update", "Sample_molecule", "Sample_organism", "Sample_scan", "Sample_taxid", "Sample_type", "Sample_submission", } } return prepare_phenotype_for_js_from_es( self.get_out_var("expression_set"), headers_options) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready' ]: return True return False def start_fetch(self, exp, *args, **kwargs): """ @param exp: Experiment """ self.clean_errors() self.celery_task_fetch = wrapper_task.s( fetch_geo_gse, exp, self, geo_uid=self.geo_uid, success_action="successful_fetch", error_action="error_during_fetch", ignore_cache=False) exp.store_block(self) self.celery_task_fetch.apply_async() def error_during_fetch(self, exp, *args, **kwargs): exp.store_block(self) def successful_fetch(self, exp, source_file, *args, **kwargs): self.clean_errors() self.source_file = source_file self.do_action("start_preprocess", exp) exp.store_block(self) def start_preprocess(self, exp, *args, **kwargs): self.celery_task_preprocess = wrapper_task.s( preprocess_soft, exp, self, source_file=self.source_file, success_action="successful_preprocess", error_action="error_during_preprocess") exp.store_block(self) self.celery_task_preprocess.apply_async() def error_during_preprocess(self, exp, *args, **kwargs): exp.store_block(self) def successful_preprocess(self, exp, es, *args, **kwargs): """ @type es: ExpressionSet @type ann: PlatformAnnotation """ self.set_out_var("expression_set", es) # self.set_out_var("gpl_annotation", ann) self.clean_errors() exp.store_block(self) msg = BlockUpdated(self.exp_id, self.uuid, self.base_name) msg.comment = u"Dataset %s was preprocessed, \n please assign samples to classes" % self.geo_uid msg.silent = False msg.send() def update_user_classes_assignment(self, exp, request, *args, **kwargs): #TODO: unify code with user upload es = self.get_out_var("expression_set") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) es.pheno_metadata["user_class_title"] = received["user_class_title"] pheno_df[received["user_class_title"]] = received["classes"] es.store_pheno_data_frame(pheno_df) exp.store_block(self) self.do_action("assign_sample_classes", exp) def assign_sample_classes(self, exp, *args, **kwargs): pass
class TableResultView(GenericBlock): block_base_name = "TR_VIEW" block_group = GroupType.VISUALIZE name = "Table Result view" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ]) input_table_result = InputBlockField(name="tr", order_num=10, required_data_type="TableResult", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_table_url = BlockField(name="export_table_url", field_type=FieldType.STR, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["table_result_view.html"]) @property def export_table_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_table", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_rc", "format": "json" }) @property def table_js(self): tr = self.get_input_var("tr") """:type :TableResult""" if tr: table = tr.get_table() table_headers = ["#"] + table.columns.tolist() column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [ column_title_to_code_name[title] for title in table_headers ] return { "columns": [{ "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers], "rows": [ dict(zip(fields_list, row)) for row in table.to_records().tolist() #[:100] ] } else: return None def export_rc(self, exp, *args, **kwargs): return self.table_js def export_table(self, exp, *args, **kwargs): pd_float_format_func = lambda x: "%1.4f" % x tr = self.get_input_var("tr") """:type :TableResult""" table = tr.get_table() out = StringIO.StringIO() # Float format in fact doesn't work in pandas # table.df.to_csv(out, float_format=pd_float_format_func) # tmp_df = table.applymap(pd_float_format_func) tmp_df.to_csv(out, float_format=pd_float_format_func) out.seek(0) return out.read()
class PatternView(GenericBlock): block_base_name = "PA_VIEW" block_group = GroupType.VISUALIZE name = "Patterns Visualizer" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params") ]) _input_patterns = InputBlockField(name="patterns", order_num=10, required_data_type="GeneSets", required=True) _input_edges = InputBlockField(name="edges", order_num=20, required_data_type="Edges", required=True) _diff_expr = InputBlockField(name="diff_expr", order_num=30, required_data_type="DiffExpr", required=True) _graph_for_js = BlockField(name="graph_js", field_type=FieldType.RAW, is_a_property=True) _edges_for_js = BlockField(name="edges", field_type=FieldType.RAW, is_a_property=False) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "pattern_view.html" ]) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def graph_js(self): # return None diff_expr = self.get_input_var("diff_expr") edges = self.get_input_var("edges") cs = self.get_input_var("patterns") if cs and edges and diff_expr: pattern_set = cs.get_gs(conv=False).genes edges = edges.load_edges() diff_expr = diff_expr.load_expr() import math # "x": math.cos(2*i*math.pi/len(com)) + 5*math.cos(2*j*math.pi/len(pattern_set)), # "y": math.sin(2*i*math.pi/len(com)) + 5*math.sin(2*j*math.pi/len(pattern_set)), res = { "nodes": [ {"id": "%s_%s" % (j, gene), "label": gene, "x": math.cos(2*i*math.pi/len(com)) + math.floor(math.sqrt(len(pattern_set))) * (j % int(math.floor(math.sqrt(len(pattern_set))))), "y": math.sin(2*i*math.pi/len(com)) + math.floor(math.sqrt(len(pattern_set))) * (j / int(math.floor(math.sqrt(len(pattern_set))))), "color": "rgb(%s, %s, %s)" % (abs(int(math.floor(((diff_expr[gene] + 1) * 128) - 1))), abs(int(math.floor(255-(((diff_expr[gene] + 1) * 128) - 1)))), 0), "size": 2 + abs(diff_expr[gene]) * 2 } for j, com in enumerate(pattern_set) for i, gene in enumerate(com) ], "edges": [ {"id": "%s_%s_%s" % (k, i, j), "source": "%s_%s" % (k, i), "target": "%s_%s" % (k, j)} for k, graph_edges in enumerate(edges) for i, j in graph_edges ] } return res else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("es") dic = ds.load_set() return dic def process_upload(self, exp, *args, **kwargs): pass def success(self, exp, *args, **kwargs): pass
class BoxPlot(RcVisualizer): block_base_name = "BOX_PLOT" block_group = GroupType.VISUALIZE name = "Box plot" boxplot_config = ParamField(name="boxplot_config", title="", input_type=InputType.HIDDEN, field_type=FieldType.RAW) plot_inputs = BlockField(name="plot_inputs", field_type=FieldType.RAW, init_val=[]) chart_series = BlockField(name="chart_series", field_type=FieldType.RAW, init_val=[]) chart_categories = BlockField(name="chart_categories", field_type=FieldType.SIMPLE_LIST, init_val=[]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "box_plot.html" ]) def __init__(self, *args, **kwargs): super(BoxPlot, self).__init__(*args, **kwargs) self.boxplot_config = { "agg_axis_for_scoring": {}, "compare_axis_by_boxplot": {}, } @log_timing def compute_boxplot_stats(self, exp, *args, **kwargs): agg_axis_for_scoring = [ axis for axis, is_selected in self.boxplot_config["agg_axis_for_scoring"].items() if is_selected ] compare_axis_by_boxplot = [ axis for axis, is_selected in self.boxplot_config["compare_axis_by_boxplot"].items() if is_selected ] rc = self.rc if compare_axis_by_boxplot and rc: rc.load() df = rc.get_pandas_slice_for_boxplot( compare_axis_by_boxplot, agg_axis_for_scoring or [], self.metric ) categories = [] for row_id, _ in df.iterrows(): if type(row_id) == tuple: title = ":".join(map(str, row_id)) else: title = str(row_id) categories.append(title) # import ipdb; ipdb.set_trace() bps = boxplot_stats(np.array(df.T, dtype=float)) if bps: self.chart_series = [{ "data": [], }, { "name": "Outliers", "data": [], "type": "scatter", "marker": { "fillColor": "white", "lineWidth": 1, "lineColor": "blue" }, "tooltip": { "pointFormat": '%s: {point.y} ' % self.metric } }] self.chart_series[0]["data"] = [ [ fix_nan(rec["whislo"]), fix_nan(rec["q1"]), fix_nan(rec["med"]), fix_nan(rec["q3"]), fix_nan(rec["whishi"]) ] for rec in bps ] for cat_idx, rec in enumerate(bps): for outlier in rec['fliers']: self.chart_series[1]["data"].append([cat_idx, outlier]) self.chart_categories = categories exp.store_block(self) def on_params_is_valid(self, exp, *args, **kwargs): super(BoxPlot, self).on_params_is_valid(exp, *args, **kwargs) if self.rc is not None: for axis in self.rc.axis_list: if axis not in self.boxplot_config["agg_axis_for_scoring"]: self.boxplot_config["agg_axis_for_scoring"][axis] = "" if axis not in self.boxplot_config["compare_axis_by_boxplot"]: self.boxplot_config["compare_axis_by_boxplot"][axis] = "" self.compute_boxplot_stats(exp) exp.store_block(self)
class ComoduleSetView(GenericBlock): block_base_name = "CS_VIEW" block_group = GroupType.VISUALIZE name = "Comodule Set View" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ]) input_comodule_set = InputBlockField(name="cs", order_num=10, required_data_type="ComoduleSet", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) _export_results_csv_url = BlockField(name="export_results_csv_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "comodule_set_view.html" ]) @property def export_results_csv_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_csv", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def table_js(self): cs = self.get_input_var("cs") """:type :ComoduleSet""" if cs: table = cs.load_set() """:type :dict""" if isinstance(table[0], set): columns = ["values"] else: columns = ["values", "values"] # table_headers = ["#"] + table.columns.tolist() table_headers = ["#"] + columns column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [column_title_to_code_name[title] for title in table_headers] return { "columns": [ { "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers ], "rows": [ dict(zip(fields_list, [idx, value])) for idx, value in table.iteritems() # [:100] ] } else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("cs") table = ds.load_set() return [(idx, list(value)) for idx, value in table.iteritems()] def export_csv(self, exp, *args, **kwargs): import csv import StringIO ds = self.get_input_var("cs") tab = ds.load_set() out = StringIO.StringIO() w = csv.writer(out) w.writerows(tab.items()) out.seek(0) return out.read()
class UserUploadComplex(GenericBlock): # unit_options = block_base_name = "UPLOAD_CMPLX" block_group = GroupType.INPUT_DATA name = "Upload mRna/miRna/methyl" _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("process_upload", ["valid_params", "processing_upload"], "processing_upload", "Process uploaded data"), ActionRecord("success", ["processing_upload"], "done", reload_block_in_client=True), ActionRecord("error", ["processing_upload"], "valid_params"), ]) m_rna_matrix = ParamField("m_rna_matrix", title="mRNA expression", order_num=10, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM) m_rna_platform = ParamField("m_rna_platform", title="Platform ID", order_num=11, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) m_rna_unit = ParamField( "m_rna_unit", title="Working unit [used when platform is unknown]", order_num=12, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["Entrez", "EntrezID"], ["Symbol", "Symbol"]] }) m_rna_matrix_ori = ParamField("m_rna_matrix_ori", title="Matrix orientation", order_num=13, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_m_rna = ParamField("csv_sep_m_rna", title="CSV separator symbol", order_num=14, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) mi_rna_matrix = ParamField("mi_rna_matrix", title=u"μRNA expression", order_num=20, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) mi_rna_platform = ParamField("mi_rna_platform", title="Platform ID", order_num=21, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) mi_rna_unit = ParamField( "mi_rna_unit", title="Working unit [used when platform is unknown]", order_num=22, input_type=InputType.SELECT, field_type=FieldType.STR, required=False, init_val="RefSeq", options={ "inline_select_provider": True, "select_options": [["RefSeq", "RefSeq"], ["mirbase", "miRBase ID"]] }) mi_rna_matrix_ori = ParamField("mi_rna_matrix_ori", title="Matrix orientation", order_num=23, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_mi_rna = ParamField("csv_sep_mi_rna", title="CSV separator symbol", order_num=24, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) methyl_matrix = ParamField("methyl_matrix", title="Methylation expression", order_num=30, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) methyl_platform = ParamField("methyl_platform", title="Platform ID", order_num=31, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) # methyl_unit = ParamField("methyl_unit", title="Working unit [used when platform is unknown]", init_val=None, # order_num=32, input_type=InputType.TEXT, field_type=FieldType.STR, required=False) methyl_matrix_ori = ParamField("methyl_matrix_ori", title="Matrix orientation", order_num=33, input_type=InputType.SELECT, field_type=FieldType.STR, init_val="SxG", options={ "inline_select_provider": True, "select_options": [["SxG", "Samples x Genes"], ["GxS", "Genes x Samples"]] }) csv_sep_methyl = ParamField("csv_sep_methyl", title="CSV separator symbol", order_num=34, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) pheno_matrix = ParamField("pheno_matrix", title="Phenotype matrix", order_num=40, input_type=InputType.FILE_INPUT, field_type=FieldType.CUSTOM, required=False) csv_sep_pheno = ParamField("csv_sep_pheno", title="CSV separator symbol", order_num=50, input_type=InputType.SELECT, field_type=FieldType.STR, init_val=",", options={ "inline_select_provider": True, "select_options": [ [" ", "space ( )"], [",", "comma (,)"], ["\t", "tab (\\t)"], [";", "semicolon (;)"], [":", "colon (:)"], ] }) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, is_a_property=True) _m_rna_es = OutputBlockField(name="m_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") # _m_rna_annotation = OutputBlockField(name="m_rna_annotation", field_type=FieldType.HIDDEN, # provided_data_type="PlatformAnnotation") _mi_rna_es = OutputBlockField(name="mi_rna_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") _methyl_es = OutputBlockField(name="methyl_es", field_type=FieldType.HIDDEN, provided_data_type="ExpressionSet") mrna_gpl_file = BlockField("mrna_gpl_file", FieldType.CUSTOM, None) mirna_gpl_file = BlockField("mirna_gpl_file", FieldType.CUSTOM, None) methyl_gpl_file = BlockField("methyl_gpl_file", FieldType.CUSTOM, None) pages = BlockField("pages", FieldType.RAW, init_val={ "assign_phenotype_classes": { "title": "Assign phenotype classes", "resource": "assign_phenotype_classes", "widget": "widgets/assign_phenotype_classes.html" }, }) @property def is_sub_pages_visible(self): if self.state in [ 'source_was_preprocessed', 'sample_classes_assigned', 'ready', 'done' ]: return True return False def __init__(self, *args, **kwargs): super(UserUploadComplex, self).__init__(*args, **kwargs) self.celery_task = None def process_upload(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s(user_upload_complex_task, exp, self) exp.store_block(self) self.celery_task.apply_async() def phenotype_for_js(self, exp, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") return prepare_phenotype_for_js_from_es(es) def update_user_classes_assignment(self, exp, request, *args, **kwargs): m_rna_es = self.get_out_var("m_rna_es") mi_rna_es = self.get_out_var("mi_rna_es") methyl_es = self.get_out_var("methyl_es") es = None if m_rna_es is not None: es = m_rna_es elif mi_rna_es is not None: es = mi_rna_es elif methyl_es is not None: es = methyl_es if es is None: raise Exception("No data was stored before") pheno_df = es.get_pheno_data_frame() received = json.loads(request.body) pheno_df[received["user_class_title"]] = received["classes"] for work_es in [m_rna_es, mi_rna_es, methyl_es]: if work_es is not None: work_es.pheno_metadata["user_class_title"] = received[ "user_class_title"] work_es.store_pheno_data_frame(pheno_df) # import ipdb; ipdb.set_trace() exp.store_block(self) def success(self, exp, m_rna_es, mi_rna_es, methyl_es): if m_rna_es: self.set_out_var("m_rna_es", m_rna_es) if mi_rna_es: self.set_out_var("mi_rna_es", mi_rna_es) if methyl_es: self.set_out_var("methyl_es", methyl_es) exp.store_block(self)
class KnnClassifier(GenericClassifier): block_base_name = "KNN" name = "Knn classifier" classifier_name = "knn" n_neighbors = ParamField( name="n_neighbors", title="Number of neighbors", input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1, order_num=10, ) algorithm = ParamField( name="algorithm", title="Algorithm [optional]", input_type=InputType.SELECT, field_type=FieldType.STR, order_num=20, options={ "inline_select_provider": True, "select_options": [ ["ball_tree", "BallTree"], ["kd_tree", "KDTree"], ["brute", "Brute force search"], ["auto", "Auto guess algorithm"], ] } ) leaf_size = ParamField( name="leaf_size", title="Leaf size for BallTree or KDTree [optional]", input_type=InputType.TEXT, field_type=FieldType.INT, order_num=30, ) _metric_options = BlockField(name="metric_options", field_type=FieldType.RAW) metric_options = [ {"pk": "euclidean", "str": "Euclidean Distance"}, {"pk": "manhattan", "str": "Manhattan Distance"}, {"pk": "chebyshev", "str": "Chebyshev Distance"}, ] metric = ParamField( name="metric", title="The distance metric to use for the tree [optional]", input_type=InputType.SELECT, field_type=FieldType.STR, select_provider="metric_options", order_num=40, options={ "inline_select_provider": True, "select_options": [ ["euclidean", "Euclidean Distance"], ["manhattan", "Manhattan Distance"], ["chebyshev", "Chebyshev Distance"], ] } ) def collect_options(self): self.collect_option_safe("n_neighbors", int) self.collect_option_safe("algorithm") self.collect_option_safe("leaf_size", int) self.collect_option_safe("metric")
class EnrichmentVisualize(GenericBlock): block_base_name = "EV_VIEW" block_group = GroupType.VISUALIZE name = "Enrichment Visualize" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params") ]) _input_dictionary_set = InputBlockField(name="ds", order_num=10, required_data_type="DictionarySet", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) _export_results_csv_url = BlockField(name="export_results_csv_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "enrichment_view.html" ]) def map_to_symbols(self, gene_set): genes = [gi.name for gi in GeneIdentifier.objects.filter(refseq__refseq__in=gene_set)] return genes @property def export_results_csv_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_csv", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_json", "format": "json" }) @property def table_js(self): cs = self.get_input_var("ds") """:type :DictionarySet""" if cs: table = cs.load_dict() table_headers = ['comodule','(term, p-val)','genes'] column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [column_title_to_code_name[title] for title in table_headers] return { "columns": [ { "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers ], "rows": [ dict(zip(fields_list, row)) for row in [(k, v[1], set(self.map_to_symbols(v[0]))) for k, v in table.iteritems()] #table.to_records().tolist() #[:100] ] } else: return None def export_json(self, exp, *args, **kwargs): ds = self.get_input_var("ds") dic = ds.load_dict() return dic def export_csv(self, exp, *args, **kwargs): import csv import StringIO ds = self.get_input_var("ds") dic = ds.load_dict() out = StringIO.StringIO() w = csv.writer(out) w.writerows(dic.items()) out.seek(0) return out.read()
class FeatureSelectionByCut(GenericBlock): block_base_name = "FS_BY_CUT" block_group = GroupType.FILTER name = "Feature Selection by Ranking" is_block_supports_auto_execution = True _block_actions = ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "ready"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ]) _block_actions.extend(execute_block_actions_list) _es = InputBlockField(name="es", order_num=10, required_data_type="ExpressionSet", required=True) _rank_table = InputBlockField(name="rank_table", order_num=20, required_data_type="TableResult", required=True) _cut_property_options = BlockField(name="cut_property_options", field_type=FieldType.RAW, is_a_property=True) cut_property = ParamField( name="cut_property", title="Ranking property to use", # input_type=InputType.SELECT, input_type=InputType.TEXT, field_type=FieldType.STR, #select_provider="cut_property_options", order_num=10, ) threshold = ParamField( name="threshold", title="Threshold for cut", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, ) _cut_direction_options = BlockField(name="cut_direction_options", field_type=FieldType.RAW) cut_direction_options = ["<", "<=", ">=", ">"] cut_direction = ParamField(name="cut_direction", title="Direction of cut", input_type=InputType.SELECT, field_type=FieldType.STR, select_provider="cut_direction_options", order_num=30, options={ "inline_select_provider": True, "select_options": [[op, op] for op in ["<", "<=", ">=", ">"]] }) es = OutputBlockField(name="es", provided_data_type="ExpressionSet") def __init__(self, *args, **kwargs): super(FeatureSelectionByCut, self).__init__(*args, **kwargs) self.celery_task = None @property def cut_property_options(self): # import ipdb; ipdb.set_trace() rank_table = self.get_input_var("rank_table") if rank_table and hasattr(rank_table, "headers"): return [{ "pk": header, "str": header } for header in rank_table.headers] def execute(self, exp, *args, **kwargs): self.clean_errors() self.celery_task = wrapper_task.s( feature_selection_by_cut, exp=exp, block=self, src_es=self.get_input_var("es"), rank_table=self.get_input_var("rank_table"), cut_property=self.cut_property, threshold=self.threshold, cut_direction=self.cut_direction, base_filename="%s_feature_selection" % self.uuid, ) exp.store_block(self) self.celery_task.apply_async() def success(self, exp, es): self.set_out_var("es", es) exp.store_block(self)
class MultiFeature(UniformMetaBlock): block_base_name = "MULTI_FEATURE" name = "Multi Feature Validation" _mf_block_actions = ActionsList([ ActionRecord("on_feature_selection_updated", ["valid_params", "ready", "done"], "ready"), ]) _input_es_dyn = InputBlockField(name="es_inputs", order_num=-10, required_data_type="ExpressionSet", required=True, multiply_extensible=True) _is_sub_pages_visible = BlockField("is_sub_pages_visible", FieldType.RAW, init_val=False, is_a_property=True) pages = BlockField("pages", FieldType.RAW, init_val={ "select_feature": { "title": "Select features to examine", "resource": "select_feature", "widget": "widgets/select_feature.html" }, }) def __init__(self, *args, **kwargs): super(MultiFeature, self).__init__(*args, **kwargs) self.features = [] @property def is_sub_pages_visible(self): if self.state in ['valid_params', 'done', 'ready']: return True return False def get_fold_labels(self): return self.features def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output = InnerOutputField( name="%s_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = new_inner_output.name self.register_inner_output_variables([new_inner_output]) def execute(self, exp, *args, **kwargs): # self.celery_task = wrapper_task.s( # # ) self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( prepare_folds, exp, self, features=self.features, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success") exp.store_block(self) self.celery_task.apply_async() def phenotype_for_js(self, exp, *args, **kwargs): es = None for input_name in self.es_inputs: es = self.get_input_var(input_name) if es is not None: break res = prepare_phenotype_for_js_from_es(es) res["features"] = self.features return res def update_feature_selection(self, exp, request, *args, **kwargs): req = json.loads(request.body) self.features = req["features"] if self.features: self.do_action("on_feature_selection_updated", exp) def on_feature_selection_updated(self, *args, **kwargs): pass
class CrossValidation(UniformMetaBlock): block_base_name = "CROSS_VALID" name = "Cross Validation K-fold" _cv_actions = ActionsList( [ActionRecord("become_ready", ["valid_params"], "ready")]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["cv_info.html"]) _input_es_dyn = InputBlockField(name="es_inputs", required_data_type="ExpressionSet", required=True, multiply_extensible=True, order_num=-1) folds_num = ParamField(name="folds_num", title="Folds number", order_num=10, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=5) repeats_num = ParamField(name="repeats_num", title="Repeats number", order_num=20, input_type=InputType.TEXT, field_type=FieldType.INT, init_val=1) def get_fold_labels(self): out = [] for repeat in range(self.repeats_num): for num in range(self.folds_num): out.append("fold_%s_%s" % (repeat + 1, num + 1)) return out # ["fold_%s_%s" % (repeat + 1, num + 1) for num in range(self.folds_num) for repeat in range(self.repeats_num)] def get_repeat_labels(self): return [ "repeat_%s" % (repeat + 1) for repeat in range(self.repeats_num) ] def add_dyn_input_hook(self, exp, dyn_port, new_port): """ @type new_port: InputBlockField """ new_inner_output_train = InnerOutputField( name="%s_train_i" % new_port.name, provided_data_type=new_port.required_data_type) new_inner_output_test = InnerOutputField( name="%s_test_i" % new_port.name, provided_data_type=new_port.required_data_type) self.inner_output_es_names_map[new_port.name] = \ (new_inner_output_train.name, new_inner_output_test.name) self.register_inner_output_variables( [new_inner_output_train, new_inner_output_test]) def execute(self, exp, *args, **kwargs): self.clean_errors() self.inner_output_manager.reset() es_dict = { inp_name: self.get_input_var(inp_name) for inp_name in self.es_inputs } self.celery_task = wrapper_task.s( generate_cv_folds, exp, self, folds_num=self.folds_num, repeats_num=self.repeats_num, es_dict=es_dict, inner_output_es_names_map=self.inner_output_es_names_map, success_action="on_folds_generation_success", ) exp.store_block(self) self.celery_task.apply_async() def on_params_is_valid(self, exp, *args, **kwargs): super(CrossValidation, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("become_ready", exp) def become_ready(self, *args, **kwargs): pass def build_result_collection(self, exp): if settings.CELERY_DEBUG: import sys sys.path.append( '/Migration/skola/phd/projects/miXGENE/mixgene_project/wrappers/pycharm-debug.egg' ) import pydevd pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) rc = ResultsContainer(base_dir=exp.get_data_folder(), base_filename="%s" % self.uuid) res_seq = self.res_seq def create_new_dim_rc(local_rc, axis_meta_block, axis_meta_block_labels): local_rc.axis_list = [axis_meta_block] local_rc.labels_dict[axis_meta_block] = axis_meta_block_labels local_rc.init_ar() local_rc.update_label_index() # WARNING: We only support homogeneous results, so we only check first element res_seq_field_name, data_type = res_seq.fields.iteritems().next() if data_type == "ClassifierResult": fold_labels = self.get_fold_labels() single_rc_list = [] for field_name in res_seq.fields: run_num = 0 loc_list = [] for idx, res_seq_cell in enumerate(res_seq.sequence): if (idx % self.folds_num) == 0: rc_run = ResultsContainer("", "") create_new_dim_rc(rc_run, self.base_name + "_folds", [ "fold_%s" % fold_num for fold_num in range(self.folds_num) ]) loc_list.append(rc_run) run_num += 1 rc_run.ar[idx % self.folds_num] = res_seq_cell[field_name] rc_single = ResultsContainer("", "") rc_single.add_dim_layer(loc_list, self.base_name, self.get_repeat_labels()) single_rc_list.append(rc_single) rc.add_dim_layer(single_rc_list, self.collector_spec.label, res_seq.fields.keys()) elif data_type == "ResultsContainer": if len(res_seq.fields) > 1: raise Exception( "Meta block only support single output of type ResultsContainer" ) else: rc_list = [] for cell in res_seq.sequence: sub_rc = cell[res_seq_field_name] sub_rc.load() rc_list.append(sub_rc) rc.add_dim_layer(rc_list, self.base_name, self.get_fold_labels()) elif data_type == "SequenceContainer": # TODO remove this check pass else: raise Exception("Meta blocks only support ClassifierResult " "or ResultsContainer in the output collection. " " Instead got: %s" % data_type) rc.store() rc.ar = None self.set_out_var("results_container", rc)
class RenderTable(RcVisualizer): block_base_name = "RENDER_TABLE" name = "Results Container as Table" _table = BlockField(name="table", field_type=FieldType.CUSTOM, is_a_property=True) _export_table_url = BlockField(name="export_table_url", field_type=FieldType.STR, is_a_property=True) _export_raw_results_url = BlockField(name="export_raw_results_url", field_type=FieldType.STR, is_a_property=True) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["rc_table.html"]) table_config = ParamField(name="table_config", title="", input_type=InputType.HIDDEN, field_type=FieldType.RAW) def __init__(self, *args, **kwargs): super(RenderTable, self).__init__(*args, **kwargs) self.table_config = { "header_axis": "", "multi_index_axis_dict": {}, } @property def table(self): rc = self.rc to = TableObj() if rc: rc.load() header_axis = self.table_config.get("header_axis") index_axis_list = [] for axis, flag in self.table_config.get("multi_index_axis_dict", {}).iteritems(): if flag: index_axis_list.append(axis) if header_axis and index_axis_list and hasattr(self, "metric"): # log.debug("Can build table slice") df = rc.get_pandas_slice(header_axis, index_axis_list, metric_name=self.metric) # log.debug(df) to.html = df.to_html(float_format=pd_float_format_func) to.df = df else: if self.exp_id: exp = Experiment.get_exp_by_id(self.exp_id) exp.log( self.uuid, "Can't build table slice, header axis `%s`, index axis_list `%s`" % (header_axis, index_axis_list)) log.debug( "Can't build table slice, header axis `%s`, index axis_list `%s`", header_axis, index_axis_list) # log.debug("Table: %s", to.to_dict()) return to @property def export_table_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_table", "format": "csv" }) @property def export_raw_results_url(self): return reverse("block_field_formatted", kwargs={ "exp_id": self.exp_id, "block_uuid": self.uuid, "field": "export_rc", "format": "json" }) # import ipdb; ipdb.set_trace() # return def export_rc(self, exp, *args, **kwargs): return self.rc.export_to_json_dict() def export_table(self, exp, *args, **kwargs): table = self.table out = StringIO.StringIO() # Float format in fact doesn't work in pandas # table.df.to_csv(out, float_format=pd_float_format_func) # tmp_df = table.df.applymap(pd_float_format_func) tmp_df.to_csv(out, float_format=pd_float_format_func) out.seek(0) return out.read() def on_params_is_valid(self, exp, *args, **kwargs): super(RenderTable, self).on_params_is_valid(exp, *args, **kwargs) if self.rc is not None: for axis in self.rc.axis_list: if axis not in self.table_config["multi_index_axis_dict"]: self.table_config["multi_index_axis_dict"][axis] = "" exp.store_block(self)
class CustomIterator(UniformMetaBlock): block_base_name = "CUSTOM_ITERATOR" name = "Custom Iterator" has_custom_layout = True _ci_block_actions = ActionsList([ ActionRecord("become_ready", ["valid_params"], "ready"), ActionRecord("reset_settings", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope", "generating_folds", "execution_error"], "ready", user_title="Reset to initial state", reload_block_in_client=True) ]) cells_prototype = BlockField(name="cells_prototype", field_type=FieldType.CUSTOM, init_val=None) cells = BlockField(name="cells", field_type=FieldType.CUSTOM, init_val=None) is_cells_prototype_defined = BlockField(name="is_cells_prototype_defined", field_type=FieldType.BOOLEAN, init_val=False) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=[ "custom_iterator/cell_prototype_definition.html", "custom_iterator/cell_dyn_inputs.html" ]) def __init__(self, *args, **kwargs): super(CustomIterator, self).__init__(*args, **kwargs) self.cells_prototype = CellsPrototype() self.cells = CellInfoList() def add_cell_prototype_field(self, exp, received_block, *args, **kwargs): new_field_dict = received_block.get("cells_prototype", {}).get("new_cell_field") if new_field_dict: cf = CellField(**new_field_dict) cf.update_name_from_label() self.cells_prototype.add_cell(cf) exp.store_block(self) def finish_cells_prototype_definition(self, exp, *args, **kwargs): self.is_cells_prototype_defined = True for field_prototype in self.cells_prototype.cells_list: new_inner_output = InnerOutputField( name=field_prototype.name, provided_data_type=field_prototype.data_type ) self.register_inner_output_variables([new_inner_output]) exp.store_block(self) def add_cell(self, exp, received_block, *args, **kwargs): new_cell_dict = received_block.get("cells", {}).get("new") if new_cell_dict: cell = CellInfo(new_cell_dict["label"]) for field_prototype in self.cells_prototype.cells_list: new_name = "%s_%s" % (field_prototype.name, len(self.cells.cells)) cell.inputs_list.append((field_prototype.name, new_name)) # TODO: add input port to block new_port = InputBlockField( name=new_name, required_data_type=field_prototype.data_type, required=True ) self.add_input_port(new_port) self.cells.cells.append(cell) exp.store_block(self) def remove_cell(self, exp, cell_json, *args, **kwargs): try: cell = json.loads(cell_json) self.cells.remove_by_label(cell["label"]) exp.store_block(self) except: pass def become_ready(self, *args, **kwargs): pass def on_params_is_valid(self, exp, *args, **kwargs): super(CustomIterator, self).on_params_is_valid(exp, *args, **kwargs) self.do_action("become_ready", exp, *args, **kwargs) def get_fold_labels(self): return [cell.label for cell in self.cells.cells] def execute(self, exp, *args, **kwargs): self.inner_output_manager.reset() seq = [] for cell_def in self.cells.cells: cell = {} for name, input_var_name in cell_def.inputs_list: # TODO: hmm maybe we should create deepcopy? cell[name] = self.get_input_var(input_var_name) seq.append(cell) exp.store_block(self) self.do_action("on_folds_generation_success", exp, seq) def reset_settings(self, exp, *args, **kwargs): self.cells_prototype = CellsPrototype() self.cells = CellInfoList() self.is_cells_prototype_defined = False exp.store_block(self)
class UniformMetaBlock(GenericBlock): is_abstract = True block_group = GroupType.META_PLUGIN create_new_scope = True is_block_supports_auto_execution = True _block_actions = ActionsList([]) _block_actions.extend(ActionsList([ ActionRecord("save_params", ["created", "valid_params", "done", "ready"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("add_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"), ActionRecord("remove_collector_var", ["created", "ready", "done", "valid_params"], "validating_params"), ActionRecord("execute", ["ready"], "generating_folds", user_title="Run block"), ActionRecord("on_folds_generation_success", ["generating_folds"], "ready_to_run_sub_scope", reload_block_in_client=True), ActionRecord("continue_collecting_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"), ActionRecord("run_sub_scope", ["ready_to_run_sub_scope"], "sub_scope_executing"), ActionRecord("on_sub_scope_done", ["sub_scope_executing"], "ready_to_run_sub_scope"), ActionRecord("success", ["working", "ready_to_run_sub_scope"], "done", propagate_auto_execution=True, reload_block_in_client=True), ActionRecord("error", ["*", "ready", "working", "sub_scope_executing", "generating_folds", "ready_to_run_sub_scope"], "execution_error", reload_block_in_client=True), ActionRecord("reset_execution", ["*", "done", "sub_scope_executing", "ready_to_run_sub_scope", "generating_folds", "execution_error"], "ready", user_title="Reset execution"), ])) _collector_spec = ParamField(name="collector_spec", title="", field_type=FieldType.CUSTOM, input_type=InputType.HIDDEN, init_val=None, required=False ) res_seq = BlockField(name="res_seq", provided_data_type="SequenceContainer", field_type=FieldType.HIDDEN, init_val=None) _results_container = OutputBlockField( name="results_container", provided_data_type="ResultsContainer", field_type=FieldType.HIDDEN, init_val=None ) def __init__(self, *args, **kwargs): super(UniformMetaBlock, self).__init__(*args, **kwargs) self.auto_exec_status_working.update(["sub_scope_executing", "ready_to_run_sub_scope", "generating_folds"]) self.inner_output_manager = IteratedInnerFieldManager() self.collector_spec = CollectorSpecification() self.collector_spec.label = self.block_base_name + "_collection" self.inner_output_es_names_map = {} self.celery_task = None self.set_out_var("results_container", None) self.res_seq = SequenceContainer() def remap_inputs(self, mapping): for var in self.bound_inputs.itervalues(): var.change_block(mapping) for var in self.collector_spec.bound.itervalues(): var.change_block(mapping) @property def is_sub_pages_visible(self): if self.state in ['valid_params', 'done', 'ready']: return True return False @abstractmethod def get_fold_labels(self): pass @abstractmethod def get_repeat_labels(self): pass def get_inner_out_var(self, name): return self.inner_output_manager.get_var(name) def run_sub_scope(self, exp, *args, **kwargs): self.reset_execution_for_sub_blocks() cell = self.res_seq.sequence[self.inner_output_manager.iterator] log.debug("Cell!!!!!!!! %s", str(cell)) act = self.inner_output_manager.sequence[self.inner_output_manager.iterator] log.debug("Cell!!!!!!!! %s", str(act)) exp.store_block(self) sr = ScopeRunner(exp, self.sub_scope_name) sr.execute() def on_sub_scope_done(self, exp, *args, **kwargs): """ @type exp: Experiment This action should be called by ScopeRunner when all blocks in sub-scope have exec status == done """ r = get_redis_instance() with redis_lock.Lock(r, ExpKeys.get_block_global_lock_key(self.exp_id, self.uuid)): cell = self.res_seq.sequence[self.inner_output_manager.iterator] for name, scope_var in self.collector_spec.bound.iteritems(): var = exp.get_scope_var_value(scope_var) exp.log(self.uuid, "Collected %s from %s" % (var, scope_var.title), severity="CRITICAL") log.debug("Collected %s from %s", var, scope_var.title) if var is not None: if hasattr(var, "clone"): cell[name] = var.clone("%s_%s" % (self.uuid, self.inner_output_manager.iterator)) else: cell[name] = deepcopy(var) self.res_seq.sequence[self.inner_output_manager.iterator] = cell exp.store_block(self) if len(cell) < len(self.res_seq.fields): self.do_action("continue_collecting_sub_scope", exp) else: try: self.inner_output_manager.next() self.do_action("run_sub_scope", exp) except StopIteration, e: # All folds were processed without errors self.build_result_collection(exp) self.do_action("success", exp)
class TableResultView(GenericBlock): block_base_name = "TR_VIEW" block_group = GroupType.VISUALIZE name = "Table Result view" is_block_supports_auto_execution = False _block_actions = ActionsList([ ActionRecord( "save_params", ["created", "valid_params", "done", "ready", "input_bound"], "validating_params", user_title="Save parameters"), ActionRecord("on_params_not_valid", ["validating_params"], "created"), ActionRecord("on_params_is_valid", ["validating_params"], "valid_params"), #ActionRecord("compute_pca", ["valid_params"], "computing_pca", user_title="Compute PCA"), #ActionRecord("pca_done", ["computing_pca"], "done",), #ActionRecord("reset_execution", ["*", "done", "execution_error", "ready", "working"], "ready", # user_title="Reset execution") #ActionRecord("update", ["input_bound", "ready"], "ready"), ]) input_table_result = InputBlockField(name="tr", order_num=10, required_data_type="TableResult", required=True) _table_for_js = BlockField(name="table_js", field_type=FieldType.RAW, is_a_property=True) #chart_series = BlockField(name="chart_series", field_type=FieldType.RAW, init_val=[]) #chart_categories = BlockField(name="chart_categories", field_type=FieldType.SIMPLE_LIST, # init_val=[]) elements = BlockField(name="elements", field_type=FieldType.SIMPLE_LIST, init_val=["table_result_view.html"]) @property def table_js(self): tr = self.get_input_var("tr") """:type :TableResult""" if tr: table = tr.get_table() table_headers = ["#"] + table.columns.tolist() column_title_to_code_name = { title: "_" + hashlib.md5(title).hexdigest()[:8] for title in table_headers } fields_list = [ column_title_to_code_name[title] for title in table_headers ] return { "columns": [{ "title": title, "field": column_title_to_code_name[title], "visible": True } for title in table_headers], "rows": [ dict(zip(fields_list, row)) for row in table.to_records().tolist() #[:100] ] } else: None