def test_ModelDescription(): for rc_support in [True, False]: seq_string_shape = "" if rc_support: ssrs = supports_simple_rc_str else: ssrs = "" model = ModelDescription.from_config( from_yaml(model_yaml % (seq_string_shape, ssrs))) dataloader = DataLoaderDescription.from_config( from_yaml(dataloader_yaml % (seq_string_shape))) mi = ModelInfoExtractor(model, dataloader) assert mi.use_seq_only_rc == rc_support assert all([ isinstance(mi.seq_input_mutator[sl], OneHotSequenceMutator) for sl in ["seq_a", "seq_c"] ]) assert all([ isinstance(mi.seq_input_mutator[sl], DNAStringSequenceMutator) for sl in ["seq_b"] ]) assert all([ mi.seq_input_metadata[sl] == "ranges" for sl in ["seq_a", "seq_b"] ]) assert all( [mi.seq_input_metadata[sl] == "ranges_b" for sl in ["seq_c"]]) assert all([ isinstance(mi.seq_input_array_trafo[sl], ReshapeDna) for sl in ["seq_a", "seq_c"] ]) assert all([ isinstance(mi.seq_input_array_trafo[sl], ReshapeDnaString) for sl in ["seq_b"] ])
def configure_domain_objects(self, arguments): self.domain_loaded_from_samples = ( arguments.action == 'sample' ) if self.domain_loaded_from_samples : self._create_samples() else: with open(arguments.project,'r') as file: self.project = related.from_yaml(file,Project) print(self.project) with open(arguments.document, 'r') as file: self.document =related.from_yaml(file,Document) self.document.configure_from_project(self.project)
def test_ret(): pps = PostProcModelStruct.from_config( from_yaml(postproc_yaml % ('', args_w_default))) model = dummy_container() model.postprocessing = pps avail_scoring_fns, avail_scoring_fn_def_args, avail_scoring_fn_names, default_scoring_fns = get_avail_scoring_methods( model)
def test_gradplotter(): from kipoi.postprocessing.gradient_vis.vis import GradPlotter, get_selector example = "rbp" if example in {"rbp", "non_bedinput_model", "iris_model_template"} and sys.version_info[0] == 2: pytest.skip("rbp example not supported on python 2 ") example_dir = "tests/models/{0}".format(example) output = os.path.realpath(example_dir + "/grad_outputs.hdf5") try: os.unlink(output) except: pass writer = writers.HDF5BatchWriter(file_path=output) get_example_data(example, predict_activation_layers[example], writer=writer) gp = GradPlotter.from_hdf5(output, example_dir, source="dir") # test get_num_samples assert gp.data['inputs']['seq'].shape[0] == gp.get_num_samples("seq") # once we have a gp instance: exp_ret_fns = [gp._select_ds_dict, gp._select_ds_list, gp._select_ds_ndarray] for model_schema_yaml, exp_ret_fn in zip(MODEL_SCHEMA_EXAMPLES, exp_ret_fns): schema = ModelSchema.from_config(from_yaml(model_schema_yaml)) ret_fn, ret_lables = gp._get_ds_extractor(schema.inputs) assert ret_fn == exp_ret_fn assert ret_lables == ['seq'] try: os.unlink(output) except: pass
def format_kipoi_dataloader(content): split_str = "```\n" code, descr_str = content.split(split_str) code += split_str # parse the other into Yaml from kipoi.data import DataLoaderDescription import related descr_str += "type: dummy\ndefined_as: dummy" descr = DataLoaderDescription.from_config(related.from_yaml(descr_str)) out = code + "\n" out += descr.info.doc + "\n" out += "\n".join([ section("Arguments", ul_dict(descr.args, format_arg)) + "\n", section( "Output schema", ul_dict_nested( OrderedDict([ ("inputs", descr.output_schema.inputs), ("targets", descr.output_schema.targets), ("metadata", descr.output_schema.metadata), ]), format_array_schema)), ]) out += "\n" return out
def test_use_rc(): pps = VarEffectModelArgs.from_config(from_yaml(yaml_in_simple_rc)) assert pps is not None assert pps.seq_input == [ "seq" ] # should always be there and is always a list of strings assert pps.use_rc
def test_use_rc(): pps = PostProcModelStruct.from_config(from_yaml(yaml_in_simple_rc)) assert pps.variant_effects is not None assert pps.variant_effects.seq_input == [ "seq" ] # should always be there and is always a list of strings assert pps.variant_effects.use_rc
def loads(cls, content, file_path=None): """Load JSON string into a Config object.""" vals = related.from_yaml(content, file_path=file_path, object_pairs_hook=dict) # environment namespace (RIGOR_) env_ns = Namespace(env={ k[6:]: v for k, v in os.environ.items() if k.startswith("RIGOR_") }) # pop profiles and file_path from root config profiles = vals.pop("profiles", {}) file_path = vals.pop("file_path") # iterate and construct profile sub-dictionaries with root info for name, profile in profiles.items(): from_root_profile = copy.deepcopy(vals) profile = utils.nested_update(from_root_profile, profile) eval_update_ns(profile, env_ns) profiles[name] = profile # construct root config profile vals["name"] = "__root__" vals["file_path"] = file_path vals["profiles"] = profiles eval_update_ns(vals, env_ns) return related.to_model(cls, vals)
def test_correct_shape(): correct_shapes = [(100, ), (None, 100), (None, ), (100, ), (100, ), (100, )] for i, info_str in enumerate(GOOD_EXAMPLES): correct_shape = correct_shapes[i] info = CLS.from_config(from_yaml(info_str)) assert info.shape == correct_shape
def test_yaml_roundtrip_with_empty_values(): original_yaml = open(YML_FILE).read().strip() yml_dict = from_yaml(original_yaml) compose = Compose(**yml_dict) generated_yaml = to_yaml(compose, suppress_map_key_values=True, suppress_empty_values=False).strip() assert "ports: []" in generated_yaml
def test_parse_correct_info(info_str): # loading works info = CLS.from_config(from_yaml(info_str)) # cfg works cfg = info.get_config() info2 = CLS.from_config(cfg) assert str(info) == str(info2)
def test_auto_default(): pps = PostProcModelStruct.from_config(from_yaml(postproc_autodefault_yaml)) model = dummy_container() model.postprocessing = pps avail_scoring_fns, avail_scoring_fn_def_args, avail_scoring_fn_names, default_scoring_fns = \ get_avail_scoring_methods(model) output = [avail_scoring_fn_names] expected = [default_scoring_fns + ["logit_ref", "diff", "ref", "alt"]] assert_groupwise_identity(output, expected)
def test_parse_correct_info(info_str): info_str = inp_targ + info_str # add the input: targets headers # loading works info = CLS.from_config(from_yaml(info_str)) # cfg works cfg = info.get_config() info2 = CLS.from_config(cfg) assert str(info) == str(info2)
def from_string(cls, string): """Loads model from a yaml file """ parsed_dict = related.from_yaml(string) try: return cls.from_config(parsed_dict) except Exception as e: raise Exception( "Unable to load yaml string: {0} into class {1}.\nError: \n{2}" .format(os.path.abspath(string), cls, str(e)))
def load_exam(path: str) -> Exam: """ Attempts to deserialize an Exam from the given file path """ try: with open(path, 'r') as file: exam = related.from_yaml(file, Exam) return exam except Exception as ex: raise click.ClickException(f"Invalid exam file `{path}` ({ex})") from ex
def test_rename_custom(): pps = PostProcModelStruct.from_config(from_yaml(rename_custom_yaml)) model = dummy_container() model.postprocessing = pps avail_scoring_fns, avail_scoring_fn_def_args, avail_scoring_fn_names, default_scoring_fns =\ get_avail_scoring_fns(model) output = [avail_scoring_fn_names] expected = [["custom_logit", "diff", "ref", "alt", "logit_ref", "logit", "deepsea_effect"]] assert_groupwise_identity(output, expected) assert default_scoring_fns == ["custom_logit"]
def test_address_yaml_roundtrip(): address = Address(street="123 Main Street", city="Springfield", zipcode="12345") assert repr(address) == "Address(street='123 Main Street', " \ "city='Springfield', zipcode='12345', " \ "street_two=None)" yaml = to_yaml(address) new_address = from_yaml(yaml, Address) assert new_address == address
def test_compose_from_yml(): original_yaml = open(YML_FILE).read().strip() yml_dict = from_yaml(original_yaml) compose = to_model(Compose, yml_dict) assert compose.version == '2' assert compose.services['web'].ports == ["5000:5000"] assert compose.services['redis'].image == "redis" generated_yaml = to_yaml(compose, suppress_empty_values=True, suppress_map_key_values=True).strip() assert original_yaml == generated_yaml yml_dict2 = from_yaml(generated_yaml) compose2 = Compose(**yml_dict2) assert compose == compose2 assert original_yaml == generated_yaml
def test_person_with_education_to_yaml_and_back(): person = Person(name="Brainy", education=[ Education(school="School 2", degree=Degree.MASTERS), Education(school="School 1", degree=Degree.BACHELORS), ]) yaml = to_yaml(person) new_person = from_yaml(yaml, Person) assert new_person.education == person.education assert new_person == person
def test_default_diff(): pps = PostProcModelStruct.from_config(from_yaml(postproc_yaml_nofndef)) model = dummy_container() model.postprocessing = pps avail_scoring_fns, avail_scoring_fn_def_args, avail_scoring_fn_names, default_scoring_fns =\ get_avail_scoring_fns(model) # output = [avail_scoring_fn_names, avail_scoring_fns, avail_scoring_fn_def_args] expected = [["diff", "ref", "alt"], [ve.Diff, ve.Ref, ve.Alt], [builtin_default_kwargs] * 3] assert_groupwise_identity(output, expected) assert default_scoring_fns == ["diff"]
def test_custom_fns(): template_avail_scoring_fns = [ve.Logit, ve.DeepSEA_effect, ve.LogitAlt] template_avail_scoring_fn_labels = ["logit", "deepsea_effect", "mydiff"] # exp_avail_scoring_fns = [ template_avail_scoring_fns + [ve.Diff] + [ve.Ref, ve.Alt, ve.LogitRef], [ve.Diff] + template_avail_scoring_fns + [ve.Ref, ve.Alt, ve.LogitRef] ] exp_avail_scoring_fn_labels = [ template_avail_scoring_fn_labels + ["diff"] + ["ref", "alt", "logit_ref"], ["diff"] + template_avail_scoring_fn_labels + ["ref", "alt", "logit_ref"] ] # for i, diff_str_here in enumerate(["", diff_str]): if diff_str_here == "": exp_avail_scoring_fn_def_args = [ None, [builtin_default_kwargs] * 2 + [{ "rc_merging": "max" }] + [builtin_default_kwargs] * 4, [builtin_default_kwargs] * 2 + [{}] + [builtin_default_kwargs] * 5 ] else: exp_avail_scoring_fn_def_args = [ None, [builtin_default_kwargs] * 3 + [{ "rc_merging": "max" }] + [builtin_default_kwargs] * 4, [builtin_default_kwargs] * 3 + [{}] + [builtin_default_kwargs] * 5 ] for i2, mydiff_args in enumerate(["", args_w_default, optional_args]): pps = PostProcModelStruct.from_config( from_yaml(postproc_yaml % (diff_str_here, mydiff_args))) model = dummy_container() model.postprocessing = pps if i2 == 0: # mydiff has one argument but none are defined. with pytest.raises(ValueError): get_avail_scoring_methods(model) else: avail_scoring_fns, avail_scoring_fn_def_args, avail_scoring_fn_names, default_scoring_fns =\ get_avail_scoring_methods(model) output = [ avail_scoring_fn_names, avail_scoring_fns, avail_scoring_fn_def_args ] expected = [ exp_avail_scoring_fn_labels[i], exp_avail_scoring_fns[i], exp_avail_scoring_fn_def_args[i2] ] assert_groupwise_identity(output, expected) assert default_scoring_fns == ["deepsea_effect"]
def test_parse_correct_info(info_str): info_str = inp_targ + info_str # add the input: targets headers # loading works info = CLS.from_config(from_yaml(info_str)) assert isinstance(info.args['arch'], str) assert isinstance(info.args['weights'], RemoteFile) # cfg works cfg = info.get_config() info2 = CLS.from_config(cfg) assert str(info) == str(info2)
def test_output_reshaper(): for k1 in RES: for k2 in YAMLS: if k1 == k2: o = OutputReshaper( ModelSchema.from_config(from_yaml(YAMLS[k2])).targets) fl, fll = o.flatten(RES[k1]) assert (fl.shape[1] == RES_OUT_SHAPES[k1]) assert (RES_OUT_LABELS[k2] == fll.tolist()) elif (k1.replace("Lab", "NoLab") == k2) or (k1 == k2.replace( "Lab", "NoLab")): o = OutputReshaper( ModelSchema.from_config(from_yaml(YAMLS[k2])).targets) fl, fll = o.flatten(RES[k1]) assert (fl.shape[1] == RES_OUT_SHAPES[k1]) assert (RES_OUT_LABELS[k2] == fll.tolist()) else: with pytest.raises(Exception): o = OutputReshaper( ModelSchema.from_config(from_yaml(YAMLS[k2])).targets) fl, fll = o.flatten(RES[k1])
def load(cls, path, append_path=True): """Loads model from a yaml file """ original_yaml = open(path).read().strip() parsed_dict = related.from_yaml(original_yaml) if append_path and "path" not in parsed_dict: parsed_dict["path"] = path try: return cls.from_config(parsed_dict) except Exception as e: raise Exception("Unable to load file {0} into class {1}.\nError: \n{2}". format(os.path.abspath(path), cls, str(e)))
def wrap(cls): if inspect.isfunction(cls): raise ValueError( "Function-based dataloader are currently not supported with kipoi_dataloader decorator" ) # figure out the right dataloader type dl_type_inferred = infer_parent_class(cls, AVAILABLE_DATALOADERS) if dl_type_inferred is None: raise ValueError( "Dataloader needs to inherit from one of the available dataloaders {}" .format(list(AVAILABLE_DATALOADERS))) # or not inherits_from(cls, Dataset) doc = cls.__doc__ doc = textwrap.dedent(doc) # de-indent if not re.match("^defined_as: ", doc): doc = "defined_as: {}\n".format(cls.__name__) + doc if not re.match("^type: ", doc): doc = "type: {}\n".format(dl_type_inferred) + doc # parse the yaml yaml_dict = related.from_yaml(doc) dl_descr = DataLoaderDescription.from_config(yaml_dict) # override parameters for k, v in six.iteritems(override): rsetattr(dl_descr, k, v) # setup optional parameters arg_names, default_values = _get_arg_name_values(cls) if set(dl_descr.args) != set(arg_names): raise ValueError( "Described args don't exactly match the implemented arguments" "docstring: {}, actual: {}".format(list(dl_descr.args), list(arg_names))) # properly set optional / non-optional argument values for i, arg in enumerate(dl_descr.args): optional = i >= len(arg_names) - len(default_values) if dl_descr.args[arg].optional and not optional: logger.warning( "Parameter {} was specified as optional. However, there " "are no defaults for it. Specifying it as not optinal". format(arg)) dl_descr.args[arg].optional = optional dl_descr.info.name = cls.__name__ # enrich the class with dataloader description return cls._add_description_factory(dl_descr)
def up(): with open('app-compose.yml') as fp: config = related.from_yaml(fp, Services) loop = asyncio.get_event_loop() palette = Palette() tasks = [] for i, (name, service) in enumerate(config.services.items()): app = create_service(loop, service, palette.next_nice()) tasks.append(app) loop.run_until_complete(asyncio.gather(*tasks))
def loads(cls, content, file_path=None): try: as_dict = related.from_yaml(content, file_path=file_path, object_pairs_hook=dict) scenarios = as_dict.get("scenarios", []) dir_path = os.path.dirname(file_path) as_dict['scenarios'] = cls.prep_scenarios(scenarios, dir_path) return related.to_model(Case, as_dict) except Exception as e: # raise e get_logger().error("Failed to Load Case", file_path=file_path, error=str(e)) return Case(file_path=file_path, is_valid=False, scenarios=[])
def test_parse_correct_info(info_str, tmpdir): info_str = inp_targ + info_str # add the input: targets headers # loading works info = CLS.from_config(from_yaml(info_str)) info.path = str(tmpdir) outfiles = example_kwargs(info.args, str(tmpdir)) assert os.path.exists(outfiles['intervals_file']) assert isinstance(info.get_example_kwargs(), dict) assert isinstance(example_kwargs(info.args), dict) assert isinstance(info.args["intervals_file"].example, RemoteFile) assert isinstance(info.args["fasta_file"].example, str) # cfg works cfg = info.get_config() info2 = CLS.from_config(cfg) assert str(info) == str(info2)
def __on_startup(self, serv, cancelled): cancel = self.router.scheduler.throw_event(OperatorEvents.STARTUP, self) if not cancel: if self.configuration_file: with open(self.configuration_file) as file: print("Loading agents descriptors") agents = set( related.from_yaml(file.read().strip(), DescriptorSet)) registered = self.register.register_all(agents) print(f"Loaded {len(registered)} agents from configuration") print("Starting up master agent operator") self.server_thread.start() self.started = True
def test_load_document_from_yaml(): # --Given-- txt = ''' title: My Title version: 0.0.1 date: "2019-02-20" reference: RCF-250-ER custodian: ["foo","bar"] preparations: [] title_template: Document about {name}''' # --When-- obj = from_yaml(txt,Document) # --Then-- assert isinstance(obj, Document) assert obj.date == date(2019,2,20) print(obj)