def get_keys_config_file(self): logging.info("List of keys config file") data_pipeline_schema = self.download_template_schema() data_pipeline_yaml = YAMLReader(data_pipeline_schema) data_pipeline_config = data_pipeline_yaml.read_yaml(True) return data_pipeline_config.keys()
def create_config_file(self, list_files, prefix_file=None): logging.info("Creating data pipeline config file.") data_pipeline_schema = self.download_template_schema() data_pipeline_yaml = YAMLReader(data_pipeline_schema) data_pipeline_config = data_pipeline_yaml.read_yaml(True) list_resources = {} for k, v in list_files.items(): list_resources.setdefault(v['resource'], []).append(k) for k, v in list_resources.items(): if k in data_pipeline_config: # The template file is the reference for creating a list or a single entry. # String vs. List if (len(v) == 1) and isinstance(data_pipeline_config[k], basestring): data_pipeline_config[k] = v[0] else: data_pipeline_config[k] = v else: logging.error("The key %s does not exist", k) with self.open_config_file(prefix_file) as outfile: yaml.safe_dump(data_pipeline_config, outfile, default_flow_style=False, allow_unicode=True) logging.info("Data Pipeline YAML file created.")
def main(): cfg.setup_parser() args = cfg.get_args() yaml = YAMLReader(args.config) yaml_dict = yaml.read_yaml() print_list_steps(yaml.get_list_keys()) cfg.set_up_logging(args) resources = RetrieveResource(args, yaml_dict) resources.run()
def main(): cfg.setup_parser() args = cfg.get_args() yaml = YAMLReader() yaml_dict = yaml.read_yaml() get_list_steps_on_request(args.list_steps, yaml.get_list_keys()) cfg.set_up_logging(args) google_opts = GoogleBucketResource.has_google_parameters( args.google_credential_key, args.google_bucket) resources = RetrieveResource(args, yaml_dict, yaml_dict.data_pipeline_schema) resources.run()
def main(): cfg.setup_parser() args = cfg.get_args() yaml = YAMLReader(args.config) yaml_dict = yaml.read_yaml() get_list_steps_on_request(args.list_steps, yaml.get_list_keys()) cfg.set_up_logging(args) #--gkey and --google_bucket are mandatory for the google storage access. Both keys must be parameters or none. GoogleBucketResource.has_google_parameters(args.google_credential_key, args.google_bucket) resources = RetrieveResource(args, yaml_dict, yaml_dict.data_pipeline_schema) resources.run()
class TestYamlReader(unittest.TestCase): """ YamlReader reads the 'config.yaml' file in the base directory and returning a dictionary representation. This test module includes basic validation tests (eg. 'test_config_parses'). Additional 'key specific' tests should be included here too as a validation mechanism (eg. chembl_indexes_have_fields). """ def setUp(self): default_conf_file = ROOT_DIR + '/' + 'config.yaml' print(default_conf_file) self.yaml_reader = YAMLReader(default_conf_file) self.yaml_dict = self.yaml_reader.read_yaml() def test_config_parses(self): """ Basic test to see if there are yaml parse errors: if the config file has a syntax problem an empty dictionary is returned. """ self.assertGreater(len(self.yaml_dict), 0, "Yaml dict should not be empty.") def test_chembl_indexes_have_fields(self): """ Checks that all listed ChEMBL elasticsearch indexes in config select at least one query field. """ indexes = self.yaml_dict.ChEMBL.datasources.indices for i in list(indexes.values()): for k, v in list(i.items()): print(k) if k is 'fields': self.assertGreater( len(v), 0, 'No fields provided on index {}'.format(k))
class TestDrugStep(unittest.TestCase): """ YamlReader reads the 'config.yaml' file in the base directory and returning a dictionary representation. """ def setUp(self): default_conf_file = ROOT_DIR + '/' + 'config.yaml' self.yaml_reader = YAMLReader(default_conf_file) self.config = self.yaml_reader.read_yaml() @patch('modules.Drug.Drug._download_elasticsearch_data') def test_output_has_fields_to_write_to_GCP(self, mock1): """ Step should return a dictionary with GCP target directory so that if `RetrieveResource` is configured to upload results it can save the files somewhere valid. """ # Given # file names 'saved' by step es_return_values = ['f1', 'f2', 'f3'] mock1.return_value = es_return_values # When drugStep = Drug.Drug(self.config['drug']) results = drugStep.get_all() # Then # Each file saved should be in returned dictionary self.assertEqual(len(results), len(es_return_values)) # returned dictionary should have fields 'resource' and 'gs_output_dir' for f in es_return_values: self.assertTrue(f in results) self.assertTrue('resource' in results[f]) self.assertTrue('gs_output_dir' in results[f])
class TestDrugStep(unittest.TestCase): """ YamlReader reads the 'config.yaml' file in the base directory and returning a dictionary representation. """ def setUp(self): default_conf_file = ROOT_DIR + '/' + 'config.yaml' self.yaml_reader = YAMLReader(default_conf_file) self.config = self.yaml_reader.read_yaml() self.output = self.create_output_dir_test() def create_output_dir_test(self): output = Dict() output.prod_dir = "prod" output.staging_dir = "staging" return output @patch('plugins.Drug.Drug._download_elasticsearch_data') def test_output_has_fields_to_write_to_GCP(self, mock1): """ Step should return a dictionary with GCP target directory so that if `RetrieveResource` is configured to upload results it can save the files somewhere valid. """ # Given # file names 'saved' by step es_return_values = ['f1', 'f2', 'f3'] mock1.return_value = es_return_values # We only want to test the results of ES configuration at this point. es_config = self.config['drug'] es_config.datasources.pop('downloads', None) # When drugStep = Drug.Drug() results = drugStep.download_indices(self.config.drug, self.output) # Then # Each file saved should be in returned dictionary self.assertEqual(len(results), len(es_return_values)) # returned dictionary should have fields 'resource' and 'gs_output_dir' for f in es_return_values: self.assertTrue(f in results)
def setUp(self): default_conf_file = ROOT_DIR + '/' + 'config.yaml' print(default_conf_file) self.yaml_reader = YAMLReader(default_conf_file) self.yaml_dict = self.yaml_reader.read_yaml()
def setUp(self): default_conf_file = ROOT_DIR + '/' + 'config.yaml' self.yaml_reader = YAMLReader(default_conf_file) self.config = self.yaml_reader.read_yaml() self.output = self.create_output_dir_test()