def main(): parser = argparse.ArgumentParser( description= "mrun is a script to run builders written using the Maggma framework.") parser.add_argument( "builder", help= "Builder file in either json or yaml format. Can contain a list of builders or a predefined Runner" ) parser.add_argument( "-n", "--num_workers", type=int, default=0, help="Number of worker processes. Defaults to use as many as available." ) parser.add_argument('-v', '--verbose', action='count', default=0, help="Controls logging level per number of v's") parser.add_argument( "--dry-run", action="store_true", default=False, help="Dry run loading the builder file. Does not run the builders") parser.add_argument("--mpi", action="store_true", default=False, help="Running under MPI") args = parser.parse_args() # Set Logging levels = [logging.WARNING, logging.INFO, logging.DEBUG] level = levels[min(len(levels) - 1, args.verbose)] # capped to number of levels root = logging.getLogger() root.setLevel(level) ch = TqdmLoggingHandler() formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s") ch.setFormatter(formatter) root.addHandler(ch) objects = loadfn(args.builder) if isinstance(objects, list): # If this is a list of builders runner = Runner(objects, num_workers=args.num_workers) elif isinstance(objects, Runner): # This is a runner: root.info("Changing number of workers from default in input file") runner = Runner(objects.builders, args.num_workers) else: root.error("Couldn't properly read the builder file.") if not args.dry_run: runner.run(mpi=args.mpi)
def test_database_write(self): builder = CorrelationBuilder(self.propstore, self.materials, self.correlation, props=self.propnet_props + self.mp_props, funcs='all') runner = Runner([builder]) runner.run() data = list(self.correlation.query(criteria={})) # count = n_props**2 * n_funcs # n_props = 4, n_funcs = 5 self.assertEqual(len(data), 80) for d in data: self.assertIsInstance(d, dict) self.assertEqual( set(d.keys()), { 'property_x', 'property_y', 'correlation', 'correlation_func', 'n_points', 'shortest_path_length', 'id', '_id', 'last_updated' }) self.assertEqual(d['n_points'], 200) if d['property_x'] == 'vickers_hardness' and \ d['property_y'] == 'bulk_modulus': self.assertAlmostEqual( d['correlation'], self.correlation_values_vickers_bulk[ d['correlation_func']]) elif d['property_x'] == 'bulk_modulus' and \ d['property_y'] == 'vickers_hardness': self.assertAlmostEqual( d['correlation'], self.correlation_values_bulk_vickers[ d['correlation_func']])
def test_build(self): builder = BondValenceBuilder(self.materials, self.bond_valence) runner = Runner([builder]) runner.run() doc = list(self.bond_valence.query(criteria={'task_id': 'mp-779001'}))[0] self.assertSetEqual(set(doc['possible_species']), {'Hf4+', 'Sr2+', 'O2-'})
def test_runner(self): self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) self.target.collection.insert_many(self.old_docs) runner = Runner([self.builder]) runner.run() self.assertEqual(self.target.query_one(criteria={"k": 0})["v"], "new") self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")
def test_simple_runner(self): builder = CopyBuilder(self.source, self.target) runner = Runner([builder]) dumpfn(runner, self.runner_filename) p = subprocess.run("python -m maggma.cli.mrun {}".format( self.runner_filename).split(), timeout=15) self.assertEqual(p.returncode, 0)
def test_runner_pipeline(self): runner = Runner([self.nofilter]) runner.run() self.assertEqual(self.lpad.workflows.count(), 3) # Ensure no further updates runner.run() self.assertEqual(self.lpad.workflows.count(), 3)
def test_query(self): self.builder.query = {"k": {"$gt": 5}} self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) runner = Runner([self.builder]) runner.run() all_docs = list(self.target.query(criteria={})) self.assertEqual(len(all_docs), 14) self.assertTrue(min([d['k'] for d in all_docs]), 6)
def test_serial_runner_quantity_db(self): # This only runs over the 4 properties in the database because # the mongomock db cannot be indexed and is therefore very slow builder = CorrelationBuilder(self.quantity_store, self.correlation, props=self.propnet_props, from_quantity_db=True) runner = Runner([builder]) runner.run()
def test_build(self): builder = MagneticBuilder(self.materials, self.magnetism) runner = Runner([builder]) runner.run() doc = list(self.magnetism.query(criteria={'task_id': 'mp-1034331'}))[0] self.assertEqual(doc['magnetism']['ordering'], 'FM') self.assertAlmostEqual(doc['magnetism']['total_magnetization_normalized_formula_units'], 4.8031771)
def test_1(self): builder1 = MagicMock() builder2 = MagicMock() builder1.configure_mock(sources=[1, 2, 3], targets=[4]) builder2.configure_mock(sources=[3, 4, 5], targets=[6]) self.builders = [builder1, builder2] rnr = Runner(self.builders) self.assertEqual(rnr.dependency_graph, {1: [0]})
def test_build(self): builder = BondBuilder(self.materials, self.bonding) runner = Runner([builder]) runner.run() doc = list(self.bonding.query(criteria={'task_id': 'mp-779001'}))[0] sg = StructureGraph.from_dict(doc['graph']) self.assertIsInstance(sg, StructureGraph) self.assertIn('Hf-O(6)', doc['summary']['coordination_envs'])
def test_delete_orphans(self): self.builder = CopyBuilder(self.source, self.target, delete_orphans=True) self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) self.target.collection.insert_many(self.old_docs) deletion_criteria = {"k": {"$in": list(range(5))}} self.source.collection.delete_many(deletion_criteria) runner = Runner([self.builder]) runner.run() self.assertEqual(self.target.collection.count_documents(deletion_criteria), 0) self.assertEqual(self.target.query_one(criteria={"k": 5})["v"], "new") self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")
def test_incremental_false(self): tic = datetime.now() toc = tic + timedelta(seconds=1) keys = list(range(20)) earlier = [{"lu": tic, "k": k, "v": "val"} for k in keys] later = [{"lu": toc, "k": k, "v": "val"} for k in keys] self.source.collection.insert_many(earlier) self.target.collection.insert_many(later) query = {"k": {"$gt": 5}} self.builder = CopyBuilder(self.source, self.target, incremental=False, query=query) Runner([self.builder]).run() docs = sorted(self.target.query(), key=lambda d: d["k"]) self.assertTrue(all(d["lu"] == tic) for d in docs[5:]) self.assertTrue(all(d["lu"] == toc) for d in docs[:5])
def test_builder(self): ec_builder = ElasticBuilder(self.test_tasks, self.test_elasticity, self.test_materials, incremental=False) ec_builder.connect() for t in ec_builder.get_items(): processed = ec_builder.process_item(t) self.assertTrue(bool(processed)) runner = Runner([ec_builder]) runner.run() # Test warnings doc = ec_builder.elasticity.query_one( criteria={"pretty_formula": "NaN3"}) self.assertEqual(doc['elasticity']['warnings'], None)
def test_builder(self): ec_builder = ElasticBuilder(self.test_tasks, self.test_elasticity, self.test_materials, incremental=False) ec_builder.connect() for t in ec_builder.get_items(): processed = ec_builder.process_item(t) if processed: pass else: import nose nose.tools.set_trace() runner = Runner([ec_builder]) runner.run()
def test_builder(self): ec_builder = ElasticAnalysisBuilder(self.test_tasks, self.test_elasticity, incremental=False) ec_builder.connect() for t in ec_builder.get_items(): processed = ec_builder.process_item(t) self.assertTrue(bool(processed)) runner = Runner([ec_builder]) runner.run() # Test warnings doc = ec_builder.elasticity.query_one( criteria={"pretty_formula": "NaN3"}) self.assertEqual(doc['warnings'], None) self.assertAlmostEqual(doc['compliance_tensor'][0][0], 41.576072, 6)
def test_build(self): """Test building the robocrys database.""" builder = RobocrysBuilder(self.materials, self.robocrys) runner = Runner([builder]) runner.run() doc = list(self.robocrys.query(criteria={'task_id': 'mp-66'}))[0] self.assertEqual(doc['condensed_structure']['formula'], 'C') self.assertEqual(doc['condensed_structure']['spg_symbol'], 'Fd-3m') self.assertEqual(doc['condensed_structure']['mineral']['type'], 'diamond') self.assertEqual(doc['condensed_structure']['dimensionality'], '3') self.assertTrue("C is diamond structured" in doc['description']) self.assertTrue("bond lengths are 1.55" in doc['description'])
def _get_correlation_values(): full_propstore = MemoryStore() with open(os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"), 'r') as f: data = json.load(f) full_propstore.connect() full_propstore.update(jsanitize(data, strict=True, allow_bson=True)) correlation_store = MemoryStore() builder = CorrelationBuilder(full_propstore, correlation_store, props=PROPNET_PROPS, funcs='all', from_quantity_db=False) runner = Runner([builder]) runner.run() return builder
def setUp(self): self.materials = MongoStore("emmet_test", "materials") self.thermo = MongoStore("emmet_test", "thermo") self.materials.connect() self.thermo.connect() self.mbuilder = MaterialsBuilder(self.tasks, self.materials, mat_prefix="", chunk_size=1) self.tbuilder = ThermoBuilder(self.materials, self.thermo, chunk_size=1) runner = Runner([self.mbuilder]) runner.run()
def create_correlation_quantity_indexed_docs(): """ Outputs JSON file containing the same data from create_correlation_test_docs() but as individual quantities. This mimics the quantity-indexed store. Must run create_correlation_test_docs() first and have the JSON file in the test directory. """ pn_store = MemoryStore() q_store = MemoryStore() m_store = MemoryStore() with open(os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"), 'r') as f: data = json.load(f) pn_store.connect() pn_store.update(jsanitize(data, strict=True, allow_bson=True)) sb = SeparationBuilder(pn_store, q_store, m_store) r = Runner([sb]) r.run() q_data = list(q_store.query(criteria={}, properties={'_id': False})) dumpfn( q_data, os.path.join(CORR_TEST_DIR, "correlation_propnet_quantity_data.json"))
def test_sample_size_limit(self): sample_sizes = [50, 300] expected_n_points = [50, 200] for sample_size, n_points in zip(sample_sizes, expected_n_points): correlation_store = MemoryStore() builder = CorrelationBuilder( self.propstore, correlation_store, props=['bulk_modulus', 'vickers_hardness'], funcs='linlsq', sample_size=sample_size, from_quantity_db=False) runner = Runner([builder]) runner.run() data = list(correlation_store.query(criteria={})) for d in data: self.assertEqual(d['n_points'], n_points) with self.assertRaises(ValueError): _ = CorrelationBuilder(self.propstore, self.correlation, sample_size=1)
def test_1(self): rnr = Runner(self.builders) ans = {1: [0]} self.assertDictEqual(rnr.dependency_graph, ans)
self.logger.info("processing item: {}".format(item)) # time.sleep(random.randint(0,3)) return {item: "processed"} def update_targets(self, items): self.logger.info("Updating targets ...") self.logger.info("Received {} processed items".format(len(items))) self.logger.info("Updated items: {}".format(list(items))) def finalize(self, cursor=None): self.logger.info("Finalizing ...") self.logger.info("DONE!") if __name__ == '__main__': N = 10 chunk_size = 3 stores = [MemoryStore(str(i)) for i in range(7)] sources = [stores[0], stores[1], stores[3]] targets = [stores[3], stores[6]] mdb = MyDumbBuilder(N, sources, targets, chunk_size=chunk_size) builders = [mdb] runner = Runner(builders) logstreamhandle(runner) runner.run()
def test_aggregation(self): runner = Runner([self.builder]) runner.run() all_agg_docs = list(self.test_elasticity_agg.query()) self.assertTrue(bool(all_agg_docs))
def test_multiproc_runner(self): builder = PropnetBuilder(self.materials, self.propstore) runner = Runner([builder]) runner.run()
def test_multiproc_runner(self): builder = CorrelationBuilder(self.propstore, self.correlation, from_quantity_db=False) runner = Runner([builder], max_workers=4) runner.run()
def test_serial_runner(self): builder = CorrelationBuilder(self.propstore, self.correlation, from_quantity_db=False) runner = Runner([builder]) runner.run()
def test_database_and_file_write(self): builder = CorrelationBuilder(self.propstore, self.correlation, props=self.propnet_props, funcs='all', out_file=os.path.join( TEST_DATA_DIR, "test_output.json"), from_quantity_db=False) runner = Runner([builder]) runner.run() # Test database output data = list(self.correlation.query(criteria={})) # count = n_props**2 * n_funcs # n_props = 4, n_funcs = 6 self.assertEqual(len(data), 96, msg="Are there new built-in funcs in the builder?") for d in data: self.assertIsInstance(d, dict) self.assertEqual( set(d.keys()), { 'property_x', 'property_y', 'correlation', 'correlation_func', 'n_points', 'shortest_path_length', 'id', '_id', 'last_updated' }) self.assertEqual(d['n_points'], 200) if d['property_x'] == 'vickers_hardness' and \ d['property_y'] == 'bulk_modulus': # print("{}: {}".format(d['correlation_func'], d['correlation'])) self.assertAlmostEqual( d['correlation'], self.correlation_values_vickers_bulk[ d['correlation_func']]) elif d['property_x'] == 'bulk_modulus' and \ d['property_y'] == 'vickers_hardness': # print("{}: {}".format(d['correlation_func'], d['correlation'])) self.assertAlmostEqual( d['correlation'], self.correlation_values_bulk_vickers[ d['correlation_func']]) # Test file output expected_file_data = loadfn( os.path.join(TEST_DATA_DIR, 'correlation_outfile.json')) actual_file_data = loadfn( os.path.join(TEST_DATA_DIR, 'test_output.json')) self.assertIsInstance(actual_file_data, dict) self.assertEqual(actual_file_data.keys(), expected_file_data.keys()) self.assertEqual(set(actual_file_data['properties']), set(expected_file_data['properties'])) expected_props = expected_file_data['properties'] actual_props = actual_file_data['properties'] for prop_x, prop_y in product(expected_props, repeat=2): iex, iey = expected_props.index(prop_x), expected_props.index( prop_y) iax, iay = actual_props.index(prop_x), actual_props.index(prop_y) self.assertEqual(actual_file_data['n_points'][iax][iay], expected_file_data['n_points'][iex][iey]) self.assertEqual( actual_file_data['shortest_path_length'][iax][iay], expected_file_data['shortest_path_length'][iex][iey]) for f in builder._funcs.keys(): self.assertAlmostEqual( actual_file_data['correlation'][f][iax][iay], expected_file_data['correlation'][f][iex][iey])
def test_serial_runner(self): builder = CorrelationBuilder(self.propstore, self.materials, self.correlation) runner = Runner([builder]) runner.run()
def test_multiproc_runner(self): builder = CorrelationBuilder(self.propstore, self.materials, self.correlation) runner = Runner([builder], max_workers=2) runner.run()