def source2(): store = MemoryStore("source2", key="k", last_updated_field="lu") store.connect() store.ensure_index("k") store.ensure_index("lu") store.update([{"k": k, "c": "c", "d": "d"} for k in range(15)]) return store
class MPWorksCompatibilityBuilderTest(unittest.TestCase): def setUp(self): # Set up test db, set up mpsft, etc. self.test_tasks = JSONStore([test_tasks]) self.elasticity = MemoryStore("atomate_tasks") self.test_tasks.connect() self.elasticity.connect() def test_builder(self): mpw_builder = MPWorksCompatibilityBuilder(self.test_tasks, self.elasticity, incremental=False) items = mpw_builder.get_items() processed = [mpw_builder.process_item(item) for item in items] mpw_builder.update_targets(processed) def test_convert_mpworks_to_atomate(self): doc = self.test_tasks.collection.find_one( {"task_type": { "$regex": "deformed" }}) new_doc = convert_mpworks_to_atomate(doc) self.assertTrue('hubbards' in new_doc['input']) doc = self.test_tasks.collection.find_one( {"task_type": { "$regex": "(2x)" }}) new_doc = convert_mpworks_to_atomate(doc) self.assertTrue('hubbards' in new_doc['input']) def test_update_mpworks_schema(self): doc = self.test_tasks.query(criteria={"task_id": "mp-612"})[0] doc = update_mpworks_schema(doc) atomate_doc = convert_mpworks_to_atomate(doc)
class TestRobocrysBuilder(unittest.TestCase): def setUp(self): """Set up materials and robocrys stores.""" struct_docs = loadfn(test_mats, cls=None) self.materials = MemoryStore("materials") self.materials.connect() self.materials.update(struct_docs) self.robocrys = MemoryStore("robocrys") def test_build(self): """Test building the robocrys database.""" builder = RobocrysBuilder(self.materials, self.robocrys) runner = Runner([builder]) runner.run() doc = list(self.robocrys.query(criteria={'task_id': 'mp-66'}))[0] self.assertEqual(doc['condensed_structure']['formula'], 'C') self.assertEqual(doc['condensed_structure']['spg_symbol'], 'Fd-3m') self.assertEqual(doc['condensed_structure']['mineral']['type'], 'diamond') self.assertEqual(doc['condensed_structure']['dimensionality'], '3') self.assertTrue("C is diamond structured" in doc['description']) self.assertTrue("bond lengths are 1.55" in doc['description'])
def source1(): store = MemoryStore("source1", key="k", last_updated_field="lu") store.connect() store.ensure_index("k") store.ensure_index("lu") store.update([{"k": k, "a": "a", "b": "b"} for k in range(10)]) return store
def source(docs): store = MemoryStore("source", key="k", last_updated_field="lu") store.connect() store.ensure_index("k") store.ensure_index("lu") store.update(docs) return store
class StructureSimilarityBuilderTest(unittest.TestCase): @classmethod def setUpClass(self): # Set up test db, etc. self.test_site_descriptors = MemoryStore("site_descr") self.test_site_descriptors.connect() site_fp_docs = loadfn(test_site_fp_stats, cls=None) self.test_site_descriptors.update(site_fp_docs) def test_get_items(self): test_structure_similarity = MemoryStore("struct_sim") test_structure_similarity.connect() sim_builder = StructureSimilarityBuilder(self.test_site_descriptors, test_structure_similarity, fp_type='opsf') items = list(sim_builder.get_items()) self.assertEqual(len(items), 3) for i in items: d1 = i[0] d2 = i[1] self.assertIn("statistics", d1) self.assertIn("statistics", d2) self.assertIn("task_id", d1) self.assertIn("task_id", d2) processed = sim_builder.process_item(i) if processed: pass else: import nose nose.tools.set_trace() def test_get_all_site_descriptors(self): test_structure_similarity = MemoryStore("struct_sim") test_structure_similarity.connect() sim_builder = StructureSimilarityBuilder(self.test_site_descriptors, test_structure_similarity, fp_type='opsf') for d in self.test_site_descriptors.query(): dsim = sim_builder.get_similarities(d, d) self.assertAlmostEqual(dsim['cos'], 1) self.assertAlmostEqual(dsim['dist'], 0) C = self.test_site_descriptors.query_one(criteria={"task_id": "mp-66"}) NaCl = self.test_site_descriptors.query_one(criteria={"task_id": "mp-22862"}) Fe = self.test_site_descriptors.query_one(criteria={"task_id": "mp-13"}) d = sim_builder.get_similarities(C, NaCl) self.assertAlmostEqual(d['cos'], 0.0013649) self.assertAlmostEqual(d['dist'], 2.6866749) d = sim_builder.get_similarities(C, Fe) self.assertAlmostEqual(d['cos'], 0.0013069) self.assertAlmostEqual(d['dist'], 2.6293889) d = sim_builder.get_similarities(NaCl, Fe) self.assertAlmostEqual(d['cos'], 0.0012729) self.assertAlmostEqual(d['dist'], 2.7235044)
class TaskTaggerTest(unittest.TestCase): def setUp(self): coords = list() coords.append([0, 0, 0]) coords.append([0.75, 0.5, 0.75]) lattice = [ [3.8401979337, 0.00, 0.00], [1.9200989668, 3.3257101909, 0.00], [0.00, -2.2171384943, 3.1355090603], ] structure = Structure(lattice, ["Si", "Si"], coords) input_sets = { "GGA Structure Optimization": MPRelaxSet(structure), "GGA Static": MPStaticSet(structure), "GGA NSCF Line": MPNonSCFSet(structure, mode="line"), "GGA NSCF Uniform": MPNonSCFSet(structure, mode="uniform"), } tasks = [] t_id = 1 for task_type, input_set in input_sets.items(): doc = { "true_task_type": task_type, "last_updated": datetime.now(), "task_id": t_id, "state": "successful", "orig_inputs": { "incar": input_set.incar.as_dict(), "kpoints": input_set.kpoints.as_dict(), }, "output": { "structure": structure.as_dict() }, } t_id += 1 tasks.append(doc) self.test_tasks = MemoryStore("tasks") self.task_types = MemoryStore("task_types") self.test_tasks.connect() self.task_types.connect() self.test_tasks.update(tasks) def test_mp_defs(self): task_tagger = TaskTagger(tasks=self.test_tasks, task_types=self.task_types) for t in task_tagger.get_items(): processed = task_tagger.calc(t) true_type = self.test_tasks.query_one( criteria={"task_id": t["task_id"]}, properties=["true_task_type"])["true_task_type"] self.assertEqual(processed["task_type"], true_type)
def alias_store(): memorystore = MemoryStore("test") memorystore.connect() alias_store = AliasingStore(memorystore, { "a": "b", "c.d": "e", "f": "g.h" }) return alias_store
def setUpClass(cls) -> None: Registry.clear_all_registries() add_builtin_symbols_to_registry() cls.afa_web = AflowAdapter() store_data = loadfn(os.path.join(TEST_DATA_DIR, 'aflow_store.json')) store = MemoryStore() store.connect() store.update(store_data, key='auid') cls.afa_store = AflowAdapter(store)
class BuilderTest(unittest.TestCase): def setUp(self): self.materials = MemoryStore() self.materials.connect() materials = loadfn(os.path.join(TEST_DIR, "test_materials.json")) materials = jsanitize(materials, strict=True, allow_bson=True) self.materials.update(materials) self.propstore = MemoryStore() self.propstore.connect() def test_serial_runner(self): builder = PropnetBuilder(self.materials, self.propstore) runner = Runner([builder]) runner.run() def test_multiproc_runner(self): builder = PropnetBuilder(self.materials, self.propstore) runner = Runner([builder]) runner.run() def test_process_item(self): item = self.materials.query_one(criteria={"pretty_formula": "Cs"}) builder = PropnetBuilder(self.materials, self.propstore) processed = builder.process_item(item) self.assertIsNotNone(processed) # Ensure vickers hardness gets populated self.assertIn("vickers_hardness", processed) # @unittest.skipIf(not os.path.isfile("runner.json"), "No runner file") # def test_runner_pipeline(self): # from monty.serialization import loadfn # runner = loadfn("runner.json") # runner.builders[0].connect() # items = list(runner.builders[0].get_items()) # processed = runner.builders[0].process_item(items[0]) # runner.run() # Just here for reference, in case anyone wants to create a new set # of test materials -jhm @unittest.skipIf(True, "Skipping test materials creation") def create_test_docs(self): formulas = ["BaNiO3", "Si", "Fe2O3", "Cs"] from maggma.advanced_stores import MongograntStore from monty.serialization import dumpfn mgstore = MongograntStore("ro:matgen2.lbl.gov/mp_prod", "materials") builder = PropnetBuilder(mgstore, self.propstore, criteria={ "pretty_formula": { "$in": formulas }, "e_above_hull": 0 }) builder.connect() dumpfn(list(builder.get_items()), "test_materials.json")
def setUpClass(cls): materials = MemoryStore("materials") materials.connect() docs = [] for n, mat_string in enumerate(["Si", "Sn", "TiO2", "VO2"]): docs.append({"task_id": n, "structure": PymatgenTest.get_structure(mat_string).as_dict()}) materials.update(docs, key='task_id') elasticity = MemoryStore("elasticity") elasticity.connect() elasticity.update(docs[0:1], key="task_id") cls.materials = materials cls.elasticity = elasticity
def test_memory_store_connect(): memorystore = MemoryStore() assert memorystore._collection is None memorystore.connect() assert isinstance(memorystore._collection, mongomock.collection.Collection) with pytest.warns(UserWarning, match="SSH Tunnel not needed for MemoryStore"): class fake_pipe: remote_bind_address = ("localhost", 27017) local_bind_address = ("localhost", 37017) server = fake_pipe() memorystore.connect(ssh_tunnel=server)
def _get_correlation_values(): full_propstore = MemoryStore() with open(os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"), 'r') as f: data = json.load(f) full_propstore.connect() full_propstore.update(jsanitize(data, strict=True, allow_bson=True)) correlation_store = MemoryStore() builder = CorrelationBuilder(full_propstore, correlation_store, props=PROPNET_PROPS, funcs='all', from_quantity_db=False) runner = Runner([builder]) runner.run() return builder
class TaskTaggerTest(unittest.TestCase): def setUp(self): # Set up test db, set up mpsft, etc. self.test_tasks = JSONStore(test_tasks) self.task_types = MemoryStore("task_types") self.test_tasks.connect() self.task_types.connect() def test_mp_defs(self): task_tagger = TaskTagger(tasks=self.test_tasks, task_types=self.task_types) for t in task_tagger.get_items(): processed = task_tagger.process_item(t) if processed: self.assertEqual(processed["task_type"], t["true_task_type"])
class TaskTaggerTest(unittest.TestCase): def setUp(self): # Set up test db, set up mpsft, etc. self.test_tasks = JSONStore(test_tasks) self.task_types = MemoryStore("task_types") self.test_tasks.connect() self.task_types.connect() def test_mp_defs(self): task_tagger = TaskTagger(tasks=self.test_tasks, task_types=self.task_types) for t in task_tagger.get_items(): processed = task_tagger.process_item(t) true_type = self.test_tasks.query_one( criteria={"task_id": t["task_id"]}, properties=["true_task_type"], )["true_task_type"] self.assertEqual(processed["task_type"], true_type)
def search_helper(payload, base: str = "/?", debug=True) -> Response: """ Helper function to directly query search endpoints Args: store: store f base: base of the query, default to /query? client: TestClient generated from FastAPI payload: query in dictionary format debug: True = print out the url, false don't print anything Returns: request.Response object that contains the response of the correspoding payload """ store = MemoryStore("owners", key="name") store.connect() store.update([d.dict() for d in owners]) endpoint = ReadOnlyResource( store, Owner, query_operators=[ StringQueryOperator(model=Owner), NumericQuery(model=Owner), SparseFieldsQuery(model=Owner), ], disable_validation=True, ) app = FastAPI() app.include_router(endpoint.router) client = TestClient(app) print(inspect.signature(NumericQuery(model=Owner).query)) url = base + urlencode(payload) if debug: print(url) res = client.get(url) json = res.json() return res, json.get("data", []) # type: ignore
def test_get_items(self): test_structure_similarity = MemoryStore("struct_sim_struct_sim") test_structure_similarity.connect() sim_builder = StructureSimilarityBuilder(self.test_site_descriptors, test_structure_similarity) items = list(sim_builder.get_items()) self.assertEqual(len(items), 3) for i in items: d1 = i[0] d2 = i[1] self.assertIn("opsf_statistics", d1) self.assertIn("opsf_statistics", d2) self.assertIn("task_id", d1) self.assertIn("task_id", d2) processed = sim_builder.process_item(i) if processed: pass else: import nose nose.tools.set_trace()
def create_correlation_quantity_indexed_docs(): """ Outputs JSON file containing the same data from create_correlation_test_docs() but as individual quantities. This mimics the quantity-indexed store. Must run create_correlation_test_docs() first and have the JSON file in the test directory. """ pn_store = MemoryStore() q_store = MemoryStore() m_store = MemoryStore() with open(os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"), 'r') as f: data = json.load(f) pn_store.connect() pn_store.update(jsanitize(data, strict=True, allow_bson=True)) sb = SeparationBuilder(pn_store, q_store, m_store) r = Runner([sb]) r.run() q_data = list(q_store.query(criteria={}, properties={'_id': False})) dumpfn( q_data, os.path.join(CORR_TEST_DIR, "correlation_propnet_quantity_data.json"))
def test_get_all_site_descriptors(self): test_structure_similarity = MemoryStore("struct_sim") test_structure_similarity.connect() sim_builder = StructureSimilarityBuilder(self.test_site_descriptors, test_structure_similarity, fp_type='opsf') for d in self.test_site_descriptors.query(): dsim = sim_builder.get_similarities(d, d) self.assertAlmostEqual(dsim['cos'], 1) self.assertAlmostEqual(dsim['dist'], 0) C = self.test_site_descriptors.query_one(criteria={"task_id": "mp-66"}) NaCl = self.test_site_descriptors.query_one(criteria={"task_id": "mp-22862"}) Fe = self.test_site_descriptors.query_one(criteria={"task_id": "mp-13"}) d = sim_builder.get_similarities(C, NaCl) self.assertAlmostEqual(d['cos'], 0.0013649) self.assertAlmostEqual(d['dist'], 2.6866749) d = sim_builder.get_similarities(C, Fe) self.assertAlmostEqual(d['cos'], 0.0013069) self.assertAlmostEqual(d['dist'], 2.6293889) d = sim_builder.get_similarities(NaCl, Fe) self.assertAlmostEqual(d['cos'], 0.0012729) self.assertAlmostEqual(d['dist'], 2.7235044)
class TestBoltztrap4DosBuilder(unittest.TestCase): def setUp(self): self.materials = JSONStore(boltztrap4dos_mat) self.materials.connect() self.bandstructure = JSONStore(boltztrap4dos_bs) self.bandstructure.connect() self.dos_ref = JSONStore(boltztrap4dos_dos) self.dos_ref.connect() self.dos = MemoryStore("dos") self.dos.connect() @unittest.skipIf("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true", "Skipping this test on Travis CI.") def test_process_items(self): dosbuilder = Boltztrap4DosBuilder(self.materials, self.bandstructure, self.dos, avoid_projections=True) item = self.materials.query_one() bs_dict = self.bandstructure.query_one() item["bandstructure_uniform"] = bs_dict dos = dosbuilder.process_item(item) density = dos['densities']['1'][3900] self.assertAlmostEqual(density, 5.446126162946311, 5) def test_update_targets(self): dos = self.dos_ref.query_one() items = [dos] dosbuilder = Boltztrap4DosBuilder(self.materials, self.bandstructure, self.dos) dosbuilder.update_targets(items) self.assertListEqual(self.dos.distinct("task_id"), ['mp-663338'])
def target(): store = MemoryStore("target", key="ks", last_updated_field="lu") store.connect() store.ensure_index("ks") store.ensure_index("lu") return store
mpr = MPRester() try: store = loadfn(environ["PROPNET_CORRELATION_STORE_FILE"]) store.connect() except (ServerSelectionTimeoutError, KeyError, FileNotFoundError) as ex: if isinstance(ex, ServerSelectionTimeoutError): logger.warning("Unable to connect to propnet correlation db!") if isinstance(ex, KeyError): logger.warning("PROPNET_CORRELATION_STORE_FILE var not set!") if isinstance(ex, FileNotFoundError): logger.warning( "File specified in PROPNET_CORRELATION_STORE_FILE not found!") from maggma.stores import MemoryStore store = MemoryStore() store.connect() # layout won't work if database is down, but at least web app will stay up correlation_funcs = list(store.query().distinct("correlation_func")) correlation_func_info = { "mic": { "name": "Maximal information coefficient", "bounds": lambda x: 0 <= round(x) <= 1 }, "linlsq": { "name": "Linear least squares, R-squared", "bounds": lambda x: 0 <= round(x) <= 1 }, "theilsen": { "name": "Theil-Sen regression, R-squared",
class SiteDescriptorsBuilderTest(unittest.TestCase): @classmethod def setUpClass(self): # Set up test db, etc. self.test_materials = MemoryStore("mat_site_fingerprint") self.test_materials.connect() struct_docs = loadfn(test_structs, cls=None) self.test_materials.update(struct_docs) def test_builder(self): test_site_descriptors = MemoryStore("test_site_descriptors") sd_builder = SiteDescriptorsBuilder(self.test_materials, test_site_descriptors) sd_builder.connect() for t in sd_builder.get_items(): processed = sd_builder.process_item(t) if processed: sd_builder.update_targets([processed]) else: import nose nose.tools.set_trace() self.assertEqual(len([t for t in sd_builder.get_items()]), 0) # Remove one data piece in diamond entry and test partial update. test_site_descriptors.collection.find_one_and_update( {'task_id': 'mp-66'}, {'$unset': { 'site_descriptors': 1 }}) items = [e for e in list(sd_builder.get_items())] self.assertEqual(len(items), 1) def test_get_all_site_descriptors(self): test_site_descriptors = MemoryStore("test_site_descriptors") sd_builder = SiteDescriptorsBuilder(self.test_materials, test_site_descriptors) C = self.test_materials.query_one(criteria={"task_id": "mp-66"}) NaCl = self.test_materials.query_one(criteria={"task_id": "mp-22862"}) Fe = self.test_materials.query_one(criteria={"task_id": "mp-13"}) # Diamond. d = sd_builder.get_site_descriptors_from_struct( Structure.from_dict(C["structure"])) for di in d.values(): self.assertEqual(len(di), 2) self.assertEqual(d['cn_VoronoiNN'][0]['CN_VoronoiNN'], 20) self.assertAlmostEqual(d['cn_wt_VoronoiNN'][0]['CN_VoronoiNN'], 4.5381162) self.assertEqual(d['cn_JMolNN'][0]['CN_JMolNN'], 4) self.assertAlmostEqual(d['cn_wt_JMolNN'][0]['CN_JMolNN'], 4.9617398) self.assertEqual(d['cn_MinimumDistanceNN'][0]['CN_MinimumDistanceNN'], 4) self.assertAlmostEqual( d['cn_wt_MinimumDistanceNN'][0]['CN_MinimumDistanceNN'], 4) self.assertEqual(d['cn_MinimumOKeeffeNN'][0]['CN_MinimumOKeeffeNN'], 4) self.assertAlmostEqual( d['cn_wt_MinimumOKeeffeNN'][0]['CN_MinimumOKeeffeNN'], 4) self.assertEqual(d['cn_MinimumVIRENN'][0]['CN_MinimumVIRENN'], 4) self.assertAlmostEqual(d['cn_wt_MinimumVIRENN'][0]['CN_MinimumVIRENN'], 4) self.assertEqual( d['cn_BrunnerNN_reciprocal'][0]['CN_BrunnerNN_reciprocal'], 4) self.assertAlmostEqual( d['cn_wt_BrunnerNN_reciprocal'][0]['CN_BrunnerNN_reciprocal'], 4) self.assertAlmostEqual(d['opsf'][0]['tetrahedral CN_4'], 0.9995) #self.assertAlmostEqual(d['csf'][0]['tetrahedral CN_4'], 0.9886777) ds = sd_builder.get_statistics(d) self.assertTrue('opsf' in list(ds.keys())) self.assertTrue('csf' in list(ds.keys())) for k, dsk in ds.items(): for di in dsk: self.assertEqual(len(list(di.keys())), 5) def get_index(li, optype): for i, di in enumerate(li): if di['name'] == optype: return i raise RuntimeError('did not find optype {}'.format(optype)) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'tetrahedral CN_4')]['max'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'tetrahedral CN_4')]['min'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'tetrahedral CN_4')]['mean'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'tetrahedral CN_4')]['std'], 0) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'octahedral CN_6')]['mean'], 0.0005) # NaCl. d = sd_builder.get_site_descriptors_from_struct( Structure.from_dict(NaCl["structure"])) self.assertAlmostEqual(d['opsf'][0]['octahedral CN_6'], 0.9995) #self.assertAlmostEqual(d['csf'][0]['octahedral CN_6'], 1) ds = sd_builder.get_statistics(d) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'octahedral CN_6')]['max'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'octahedral CN_6')]['min'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'octahedral CN_6')]['mean'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'octahedral CN_6')]['std'], 0) # Iron. d = sd_builder.get_site_descriptors_from_struct( Structure.from_dict(Fe["structure"])) self.assertAlmostEqual(d['opsf'][0]['body-centered cubic CN_8'], 0.9995) #self.assertAlmostEqual(d['csf'][0]['body-centered cubic CN_8'], 0.755096) ds = sd_builder.get_statistics(d) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'body-centered cubic CN_8')]['max'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'body-centered cubic CN_8')]['min'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'body-centered cubic CN_8')]['mean'], 0.9995) self.assertAlmostEqual( ds['opsf'][get_index(ds['opsf'], 'body-centered cubic CN_8')]['std'], 0)
def pet_store(pets): store = MemoryStore("pets", key="name") store.connect() pets = [jsonable_encoder(d) for d in pets] store.update(pets) return store
def owner_store(owners): store = MemoryStore("owners", key="name") store.connect() owners = [jsonable_encoder(d) for d in owners] store.update(owners) return store
def test_memory_store_connect(): memorystore = MemoryStore() assert memorystore._collection is None memorystore.connect() assert isinstance(memorystore._collection, mongomock.collection.Collection)
def memory_store(): from maggma.stores import MemoryStore store = MemoryStore() store.connect() return store
def memorystore(): store = MemoryStore() store.connect() return store
def owner_store(): store = MemoryStore("owners", key="name") store.connect() store.update([d.dict() for d in owners]) return store
class CorrelationTest(unittest.TestCase): @classmethod def setUpClass(cls): add_builtin_models_to_registry() cls.propnet_props = PROPNET_PROPS cls.propstore = MemoryStore() cls.propstore.connect() materials_file = os.path.join(TEST_DATA_DIR, "correlation_propnet_data.json") if os.path.exists(materials_file): with open(materials_file, 'r') as f: materials = json.load(f) materials = jsanitize(materials, strict=True, allow_bson=True) cls.propstore.update(materials) cls.quantity_store = MemoryStore() cls.quantity_store.connect() quantities_file = os.path.join( TEST_DATA_DIR, "correlation_propnet_quantity_data.json") if os.path.exists(quantities_file): with open(quantities_file, 'r') as f: quantities = json.load(f) quantities = jsanitize(quantities, strict=True, allow_bson=True) cls.quantity_store.update(quantities, key='internal_id') cls.correlation = None # vickers hardness (x-axis) vs. bulk modulus (y-axis) cls.correlation_values_vickers_bulk = { 'linlsq': 0.49302857395714916, 'pearson': 0.7021599347421844, 'spearman': 0.7658801470036752, 'mic': 0.7069707677575771, 'theilsen': 0.4498696692135892, 'ransac': 0.412850153689996 } cls.correlation_values_bulk_vickers = { 'linlsq': 0.49302857395714916, 'pearson': 0.7021599347421844, 'spearman': 0.7658801470036752, 'mic': 0.7069707677575771, 'theilsen': 0.46461142885951223, 'ransac': 0.3699261228882036 } @classmethod def tearDownClass(cls): if os.path.exists(os.path.join(TEST_DATA_DIR, "test_output.json")): os.remove(os.path.join(TEST_DATA_DIR, "test_output.json")) def setUp(self): self.correlation = MemoryStore() self.correlation.connect() def test_serial_runner(self): builder = CorrelationBuilder(self.propstore, self.correlation, from_quantity_db=False) runner = Runner([builder]) runner.run() def test_serial_runner_quantity_db(self): # This only runs over the 4 properties in the database because # the mongomock db cannot be indexed and is therefore very slow builder = CorrelationBuilder(self.quantity_store, self.correlation, props=self.propnet_props, from_quantity_db=True) runner = Runner([builder]) runner.run() def test_multiproc_runner(self): builder = CorrelationBuilder(self.propstore, self.correlation, from_quantity_db=False) runner = Runner([builder], max_workers=4) runner.run() def test_multiproc_runner_quantity_db(self): # This only runs over the 4 properties in the database because # the mongomock db cannot be indexed and is therefore very slow builder = CorrelationBuilder(self.quantity_store, self.correlation, props=self.propnet_props, from_quantity_db=True) runner = Runner([builder], max_workers=4) runner.run() def test_process_item(self): test_props = [['band_gap_pbe', 'atomic_density'], ['bulk_modulus', 'vickers_hardness']] linlsq_correlation_values = [0.011294932700383722, 0.49302857395714916] path_lengths = [None, 2] for source_db, is_quantity_db in zip( (self.propstore, self.quantity_store), (False, True)): for props, expected_correlation_val, expected_path_length in \ zip(test_props, linlsq_correlation_values, path_lengths): builder = CorrelationBuilder(source_db, self.correlation, props=props, from_quantity_db=is_quantity_db) processed = None prop_x, prop_y = props for item in builder.get_items(): if item['x_name'] == prop_x and \ item['y_name'] == prop_y: processed = builder.process_item(item) break # print(processed) self.assertIsNotNone(processed) self.assertIsInstance(processed, tuple) px, py, correlation, func_name, n_points, path_length = processed self.assertEqual(px, prop_x) self.assertEqual(py, prop_y) self.assertAlmostEqual(correlation, expected_correlation_val) self.assertEqual(func_name, 'linlsq') self.assertEqual(n_points, 200) self.assertEqual(path_length, expected_path_length) def test_correlation_funcs(self): def custom_correlation_func(x, y): return 0.5 correlation_values = { k: v for k, v in self.correlation_values_bulk_vickers.items() } correlation_values['test_correlation.custom_correlation_func'] = 0.5 builder = CorrelationBuilder( self.propstore, self.correlation, props=['vickers_hardness', 'bulk_modulus'], funcs=['all', custom_correlation_func], from_quantity_db=False) self.assertEqual( set(builder._funcs.keys()), set(correlation_values.keys()), msg="Are there new built-in functions in the correlation builder?") for item in builder.get_items(): if item['x_name'] == 'bulk_modulus' and \ item['y_name'] == 'vickers_hardness': processed = builder.process_item(item) self.assertIsInstance(processed, tuple) prop_x, prop_y, correlation, func_name, n_points, path_length = processed self.assertEqual(prop_x, 'bulk_modulus') self.assertEqual(prop_y, 'vickers_hardness') self.assertIn(func_name, correlation_values.keys()) self.assertAlmostEqual(correlation, correlation_values[func_name]) self.assertEqual(n_points, 200) self.assertEqual(path_length, 2) def test_database_and_file_write(self): builder = CorrelationBuilder(self.propstore, self.correlation, props=self.propnet_props, funcs='all', out_file=os.path.join( TEST_DATA_DIR, "test_output.json"), from_quantity_db=False) runner = Runner([builder]) runner.run() # Test database output data = list(self.correlation.query(criteria={})) # count = n_props**2 * n_funcs # n_props = 4, n_funcs = 6 self.assertEqual(len(data), 96, msg="Are there new built-in funcs in the builder?") for d in data: self.assertIsInstance(d, dict) self.assertEqual( set(d.keys()), { 'property_x', 'property_y', 'correlation', 'correlation_func', 'n_points', 'shortest_path_length', 'id', '_id', 'last_updated' }) self.assertEqual(d['n_points'], 200) if d['property_x'] == 'vickers_hardness' and \ d['property_y'] == 'bulk_modulus': # print("{}: {}".format(d['correlation_func'], d['correlation'])) self.assertAlmostEqual( d['correlation'], self.correlation_values_vickers_bulk[ d['correlation_func']]) elif d['property_x'] == 'bulk_modulus' and \ d['property_y'] == 'vickers_hardness': # print("{}: {}".format(d['correlation_func'], d['correlation'])) self.assertAlmostEqual( d['correlation'], self.correlation_values_bulk_vickers[ d['correlation_func']]) # Test file output expected_file_data = loadfn( os.path.join(TEST_DATA_DIR, 'correlation_outfile.json')) actual_file_data = loadfn( os.path.join(TEST_DATA_DIR, 'test_output.json')) self.assertIsInstance(actual_file_data, dict) self.assertEqual(actual_file_data.keys(), expected_file_data.keys()) self.assertEqual(set(actual_file_data['properties']), set(expected_file_data['properties'])) expected_props = expected_file_data['properties'] actual_props = actual_file_data['properties'] for prop_x, prop_y in product(expected_props, repeat=2): iex, iey = expected_props.index(prop_x), expected_props.index( prop_y) iax, iay = actual_props.index(prop_x), actual_props.index(prop_y) self.assertEqual(actual_file_data['n_points'][iax][iay], expected_file_data['n_points'][iex][iey]) self.assertEqual( actual_file_data['shortest_path_length'][iax][iay], expected_file_data['shortest_path_length'][iex][iey]) for f in builder._funcs.keys(): self.assertAlmostEqual( actual_file_data['correlation'][f][iax][iay], expected_file_data['correlation'][f][iex][iey]) def test_sample_size_limit(self): sample_sizes = [50, 300] expected_n_points = [50, 200] for sample_size, n_points in zip(sample_sizes, expected_n_points): correlation_store = MemoryStore() builder = CorrelationBuilder( self.propstore, correlation_store, props=['bulk_modulus', 'vickers_hardness'], funcs='linlsq', sample_size=sample_size, from_quantity_db=False) runner = Runner([builder]) runner.run() data = list(correlation_store.query(criteria={})) for d in data: self.assertEqual(d['n_points'], n_points) with self.assertRaises(ValueError): _ = CorrelationBuilder(self.propstore, self.correlation, sample_size=1)