class TestRobocrysBuilder(unittest.TestCase): def setUp(self): """Set up materials and robocrys stores.""" struct_docs = loadfn(test_mats, cls=None) self.materials = MemoryStore("materials") self.materials.connect() self.materials.update(struct_docs) self.robocrys = MemoryStore("robocrys") def test_build(self): """Test building the robocrys database.""" builder = RobocrysBuilder(self.materials, self.robocrys) runner = Runner([builder]) runner.run() doc = list(self.robocrys.query(criteria={'task_id': 'mp-66'}))[0] self.assertEqual(doc['condensed_structure']['formula'], 'C') self.assertEqual(doc['condensed_structure']['spg_symbol'], 'Fd-3m') self.assertEqual(doc['condensed_structure']['mineral']['type'], 'diamond') self.assertEqual(doc['condensed_structure']['dimensionality'], '3') self.assertTrue("C is diamond structured" in doc['description']) self.assertTrue("bond lengths are 1.55" in doc['description'])
class StructureSimilarityBuilderTest(unittest.TestCase): @classmethod def setUpClass(self): # Set up test db, etc. self.test_site_descriptors = MemoryStore("site_descr") self.test_site_descriptors.connect() site_fp_docs = loadfn(test_site_fp_stats, cls=None) self.test_site_descriptors.update(site_fp_docs) def test_get_items(self): test_structure_similarity = MemoryStore("struct_sim") test_structure_similarity.connect() sim_builder = StructureSimilarityBuilder(self.test_site_descriptors, test_structure_similarity, fp_type='opsf') items = list(sim_builder.get_items()) self.assertEqual(len(items), 3) for i in items: d1 = i[0] d2 = i[1] self.assertIn("statistics", d1) self.assertIn("statistics", d2) self.assertIn("task_id", d1) self.assertIn("task_id", d2) processed = sim_builder.process_item(i) if processed: pass else: import nose nose.tools.set_trace() def test_get_all_site_descriptors(self): test_structure_similarity = MemoryStore("struct_sim") test_structure_similarity.connect() sim_builder = StructureSimilarityBuilder(self.test_site_descriptors, test_structure_similarity, fp_type='opsf') for d in self.test_site_descriptors.query(): dsim = sim_builder.get_similarities(d, d) self.assertAlmostEqual(dsim['cos'], 1) self.assertAlmostEqual(dsim['dist'], 0) C = self.test_site_descriptors.query_one(criteria={"task_id": "mp-66"}) NaCl = self.test_site_descriptors.query_one(criteria={"task_id": "mp-22862"}) Fe = self.test_site_descriptors.query_one(criteria={"task_id": "mp-13"}) d = sim_builder.get_similarities(C, NaCl) self.assertAlmostEqual(d['cos'], 0.0013649) self.assertAlmostEqual(d['dist'], 2.6866749) d = sim_builder.get_similarities(C, Fe) self.assertAlmostEqual(d['cos'], 0.0013069) self.assertAlmostEqual(d['dist'], 2.6293889) d = sim_builder.get_similarities(NaCl, Fe) self.assertAlmostEqual(d['cos'], 0.0012729) self.assertAlmostEqual(d['dist'], 2.7235044)
def test_update(self): dt = MemoryStore() at = MemoryStore() qc = [{ 'catalog': 'icsd', 'k': 100, 'exclude': ['compound'], 'filter': [('auid', '__eq__', 'aflow:0132ab6b9cddd429')], 'select': [], 'targets': ['data', 'auid'] }] kw = ['auid', 'aurl', 'ael_elastic_anisotropy', 'files'] afi = AflowIngester(data_target=dt, auid_target=at, keywords=kw, query_configs=qc) afi.connect() item = afi.get_items().__next__() processed = afi.process_item(item) afi.update_targets([processed]) dt_data = list(dt.query()) at_data = list(at.query()) self.assertEqual(len(dt_data), 1) self.assertEqual(len(at_data), 1) # Omit auid target and use same data target to ensure upsert afi = AflowIngester(data_target=dt, keywords=kw, query_configs=qc) afi.connect() item = afi.get_items().__next__() processed = afi.process_item(item) afi.update_targets([processed]) dt_data = list(dt.query()) self.assertEqual(len(dt_data), 1)
class TestBondValence(unittest.TestCase): def setUp(self): self.materials = JSONStore(test_mats, lu_type='isoformat') self.bond_valence = MemoryStore("bond_valence") def test_build(self): builder = BondValenceBuilder(self.materials, self.bond_valence) runner = Runner([builder]) runner.run() doc = list(self.bond_valence.query(criteria={'task_id': 'mp-779001'}))[0] self.assertSetEqual(set(doc['possible_species']), {'Hf4+', 'Sr2+', 'O2-'})
class TestBondBuilder(unittest.TestCase): def setUp(self): self.materials = JSONStore(test_mats, lu_type='isoformat') self.bonding = MemoryStore("bonding") def test_build(self): builder = BondBuilder(self.materials, self.bonding) runner = Runner([builder]) runner.run() doc = list(self.bonding.query(criteria={'task_id': 'mp-779001'}))[0] sg = StructureGraph.from_dict(doc['graph']) self.assertIsInstance(sg, StructureGraph) self.assertIn('Hf-O(6)', doc['summary']['coordination_envs'])
class TestMagneticBuilder(unittest.TestCase): def setUp(self): self.materials = JSONStore(test_mats, lu_type='isoformat') self.magnetism = MemoryStore("magnetism") def test_build(self): builder = MagneticBuilder(self.materials, self.magnetism) runner = Runner([builder]) runner.run() doc = list(self.magnetism.query(criteria={'task_id': 'mp-1034331'}))[0] self.assertEqual(doc['magnetism']['ordering'], 'FM') self.assertAlmostEqual(doc['magnetism']['total_magnetization_normalized_formula_units'], 4.8031771)
def create_correlation_quantity_indexed_docs(): """ Outputs JSON file containing the same data from create_correlation_test_docs() but as individual quantities. This mimics the quantity-indexed store. Must run create_correlation_test_docs() first and have the JSON file in the test directory. """ pn_store = MemoryStore() q_store = MemoryStore() m_store = MemoryStore() with open(os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"), 'r') as f: data = json.load(f) pn_store.connect() pn_store.update(jsanitize(data, strict=True, allow_bson=True)) sb = SeparationBuilder(pn_store, q_store, m_store) r = Runner([sb]) r.run() q_data = list(q_store.query(criteria={}, properties={'_id': False})) dumpfn( q_data, os.path.join(CORR_TEST_DIR, "correlation_propnet_quantity_data.json"))
def test_sample_size_limit(self): sample_sizes = [50, 300] expected_n_points = [50, 200] for sample_size, n_points in zip(sample_sizes, expected_n_points): correlation_store = MemoryStore() builder = CorrelationBuilder( self.propstore, correlation_store, props=['bulk_modulus', 'vickers_hardness'], funcs='linlsq', sample_size=sample_size, from_quantity_db=False) runner = Runner([builder]) runner.run() data = list(correlation_store.query(criteria={})) for d in data: self.assertEqual(d['n_points'], n_points) with self.assertRaises(ValueError): _ = CorrelationBuilder(self.propstore, self.correlation, sample_size=1)
store = loadfn(environ["PROPNET_CORRELATION_STORE_FILE"]) store.connect() except (ServerSelectionTimeoutError, KeyError, FileNotFoundError) as ex: if isinstance(ex, ServerSelectionTimeoutError): logger.warning("Unable to connect to propnet correlation db!") if isinstance(ex, KeyError): logger.warning("PROPNET_CORRELATION_STORE_FILE var not set!") if isinstance(ex, FileNotFoundError): logger.warning( "File specified in PROPNET_CORRELATION_STORE_FILE not found!") from maggma.stores import MemoryStore store = MemoryStore() store.connect() # layout won't work if database is down, but at least web app will stay up correlation_funcs = list(store.query().distinct("correlation_func")) correlation_func_info = { "mic": { "name": "Maximal information coefficient", "bounds": lambda x: 0 <= round(x) <= 1 }, "linlsq": { "name": "Linear least squares, R-squared", "bounds": lambda x: 0 <= round(x) <= 1 }, "theilsen": { "name": "Theil-Sen regression, R-squared", "bounds": lambda x: -10 <= round(x) <= 1 }, # Arbitrary lower bound to filter nonsense data "ransac": {
class CorrelationTest(unittest.TestCase): @classmethod def setUpClass(cls): add_builtin_models_to_registry() cls.propnet_props = PROPNET_PROPS cls.propstore = MemoryStore() cls.propstore.connect() materials_file = os.path.join(TEST_DATA_DIR, "correlation_propnet_data.json") if os.path.exists(materials_file): with open(materials_file, 'r') as f: materials = json.load(f) materials = jsanitize(materials, strict=True, allow_bson=True) cls.propstore.update(materials) cls.quantity_store = MemoryStore() cls.quantity_store.connect() quantities_file = os.path.join( TEST_DATA_DIR, "correlation_propnet_quantity_data.json") if os.path.exists(quantities_file): with open(quantities_file, 'r') as f: quantities = json.load(f) quantities = jsanitize(quantities, strict=True, allow_bson=True) cls.quantity_store.update(quantities, key='internal_id') cls.correlation = None # vickers hardness (x-axis) vs. bulk modulus (y-axis) cls.correlation_values_vickers_bulk = { 'linlsq': 0.49302857395714916, 'pearson': 0.7021599347421844, 'spearman': 0.7658801470036752, 'mic': 0.7069707677575771, 'theilsen': 0.4498696692135892, 'ransac': 0.412850153689996 } cls.correlation_values_bulk_vickers = { 'linlsq': 0.49302857395714916, 'pearson': 0.7021599347421844, 'spearman': 0.7658801470036752, 'mic': 0.7069707677575771, 'theilsen': 0.46461142885951223, 'ransac': 0.3699261228882036 } @classmethod def tearDownClass(cls): if os.path.exists(os.path.join(TEST_DATA_DIR, "test_output.json")): os.remove(os.path.join(TEST_DATA_DIR, "test_output.json")) def setUp(self): self.correlation = MemoryStore() self.correlation.connect() def test_serial_runner(self): builder = CorrelationBuilder(self.propstore, self.correlation, from_quantity_db=False) runner = Runner([builder]) runner.run() def test_serial_runner_quantity_db(self): # This only runs over the 4 properties in the database because # the mongomock db cannot be indexed and is therefore very slow builder = CorrelationBuilder(self.quantity_store, self.correlation, props=self.propnet_props, from_quantity_db=True) runner = Runner([builder]) runner.run() def test_multiproc_runner(self): builder = CorrelationBuilder(self.propstore, self.correlation, from_quantity_db=False) runner = Runner([builder], max_workers=4) runner.run() def test_multiproc_runner_quantity_db(self): # This only runs over the 4 properties in the database because # the mongomock db cannot be indexed and is therefore very slow builder = CorrelationBuilder(self.quantity_store, self.correlation, props=self.propnet_props, from_quantity_db=True) runner = Runner([builder], max_workers=4) runner.run() def test_process_item(self): test_props = [['band_gap_pbe', 'atomic_density'], ['bulk_modulus', 'vickers_hardness']] linlsq_correlation_values = [0.011294932700383722, 0.49302857395714916] path_lengths = [None, 2] for source_db, is_quantity_db in zip( (self.propstore, self.quantity_store), (False, True)): for props, expected_correlation_val, expected_path_length in \ zip(test_props, linlsq_correlation_values, path_lengths): builder = CorrelationBuilder(source_db, self.correlation, props=props, from_quantity_db=is_quantity_db) processed = None prop_x, prop_y = props for item in builder.get_items(): if item['x_name'] == prop_x and \ item['y_name'] == prop_y: processed = builder.process_item(item) break # print(processed) self.assertIsNotNone(processed) self.assertIsInstance(processed, tuple) px, py, correlation, func_name, n_points, path_length = processed self.assertEqual(px, prop_x) self.assertEqual(py, prop_y) self.assertAlmostEqual(correlation, expected_correlation_val) self.assertEqual(func_name, 'linlsq') self.assertEqual(n_points, 200) self.assertEqual(path_length, expected_path_length) def test_correlation_funcs(self): def custom_correlation_func(x, y): return 0.5 correlation_values = { k: v for k, v in self.correlation_values_bulk_vickers.items() } correlation_values['test_correlation.custom_correlation_func'] = 0.5 builder = CorrelationBuilder( self.propstore, self.correlation, props=['vickers_hardness', 'bulk_modulus'], funcs=['all', custom_correlation_func], from_quantity_db=False) self.assertEqual( set(builder._funcs.keys()), set(correlation_values.keys()), msg="Are there new built-in functions in the correlation builder?") for item in builder.get_items(): if item['x_name'] == 'bulk_modulus' and \ item['y_name'] == 'vickers_hardness': processed = builder.process_item(item) self.assertIsInstance(processed, tuple) prop_x, prop_y, correlation, func_name, n_points, path_length = processed self.assertEqual(prop_x, 'bulk_modulus') self.assertEqual(prop_y, 'vickers_hardness') self.assertIn(func_name, correlation_values.keys()) self.assertAlmostEqual(correlation, correlation_values[func_name]) self.assertEqual(n_points, 200) self.assertEqual(path_length, 2) def test_database_and_file_write(self): builder = CorrelationBuilder(self.propstore, self.correlation, props=self.propnet_props, funcs='all', out_file=os.path.join( TEST_DATA_DIR, "test_output.json"), from_quantity_db=False) runner = Runner([builder]) runner.run() # Test database output data = list(self.correlation.query(criteria={})) # count = n_props**2 * n_funcs # n_props = 4, n_funcs = 6 self.assertEqual(len(data), 96, msg="Are there new built-in funcs in the builder?") for d in data: self.assertIsInstance(d, dict) self.assertEqual( set(d.keys()), { 'property_x', 'property_y', 'correlation', 'correlation_func', 'n_points', 'shortest_path_length', 'id', '_id', 'last_updated' }) self.assertEqual(d['n_points'], 200) if d['property_x'] == 'vickers_hardness' and \ d['property_y'] == 'bulk_modulus': # print("{}: {}".format(d['correlation_func'], d['correlation'])) self.assertAlmostEqual( d['correlation'], self.correlation_values_vickers_bulk[ d['correlation_func']]) elif d['property_x'] == 'bulk_modulus' and \ d['property_y'] == 'vickers_hardness': # print("{}: {}".format(d['correlation_func'], d['correlation'])) self.assertAlmostEqual( d['correlation'], self.correlation_values_bulk_vickers[ d['correlation_func']]) # Test file output expected_file_data = loadfn( os.path.join(TEST_DATA_DIR, 'correlation_outfile.json')) actual_file_data = loadfn( os.path.join(TEST_DATA_DIR, 'test_output.json')) self.assertIsInstance(actual_file_data, dict) self.assertEqual(actual_file_data.keys(), expected_file_data.keys()) self.assertEqual(set(actual_file_data['properties']), set(expected_file_data['properties'])) expected_props = expected_file_data['properties'] actual_props = actual_file_data['properties'] for prop_x, prop_y in product(expected_props, repeat=2): iex, iey = expected_props.index(prop_x), expected_props.index( prop_y) iax, iay = actual_props.index(prop_x), actual_props.index(prop_y) self.assertEqual(actual_file_data['n_points'][iax][iay], expected_file_data['n_points'][iex][iey]) self.assertEqual( actual_file_data['shortest_path_length'][iax][iay], expected_file_data['shortest_path_length'][iex][iey]) for f in builder._funcs.keys(): self.assertAlmostEqual( actual_file_data['correlation'][f][iax][iay], expected_file_data['correlation'][f][iex][iey]) def test_sample_size_limit(self): sample_sizes = [50, 300] expected_n_points = [50, 200] for sample_size, n_points in zip(sample_sizes, expected_n_points): correlation_store = MemoryStore() builder = CorrelationBuilder( self.propstore, correlation_store, props=['bulk_modulus', 'vickers_hardness'], funcs='linlsq', sample_size=sample_size, from_quantity_db=False) runner = Runner([builder]) runner.run() data = list(correlation_store.query(criteria={})) for d in data: self.assertEqual(d['n_points'], n_points) with self.assertRaises(ValueError): _ = CorrelationBuilder(self.propstore, self.correlation, sample_size=1)
class CorrelationTest(unittest.TestCase): def setUp(self): self.propstore = MemoryStore() self.propstore.connect() materials = loadfn( os.path.join(TEST_DIR, "correlation_propnet_data.json")) materials = jsanitize(materials, strict=True, allow_bson=True) self.propstore.update(materials) self.materials = MemoryStore() self.materials.connect() materials = loadfn(os.path.join(TEST_DIR, "correlation_mp_data.json")) materials = jsanitize(materials, strict=True, allow_bson=True) self.materials.update(materials) self.correlation = MemoryStore() self.correlation.connect() self.propnet_props = [ "band_gap_pbe", "bulk_modulus", "vickers_hardness" ] self.mp_query_props = ["magnetism.total_magnetization_normalized_vol"] self.mp_props = ["total_magnetization_normalized_vol"] # vickers hardness (x-axis) vs. bulk modulus (y-axis) self.correlation_values_vickers_bulk = { 'linlsq': 0.4155837083845686, 'pearson': 0.6446578227126143, 'mic': 0.5616515521782413, 'theilsen': 0.4047519736540858, 'ransac': 0.3747245847179631 } self.correlation_values_bulk_vickers = { 'linlsq': 0.4155837083845686, 'pearson': 0.6446578227126143, 'mic': 0.5616515521782413, 'theilsen': 0.39860109570815505, 'ransac': 0.3119656700613579 } def test_serial_runner(self): builder = CorrelationBuilder(self.propstore, self.materials, self.correlation) runner = Runner([builder]) runner.run() def test_multiproc_runner(self): builder = CorrelationBuilder(self.propstore, self.materials, self.correlation) runner = Runner([builder], max_workers=2) runner.run() def test_process_item(self): test_props = [['band_gap_pbe', 'total_magnetization_normalized_vol'], ['bulk_modulus', 'vickers_hardness']] linlsq_correlation_values = [0.03620401274778131, 0.4155837083845686] path_lengths = [None, 2] for props, expected_correlation_val, expected_path_length in \ zip(test_props, linlsq_correlation_values, path_lengths): builder = CorrelationBuilder(self.propstore, self.materials, self.correlation, props=props) processed = None prop_x, prop_y = props for item in builder.get_items(): if item['x_name'] == prop_x and \ item['y_name'] == prop_y: processed = builder.process_item(item) break self.assertIsNotNone(processed) self.assertIsInstance(processed, tuple) px, py, correlation, func_name, n_points, path_length = processed self.assertEqual(px, prop_x) self.assertEqual(py, prop_y) self.assertAlmostEqual(correlation, expected_correlation_val) self.assertEqual(func_name, 'linlsq') self.assertEqual(n_points, 200) self.assertEqual(path_length, expected_path_length) def test_correlation_funcs(self): def custom_correlation_func(x, y): return 0.5 correlation_values = { k: v for k, v in self.correlation_values_bulk_vickers.items() } correlation_values['test_correlation.custom_correlation_func'] = 0.5 builder = CorrelationBuilder( self.propstore, self.materials, self.correlation, props=['vickers_hardness', 'bulk_modulus'], funcs=['all', custom_correlation_func]) self.assertEqual( set(builder._funcs.keys()), set(correlation_values.keys()), msg="Are there new built-in functions in the correlation builder?") for item in builder.get_items(): if item['x_name'] == 'bulk_modulus' and \ item['y_name'] == 'vickers_hardness': processed = builder.process_item(item) self.assertIsInstance(processed, tuple) prop_x, prop_y, correlation, func_name, n_points, path_length = processed self.assertEqual(prop_x, 'bulk_modulus') self.assertEqual(prop_y, 'vickers_hardness') self.assertIn(func_name, correlation_values.keys()) self.assertAlmostEqual(correlation, correlation_values[func_name]) self.assertEqual(n_points, 200) self.assertEqual(path_length, 2) def test_database_write(self): builder = CorrelationBuilder(self.propstore, self.materials, self.correlation, props=self.propnet_props + self.mp_props, funcs='all') runner = Runner([builder]) runner.run() data = list(self.correlation.query(criteria={})) # count = n_props**2 * n_funcs # n_props = 4, n_funcs = 5 self.assertEqual(len(data), 80) for d in data: self.assertIsInstance(d, dict) self.assertEqual( set(d.keys()), { 'property_x', 'property_y', 'correlation', 'correlation_func', 'n_points', 'shortest_path_length', 'id', '_id', 'last_updated' }) self.assertEqual(d['n_points'], 200) if d['property_x'] == 'vickers_hardness' and \ d['property_y'] == 'bulk_modulus': self.assertAlmostEqual( d['correlation'], self.correlation_values_vickers_bulk[ d['correlation_func']]) elif d['property_x'] == 'bulk_modulus' and \ d['property_y'] == 'vickers_hardness': self.assertAlmostEqual( d['correlation'], self.correlation_values_bulk_vickers[ d['correlation_func']]) # Just here for reference, in case anyone wants to create a new set # of test materials. Requires mongogrant read access to knowhere.lbl.gov. @unittest.skipIf(True, "Skipping test materials creation") def create_test_docs(self): from maggma.advanced_stores import MongograntStore from monty.serialization import dumpfn pnstore = MongograntStore("ro:knowhere.lbl.gov/mp_core", "propnet") pnstore.connect() mpstore = MongograntStore("ro:knowhere.lbl.gov/mp_core", "materials") mpstore.connect() cursor = pnstore.query(criteria={ '$and': [{ '$or': [{ p: { '$exists': True } }, { 'inputs.symbol_type': p }] } for p in self.propnet_props] }, properties=['task_id']) pn_mpids = [item['task_id'] for item in cursor] cursor = mpstore.query( criteria={p: { '$exists': True } for p in self.mp_query_props}, properties=['task_id']) mp_mpids = [item['task_id'] for item in cursor] mpids = list(set(pn_mpids).intersection(set(mp_mpids)))[:200] pn_data = pnstore.query(criteria={'task_id': { '$in': mpids }}, properties=['task_id', 'inputs'] + [p + '.mean' for p in self.propnet_props] + [p + '.units' for p in self.propnet_props]) dumpfn(list(pn_data), os.path.join(TEST_DIR, "correlation_propnet_data.json")) mp_data = mpstore.query(criteria={'task_id': { '$in': mpids }}, properties=['task_id'] + self.mp_query_props) dumpfn(list(mp_data), os.path.join(TEST_DIR, "correlation_mp_data.json"))
style={'font-family': 'monospace', 'color': 'rgb(211, 84, 0)', 'text-align': 'left', 'font-size': '1.2em'}) else: cut_off = 100 # need at least this many available quantities for plot """ scalar_symbols = {k: v for k, v in Registry("symbols").items() if (v.category == 'property' and v.shape == 1 and store.query( criteria={f'{k}.mean': {'$exists': True}}).count() > cut_off)} """ scalar_symbols = { k: v for k, v in Registry("symbols").items() if (v.category == 'property' and v.shape == 1 and len(store.query(criteria={'symbol_type': k}).distinct("material_key")) > cut_off)} warning_layout = html.Div() # this is dependent on the database schema def _ensure_indices(): for property_name in scalar_symbols.keys(): store.ensure_index(property_name) def get_plot_layout(props=None): prop_x = None prop_y = None prop_z = None z_enabled = False create_plot = False