def __init__(self, host="127.0.0.1", port=27017, database="vasp", user=None, password=None, collection="tasks", aliases_config=None, default_properties=None): if has_mongo(): try: QueryEngine.__init__(self, host=host, port=port, database=database, user=user, password=password, collection=collection, aliases_config=aliases_config, default_properties=default_properties) _log.warning("Connected to real MongoDB at {}:{}".format(host, port)) return # actully connected! not mocked.. except: _log.debug("Connection to real MongoDB at {}:{} failed. " "This is normal; using mock." .format(host, port)) self.connection = MongoClient(host, port) self.db = self.connection[database] self._user, self._password = user, password self.host = host self.port = port self.database_name = database # colllection name is now a @property. the setter will set "self.collection" internally self.collection_name = collection self.set_aliases_and_defaults(aliases_config=aliases_config, default_properties=default_properties)
def test_queryresult(self): qe = QueryEngine( connection=self.conn, database=self.db_name, collection=self.coll_name, ) result = qe.query( criteria={'task_id': 'mp-1002133'}, properties=[ 'calcs_reversed.output.ionic_steps.e_0_energy', 'calcs_reversed.output.ionic_steps.electronic_steps.e_0_energy', ], ) self.assertTrue(isinstance(result, QueryResults)) print(list(qe.query(criteria={'task_id': 'mp-1002133'}))) self.assertEqual(len(result), 1) doc = list(result)[0] self.assertIn('calcs_reversed.output.ionic_steps.e_0_energy', doc) v = doc['calcs_reversed.output.ionic_steps.e_0_energy'] self.assertIsInstance(v, list) for elt in v: self.assertIsInstance(elt, list) for n in elt: self.assertIsInstance(n, float) self.assertIn('calcs_reversed.output.ionic_steps.electronic_steps.e_0_energy', doc) v = doc['calcs_reversed.output.ionic_steps.electronic_steps.e_0_energy'] for elt in v: self.assertIsInstance(elt, list) for _elt in elt: self.assertIsInstance(_elt, list) for n in _elt: self.assertIsInstance(n, float)
def test_queryresult(self): qe = QueryEngine( connection=self.conn, database=self.db_name, collection=self.coll_name, ) result = qe.query( criteria={'task_id': 'mp-1002133'}, properties=[ 'calcs_reversed.output.ionic_steps.e_0_energy', 'calcs_reversed.output.ionic_steps.electronic_steps.e_0_energy', ], ) self.assertTrue(isinstance(result, QueryResults)) print(list(qe.query(criteria={'task_id': 'mp-1002133'}))) self.assertEqual(len(result), 1) doc = list(result)[0] self.assertIn('calcs_reversed.output.ionic_steps.e_0_energy', doc) v = doc['calcs_reversed.output.ionic_steps.e_0_energy'] self.assertIsInstance(v, list) for elt in v: self.assertIsInstance(elt, list) for n in elt: self.assertIsInstance(n, float) self.assertIn( 'calcs_reversed.output.ionic_steps.electronic_steps.e_0_energy', doc) v = doc[ 'calcs_reversed.output.ionic_steps.electronic_steps.e_0_energy'] for elt in v: self.assertIsInstance(elt, list) for _elt in elt: self.assertIsInstance(_elt, list) for n in _elt: self.assertIsInstance(n, float)
def __init__(self, track_operation=None, track_field=None, **kwargs): """Constructor. """ self._tracking_off = False # Set these first because QueryEngine.__init__ calls overridden `collection_name setter()`. assert track_field self._t_op, self._t_field = track_operation, track_field self.collection = None # Now init parent QueryEngine.__init__(self, **kwargs)
def test_queryresult(self): qe = QueryEngine(connection=self.conn, database=self.db_name, collection=self.coll_name, aliases={}, query_post=[self.qtx], result_post=[self.rtx]) result = qe.query(criteria={'e_above_hull': {'$lte': 0.0}}).sort('sbxd.e_above_hull', pymongo.ASCENDING) self.assertTrue(isinstance(result, QueryResults)) self.assertEqual(len(result), self.N) self.assertTrue(result[0]['e_above_hull'] < 0)
def test_queryresult(self): qe = QueryEngine(connection=self.conn, database=self.db_name, collection=self.coll_name, aliases={}, query_post=[self.qtx], result_post=[self.rtx]) result = qe.query(criteria={ 'e_above_hull': { '$lte': 0.0 } }).sort('sbxd.e_above_hull', pymongo.ASCENDING) self.assertTrue(isinstance(result, QueryResults)) self.assertEqual(len(result), self.N) self.assertTrue(result[0]['e_above_hull'] < 0)
def test_no_post_funcs(self): qe = QueryEngine(connection=self.conn, database=self.db_name, collection=self.coll_name, aliases={}, query_post=[], result_post=[]) cursor = qe.query() self.assertTrue(isinstance(cursor, QueryResults)) n = 0 for rec in cursor: pprint.pprint("RESULT: {}".format(rec)) # No Post proccessing should be done self.assertTrue('e_above_hull' not in rec) self.assertTrue('add_fake_field' not in rec) self.assertTrue('sbxd' in rec) n += 1 # should find all tasks self.assertEqual(n, self.N)
def setUpClass(cls): try: drone = VaspToDbTaskDrone(database="qetransmuter_unittest") queen = BorgQueen(drone) queen.serial_assimilate( os.path.join(test_dir, 'db_test', 'success_mp_aflow')) cls.conn = MongoClient() cls.qe = QueryEngine(database="qetransmuter_unittest") except ConnectionFailure: cls.qe = None cls.conn = None
def __init__(self, host="127.0.0.1", port=27017, database="vasp", user=None, password=None, collection="tasks", aliases_config=None, default_properties=None): try: QueryEngine.__init__(self, host=host, port=port, database=database, user=user, password=password, collection=collection, aliases_config=aliases_config, default_properties=default_properties) print("@@ connected to real Mongo") return # actully connected! not mocked.. except: pass self.connection = MongoClient(self.host, self.port) self.db = self.connection[database] self._user, self._password = user, password self.host = host self.port = port self.database_name = database # colllection name is now a @property. the setter will set "self.collection" internally self.collection_name = collection self.set_aliases_and_defaults(aliases_config=aliases_config, default_properties=default_properties)
def __init__(self, host="127.0.0.1", port=27017, database="vasp", user=None, password=None, collection="tasks", aliases_config=None, default_properties=None): try: QueryEngine.__init__(self, host=host, port=port, database=database, user=user, password=password, collection=collection, aliases_config=aliases_config, default_properties=default_properties) print("@@ connected to real Mongo") return # actully connected! not mocked.. except: pass self.connection = MongoClient(self.host, self.port) self.db = self.connection[database] self._user, self._password = user, password self.host = host self.port = port self.database_name = database self.collection_name = collection self.set_collection(collection=collection) self.set_aliases_and_defaults(aliases_config=aliases_config, default_properties=default_properties)
def test_mongo_find(self): # qe = QueryEngine(connection=self.conn, database=self.db_name, collection=self.coll_name, aliases={}, query_post=[self.qtx], result_post=[self.rtx]) result = self._test_find(qe, criteria={'e_above_hull': { '$lte': 0.0 }}, properties={})
def test_with_properties(self): # qe = QueryEngine(connection=self.conn, database=self.db_name, collection=self.coll_name, aliases={}, query_post=[self.qtx], result_post=[self.rtx]) result = self._test_find( qe, criteria={'e_above_hull': { '$lte': 0.0 }}, properties=['e_above_hull', 'sbxd', 'add_fake_field'])
def __init__(self, host="127.0.0.1", port=27017, database="vasp", user=None, password=None, collection="tasks", aliases_config=None, default_properties=None): if has_mongo(): try: QueryEngine.__init__(self, host=host, port=port, database=database, user=user, password=password, collection=collection, aliases_config=aliases_config, default_properties=default_properties) _log.warning("Connected to real MongoDB at {}:{}".format( host, port)) return # actully connected! not mocked.. except: _log.debug("Connection to real MongoDB at {}:{} failed. " "This is normal; using mock.".format(host, port)) self.connection = MongoClient(host, port) self.db = self.connection[database] self._user, self._password = user, password self.host = host self.port = port self.database_name = database # colllection name is now a @property. the setter will set "self.collection" internally self.collection_name = collection self.set_aliases_and_defaults(aliases_config=aliases_config, default_properties=default_properties)
def test_assimilate(self): """Borg assimilation code. This takes too long for a unit test! """ simulate = True if VaspToDbTaskDroneTest.conn is None else False drone = VaspToDbTaskDrone(database="creator_unittest", simulate_mode=simulate, parse_dos=True, compress_dos=1) queen = BorgQueen(drone) queen.serial_assimilate(os.path.join(test_dir, 'db_test')) data = queen.get_data() self.assertEqual(len(data), 6) if VaspToDbTaskDroneTest.conn: db = VaspToDbTaskDroneTest.conn["creator_unittest"] data = db.tasks.find() self.assertEqual(data.count(), 6) warnings.warn("Actual db insertion mode.") for d in data: dir_name = d['dir_name'] if dir_name.endswith("killed_mp_aflow"): self.assertEqual(d['state'], "killed") self.assertFalse(d['is_hubbard']) self.assertEqual(d['pretty_formula'], "SiO2") elif dir_name.endswith("stopped_mp_aflow"): self.assertEqual(d['state'], "stopped") self.assertEqual(d['pretty_formula'], "ThFe5P3") elif dir_name.endswith("success_mp_aflow"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "TbZn(BO2)5") self.assertAlmostEqual(d['output']['final_energy'], -526.66747274, 4) elif dir_name.endswith("Li2O_aflow"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -14.31446494, 6) self.assertEqual(len(d["calculations"]), 2) self.assertEqual(d['input']['is_lasph'], False) self.assertEqual(d['input']['xc_override'], None) self.assertEqual(d["oxide_type"], "oxide") elif dir_name.endswith("Li2O"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -14.31337758, 6) self.assertEqual(len(d["calculations"]), 1) self.assertEqual(len(d["custodian"]), 1) self.assertEqual(len(d["custodian"][0]["corrections"]), 1) elif dir_name.endswith("Li2O_aflow_lasph"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -13.998171, 6) self.assertEqual(len(d["calculations"]), 2) self.assertEqual(d['input']['is_lasph'], True) self.assertEqual(d['input']['xc_override'], "PS") if VaspToDbTaskDroneTest.conn: warnings.warn("Testing query engine mode.") qe = QueryEngine(database="creator_unittest") self.assertEqual(qe.query().count(), 6) #Test mappings by query engine. for r in qe.query(criteria={"pretty_formula": "Li2O"}, properties=["dir_name", "energy", "calculations", "input"]): if r["dir_name"].endswith("Li2O_aflow"): self.assertAlmostEqual(r['energy'], -14.31446494, 4) self.assertEqual(len(r["calculations"]), 2) self.assertEqual(r["input"]["is_lasph"], False) self.assertEqual(r['input']['xc_override'], None) self.assertEqual(d["oxide_type"], "oxide") elif r["dir_name"].endswith("Li2O"): self.assertAlmostEqual(r['energy'], -14.31337758, 4) self.assertEqual(len(r["calculations"]), 1) self.assertEqual(r["input"]["is_lasph"], False) self.assertEqual(r['input']['xc_override'], None) # Test lasph e = qe.get_entries({"dir_name":{"$regex":"lasph"}}) self.assertEqual(len(e), 1) self.assertEqual(e[0].parameters["is_lasph"], True) self.assertEqual(e[0].parameters["xc_override"], "PS") # Test query one. d = qe.query_one(criteria={"pretty_formula": "TbZn(BO2)5"}, properties=["energy"]) self.assertAlmostEqual(d['energy'], -526.66747274, 4) d = qe.get_entries_in_system(["Li", "O"]) self.assertEqual(len(d), 3) self.assertIsInstance(d[0], ComputedEntry) self.assertEqual(d[0].data["oxide_type"], "oxide") s = qe.get_structure_from_id(d[0].entry_id) self.assertIsInstance(s, Structure) self.assertEqual(s.formula, "Li2 O1") self.assertIsInstance(qe.get_dos_from_id(d[0].entry_id), CompleteDos)
# coding: utf-8 # Copyright (c) Henniggroup. # Distributed under the terms of the MIT License. from __future__ import division, print_function, unicode_literals, \ absolute_import import os from matgendb.query_engine import QueryEngine from pymatgen.core.structure import Structure # from config file db.json DB_CONFIG = os.path.join(os.path.expanduser('~'), ".mongodb/db.json") qe = QueryEngine.from_config(DB_CONFIG) # or # qe = QueryEngine(host="127.0.0.1", port=27017, # database="vasp", collection="collection_name", # user="******", password="******") results = qe.query(criteria={"normalized_formula": 'GaSb'}, properties=[ 'pretty_formula', 'author', 'spacegroup', 'output', 'analysis', 'last_updated', 'dir_name' ]) # if the documents contain the hkl field, to query based on hkl # use criteria={"hkl": [1,1,1]} for r in results: for k, v in r.items(): if k == "output":
def test_assimilate(self): """Borg assimilation code. This takes too long for a unit test! """ simulate = True if VaspToDbTaskDroneTest.conn is None else False drone = VaspToDbTaskDrone(database="creator_unittest", simulate_mode=simulate, parse_dos=True, compress_dos=1) queen = BorgQueen(drone) queen.serial_assimilate(os.path.join(test_dir, 'db_test')) data = queen.get_data() self.assertEqual(len(data), 6) if VaspToDbTaskDroneTest.conn: db = VaspToDbTaskDroneTest.conn["creator_unittest"] data = db.tasks.find() self.assertEqual(data.count(), 6) warnings.warn("Actual db insertion mode.") for d in data: dir_name = d['dir_name'] if dir_name.endswith("killed_mp_aflow"): self.assertEqual(d['state'], "killed") self.assertFalse(d['is_hubbard']) self.assertEqual(d['pretty_formula'], "SiO2") elif dir_name.endswith("stopped_mp_aflow"): self.assertEqual(d['state'], "stopped") self.assertEqual(d['pretty_formula'], "ThFe5P3") elif dir_name.endswith("success_mp_aflow"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "TbZn(BO2)5") self.assertAlmostEqual(d['output']['final_energy'], -526.66747274, 4) elif dir_name.endswith("Li2O_aflow"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -14.31446494, 6) self.assertEqual(len(d["calculations"]), 2) self.assertEqual(d['input']['is_lasph'], False) self.assertEqual(d['input']['xc_override'], None) elif dir_name.endswith("Li2O"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -14.31337758, 6) self.assertEqual(len(d["calculations"]), 1) self.assertEqual(len(d["custodian"]), 1) self.assertEqual(len(d["custodian"][0]["corrections"]), 1) elif dir_name.endswith("Li2O_aflow_lasph"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -13.998171, 6) self.assertEqual(len(d["calculations"]), 2) self.assertEqual(d['input']['is_lasph'], True) self.assertEqual(d['input']['xc_override'], "PS") if VaspToDbTaskDroneTest.conn: warnings.warn("Testing query engine mode.") qe = QueryEngine(database="creator_unittest") self.assertEqual(qe.query().count(), 6) #Test mappings by query engine. for r in qe.query( criteria={"pretty_formula": "Li2O"}, properties=["dir_name", "energy", "calculations", "input"]): if r["dir_name"].endswith("Li2O_aflow"): self.assertAlmostEqual(r['energy'], -14.31446494, 4) self.assertEqual(len(r["calculations"]), 2) self.assertEqual(r["input"]["is_lasph"], False) self.assertEqual(r['input']['xc_override'], None) elif r["dir_name"].endswith("Li2O"): self.assertAlmostEqual(r['energy'], -14.31337758, 4) self.assertEqual(len(r["calculations"]), 1) self.assertEqual(r["input"]["is_lasph"], False) self.assertEqual(r['input']['xc_override'], None) #Test lasph e = qe.get_entries({"dir_name": {"$regex": "lasph"}}) self.assertEqual(len(e), 1) self.assertEqual(e[0].parameters["is_lasph"], True) self.assertEqual(e[0].parameters["xc_override"], "PS") # Test query one. d = qe.query_one(criteria={"pretty_formula": "TbZn(BO2)5"}, properties=["energy"]) self.assertAlmostEqual(d['energy'], -526.66747274, 4) d = qe.get_entries_in_system(["Li", "O"]) self.assertEqual(len(d), 3) self.assertIsInstance(d[0], ComputedEntry) s = qe.get_structure_from_id(d[0].entry_id) self.assertIsInstance(s, Structure) self.assertEqual(s.formula, "Li2 O1") self.assertIsInstance(qe.get_dos_from_id(d[0].entry_id), CompleteDos)
import datetime from django.utils.encoding import force_unicode from django.core.serializers.json import DjangoJSONEncoder qe = None mgdb_config = os.environ.get("MGDB_CONFIG", "") if mgdb_config: config = json.loads(mgdb_config) if not dbconfig.normalize_auth(config, readonly_first=True): config["user"] = config["password"] = None qe = QueryEngine(host=config["host"], port=config["port"], database=config["database"], user=config["user"], password=config["password"], collection=config["collection"], aliases_config=config.get("aliases_config", None)) def index(request, rest_query): if request.method == "GET": if qe is None: return HttpResponseBadRequest(json.dumps( {"error": "no database configured"}), mimetype="application/json") try: rest_query = rest_query.strip("/") if rest_query == "": results = list(qe.query(criteria={}, properties=["task_id"]))
def test_assimilate(self): simulate = True if VaspToDbTaskDroneTest.conn is None else False drone = VaspToDbTaskDrone(database="creator_unittest", simulate_mode=simulate) queen = BorgQueen(drone) queen.serial_assimilate(os.path.join(test_dir, 'db_test')) data = queen.get_data() self.assertEqual(len(data), 5) if VaspToDbTaskDroneTest.conn: db = VaspToDbTaskDroneTest.conn["creator_unittest"] data = db.tasks.find() self.assertEqual(data.count(), 5) warnings.warn("Actual db insertion mode.") for d in data: dir_name = d['dir_name'] if dir_name.endswith("killed_mp_aflow"): self.assertEqual(d['state'], "killed") self.assertFalse(d['is_hubbard']) self.assertEqual(d['pretty_formula'], "SiO2") elif dir_name.endswith("stopped_mp_aflow"): self.assertEqual(d['state'], "stopped") self.assertEqual(d['pretty_formula'], "ThFe5P3") elif dir_name.endswith("success_mp_aflow"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "TbZn(BO2)5") self.assertAlmostEqual(d['output']['final_energy'], -526.66747274, 4) elif dir_name.endswith("Li2O_aflow"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -14.31446494, 6) self.assertEqual(len(d["calculations"]), 2) elif dir_name.endswith("Li2O"): self.assertEqual(d['state'], "successful") self.assertEqual(d['pretty_formula'], "Li2O") self.assertAlmostEqual(d['output']['final_energy'], -14.31337758, 6) self.assertEqual(len(d["calculations"]), 1) self.assertEqual(len(d["custodian"]), 1) self.assertEqual(len(d["custodian"][0]["corrections"]), 1) if VaspToDbTaskDroneTest.conn: warnings.warn("Testing query engine mode.") qe = QueryEngine(database="creator_unittest") self.assertEqual(qe.query().count(), 5) #Test mappings by query engine. for r in qe.query(criteria={"pretty_formula": "Li2O"}, properties=["dir_name", "energy", "calculations"]): if r["dir_name"].endswith("Li2O_aflow"): self.assertAlmostEqual(r['energy'], -14.31446494, 4) self.assertEqual(len(r["calculations"]), 2) elif r["dir_name"].endswith("Li2O"): self.assertAlmostEqual(r['energy'], -14.31337758, 4) self.assertEqual(len(r["calculations"]), 1) # Test query one. d = qe.query_one(criteria={"pretty_formula": "TbZn(BO2)5"}, properties=["energy"]) self.assertAlmostEqual(d['energy'], -526.66747274, 4) d = qe.get_entries_in_system(["Li", "O"]) self.assertEqual(len(d), 2) self.assertIsInstance(d[0], ComputedEntry) s = qe.get_structure_from_id(d[0].entry_id) self.assertIsInstance(s, Structure) self.assertEqual(s.formula, "Li2 O1")
# coding: utf-8 # Copyright (c) Henniggroup. # Distributed under the terms of the MIT License. from __future__ import division, print_function, unicode_literals, \ absolute_import import os from matgendb.query_engine import QueryEngine from pymatgen.core.structure import Structure # from config file db.json DB_CONFIG = os.path.join(os.path.expanduser('~'), ".mongodb/db.json") qe = QueryEngine.from_config(DB_CONFIG) # or # qe = QueryEngine(host="127.0.0.1", port=27017, # database="vasp", collection="collection_name", # user="******", password="******") results = qe.query(criteria={"normalized_formula": 'GaSb'}, properties=['pretty_formula', 'author', 'spacegroup', 'output', 'analysis', 'last_updated', 'dir_name']) # if the documents contain the hkl field, to query based on hkl # use criteria={"hkl": [1,1,1]} for r in results: for k, v in r.items(): if k == "output": structure = Structure.from_dict(v["crystal"])
'/home/lorenzo/tests/project-test/tutorials/Si-BS-dataset') #%% from matgendb.creator import VaspToDbTaskDrone drone = VaspToDbTaskDrone(collection='test') for j in ds: drone.assimilate(j.path) #%% from matgendb.query_engine import QueryEngine qe = QueryEngine(collection='test') # entries = qe.get_entries({'dir_name':'localhost:/home/lorenzo/tests/project-test/tutorials/Si-BS-dataset/3-PBE-BS'}) entries = qe.get_entries({'chemsys': 'Si'}, optional_data=['calculations'], inc_structure=True) #%% e = entries[0] inputs = e.data['calculations'][0]['input'] incar = Incar(inputs['incar']) kpoints = Kpoints.from_dict(inputs['kpoints']) poscar = Poscar(e.structure) potcar = Potcar(inputs['potcar'])
def diff(self, c1, c2, only_missing=False, only_values=False, allow_dup=False): """Perform a difference between the 2 collections. The first collection is treated as the previous one, and the second is treated as the new one. Note: this is not 'big data'-ready; we assume all the records can fit in memory. :param c1: Collection (1) config file, or QueryEngine object :type c1: str or QueryEngine :param c2: Collection (2) config file, or QueryEngine object :type c2: str or QueryEngine :param only_missing: Only find and return self.MISSING; ignore 'new' keys :param only_values: Only find and return self.CHANGED; ignore new or missing keys :param allow_dup: Allow duplicate keys, otherwise fail with ValueError :return: dict with keys self.MISSING, self.NEW (unless only_missing is True), & self.CHANGED, each a list of records with the key and any other fields given to the constructor 'info' argument. The meaning is: 'missing' are keys that are in c1 not found in c2 'new' is keys found in c2 that are not found in c1, and 'changed' are records with the same key that have different 'props' values. """ # Connect. _log.info("connect.start") if isinstance(c1, QueryEngine): engines = [c1, c2] else: engines = [] for cfg in c1, c2: settings = util.get_settings(cfg) if not normalize_auth(settings): _log.warn( "Config file {} does not have a username/password". format(cfg)) settings["aliases_config"] = {"aliases": {}, "defaults": {}} engine = QueryEngine(**settings) engines.append(engine) _log.info("connect.end") # Query DB. keys = [set(), set()] eqprops = [{}, {}] numprops = [{}, {}] # Build query fields. fields = dict.fromkeys( self._info + self._all_props + [self._key_field], True) if not '_id' in fields: # explicitly remove _id if not given fields['_id'] = False # Initialize for query loop. info = {} # per-key information has_info, has_props = bool(self._info), bool(self._all_props) has_numprops, has_eqprops = bool(self._prop_deltas), bool(self._props) _log.info("query.start query={} fields={}".format( self._filter, fields)) t0 = time.time() # Main query loop. for i, coll in enumerate(engines): _log.debug("collection {:d}".format(i)) count, missing_props = 0, 0 for rec in coll.query(criteria=self._filter, properties=fields): count += 1 # Extract key from record. try: key = rec[self._key_field] except KeyError: _log.critical( "Key '{}' not found in record: {}. Abort.".format( self._key_field, rec)) return {} if not allow_dup and key in keys[i]: raise ValueError("Duplicate key: {}".format(key)) keys[i].add(key) # Extract numeric properties. if has_numprops: pvals = {} for pkey in self._prop_deltas.keys(): try: pvals[pkey] = float(rec[pkey]) except KeyError: #print("@@ missing {} on {}".format(pkey, rec)) missing_props += 1 continue except (TypeError, ValueError): raise ValueError( "Not a number: collection={c} key={k} {p}='{v}'" .format(k=key, c=("old", "new")[i], p=pkey, v=rec[pkey])) numprops[i][key] = pvals # Extract properties for exact match. if has_eqprops: try: propval = tuple([(p, str(rec[p])) for p in self._props]) except KeyError: missing_props += 1 #print("@@ missing {} on {}".format(pkey, rec)) continue eqprops[i][key] = propval # Extract informational fields. if has_info: if key not in info: info[key] = {} for k in self._info: info[key][k] = rec[k] # Stop if we don't have properties on any record at all if 0 < count == missing_props: _log.critical( "Missing one or more properties on all {:d} records". format(count)) return {} # ..but only issue a warning for partially missing properties. elif missing_props > 0: _log.warn( "Missing one or more properties for {:d}/{:d} records". format(missing_props, count)) t1 = time.time() _log.info("query.end sec={:f}".format(t1 - t0)) # Compute missing and new keys. if only_values: missing, new = [], [] else: _log.debug("compute_difference.start") missing, new = keys[0] - keys[1], [] if not only_missing: new = keys[1] - keys[0] _log.debug("compute_difference.end") # Compute mis-matched properties. if has_props: changed = self._changed_props(keys, eqprops, numprops, info, has_eqprops=has_eqprops, has_numprops=has_numprops) else: changed = [] # Build result. _log.debug("build_result.begin") result = {} if not only_values: result[self.MISSING] = [] for key in missing: rec = {self._key_field: key} if has_info: rec.update(info.get(key, {})) result[self.MISSING].append(rec) if not only_missing: result[self.NEW] = [] for key in new: rec = {self._key_field: key} if has_info: rec.update(info.get(key, {})) result[self.NEW].append(rec) result[self.CHANGED] = changed _log.debug("build_result.end") return result