def test_mol_pagination(storage_socket): """ Test Molecule pagination """ assert len(storage_socket.get_molecules()['data']) == 0 mol_names = [ 'water_dimer_minima.psimol', 'water_dimer_stretch.psimol', 'water_dimer_stretch2.psimol', 'neon_tetramer.psimol' ] total = len(mol_names) molecules = [] for mol_name in mol_names: mol = ptl.data.get_molecule(mol_name) molecules.append(mol) inserted = storage_socket.add_molecules(molecules) assert inserted['meta']['n_inserted'] == total ret = storage_socket.get_molecules(skip=1) assert len(ret['data']) == total - 1 assert ret['meta']['n_found'] == total ret = storage_socket.get_molecules(skip=total + 1) assert len(ret['data']) == 0 assert ret['meta']['n_found'] == total # cleanup storage_socket.del_molecules(inserted['data'])
def test_molecules_add(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret1 = storage_socket.add_molecules([water]) assert ret1["meta"]["success"] is True assert ret1["meta"]["n_inserted"] == 1 # Try duplicate adds ret2 = storage_socket.add_molecules([water]) assert ret2["meta"]["success"] is True assert ret2["meta"]["n_inserted"] == 0 assert ret2["meta"]["duplicates"][0] == ret1["data"][0] # Assert the ids match assert ret1["data"][0] == ret2["data"][0] # Pull molecule from the DB for tests db_json = storage_socket.get_molecules( molecule_hash=water.get_hash())["data"][0] water.compare(db_json) # Cleanup adds ret = storage_socket.del_molecules(molecule_hash=water.get_hash()) assert ret == 1
def test_molecules_add(storage_socket): water = portal.data.get_molecule("water_dimer_minima.psimol") # Add once ret1 = storage_socket.add_molecules({"new_water": water.to_json()}) assert ret1["meta"]["success"] is True assert ret1["meta"]["n_inserted"] == 1 # Try duplicate adds ret2 = storage_socket.add_molecules({"new_water2": water.to_json()}) assert ret2["meta"]["success"] is True assert ret2["meta"]["n_inserted"] == 0 assert ret2["meta"]["duplicates"][0] == "new_water2" # Assert the ids match assert ret1["data"]["new_water"] == ret2["data"]["new_water2"] # Pull molecule from the DB for tests db_json = storage_socket.get_molecules(water.get_hash(), index="hash")["data"][0] water.compare(db_json) # Cleanup adds ret = storage_socket.del_molecules(water.get_hash(), index="hash") assert ret == 1
def test_molecule(storage_socket): """ Test the use of the ME class MoleculeORM Note: creation of a MoleculeORM using ME is not implemented yet Should create a MoleculeORM using: mongoengine_socket.add_molecules """ # don't use len(MoleculeORM.objects), slow num_mol_in_db = MoleculeORM.objects().count() # MoleculeORM.objects().delete() assert num_mol_in_db == 0 water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") # Add MoleculeORM using pymongo ret = storage_socket.add_molecules([water, water2]) assert ret["meta"]["success"] is True assert ret["meta"]["n_inserted"] == 2 # Use the ORM class water_mol = MoleculeORM.objects().first() assert water_mol.molecular_formula == "H4O2" assert water_mol.molecular_charge == 0 # print(water_mol.json_dict()) # Query with fields in the model result_list = MoleculeORM.objects(molecular_formula="H4O2") assert len(result_list) == 2 assert result_list[0].molecular_multiplicity == 1 # Query with fields NOT in the model. works too! result_list = MoleculeORM.objects(molecular_charge=0) assert len(result_list) == 2 # get unique by hash and formula one_mol = MoleculeORM.objects( molecule_hash=water_mol.molecule_hash, molecular_formula=water_mol.molecular_formula) assert len(one_mol) == 1 # Clean up storage_socket.del_molecules( molecule_hash=[water.get_hash(), water2.get_hash()])
def test_molecules_add_many(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") ret = storage_socket.add_molecules([water, water2]) assert ret["meta"]["n_inserted"] == 2 # Cleanup adds ret = storage_socket.del_molecules( molecule_hash=[water.get_hash(), water2.get_hash()]) assert ret == 2 ret = storage_socket.add_molecules([water, water2]) assert ret["meta"]["n_inserted"] == 2 # Cleanup adds ret = storage_socket.del_molecules(id=ret["data"]) assert ret == 2
def molecules_H4O2(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") ret = storage_socket.add_molecules([water, water2]) yield list(ret['data']) r = storage_socket.del_molecules( molecule_hash=[water.get_hash(), water2.get_hash()]) assert r == 2
def test_molecules_mixed_add_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") ret = storage_socket.get_add_molecules_mixed([bad_id1, water, bad_id2]) assert ret["data"][0] is None assert ret["data"][1].identifiers.molecule_hash == water.get_hash() assert ret["data"][2] is None assert set(ret["meta"]["missing"]) == {0, 2} # Cleanup adds ret = storage_socket.del_molecules(id=ret["data"][1].id) assert ret == 1
def test_molecules_bad_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules([water]) water_id = ret["data"][0] # Pull molecule from the DB for tests ret = storage_socket.get_molecules(id=[water_id, bad_id1, bad_id2]) assert ret["meta"]["n_found"] == 1 # Cleanup adds ret = storage_socket.del_molecules(id=water_id) assert ret == 1
def test_molecules_add_many(storage_socket): water = portal.data.get_molecule("water_dimer_minima.psimol") water2 = portal.data.get_molecule("water_dimer_stretch.psimol") ret = storage_socket.add_molecules({ "water1": water.to_json(), "water2": water2.to_json() }) assert ret["meta"]["n_inserted"] == 2 # Cleanup adds ret = storage_socket.del_molecules( [water.get_hash(), water2.get_hash()], index="hash") assert ret == 2 ret = storage_socket.add_molecules({ "water1": water.to_json(), "water2": water2.to_json() }) assert ret["meta"]["n_inserted"] == 2 # Cleanup adds ret = storage_socket.del_molecules(list(ret["data"].values()), index="id") assert ret == 2
def test_molecules_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules([water]) assert ret["meta"]["n_inserted"] == 1 water_id = ret["data"][0] # Pull molecule from the DB for tests water2 = storage_socket.get_molecules(id=water_id)["data"][0] water2.compare(water) # Cleanup adds ret = storage_socket.del_molecules(id=water_id) assert ret == 1
def test_molecules_get(storage_socket): water = portal.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules({"water": water.to_json()}) assert ret["meta"]["n_inserted"] == 1 water_id = ret["data"]["water"] # Pull molecule from the DB for tests db_json = storage_socket.get_molecules(water_id, index="id")["data"][0] water2 = portal.Molecule.from_json(db_json) water2.compare(water) # Cleanup adds ret = storage_socket.del_molecules(water_id, index="id") assert ret == 1
def test_molecules_bad_get(storage_socket): water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules([water]) water_id = ret["data"][0] # Pull molecule from the DB for tests ret = storage_socket.get_molecules(id=[water_id, "something", 5, (3, 2)]) assert len(ret["meta"]["errors"]) == 1 assert ret["meta"]["errors"][0][0] == "id" assert len(ret["meta"]["errors"][0][1]) == 3 assert ret["meta"]["n_found"] == 1 # Cleanup adds ret = storage_socket.del_molecules(id=water_id) assert ret == 1
def test_identical_mol_insert(storage_socket): """ Tests as edge case where to identical molecules are added under different tags. """ water = ptl.data.get_molecule("water_dimer_minima.psimol") # Add two identical molecules ret1 = storage_socket.add_molecules([water, water]) assert ret1["meta"]["success"] is True assert ret1["meta"]["n_inserted"] == 1 assert ret1["data"][0] == ret1["data"][1] # Should only find one molecule ret2 = storage_socket.get_molecules(molecule_hash=[water.get_hash()]) assert ret2["meta"]["n_found"] == 1 ret = storage_socket.del_molecules(molecule_hash=water.get_hash()) assert ret == 1
def test_molecules_bad_get(storage_socket): water = portal.data.get_molecule("water_dimer_minima.psimol") # Add once ret = storage_socket.add_molecules({"water": water.to_json()}) assert ret["meta"]["n_inserted"] == 1 water_id = ret["data"]["water"] # Pull molecule from the DB for tests ret = storage_socket.get_molecules([water_id, "something", 5, (3, 2)], index="id") assert len(ret["meta"]["errors"]) == 1 assert ret["meta"]["errors"][0][0] == "Bad Ids" assert len(ret["meta"]["errors"][0][1]) == 3 assert ret["meta"]["n_found"] == 1 # Cleanup adds ret = storage_socket.del_molecules(water_id, index="id") assert ret == 1
def test_identical_mol_insert(storage_socket): """ Tests as edge case where to identical molecules are added under different tags. """ water = portal.data.get_molecule("water_dimer_minima.psimol") # Add two idential molecules ret1 = storage_socket.add_molecules({ "w1": water.to_json(), "w2": water.to_json() }) assert ret1["meta"]["success"] is True assert ret1["meta"]["n_inserted"] == 1 assert ret1["data"]["w1"] == ret1["data"]["w2"] # Should only find one molecule ret2 = storage_socket.get_molecules([water.get_hash()], index="hash") assert ret2["meta"]["n_found"] == 1 ret = storage_socket.del_molecules(water.get_hash(), index="hash") assert ret == 1
def test_results_pagination(storage_socket): """ Test results pagination """ # results = storage_socket.get_results()['data'] # storage_socket.del_results([result['id'] for result in results]) assert len(storage_socket.get_results()['data']) == 0 water = ptl.data.get_molecule("water_dimer_minima.psimol") mol = storage_socket.add_molecules([water])['data'][0] result_template = { "molecule": mol, "method": "M1", "basis": "B1", "keywords": None, "program": "P1", "driver": "energy", } # Save (~ 1 msec/doc) # t1 = time() total_results = 1000 first_half = int(total_results / 2) limit = 100 skip = 50 results = [] for i in range(first_half): tmp = result_template.copy() tmp['basis'] = str(i) results.append(ptl.models.ResultRecord(**tmp)) result_template['method'] = 'M2' for i in range(first_half, total_results): tmp = result_template.copy() tmp['basis'] = str(i) results.append(ptl.models.ResultRecord(**tmp)) inserted = storage_socket.add_results(results) assert inserted['meta']['n_inserted'] == total_results # total_time = (time() - t1) * 1000 / total_results # print('Inserted {} results in {:.2f} msec / doc'.format(total_results, total_time)) # query (~ 0.05 msec/doc) # t1 = time() ret = storage_socket.get_results(method='M2', status=None, limit=limit, skip=skip) # total_time = (time() - t1) * 1000 / first_half # print('Query {} results in {:.2f} msec /doc'.format(first_half, total_time)) # count is total, but actual data size is the limit assert ret['meta']['n_found'] == total_results - first_half assert len(ret['data']) == limit assert int(ret['data'][0]['basis']) == first_half + skip # get the last page when with fewer than limit are remaining ret = storage_socket.get_results(method='M1', skip=(int(first_half - limit / 2)), status=None) assert len(ret['data']) == limit / 2 # cleanup storage_socket.del_results(inserted['data']) storage_socket.del_molecules(mol)
def test_results_add(storage_socket): # Add two waters water = portal.data.get_molecule("water_dimer_minima.psimol") water2 = portal.data.get_molecule("water_dimer_stretch.psimol") mol_insert = storage_socket.add_molecules({ "water1": water.to_json(), "water2": water2.to_json() }) page1 = { "molecule": mol_insert["data"]["water1"], "method": "M1", "basis": "B1", "options": "default", "program": "P1", "driver": "energy", "other_data": 5, "hash_index": 0, } page2 = { "molecule": mol_insert["data"]["water2"], "method": "M1", "basis": "B1", "options": "default", "program": "P1", "driver": "energy", "other_data": 10, "hash_index": 1, } page3 = { "molecule": mol_insert["data"]["water2"], "method": "M22", "basis": "B1", "options": "default", "program": "P1", "driver": "energy", "other_data": 10, "hash_index": 2, } ids = [] ret = storage_socket.add_results([page1, page2]) assert ret["meta"]["n_inserted"] == 2 ids.extend(ret['data']) # add with duplicates: ret = storage_socket.add_results([page1, page2, page3]) assert ret["meta"]["n_inserted"] == 1 assert len(ret['data']) == 3 # first 2 found are None assert len(ret["meta"]['duplicates']) == 2 for res_id in ret['data']: if res_id is not None: ids.append(res_id) ret = storage_socket.del_results(ids) assert ret == 3 ret = storage_socket.del_molecules(list(mol_insert["data"].values()), index="id") assert ret == 2
def storage_results(storage_socket): # Add two waters water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") mol_insert = storage_socket.add_molecules([water, water2]) kw1 = ptl.models.KeywordSet(**{"program": "a", "values": {}}) kwid1 = storage_socket.add_keywords([kw1])["data"][0] page1 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", "return_result": 5, "hash_index": 0, "status": 'COMPLETE' }) page2 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", "return_result": 10, "hash_index": 1, "status": 'COMPLETE' }) page3 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P2", "driver": "gradient", "return_result": 15, "hash_index": 2, "status": 'COMPLETE' }) page4 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M2", "basis": "B1", "keywords": kwid1, "program": "P2", "driver": "gradient", "return_result": 15, "hash_index": 3, "status": 'COMPLETE' }) page5 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M2", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "gradient", "return_result": 20, "hash_index": 4, "status": 'COMPLETE' }) page6 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M3", "basis": "B1", "keywords": None, "program": "P1", "driver": "gradient", "return_result": 20, "hash_index": 5, "status": 'COMPLETE' }) results_insert = storage_socket.add_results( [page1, page2, page3, page4, page5, page6]) assert results_insert["meta"]["n_inserted"] == 6 yield storage_socket # Cleanup result_ids = [x for x in results_insert["data"]] ret = storage_socket.del_results(result_ids) assert ret == results_insert["meta"]["n_inserted"] ret = storage_socket.del_molecules(id=mol_insert["data"]) assert ret == mol_insert["meta"]["n_inserted"] all_tasks = storage_socket.get_queue()['data'] storage_socket.del_tasks(id=[task.id for task in all_tasks])
def test_results_add(storage_socket): # Add two waters water = ptl.data.get_molecule("water_dimer_minima.psimol") water2 = ptl.data.get_molecule("water_dimer_stretch.psimol") mol_insert = storage_socket.add_molecules([water, water2]) kw1 = ptl.models.KeywordSet(**{"program": "a", "values": {}}) kwid1 = storage_socket.add_keywords([kw1])["data"][0] page1 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][0], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", "extras": { "other_data": 5 }, "hash_index": 0, }) page2 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M1", "basis": "B1", "keywords": kwid1, "program": "P1", "driver": "energy", "extras": { "other_data": 10 }, "hash_index": 1, }) page3 = ptl.models.ResultRecord( **{ "molecule": mol_insert["data"][1], "method": "M22", "basis": "B1", "keywords": None, "program": "P1", "driver": "energy", "extras": { "other_data": 10 }, "hash_index": 2, }) ids = [] ret = storage_socket.add_results([page1, page2]) assert ret["meta"]["n_inserted"] == 2 ids.extend(ret['data']) # add with duplicates: ret = storage_socket.add_results([page1, page2, page3]) assert ret["meta"]["n_inserted"] == 1 assert len(ret['data']) == 3 # first 2 found are None assert len(ret["meta"]['duplicates']) == 2 for res_id in ret['data']: if res_id is not None: ids.append(res_id) ret = storage_socket.del_results(ids) assert ret == 3 ret = storage_socket.del_molecules(id=mol_insert["data"]) assert ret == 2
def storage_results(storage_socket): # Add two waters water = portal.data.get_molecule("water_dimer_minima.psimol") water2 = portal.data.get_molecule("water_dimer_stretch.psimol") mol_insert = storage_socket.add_molecules({ "water1": water.to_json(), "water2": water2.to_json() }) page1 = { "molecule": mol_insert["data"]["water1"], "method": "M1", "basis": "B1", "options": "default", "program": "P1", "driver": "energy", "return_result": 5, "hash_index": 0, "status": 'COMPLETE' } page2 = { "molecule": mol_insert["data"]["water2"], "method": "M1", "basis": "B1", "options": "default", "program": "P1", "driver": "energy", "return_result": 10, "hash_index": 1, "status": 'COMPLETE' } page3 = { "molecule": mol_insert["data"]["water1"], "method": "M1", "basis": "B1", "options": "default", "program": "P2", "driver": "gradient", "return_result": 15, "hash_index": 2, "status": 'COMPLETE' } page4 = { "molecule": mol_insert["data"]["water1"], "method": "M2", "basis": "B1", "options": "default", "program": "P2", "driver": "gradient", "return_result": 15, "hash_index": 3, "status": 'COMPLETE' } page5 = { "molecule": mol_insert["data"]["water2"], "method": "M2", "basis": "B1", "options": "default", "program": "P1", "driver": "gradient", "return_result": 20, "hash_index": 4, "status": 'COMPLETE' } results_insert = storage_socket.add_results( [page1, page2, page3, page4, page5]) assert results_insert["meta"]["n_inserted"] == 5 yield storage_socket # Cleanup result_ids = [x for x in results_insert["data"]] ret = storage_socket.del_results(result_ids) assert ret == results_insert["meta"]["n_inserted"] ret = storage_socket.del_molecules(list(mol_insert["data"].values()), index="id") assert ret == mol_insert["meta"]["n_inserted"]