Ejemplo n.º 1
0
class MPWorksCompatibilityBuilderTest(unittest.TestCase):
    def setUp(self):
        # Set up test db, set up mpsft, etc.
        self.test_tasks = JSONStore([test_tasks])
        self.elasticity = MemoryStore("atomate_tasks")
        self.test_tasks.connect()
        self.elasticity.connect()

    def test_builder(self):
        mpw_builder = MPWorksCompatibilityBuilder(self.test_tasks,
                                                  self.elasticity,
                                                  incremental=False)
        items = mpw_builder.get_items()
        processed = [mpw_builder.process_item(item) for item in items]
        mpw_builder.update_targets(processed)

    def test_convert_mpworks_to_atomate(self):
        doc = self.test_tasks.collection.find_one(
            {"task_type": {
                "$regex": "deformed"
            }})
        new_doc = convert_mpworks_to_atomate(doc)
        self.assertTrue('hubbards' in new_doc['input'])
        doc = self.test_tasks.collection.find_one(
            {"task_type": {
                "$regex": "(2x)"
            }})
        new_doc = convert_mpworks_to_atomate(doc)
        self.assertTrue('hubbards' in new_doc['input'])

    def test_update_mpworks_schema(self):
        doc = self.test_tasks.query(criteria={"task_id": "mp-612"})[0]
        doc = update_mpworks_schema(doc)
        atomate_doc = convert_mpworks_to_atomate(doc)
Ejemplo n.º 2
0
def test_json_store_load(jsonstore, test_dir):
    jsonstore.connect()
    assert len(list(jsonstore.query())) == 20

    jsonstore = JSONStore(test_dir / "test_set" / "c.json.gz")
    jsonstore.connect()
    assert len(list(jsonstore.query())) == 20
Ejemplo n.º 3
0
class TaskTaggerTest(unittest.TestCase):
    def setUp(self):
        # Set up test db, set up mpsft, etc.
        self.test_tasks = JSONStore(test_tasks)
        self.task_types = MemoryStore("task_types")
        self.test_tasks.connect()
        self.task_types.connect()

    def test_mp_defs(self):
        task_tagger = TaskTagger(tasks=self.test_tasks,
                                 task_types=self.task_types)

        for t in task_tagger.get_items():
            processed = task_tagger.process_item(t)
            if processed:
                self.assertEqual(processed["task_type"],
                                 t["true_task_type"])
Ejemplo n.º 4
0
class TaskTaggerTest(unittest.TestCase):
    def setUp(self):
        # Set up test db, set up mpsft, etc.
        self.test_tasks = JSONStore(test_tasks)
        self.task_types = MemoryStore("task_types")
        self.test_tasks.connect()
        self.task_types.connect()

    def test_mp_defs(self):
        task_tagger = TaskTagger(tasks=self.test_tasks,
                                 task_types=self.task_types)

        for t in task_tagger.get_items():
            processed = task_tagger.process_item(t)
            true_type = self.test_tasks.query_one(
                criteria={"task_id": t["task_id"]},
                properties=["true_task_type"],
            )["true_task_type"]
            self.assertEqual(processed["task_type"], true_type)
Ejemplo n.º 5
0
class TestBoltztrap4DosBuilder(unittest.TestCase):
    def setUp(self):
        self.materials = JSONStore(boltztrap4dos_mat)
        self.materials.connect()
        self.bandstructure = JSONStore(boltztrap4dos_bs)
        self.bandstructure.connect()
        self.dos_ref = JSONStore(boltztrap4dos_dos)
        self.dos_ref.connect()
        self.dos = MemoryStore("dos")
        self.dos.connect()

    @unittest.skipIf("TRAVIS" in os.environ and os.environ["TRAVIS"] == "true",
                     "Skipping this test on Travis CI.")
    def test_process_items(self):
        dosbuilder = Boltztrap4DosBuilder(self.materials,
                                          self.bandstructure,
                                          self.dos,
                                          avoid_projections=True)

        item = self.materials.query_one()
        bs_dict = self.bandstructure.query_one()
        item["bandstructure_uniform"] = bs_dict

        dos = dosbuilder.process_item(item)
        density = dos['densities']['1'][3900]
        self.assertAlmostEqual(density, 5.446126162946311, 5)

    def test_update_targets(self):
        dos = self.dos_ref.query_one()
        items = [dos]

        dosbuilder = Boltztrap4DosBuilder(self.materials, self.bandstructure,
                                          self.dos)
        dosbuilder.update_targets(items)

        self.assertListEqual(self.dos.distinct("task_id"), ['mp-663338'])
Ejemplo n.º 6
0
class TestThermo(unittest.TestCase):
    def setUp(self):
        self.materials = JSONStore(test_mats,lu_type='isoformat')
        self.materials.connect()
        self.thermo = MemoryStore("thermo")
        self.thermo.connect()

    def test_get_entries(self):

        tbuilder = ThermoBuilder(self.materials, self.thermo)
        self.assertEqual(len(tbuilder.get_entries("Sr")), 7)
        self.assertEqual(len(tbuilder.get_entries("Hf")), 4)
        self.assertEqual(len(tbuilder.get_entries("O")), 6)
        self.assertEqual(len(tbuilder.get_entries("Hf-O-Sr")), 44)
        self.assertEqual(len(tbuilder.get_entries("Sr-Hf")), 11)

    def test_chemsys_permutations(self):
        self.assertEqual(len(chemsys_permutations("Sr")), 1)
        self.assertEqual(len(chemsys_permutations("Sr-Hf")), 3)
        self.assertEqual(len(chemsys_permutations("Sr-Hf-O")), 7)

    def test_process_items(self):
        tbuilder = ThermoBuilder(self.materials, self.thermo)

        # Ensure only one doc gets a 0 e_above_hull
        entries = tbuilder.get_entries("Sr")
        t_docs = tbuilder.process_item(entries)
        e_above_hulls = [t['thermo']['e_above_hull'] for t in t_docs]
        self.assertEqual(len([e for e in e_above_hulls if e == 0.0]), 1)

        entries = tbuilder.get_entries("Hf")
        t_docs = tbuilder.process_item(entries)
        e_above_hulls = [t['thermo']['e_above_hull'] for t in t_docs]
        self.assertEqual(len([e for e in e_above_hulls if e == 0.0]), 1)

        entries = tbuilder.get_entries("O")
        t_docs = tbuilder.process_item(entries)
        e_above_hulls = [t['thermo']['e_above_hull'] for t in t_docs]
        self.assertEqual(len([e for e in e_above_hulls if e == 0.0]), 1)

        # Ensure 4 docs iwth 0 e_above hull for convex hull for Sr-O
        entries = tbuilder.get_entries("Sr-O")
        t_docs = tbuilder.process_item(entries)
        e_above_hulls = [t['thermo']['e_above_hull'] for t in t_docs]
        self.assertEqual(len([e for e in e_above_hulls if e == 0.0]), 4)

        # Ensure 4 docs iwth 0 e_above hull for convex hull Hf-O
        entries = tbuilder.get_entries("Hf-O")
        t_docs = tbuilder.process_item(entries)
        e_above_hulls = [t['thermo']['e_above_hull'] for t in t_docs]
        self.assertEqual(len([e for e in e_above_hulls if e == 0.0]), 3)

        # Ensure 4 docs iwth 0 e_above hull for convex hull
        entries = tbuilder.get_entries("Sr-Hf-O")
        t_docs = tbuilder.process_item(entries)
        e_above_hulls = [t['thermo']['e_above_hull'] for t in t_docs]
        self.assertEqual(len(e_above_hulls), 44)
        self.assertEqual(len([e for e in e_above_hulls if e == 0.0]), 7)

    def test_update_targets(self):
        items = [[{"task_id": 1}] * 3, [{"task_id": 2}] * 4, [{"task_id": 3}] * 4]
        tbuilder = ThermoBuilder(self.materials, self.thermo)
        tbuilder.update_targets(items)

        self.assertEqual(len(self.thermo.distinct("task_id")), 3)
        self.assertEqual(tbuilder.completed_tasks, {1, 2, 3})

    def test_get_items(self):
        tbuilder = ThermoBuilder(self.materials, self.thermo)
        self.assertEqual(len(list(tbuilder.get_items())),1)
Ejemplo n.º 7
0
class MPDispatcher(Dispatcher):
    """
    Dispatcher to use with Materials Project data. Reads from a JSON file.
    """
    def __init__(self, data_file_path):
        """
        Assumers that the data_file_path contains data from
        a mpquery request in JSON format.
        """
        self._dataset = JSONStore(data_file_path)
        self._objective_partitioned_data = {}
        self._objective_weights = {}
        self._objective_ids = {}
        self._dataset.connect()

    def add_objective(self, objective, weight):
        assert isinstance(objective, Objective)
        assert isinstance(weight, float)

        self._objective_weights[objective.objective_id] = weight
        self._objective_ids[objective.objective_id] = objective

        self._objective_partitioned_data[
            objective.objective_id] = self.partition_data(objective)

    def partition_data(self, objective):
        model_inputs = objective.model_input_property_names()
        model_inputs = model_inputs | set(["task_id"])
        model_outputs = objective.model_response_property_names()
        all_props = model_inputs | model_outputs

        training_criteria = {}
        candidate_criteria = {}

        for prop in model_inputs:
            training_criteria[prop] = {"$exists": 1}
            candidate_criteria[prop] = {"$exists": 1}
        for prop in model_outputs:
            training_criteria[prop] = {"$exists": 1}
            candidate_criteria[prop] = {"$exists": 0}
        training_data = self._dataset.query(criteria=training_criteria,
                                            properties=list(all_props))
        candidate_data = self._dataset.query(criteria=candidate_criteria,
                                             properties=list(model_inputs))
        return (training_data, candidate_data)

    def rank_wflows(self, n):
        """
        Written
        TODO
        Needs to be rewritten for multiple objectives
        """
        for id in self._objective_weights:
            candidates = self._objective_partitioned_data[id][1]
            training_data = self._objective_partitioned_data[id][0]
            weight = self._objective_weights[id]
            score_mapping = self.run_objective(id, training_data, candidates)
        return score_mapping[:n]

    def run_objective(self, objective_id, training_data, candidates):
        """
        Returns a list of (workflow , score) tuples
        """
        objective = self._objective_ids[objective_id]
        objective.train_model(training_data)
        print("done training")
        score_mapping = objective.return_scores(candidates)
        workflow_scores = []
        for pair in score_mapping:
            candidate_mp_id = pair[0]["task_id"]
            score = pair[1]
            for wflow in objective.wflows:
                wflow_instance = wflow(candidate_mp_id)
                workflow_scores.append((wflow_instance, score))
        return workflow_scores

    def update(self):
        pass