Example #1
0
def main():
    parser = argparse.ArgumentParser(
        description=
        "mrun is a script to run builders written using the Maggma framework.")
    parser.add_argument(
        "builder",
        help=
        "Builder file in either json or yaml format. Can contain a list of builders or a predefined Runner"
    )
    parser.add_argument(
        "-n",
        "--num_workers",
        type=int,
        default=0,
        help="Number of worker processes. Defaults to use as many as available."
    )
    parser.add_argument('-v',
                        '--verbose',
                        action='count',
                        default=0,
                        help="Controls logging level per number of v's")
    parser.add_argument(
        "--dry-run",
        action="store_true",
        default=False,
        help="Dry run loading the builder file. Does not run the builders")
    parser.add_argument("--mpi",
                        action="store_true",
                        default=False,
                        help="Running under MPI")
    args = parser.parse_args()

    # Set Logging
    levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    level = levels[min(len(levels) - 1,
                       args.verbose)]  # capped to number of levels
    root = logging.getLogger()
    root.setLevel(level)
    ch = TqdmLoggingHandler()
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
    ch.setFormatter(formatter)
    root.addHandler(ch)

    objects = loadfn(args.builder)

    if isinstance(objects, list):
        # If this is a list of builders
        runner = Runner(objects, num_workers=args.num_workers)
    elif isinstance(objects, Runner):
        # This is a runner:
        root.info("Changing number of workers from default in input file")
        runner = Runner(objects.builders, args.num_workers)
    else:
        root.error("Couldn't properly read the builder file.")

    if not args.dry_run:
        runner.run(mpi=args.mpi)
Example #2
0
    def test_database_write(self):
        builder = CorrelationBuilder(self.propstore,
                                     self.materials,
                                     self.correlation,
                                     props=self.propnet_props + self.mp_props,
                                     funcs='all')

        runner = Runner([builder])
        runner.run()

        data = list(self.correlation.query(criteria={}))
        # count = n_props**2 * n_funcs
        # n_props = 4, n_funcs = 5
        self.assertEqual(len(data), 80)

        for d in data:
            self.assertIsInstance(d, dict)
            self.assertEqual(
                set(d.keys()), {
                    'property_x', 'property_y', 'correlation',
                    'correlation_func', 'n_points', 'shortest_path_length',
                    'id', '_id', 'last_updated'
                })
            self.assertEqual(d['n_points'], 200)
            if d['property_x'] == 'vickers_hardness' and \
                    d['property_y'] == 'bulk_modulus':
                self.assertAlmostEqual(
                    d['correlation'], self.correlation_values_vickers_bulk[
                        d['correlation_func']])
            elif d['property_x'] == 'bulk_modulus' and \
                    d['property_y'] == 'vickers_hardness':
                self.assertAlmostEqual(
                    d['correlation'], self.correlation_values_bulk_vickers[
                        d['correlation_func']])
Example #3
0
    def test_build(self):

        builder = BondValenceBuilder(self.materials, self.bond_valence)
        runner = Runner([builder])
        runner.run()

        doc = list(self.bond_valence.query(criteria={'task_id': 'mp-779001'}))[0]
        self.assertSetEqual(set(doc['possible_species']), {'Hf4+', 'Sr2+', 'O2-'})
Example #4
0
 def test_runner(self):
     self.source.collection.insert_many(self.old_docs)
     self.source.update(self.new_docs, update_lu=False)
     self.target.collection.insert_many(self.old_docs)
     runner = Runner([self.builder])
     runner.run()
     self.assertEqual(self.target.query_one(criteria={"k": 0})["v"], "new")
     self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")
 def test_simple_runner(self):
     builder = CopyBuilder(self.source, self.target)
     runner = Runner([builder])
     dumpfn(runner, self.runner_filename)
     p = subprocess.run("python -m maggma.cli.mrun {}".format(
         self.runner_filename).split(),
                        timeout=15)
     self.assertEqual(p.returncode, 0)
Example #6
0
    def test_runner_pipeline(self):
        runner = Runner([self.nofilter])
        runner.run()
        self.assertEqual(self.lpad.workflows.count(), 3)

        # Ensure no further updates
        runner.run()
        self.assertEqual(self.lpad.workflows.count(), 3)
Example #7
0
 def test_query(self):
     self.builder.query = {"k": {"$gt": 5}}
     self.source.collection.insert_many(self.old_docs)
     self.source.update(self.new_docs, update_lu=False)
     runner = Runner([self.builder])
     runner.run()
     all_docs = list(self.target.query(criteria={}))
     self.assertEqual(len(all_docs), 14)
     self.assertTrue(min([d['k'] for d in all_docs]), 6)
Example #8
0
 def test_serial_runner_quantity_db(self):
     # This only runs over the 4 properties in the database because
     # the mongomock db cannot be indexed and is therefore very slow
     builder = CorrelationBuilder(self.quantity_store,
                                  self.correlation,
                                  props=self.propnet_props,
                                  from_quantity_db=True)
     runner = Runner([builder])
     runner.run()
Example #9
0
    def test_build(self):

        builder = MagneticBuilder(self.materials, self.magnetism)
        runner = Runner([builder])
        runner.run()

        doc = list(self.magnetism.query(criteria={'task_id': 'mp-1034331'}))[0]

        self.assertEqual(doc['magnetism']['ordering'], 'FM')
        self.assertAlmostEqual(doc['magnetism']['total_magnetization_normalized_formula_units'], 4.8031771)
Example #10
0
    def test_1(self):
        builder1 = MagicMock()
        builder2 = MagicMock()

        builder1.configure_mock(sources=[1, 2, 3], targets=[4])
        builder2.configure_mock(sources=[3, 4, 5], targets=[6])
        self.builders = [builder1, builder2]

        rnr = Runner(self.builders)
        self.assertEqual(rnr.dependency_graph, {1: [0]})
Example #11
0
    def test_build(self):

        builder = BondBuilder(self.materials, self.bonding)
        runner = Runner([builder])
        runner.run()

        doc = list(self.bonding.query(criteria={'task_id': 'mp-779001'}))[0]
        sg = StructureGraph.from_dict(doc['graph'])
        self.assertIsInstance(sg, StructureGraph)
        self.assertIn('Hf-O(6)', doc['summary']['coordination_envs'])
Example #12
0
    def test_delete_orphans(self):
        self.builder = CopyBuilder(self.source, self.target, delete_orphans=True)
        self.source.collection.insert_many(self.old_docs)
        self.source.update(self.new_docs, update_lu=False)
        self.target.collection.insert_many(self.old_docs)

        deletion_criteria = {"k": {"$in": list(range(5))}}
        self.source.collection.delete_many(deletion_criteria)
        runner = Runner([self.builder])
        runner.run()

        self.assertEqual(self.target.collection.count_documents(deletion_criteria), 0)
        self.assertEqual(self.target.query_one(criteria={"k": 5})["v"], "new")
        self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")
Example #13
0
 def test_incremental_false(self):
     tic = datetime.now()
     toc = tic + timedelta(seconds=1)
     keys = list(range(20))
     earlier = [{"lu": tic, "k": k, "v": "val"} for k in keys]
     later = [{"lu": toc, "k": k, "v": "val"} for k in keys]
     self.source.collection.insert_many(earlier)
     self.target.collection.insert_many(later)
     query = {"k": {"$gt": 5}}
     self.builder = CopyBuilder(self.source, self.target, incremental=False, query=query)
     Runner([self.builder]).run()
     docs = sorted(self.target.query(), key=lambda d: d["k"])
     self.assertTrue(all(d["lu"] == tic) for d in docs[5:])
     self.assertTrue(all(d["lu"] == toc) for d in docs[:5])
Example #14
0
 def test_builder(self):
     ec_builder = ElasticBuilder(self.test_tasks,
                                 self.test_elasticity,
                                 self.test_materials,
                                 incremental=False)
     ec_builder.connect()
     for t in ec_builder.get_items():
         processed = ec_builder.process_item(t)
         self.assertTrue(bool(processed))
     runner = Runner([ec_builder])
     runner.run()
     # Test warnings
     doc = ec_builder.elasticity.query_one(
         criteria={"pretty_formula": "NaN3"})
     self.assertEqual(doc['elasticity']['warnings'], None)
Example #15
0
 def test_builder(self):
     ec_builder = ElasticBuilder(self.test_tasks,
                                 self.test_elasticity,
                                 self.test_materials,
                                 incremental=False)
     ec_builder.connect()
     for t in ec_builder.get_items():
         processed = ec_builder.process_item(t)
         if processed:
             pass
         else:
             import nose
             nose.tools.set_trace()
     runner = Runner([ec_builder])
     runner.run()
Example #16
0
 def test_builder(self):
     ec_builder = ElasticAnalysisBuilder(self.test_tasks,
                                         self.test_elasticity,
                                         incremental=False)
     ec_builder.connect()
     for t in ec_builder.get_items():
         processed = ec_builder.process_item(t)
         self.assertTrue(bool(processed))
     runner = Runner([ec_builder])
     runner.run()
     # Test warnings
     doc = ec_builder.elasticity.query_one(
         criteria={"pretty_formula": "NaN3"})
     self.assertEqual(doc['warnings'], None)
     self.assertAlmostEqual(doc['compliance_tensor'][0][0], 41.576072, 6)
Example #17
0
    def test_build(self):
        """Test building the robocrys database."""
        builder = RobocrysBuilder(self.materials, self.robocrys)
        runner = Runner([builder])
        runner.run()

        doc = list(self.robocrys.query(criteria={'task_id': 'mp-66'}))[0]

        self.assertEqual(doc['condensed_structure']['formula'], 'C')
        self.assertEqual(doc['condensed_structure']['spg_symbol'], 'Fd-3m')
        self.assertEqual(doc['condensed_structure']['mineral']['type'],
                         'diamond')
        self.assertEqual(doc['condensed_structure']['dimensionality'], '3')

        self.assertTrue("C is diamond structured" in doc['description'])
        self.assertTrue("bond lengths are 1.55" in doc['description'])
Example #18
0
def _get_correlation_values():
    full_propstore = MemoryStore()
    with open(os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"),
              'r') as f:
        data = json.load(f)
    full_propstore.connect()
    full_propstore.update(jsanitize(data, strict=True, allow_bson=True))
    correlation_store = MemoryStore()
    builder = CorrelationBuilder(full_propstore,
                                 correlation_store,
                                 props=PROPNET_PROPS,
                                 funcs='all',
                                 from_quantity_db=False)
    runner = Runner([builder])
    runner.run()
    return builder
Example #19
0
    def setUp(self):

        self.materials = MongoStore("emmet_test", "materials")
        self.thermo = MongoStore("emmet_test", "thermo")

        self.materials.connect()
        self.thermo.connect()

        self.mbuilder = MaterialsBuilder(self.tasks,
                                         self.materials,
                                         mat_prefix="",
                                         chunk_size=1)
        self.tbuilder = ThermoBuilder(self.materials,
                                      self.thermo,
                                      chunk_size=1)
        runner = Runner([self.mbuilder])
        runner.run()
Example #20
0
def create_correlation_quantity_indexed_docs():
    """
    Outputs JSON file containing the same data from create_correlation_test_docs()
    but as individual quantities. This mimics the quantity-indexed store.

    Must run create_correlation_test_docs() first and have the JSON file in the
    test directory.

    """
    pn_store = MemoryStore()
    q_store = MemoryStore()
    m_store = MemoryStore()
    with open(os.path.join(CORR_TEST_DIR, "correlation_propnet_data.json"),
              'r') as f:
        data = json.load(f)
    pn_store.connect()
    pn_store.update(jsanitize(data, strict=True, allow_bson=True))
    sb = SeparationBuilder(pn_store, q_store, m_store)
    r = Runner([sb])
    r.run()
    q_data = list(q_store.query(criteria={}, properties={'_id': False}))
    dumpfn(
        q_data,
        os.path.join(CORR_TEST_DIR, "correlation_propnet_quantity_data.json"))
Example #21
0
    def test_sample_size_limit(self):
        sample_sizes = [50, 300]
        expected_n_points = [50, 200]

        for sample_size, n_points in zip(sample_sizes, expected_n_points):
            correlation_store = MemoryStore()
            builder = CorrelationBuilder(
                self.propstore,
                correlation_store,
                props=['bulk_modulus', 'vickers_hardness'],
                funcs='linlsq',
                sample_size=sample_size,
                from_quantity_db=False)
            runner = Runner([builder])
            runner.run()

            data = list(correlation_store.query(criteria={}))
            for d in data:
                self.assertEqual(d['n_points'], n_points)

        with self.assertRaises(ValueError):
            _ = CorrelationBuilder(self.propstore,
                                   self.correlation,
                                   sample_size=1)
Example #22
0
 def test_1(self):
     rnr = Runner(self.builders)
     ans = {1: [0]}
     self.assertDictEqual(rnr.dependency_graph, ans)
Example #23
0
        self.logger.info("processing item: {}".format(item))
        # time.sleep(random.randint(0,3))
        return {item: "processed"}

    def update_targets(self, items):
        self.logger.info("Updating targets ...")
        self.logger.info("Received {} processed items".format(len(items)))
        self.logger.info("Updated items: {}".format(list(items)))

    def finalize(self, cursor=None):
        self.logger.info("Finalizing ...")
        self.logger.info("DONE!")


if __name__ == '__main__':
    N = 10
    chunk_size = 3
    stores = [MemoryStore(str(i)) for i in range(7)]

    sources = [stores[0], stores[1], stores[3]]
    targets = [stores[3], stores[6]]

    mdb = MyDumbBuilder(N, sources, targets, chunk_size=chunk_size)

    builders = [mdb]

    runner = Runner(builders)

    logstreamhandle(runner)
    runner.run()
Example #24
0
 def test_aggregation(self):
     runner = Runner([self.builder])
     runner.run()
     all_agg_docs = list(self.test_elasticity_agg.query())
     self.assertTrue(bool(all_agg_docs))
Example #25
0
 def test_multiproc_runner(self):
     builder = PropnetBuilder(self.materials, self.propstore)
     runner = Runner([builder])
     runner.run()
Example #26
0
 def test_multiproc_runner(self):
     builder = CorrelationBuilder(self.propstore,
                                  self.correlation,
                                  from_quantity_db=False)
     runner = Runner([builder], max_workers=4)
     runner.run()
Example #27
0
 def test_serial_runner(self):
     builder = CorrelationBuilder(self.propstore,
                                  self.correlation,
                                  from_quantity_db=False)
     runner = Runner([builder])
     runner.run()
Example #28
0
    def test_database_and_file_write(self):
        builder = CorrelationBuilder(self.propstore,
                                     self.correlation,
                                     props=self.propnet_props,
                                     funcs='all',
                                     out_file=os.path.join(
                                         TEST_DATA_DIR, "test_output.json"),
                                     from_quantity_db=False)

        runner = Runner([builder])
        runner.run()

        # Test database output
        data = list(self.correlation.query(criteria={}))
        # count = n_props**2 * n_funcs
        # n_props = 4, n_funcs = 6
        self.assertEqual(len(data),
                         96,
                         msg="Are there new built-in funcs in the builder?")

        for d in data:
            self.assertIsInstance(d, dict)
            self.assertEqual(
                set(d.keys()), {
                    'property_x', 'property_y', 'correlation',
                    'correlation_func', 'n_points', 'shortest_path_length',
                    'id', '_id', 'last_updated'
                })
            self.assertEqual(d['n_points'], 200)
            if d['property_x'] == 'vickers_hardness' and \
                    d['property_y'] == 'bulk_modulus':
                # print("{}: {}".format(d['correlation_func'], d['correlation']))
                self.assertAlmostEqual(
                    d['correlation'], self.correlation_values_vickers_bulk[
                        d['correlation_func']])
            elif d['property_x'] == 'bulk_modulus' and \
                    d['property_y'] == 'vickers_hardness':
                # print("{}: {}".format(d['correlation_func'], d['correlation']))
                self.assertAlmostEqual(
                    d['correlation'], self.correlation_values_bulk_vickers[
                        d['correlation_func']])
        # Test file output
        expected_file_data = loadfn(
            os.path.join(TEST_DATA_DIR, 'correlation_outfile.json'))
        actual_file_data = loadfn(
            os.path.join(TEST_DATA_DIR, 'test_output.json'))

        self.assertIsInstance(actual_file_data, dict)
        self.assertEqual(actual_file_data.keys(), expected_file_data.keys())
        self.assertEqual(set(actual_file_data['properties']),
                         set(expected_file_data['properties']))

        expected_props = expected_file_data['properties']
        actual_props = actual_file_data['properties']

        for prop_x, prop_y in product(expected_props, repeat=2):
            iex, iey = expected_props.index(prop_x), expected_props.index(
                prop_y)
            iax, iay = actual_props.index(prop_x), actual_props.index(prop_y)

            self.assertEqual(actual_file_data['n_points'][iax][iay],
                             expected_file_data['n_points'][iex][iey])
            self.assertEqual(
                actual_file_data['shortest_path_length'][iax][iay],
                expected_file_data['shortest_path_length'][iex][iey])

            for f in builder._funcs.keys():
                self.assertAlmostEqual(
                    actual_file_data['correlation'][f][iax][iay],
                    expected_file_data['correlation'][f][iex][iey])
Example #29
0
 def test_serial_runner(self):
     builder = CorrelationBuilder(self.propstore, self.materials,
                                  self.correlation)
     runner = Runner([builder])
     runner.run()
Example #30
0
 def test_multiproc_runner(self):
     builder = CorrelationBuilder(self.propstore, self.materials,
                                  self.correlation)
     runner = Runner([builder], max_workers=2)
     runner.run()