Python run_kmc Examples, ligninkmc.kmc_functions.run_kmc Python Examples

Example #1

0

Show file

    def testFindScaling(self):
        test_scaling = False
        if test_scaling:
            # To test scaling: replicate batch runs with different numbers of monomers
            # Here, we are testing with equal amount of S and G (no C)
            times = []
            sg_ratio = 1
            pct_s = sg_ratio / (1 + sg_ratio)

            test_vals = np.linspace(50, 150, num=3, dtype='int32')
            num_repeats = 5
            for num_monos in test_vals:
                print(f"Starting batch simulation with {num_monos} monomers")
                times.append([])
                for i in range(num_repeats):
                    random_seed = 8 + i
                    np.random.seed(random_seed)
                    print(f"    Starting repeat", i)
                    # Generate the initial monomers and events (oxidation)
                    monomer_draw = np.random.rand(num_monos)
                    initial_monomers = create_initial_monomers(
                        pct_s, monomer_draw)
                    initial_events = create_initial_events(
                        initial_monomers, DEF_RXN_RATES)
                    # Set the state and add the option to join initial monomers
                    initial_state = create_initial_state(
                        initial_events, initial_monomers)

                    # Start timing the actual KMC part
                    # noinspection PyUnboundLocalVariable
                    start = time.time()
                    run_kmc(DEF_RXN_RATES,
                            initial_state,
                            initial_events,
                            sg_ratio=sg_ratio,
                            random_seed=random_seed)
                    end = time.time()
                    times[-1].append(end - start)
                print(
                    f'Average time to complete simulation with {num_monos:5n} monomers: '
                    f'{np.sum(times[-1]) / num_repeats:7.2f} seconds')

        # Now we want to fit the times that we just calculated to a generic power law expression $t = aN^b$ to find the
        # scaling of our algorithm.
            meas_t = [np.mean(one_time) for one_time in times]
            # sdev_t = [np.sqrt(np.var(one_time)) for one_time in times]
            meas_n = test_vals

            sim_t = lambda p, n: p[0] * np.power(n, p[1])
            loss = lambda p: np.linalg.norm(sim_t(p, meas_n) - meas_t)

            results = optimize.minimize(loss,
                                        np.asarray([1e-5, 2.5]),
                                        bounds=[[0, 1], [0, 10]],
                                        options={'disp': True})
            opt_p = results.x
            scaling_formula = f'$t = {opt_p[0]:3.1e}N^{{ {opt_p[1]:4.2f} }}$'
            print(f'Scaling: {scaling_formula}')

Example #2

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

 def testTCLTruncateSegname(self):
     # Tests providing a chain_id that is longer than one character
     try:
         # easier to run_kmc to create monomer_list than recreate it here (adj easier) so doing so
         # minimize random calls by providing set list of monomer types
         initial_mono_type_list = [S, S, G, S, S, S, G, S]
         num_monos = len(initial_mono_type_list)
         initial_monomers = [
             Monomer(mono_type, i)
             for i, mono_type in enumerate(initial_mono_type_list)
         ]
         initial_events = create_initial_events(initial_monomers,
                                                DEF_RXN_RATES)
         initial_state = create_initial_state(initial_events,
                                              initial_monomers)
         # since GROW is not added to event_dict, no additional monomers will be added
         result = run_kmc(DEF_RXN_RATES,
                          initial_state,
                          sorted(initial_events),
                          t_max=2,
                          random_seed=8)
         # quick tests to make sure run_kmc gives expected results (not what we want to test here)
         # self.assertAlmostEqual(result[TIME][-1], 0.000766574526703574)
         self.assertTrue(len(result[MONO_LIST]) == num_monos)
         # the function we want to test here is below
         with capture_stderr(gen_tcl,
                             result[ADJ_MATRIX],
                             result[MONO_LIST],
                             chain_id="lignin",
                             out_dir=SUB_DATA_DIR) as output:
             self.assertTrue("should be one character" in output)
         self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_SHORT))
     finally:
         silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE)
         pass

Example #3

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

 def testNoGrowth(self):
     # Here, all the monomers are available at the beginning of the simulation
     try:
         # minimize random calls by providing set list of monomer types
         mono_type_list = [
             S, S, S, S, G, S, S, S, S, S, S, G, S, S, S, S, S, S, S, S, S,
             S, S, S
         ]
         random_num = 24
         initial_monomers = [
             Monomer(mono_type, i)
             for i, mono_type in enumerate(mono_type_list)
         ]
         initial_events = create_initial_events(initial_monomers,
                                                DEF_RXN_RATES)
         initial_state = create_initial_state(initial_events,
                                              initial_monomers)
         # since GROW is not added to event_dict, no additional monomers will be added
         result = run_kmc(DEF_RXN_RATES,
                          initial_state,
                          sorted(initial_events),
                          t_max=0.0001,
                          random_seed=random_num)
         gen_tcl(result[ADJ_MATRIX],
                 result[MONO_LIST],
                 tcl_fname=TCL_FNAME,
                 chain_id="L",
                 out_dir=SUB_DATA_DIR)
         self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_NO_GROW_OUT))
     finally:
         silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE)
         pass

Example #4

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

    def testB1BondGenMol(self):
        ini_mono_type_list = [S, S, S, G, S]
        sg_ratio = 1.0
        max_monos = 12
        random_num = 55
        initial_monomers = [
            Monomer(mono_type, i)
            for i, mono_type in enumerate(ini_mono_type_list)
        ]
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        initial_events.append(Event(GROW, [], rate=1e4))
        initial_state = create_initial_state(initial_events, initial_monomers)
        result = run_kmc(DEF_RXN_RATES,
                         initial_state,
                         initial_events,
                         n_max=max_monos,
                         t_max=2,
                         random_seed=random_num,
                         sg_ratio=sg_ratio)
        nodes = result[MONO_LIST]
        adj = result[ADJ_MATRIX]
        # generate_mol(adj, nodes)
        with capture_stderr(generate_mol, adj, nodes) as output:
            self.assertFalse(output)

        mol = MolFromMolBlock(generate_mol(adj, nodes))
        mols = GetMolFrags(mol)

        analysis = analyze_adj_matrix(adj)
        frag_sizes = analysis[CHAIN_LEN]

        # Make sure there are the same number of separate fragments calculated by RDKIT
        # as we get from just separating the alternate B1
        self.assertEqual(np.sum(list(frag_sizes.values())), len(mols))

Example #5

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

    def testCheckBO4Fraction(self):
        # similar to a test above; was useful for comparing output from different versions
        monomer_types = [[G, S, G, G, S, S, S, G, S, S, G, G, S, G, G, G, G, S, G, G, G, S, G, S, S, S, G, S, S, G, G],
                         [S, S, G, G, S, G, S, G, G, G, G, S, S, S, S, S, G, S, S, S, G, G, S, G, S, G, S, S, G, S, S],
                         [S, S, S, S, G, S, S, G, G, S, G, S, G, G, G, G, S, S, S, S, S, S, S, G, S, S, G, S, G, S, G]]
        num_repeats = len(monomer_types)
        sg_result_list = []

        # will add to random seed in the iterations to insure using a different seed for each repeat
        random_seed = 32
        for i in range(num_repeats):
            # Initialize the monomers, event_dict, and state
            initial_monomers = [Monomer(mono_type, m) for m, mono_type in enumerate(monomer_types[i])]
            num_monos = len(initial_monomers)
            initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
            initial_state = create_initial_state(initial_events, initial_monomers)
            results = run_kmc(DEF_RXN_RATES, initial_state, initial_events,
                              n_max=num_monos, t_max=2, random_seed=random_seed + i)
            sg_result_list.append(results)

        av_bo4_bonds, std_bo4_bonds = get_avg_num_bonds_single_option(BO4, sg_result_list, num_repeats)
        print("Average fraction BO4 bonds: {:.3f}".format(av_bo4_bonds))
        print("Std dev fraction BO4 bonds: {:.3f}".format(std_bo4_bonds))
        self.assertTrue(np.allclose(av_bo4_bonds, 0.21020733652312598))
        self.assertTrue(np.allclose(std_bo4_bonds, 0.04743254939825481))

Example #6

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

 def testTCLGenEmptySegname(self):
     # tcl_fname="psfgen.tcl", psf_fname='lignin', chain_id="L", toppar_dir="toppar/"
     # Here, all the monomers are available at the beginning of the simulation
     # Increases coverage of gen_tcl
     try:
         # easier to run_kmc to create monomer_list than recreate it here (adj easier) so doing so
         # minimize random calls by providing set list of monomer types
         initial_mono_type_list = [S, S, G, S, S, S, G, S]
         num_monos = len(initial_mono_type_list)
         initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(initial_mono_type_list)]
         initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
         initial_state = create_initial_state(initial_events, initial_monomers)
         # since GROW is not added to event_dict, no additional monomers will be added
         result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), t_max=2, random_seed=8)
         # quick tests to make sure run_kmc gives expected results (not what we want to test here)
         self.assertAlmostEqual(result[TIME][-1], 0.00015059250794459398)
         self.assertTrue(len(result[MONO_LIST]) == num_monos)
         # the function we want to test here is below
         with capture_stderr(gen_tcl, result[ADJ_MATRIX], result[MONO_LIST], chain_id=" ",
                             out_dir=SUB_DATA_DIR) as output:
             self.assertTrue("should be one character" in output)
         self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_SHORT))
     finally:
         silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE)
         pass

Example #7

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

 def testMissingRequiredSGRatio(self):
     # set up variable to allow running run_kmc without specifying sg_ratio
     initial_sg_ratio = 0.75
     num_initial_monos = 3
     monomer_draw = np.around(np.random.rand(num_initial_monos), MAX_NUM_DECIMAL)
     # these are tested separately
     initial_monomers = create_initial_monomers(initial_sg_ratio, monomer_draw)
     initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
     initial_state = create_initial_state(initial_events, initial_monomers)
     events = {initial_events[i] for i in range(num_initial_monos)}
     events.add(Event(GROW, [], rate=1e4))
     try:
         run_kmc(DEF_RXN_RATES, initial_state, sorted(events), n_max=20, t_max=1, random_seed=10)
         self.assertFalse("Should not arrive here; An error should have be raised")
     except InvalidDataError as e:
         self.assertTrue("A numeric sg_ratio" in e.args[0])

Example #8

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

def create_sample_kmc_result_c_lignin(num_monos=2, max_monos=12, seed=10):
    initial_monomers = [Monomer(C, i) for i in range(num_monos)]
    # noinspection PyTypeChecker
    initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
    initial_state = create_initial_state(initial_events, initial_monomers)
    initial_events.append(Event(GROW, [], rate=1e4))
    result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), n_max=max_monos, t_max=2, random_seed=seed)
    return result

Example #9

0

Show file

File: generate_lignin.py Project: ZimmermanGroup/conformer-rl

def generate_lignin(num_monomers: int = 1) -> Chem.Mol:
    """Generates lignin molecule.

        parameters
        ----------
        num_monomers : int
                Number of monomers in lignin molecule.
        """
    # Set the percentage of S
    sg_ratio = 0
    pct_s = sg_ratio / (1 + sg_ratio)

    # Set the initial and maximum number of monomers to be modeled.
    ini_num_monos = 1
    max_num_monos = num_monomers

    # Maximum time to simulate, in seconds
    t_max = 1  # seconds
    mono_add_rate = 1e4  # monomers/second

    # Use a random number and the given sg_ratio to determine the monolignol types to be initially modeled
    monomer_draw = np.random.rand(ini_num_monos)
    initial_monomers = create_initial_monomers(pct_s, monomer_draw)

    # Initially allow only oxidation events. After they are used to determine the initial state, add
    #     GROW to the events, which allows additional monomers to be added to the reaction at the
    #     specified rate and with the specified ratio
    initial_events = create_initial_events(initial_monomers, rxn_rates)
    initial_state = create_initial_state(initial_events, initial_monomers)
    initial_events.append(Event(GROW, [], rate=mono_add_rate))

    # simulate lignin creation
    result = run_kmc(rxn_rates,
                     initial_state,
                     initial_events,
                     n_max=max_num_monos,
                     t_max=t_max,
                     sg_ratio=sg_ratio)
    # using RDKit
    nodes = result[MONO_LIST]
    adj = result[ADJ_MATRIX]
    block = generate_mol(adj, nodes)
    mol = MolFromMolBlock(block)
    mol = Chem.AddHs(mol)

    return mol

Example #10

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

    def testIniRates(self):
        # Note: this test did not increase coverage. Added to help debug notebook.
        # run_multi = False
        # if run_multi:
        #     fun = par.delayed(run_kmc)
        #     num_jobs = num_repeats
        # else:
        num_repeats = 4

        # fun = None
        # num_jobs = None

        sg_ratio = 1.1

        # minimize random calls
        monomer_type_list = [S, G]
        initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(monomer_type_list)]
        max_monos = 12
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        # FYI: np.logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None, axis=0)[source]
        num_rates = 3
        add_rates = np.logspace(4, 12, num_rates)
        add_rates_result_list = []

        # will add to random seed in the iterations to insure using a different seed for each repeat
        random_seed = 2

        for add_rate in add_rates:
            initial_state = create_initial_state(initial_events, initial_monomers)
            initial_events.append(Event(GROW, [], rate=add_rate))
            # if run_multi:
            #     results = par.Parallel(n_jobs=num_jobs)([fun(DEF_RXN_RATES, initial_state, initial_events,
            #                                                  n_max=max_monos, t_max=1, sg_ratio=sg_ratio,
            #                                                  random_seed=(random_seed + i))
            #                                              for i in range(num_repeats)])
            # else:
            results = [run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=1,
                               sg_ratio=sg_ratio, random_seed=(random_seed + i)) for i in range(num_repeats)]
            add_rates_result_list.append(results)

        av_bo4_bonds, std_bo4_bonds = get_avg_num_bonds(BO4, num_rates, add_rates_result_list, num_repeats)

        good_av_bo4 = [0.3680555555555555, 0.2863636363636364, 0.03125]
        good_std_bo4 = [0.08187379251771941, 0.013636363636363641, 0.05412658773652741]
        self.assertTrue(np.allclose(av_bo4_bonds, good_av_bo4))
        self.assertTrue(np.allclose(std_bo4_bonds, good_std_bo4))

Example #11

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

def create_sample_kmc_result(max_time=1., num_initial_monos=3, max_monos=10, sg_ratio=0.75, seed=10):
    # The set lists are to minimize randomness in testing (adding while debugging source of randomness in some tests;
    #     leaving because it doesn't hurt a thing; also leaving option to make a monomer_draw of arbitrary length
    #     using a seed, but rounding those numbers because the machine precision differences in floats was the bug
    np.random.seed(seed)
    if num_initial_monos == 3:
        monomer_draw = MONO_DRAW_3
    elif num_initial_monos == 20:
        monomer_draw = [0.77132064, 0.02075195, 0.63364823, 0.74880388, 0.49850701, 0.22479665, 0.19806286,
                        0.76053071, 0.16911084, 0.08833981, 0.68535982, 0.95339335, 0.00394827, 0.51219226,
                        0.81262096, 0.61252607, 0.72175532, 0.29187607, 0.91777412, 0.71457578]
    else:
        monomer_draw = np.around(np.random.rand(num_initial_monos), MAX_NUM_DECIMAL)

    # these are tested separately elsewhere
    initial_monomers = create_initial_monomers(sg_ratio, monomer_draw)
    initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
    initial_state = OrderedDict(create_initial_state(initial_events, initial_monomers))
    initial_events.append(Event(GROW, [], rate=1e4))
    result = run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=max_time,
                     random_seed=10, sg_ratio=sg_ratio)
    return result

Example #12

0

Show file

File: test_lignin_kmc_parts.py Project: michaelorella/lignin-kmc

    def testDynamics(self):
        # Tests procedures in the Dynamics.ipynb
        # minimize number of random calls during testing (here, set monomer type distribution)
        monomer_type_list = [G, S, G, G, S, S, S, G, S, S, G, G, S, G, G, G, G, S, G, G, G, S, S, G, S, S, G, G, ]
        num_monos = len(monomer_type_list)
        initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(monomer_type_list)]
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        initial_state = create_initial_state(initial_events, initial_monomers)
        # since GROW is not added to event_dict, no additional monomers will be added (sg_ratio is thus not needed)
        result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), random_seed=10, dynamics=True)
        # With dynamics, the MONO_LIST will be a list of monomer lists:
        #    the inner list is the usual MONO_LIST, but here is it saved for every time step
        t_steps = result[TIME]
        expected_num_t_steps = 61
        self.assertEqual(len(t_steps), expected_num_t_steps)
        self.assertTrue(len(result[MONO_LIST]) == expected_num_t_steps)
        self.assertTrue(len(result[MONO_LIST][-1]) == num_monos)
        # want dict[key: [], ...] where the inner list is values by timestep
        #                         instead of list of time steps with [[key: val, ...], ... ]
        adj_list = result[ADJ_MATRIX]
        (bond_type_dict, olig_len_dict, sum_list, olig_count_dict,
            sum_count_list) = get_bond_type_v_time_dict(adj_list, sum_len_larger_than=10)

        # test results by checking sums
        good_bond_type_sum_dict = {BO4: 16, BB: 171, B1: 0, B5: 119, C5C5: 0, AO4: 0, C5O4: 23}
        bond_type_sum_dict = {}
        for bond_type, val_list in bond_type_dict.items():
            self.assertEqual(len(val_list), expected_num_t_steps)
            bond_type_sum_dict[bond_type] = sum(val_list)
        self.assertEqual(bond_type_sum_dict, good_bond_type_sum_dict)

        good_olig_len_sum_dict = {1: 1112, 2: 474, 3: 21, 4: 56, 5: 45}
        olig_len_sum_dict = {}
        for olig_len, val_list in olig_len_dict.items():
            self.assertEqual(len(val_list), expected_num_t_steps)
            olig_len_sum_dict[olig_len] = sum(val_list)
        self.assertEqual(olig_len_sum_dict, good_olig_len_sum_dict)

Example #13

0

Show file

File: step1.py Project: jvermaas/LigninBuilder

mono_add_rate = 1e4  # monomers/second

# Use a random number and the given sg_ratio to determine the monolignol types to be initially modeled
monomer_draw = np.random.rand(ini_num_monos)
initial_monomers = create_initial_monomers(pct_s, monomer_draw)

# Initially allow only oxidation events. After they are used to determine the initial state, add
#     GROW to the events, which allows additional monomers to be added to the reaction at the
#     specified rate and with the specified ratio
initial_events = create_initial_events(initial_monomers, rxn_rates)
initial_state = create_initial_state(initial_events, initial_monomers)
initial_events.append(Event(GROW, [], rate=mono_add_rate))

result = run_kmc(rxn_rates,
                 initial_state,
                 initial_events,
                 n_max=max_num_monos,
                 t_max=t_max,
                 sg_ratio=sg_ratio)

# Convert the sparse matrix to a full array before printing
print("The adjacency matrix for the simulated lignin is:")
print(result[ADJ_MATRIX].toarray())

# From the list of monomers and the adjacency matrix, we can use LigninKMC to write out a tcl script for psfgen to
# turn into a .psf file.
# fname and sgnames are things that we'd want to change; file name always the same as the segname
gen_tcl(result[ADJ_MATRIX],
        result[MONO_LIST],
        toppar_dir="../smilesdemo/toppar/",
        tcl_fname="psfgen.tcl",
        psf_fname="L",

Example #14

0

Show file

def main(argv=None):
    """
    Runs the main program.

    :param argv: The command line arguments.
    :return: The return code for the program's termination.
    """
    print(OPENING_MSG)
    args, ret = parse_cmdline(argv)
    if ret != GOOD_RET or args is None:
        return ret

    cfg = args.config

    try:
        # tests at the beginning to catch errors early
        validate_input(cfg)

        for add_rate in cfg[ADD_RATES]:
            sg_adjs = []
            add_rate_str = f'{add_rate:.{3}g}'.replace("+", "").replace(".", "-")
            for sg_ratio in cfg[SG_RATIOS]:
                # the initialized lists below are for storing repeats
                bond_types = defaultdict(list)
                num_monos = []
                num_oligs = []
                adj_repeats = []

                for rep in range(cfg[NUM_REPEATS]):
                    # decide on initial monomers, based on given sg_ratio, and create initial oxidation events
                    initial_events, initial_state = initiate_state(add_rate, cfg, rep, sg_ratio)

                    # begin simulation
                    result = run_kmc(cfg[RXN_RATES], initial_state, initial_events, n_max=cfg[MAX_MONOS],
                                     t_max=cfg[SIM_TIME], sg_ratio=sg_ratio, dynamics=cfg[DYNAMICS])

                    if cfg[DYNAMICS]:
                        last_adj = result[ADJ_MATRIX][-1]
                        last_mono_list = result[MONO_LIST][-1]
                        (bond_type_dict, olig_monos_dict, sum_monos_list, olig_count_dict,
                         sum_count_list) = get_bond_type_v_time_dict(result[ADJ_MATRIX], sum_len_larger_than=2)

                        for bond_type in BOND_TYPE_LIST:
                            bond_types[bond_type].append(bond_type_dict[bond_type])
                        num_monos.append(olig_count_dict[1])
                        num_oligs.append(sum_count_list)

                    else:
                        last_adj = result[ADJ_MATRIX]
                        last_mono_list = result[MONO_LIST]

                    adj_repeats.append(last_adj)

                    # show results
                    summary = analyze_adj_matrix(last_adj, break_co_bonds=cfg[BREAK_CO])
                    adj_analysis_to_stdout(summary, break_co_bonds=cfg[BREAK_CO])

                    # Outputs
                    produce_output(last_adj, last_mono_list, cfg)

                # save for potential plotting
                sg_adjs.append(adj_repeats)

                # Now that all repeats done, create plots for dynamics, if applicable
                if cfg[DYNAMICS]:
                    # create plots of num mon & olig vs timestep, and % bond time v timestep
                    create_dynamics_plots(add_rate_str, bond_types, cfg, num_monos, num_oligs, sg_ratio)
            if cfg[PLOT_BONDS]:
                create_bond_v_sg_plots(add_rate_str, cfg, sg_adjs)

    except (InvalidDataError, KeyError) as e:
        warning(e)
        return INVALID_DATA

    return GOOD_RET  # success