def testEventIDHash(self):
     monomer_a = Monomer(S, 4)
     monomer_b = Monomer(S, 4)
     events_a = create_initial_events([monomer_a], DEF_RXN_RATES)
     events_b = create_initial_events([monomer_b], DEF_RXN_RATES)
     self.assertTrue(events_a == events_b)
     check_set = {events_a[0], events_b[0]}
     self.assertTrue(len(check_set) == 1)
 def testTCLTruncateSegname(self):
     # Tests providing a chain_id that is longer than one character
     try:
         # easier to run_kmc to create monomer_list than recreate it here (adj easier) so doing so
         # minimize random calls by providing set list of monomer types
         initial_mono_type_list = [S, S, G, S, S, S, G, S]
         num_monos = len(initial_mono_type_list)
         initial_monomers = [
             Monomer(mono_type, i)
             for i, mono_type in enumerate(initial_mono_type_list)
         ]
         initial_events = create_initial_events(initial_monomers,
                                                DEF_RXN_RATES)
         initial_state = create_initial_state(initial_events,
                                              initial_monomers)
         # since GROW is not added to event_dict, no additional monomers will be added
         result = run_kmc(DEF_RXN_RATES,
                          initial_state,
                          sorted(initial_events),
                          t_max=2,
                          random_seed=8)
         # quick tests to make sure run_kmc gives expected results (not what we want to test here)
         # self.assertAlmostEqual(result[TIME][-1], 0.000766574526703574)
         self.assertTrue(len(result[MONO_LIST]) == num_monos)
         # the function we want to test here is below
         with capture_stderr(gen_tcl,
                             result[ADJ_MATRIX],
                             result[MONO_LIST],
                             chain_id="lignin",
                             out_dir=SUB_DATA_DIR) as output:
             self.assertTrue("should be one character" in output)
         self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_SHORT))
     finally:
         silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE)
         pass
 def testNoGrowth(self):
     # Here, all the monomers are available at the beginning of the simulation
     try:
         # minimize random calls by providing set list of monomer types
         mono_type_list = [
             S, S, S, S, G, S, S, S, S, S, S, G, S, S, S, S, S, S, S, S, S,
             S, S, S
         ]
         random_num = 24
         initial_monomers = [
             Monomer(mono_type, i)
             for i, mono_type in enumerate(mono_type_list)
         ]
         initial_events = create_initial_events(initial_monomers,
                                                DEF_RXN_RATES)
         initial_state = create_initial_state(initial_events,
                                              initial_monomers)
         # since GROW is not added to event_dict, no additional monomers will be added
         result = run_kmc(DEF_RXN_RATES,
                          initial_state,
                          sorted(initial_events),
                          t_max=0.0001,
                          random_seed=random_num)
         gen_tcl(result[ADJ_MATRIX],
                 result[MONO_LIST],
                 tcl_fname=TCL_FNAME,
                 chain_id="L",
                 out_dir=SUB_DATA_DIR)
         self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_NO_GROW_OUT))
     finally:
         silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE)
         pass
    def testB1BondGenMol(self):
        ini_mono_type_list = [S, S, S, G, S]
        sg_ratio = 1.0
        max_monos = 12
        random_num = 55
        initial_monomers = [
            Monomer(mono_type, i)
            for i, mono_type in enumerate(ini_mono_type_list)
        ]
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        initial_events.append(Event(GROW, [], rate=1e4))
        initial_state = create_initial_state(initial_events, initial_monomers)
        result = run_kmc(DEF_RXN_RATES,
                         initial_state,
                         initial_events,
                         n_max=max_monos,
                         t_max=2,
                         random_seed=random_num,
                         sg_ratio=sg_ratio)
        nodes = result[MONO_LIST]
        adj = result[ADJ_MATRIX]
        # generate_mol(adj, nodes)
        with capture_stderr(generate_mol, adj, nodes) as output:
            self.assertFalse(output)

        mol = MolFromMolBlock(generate_mol(adj, nodes))
        mols = GetMolFrags(mol)

        analysis = analyze_adj_matrix(adj)
        frag_sizes = analysis[CHAIN_LEN]

        # Make sure there are the same number of separate fragments calculated by RDKIT
        # as we get from just separating the alternate B1
        self.assertEqual(np.sum(list(frag_sizes.values())), len(mols))
 def testTCLGenEmptySegname(self):
     # tcl_fname="psfgen.tcl", psf_fname='lignin', chain_id="L", toppar_dir="toppar/"
     # Here, all the monomers are available at the beginning of the simulation
     # Increases coverage of gen_tcl
     try:
         # easier to run_kmc to create monomer_list than recreate it here (adj easier) so doing so
         # minimize random calls by providing set list of monomer types
         initial_mono_type_list = [S, S, G, S, S, S, G, S]
         num_monos = len(initial_mono_type_list)
         initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(initial_mono_type_list)]
         initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
         initial_state = create_initial_state(initial_events, initial_monomers)
         # since GROW is not added to event_dict, no additional monomers will be added
         result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), t_max=2, random_seed=8)
         # quick tests to make sure run_kmc gives expected results (not what we want to test here)
         self.assertAlmostEqual(result[TIME][-1], 0.00015059250794459398)
         self.assertTrue(len(result[MONO_LIST]) == num_monos)
         # the function we want to test here is below
         with capture_stderr(gen_tcl, result[ADJ_MATRIX], result[MONO_LIST], chain_id=" ",
                             out_dir=SUB_DATA_DIR) as output:
             self.assertTrue("should be one character" in output)
         self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_SHORT))
     finally:
         silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE)
         pass
    def testCheckBO4Fraction(self):
        # similar to a test above; was useful for comparing output from different versions
        monomer_types = [[G, S, G, G, S, S, S, G, S, S, G, G, S, G, G, G, G, S, G, G, G, S, G, S, S, S, G, S, S, G, G],
                         [S, S, G, G, S, G, S, G, G, G, G, S, S, S, S, S, G, S, S, S, G, G, S, G, S, G, S, S, G, S, S],
                         [S, S, S, S, G, S, S, G, G, S, G, S, G, G, G, G, S, S, S, S, S, S, S, G, S, S, G, S, G, S, G]]
        num_repeats = len(monomer_types)
        sg_result_list = []

        # will add to random seed in the iterations to insure using a different seed for each repeat
        random_seed = 32
        for i in range(num_repeats):
            # Initialize the monomers, event_dict, and state
            initial_monomers = [Monomer(mono_type, m) for m, mono_type in enumerate(monomer_types[i])]
            num_monos = len(initial_monomers)
            initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
            initial_state = create_initial_state(initial_events, initial_monomers)
            results = run_kmc(DEF_RXN_RATES, initial_state, initial_events,
                              n_max=num_monos, t_max=2, random_seed=random_seed + i)
            sg_result_list.append(results)

        av_bo4_bonds, std_bo4_bonds = get_avg_num_bonds_single_option(BO4, sg_result_list, num_repeats)
        print("Average fraction BO4 bonds: {:.3f}".format(av_bo4_bonds))
        print("Std dev fraction BO4 bonds: {:.3f}".format(std_bo4_bonds))
        self.assertTrue(np.allclose(av_bo4_bonds, 0.21020733652312598))
        self.assertTrue(np.allclose(std_bo4_bonds, 0.04743254939825481))
 def testCreateInitialState(self):
     sg_ratio = 0.75
     monomer_draw = [0.48772, 0.15174, 0.7886]
     initial_monomers = create_initial_monomers(sg_ratio, monomer_draw)
     initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
     initial_state = create_initial_state(initial_events, initial_monomers)
     self.assertTrue(len(initial_state) == 3)
     self.assertTrue(str(initial_monomers[0]) == str(initial_state[0][MONOMER]))
def create_sample_kmc_result_c_lignin(num_monos=2, max_monos=12, seed=10):
    initial_monomers = [Monomer(C, i) for i in range(num_monos)]
    # noinspection PyTypeChecker
    initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
    initial_state = create_initial_state(initial_events, initial_monomers)
    initial_events.append(Event(GROW, [], rate=1e4))
    result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), n_max=max_monos, t_max=2, random_seed=seed)
    return result
Ejemplo n.º 9
0
    def testFindScaling(self):
        test_scaling = False
        if test_scaling:
            # To test scaling: replicate batch runs with different numbers of monomers
            # Here, we are testing with equal amount of S and G (no C)
            times = []
            sg_ratio = 1
            pct_s = sg_ratio / (1 + sg_ratio)

            test_vals = np.linspace(50, 150, num=3, dtype='int32')
            num_repeats = 5
            for num_monos in test_vals:
                print(f"Starting batch simulation with {num_monos} monomers")
                times.append([])
                for i in range(num_repeats):
                    random_seed = 8 + i
                    np.random.seed(random_seed)
                    print(f"    Starting repeat", i)
                    # Generate the initial monomers and events (oxidation)
                    monomer_draw = np.random.rand(num_monos)
                    initial_monomers = create_initial_monomers(
                        pct_s, monomer_draw)
                    initial_events = create_initial_events(
                        initial_monomers, DEF_RXN_RATES)
                    # Set the state and add the option to join initial monomers
                    initial_state = create_initial_state(
                        initial_events, initial_monomers)

                    # Start timing the actual KMC part
                    # noinspection PyUnboundLocalVariable
                    start = time.time()
                    run_kmc(DEF_RXN_RATES,
                            initial_state,
                            initial_events,
                            sg_ratio=sg_ratio,
                            random_seed=random_seed)
                    end = time.time()
                    times[-1].append(end - start)
                print(
                    f'Average time to complete simulation with {num_monos:5n} monomers: '
                    f'{np.sum(times[-1]) / num_repeats:7.2f} seconds')

        # Now we want to fit the times that we just calculated to a generic power law expression $t = aN^b$ to find the
        # scaling of our algorithm.
            meas_t = [np.mean(one_time) for one_time in times]
            # sdev_t = [np.sqrt(np.var(one_time)) for one_time in times]
            meas_n = test_vals

            sim_t = lambda p, n: p[0] * np.power(n, p[1])
            loss = lambda p: np.linalg.norm(sim_t(p, meas_n) - meas_t)

            results = optimize.minimize(loss,
                                        np.asarray([1e-5, 2.5]),
                                        bounds=[[0, 1], [0, 10]],
                                        options={'disp': True})
            opt_p = results.x
            scaling_formula = f'$t = {opt_p[0]:3.1e}N^{{ {opt_p[1]:4.2f} }}$'
            print(f'Scaling: {scaling_formula}')
 def testMissingRequiredSGRatio(self):
     # set up variable to allow running run_kmc without specifying sg_ratio
     initial_sg_ratio = 0.75
     num_initial_monos = 3
     monomer_draw = np.around(np.random.rand(num_initial_monos), MAX_NUM_DECIMAL)
     # these are tested separately
     initial_monomers = create_initial_monomers(initial_sg_ratio, monomer_draw)
     initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
     initial_state = create_initial_state(initial_events, initial_monomers)
     events = {initial_events[i] for i in range(num_initial_monos)}
     events.add(Event(GROW, [], rate=1e4))
     try:
         run_kmc(DEF_RXN_RATES, initial_state, sorted(events), n_max=20, t_max=1, random_seed=10)
         self.assertFalse("Should not arrive here; An error should have be raised")
     except InvalidDataError as e:
         self.assertTrue("A numeric sg_ratio" in e.args[0])
Ejemplo n.º 11
0
def generate_lignin(num_monomers: int = 1) -> Chem.Mol:
    """Generates lignin molecule.

        parameters
        ----------
        num_monomers : int
                Number of monomers in lignin molecule.
        """
    # Set the percentage of S
    sg_ratio = 0
    pct_s = sg_ratio / (1 + sg_ratio)

    # Set the initial and maximum number of monomers to be modeled.
    ini_num_monos = 1
    max_num_monos = num_monomers

    # Maximum time to simulate, in seconds
    t_max = 1  # seconds
    mono_add_rate = 1e4  # monomers/second

    # Use a random number and the given sg_ratio to determine the monolignol types to be initially modeled
    monomer_draw = np.random.rand(ini_num_monos)
    initial_monomers = create_initial_monomers(pct_s, monomer_draw)

    # Initially allow only oxidation events. After they are used to determine the initial state, add
    #     GROW to the events, which allows additional monomers to be added to the reaction at the
    #     specified rate and with the specified ratio
    initial_events = create_initial_events(initial_monomers, rxn_rates)
    initial_state = create_initial_state(initial_events, initial_monomers)
    initial_events.append(Event(GROW, [], rate=mono_add_rate))

    # simulate lignin creation
    result = run_kmc(rxn_rates,
                     initial_state,
                     initial_events,
                     n_max=max_num_monos,
                     t_max=t_max,
                     sg_ratio=sg_ratio)
    # using RDKit
    nodes = result[MONO_LIST]
    adj = result[ADJ_MATRIX]
    block = generate_mol(adj, nodes)
    mol = MolFromMolBlock(block)
    mol = Chem.AddHs(mol)

    return mol
    def testIniRates(self):
        # Note: this test did not increase coverage. Added to help debug notebook.
        # run_multi = False
        # if run_multi:
        #     fun = par.delayed(run_kmc)
        #     num_jobs = num_repeats
        # else:
        num_repeats = 4

        # fun = None
        # num_jobs = None

        sg_ratio = 1.1

        # minimize random calls
        monomer_type_list = [S, G]
        initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(monomer_type_list)]
        max_monos = 12
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        # FYI: np.logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None, axis=0)[source]
        num_rates = 3
        add_rates = np.logspace(4, 12, num_rates)
        add_rates_result_list = []

        # will add to random seed in the iterations to insure using a different seed for each repeat
        random_seed = 2

        for add_rate in add_rates:
            initial_state = create_initial_state(initial_events, initial_monomers)
            initial_events.append(Event(GROW, [], rate=add_rate))
            # if run_multi:
            #     results = par.Parallel(n_jobs=num_jobs)([fun(DEF_RXN_RATES, initial_state, initial_events,
            #                                                  n_max=max_monos, t_max=1, sg_ratio=sg_ratio,
            #                                                  random_seed=(random_seed + i))
            #                                              for i in range(num_repeats)])
            # else:
            results = [run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=1,
                               sg_ratio=sg_ratio, random_seed=(random_seed + i)) for i in range(num_repeats)]
            add_rates_result_list.append(results)

        av_bo4_bonds, std_bo4_bonds = get_avg_num_bonds(BO4, num_rates, add_rates_result_list, num_repeats)

        good_av_bo4 = [0.3680555555555555, 0.2863636363636364, 0.03125]
        good_std_bo4 = [0.08187379251771941, 0.013636363636363641, 0.05412658773652741]
        self.assertTrue(np.allclose(av_bo4_bonds, good_av_bo4))
        self.assertTrue(np.allclose(std_bo4_bonds, good_std_bo4))
def create_sample_kmc_result(max_time=1., num_initial_monos=3, max_monos=10, sg_ratio=0.75, seed=10):
    # The set lists are to minimize randomness in testing (adding while debugging source of randomness in some tests;
    #     leaving because it doesn't hurt a thing; also leaving option to make a monomer_draw of arbitrary length
    #     using a seed, but rounding those numbers because the machine precision differences in floats was the bug
    np.random.seed(seed)
    if num_initial_monos == 3:
        monomer_draw = MONO_DRAW_3
    elif num_initial_monos == 20:
        monomer_draw = [0.77132064, 0.02075195, 0.63364823, 0.74880388, 0.49850701, 0.22479665, 0.19806286,
                        0.76053071, 0.16911084, 0.08833981, 0.68535982, 0.95339335, 0.00394827, 0.51219226,
                        0.81262096, 0.61252607, 0.72175532, 0.29187607, 0.91777412, 0.71457578]
    else:
        monomer_draw = np.around(np.random.rand(num_initial_monos), MAX_NUM_DECIMAL)

    # these are tested separately elsewhere
    initial_monomers = create_initial_monomers(sg_ratio, monomer_draw)
    initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
    initial_state = OrderedDict(create_initial_state(initial_events, initial_monomers))
    initial_events.append(Event(GROW, [], rate=1e4))
    result = run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=max_time,
                     random_seed=10, sg_ratio=sg_ratio)
    return result
    def testDynamics(self):
        # Tests procedures in the Dynamics.ipynb
        # minimize number of random calls during testing (here, set monomer type distribution)
        monomer_type_list = [G, S, G, G, S, S, S, G, S, S, G, G, S, G, G, G, G, S, G, G, G, S, S, G, S, S, G, G, ]
        num_monos = len(monomer_type_list)
        initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(monomer_type_list)]
        initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES)
        initial_state = create_initial_state(initial_events, initial_monomers)
        # since GROW is not added to event_dict, no additional monomers will be added (sg_ratio is thus not needed)
        result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), random_seed=10, dynamics=True)
        # With dynamics, the MONO_LIST will be a list of monomer lists:
        #    the inner list is the usual MONO_LIST, but here is it saved for every time step
        t_steps = result[TIME]
        expected_num_t_steps = 61
        self.assertEqual(len(t_steps), expected_num_t_steps)
        self.assertTrue(len(result[MONO_LIST]) == expected_num_t_steps)
        self.assertTrue(len(result[MONO_LIST][-1]) == num_monos)
        # want dict[key: [], ...] where the inner list is values by timestep
        #                         instead of list of time steps with [[key: val, ...], ... ]
        adj_list = result[ADJ_MATRIX]
        (bond_type_dict, olig_len_dict, sum_list, olig_count_dict,
            sum_count_list) = get_bond_type_v_time_dict(adj_list, sum_len_larger_than=10)

        # test results by checking sums
        good_bond_type_sum_dict = {BO4: 16, BB: 171, B1: 0, B5: 119, C5C5: 0, AO4: 0, C5O4: 23}
        bond_type_sum_dict = {}
        for bond_type, val_list in bond_type_dict.items():
            self.assertEqual(len(val_list), expected_num_t_steps)
            bond_type_sum_dict[bond_type] = sum(val_list)
        self.assertEqual(bond_type_sum_dict, good_bond_type_sum_dict)

        good_olig_len_sum_dict = {1: 1112, 2: 474, 3: 21, 4: 56, 5: 45}
        olig_len_sum_dict = {}
        for olig_len, val_list in olig_len_dict.items():
            self.assertEqual(len(val_list), expected_num_t_steps)
            olig_len_sum_dict[olig_len] = sum(val_list)
        self.assertEqual(olig_len_sum_dict, good_olig_len_sum_dict)
Ejemplo n.º 15
0
# Set the initial and maximum number of monomers to be modeled.
ini_num_monos = 2
max_num_monos = 10

# Maximum time to simulate, in seconds
t_max = 1  # seconds
mono_add_rate = 1e4  # monomers/second

# Use a random number and the given sg_ratio to determine the monolignol types to be initially modeled
monomer_draw = np.random.rand(ini_num_monos)
initial_monomers = create_initial_monomers(pct_s, monomer_draw)

# Initially allow only oxidation events. After they are used to determine the initial state, add
#     GROW to the events, which allows additional monomers to be added to the reaction at the
#     specified rate and with the specified ratio
initial_events = create_initial_events(initial_monomers, rxn_rates)
initial_state = create_initial_state(initial_events, initial_monomers)
initial_events.append(Event(GROW, [], rate=mono_add_rate))

result = run_kmc(rxn_rates,
                 initial_state,
                 initial_events,
                 n_max=max_num_monos,
                 t_max=t_max,
                 sg_ratio=sg_ratio)

# Convert the sparse matrix to a full array before printing
print("The adjacency matrix for the simulated lignin is:")
print(result[ADJ_MATRIX].toarray())

# From the list of monomers and the adjacency matrix, we can use LigninKMC to write out a tcl script for psfgen to