def testFindScaling(self): test_scaling = False if test_scaling: # To test scaling: replicate batch runs with different numbers of monomers # Here, we are testing with equal amount of S and G (no C) times = [] sg_ratio = 1 pct_s = sg_ratio / (1 + sg_ratio) test_vals = np.linspace(50, 150, num=3, dtype='int32') num_repeats = 5 for num_monos in test_vals: print(f"Starting batch simulation with {num_monos} monomers") times.append([]) for i in range(num_repeats): random_seed = 8 + i np.random.seed(random_seed) print(f" Starting repeat", i) # Generate the initial monomers and events (oxidation) monomer_draw = np.random.rand(num_monos) initial_monomers = create_initial_monomers( pct_s, monomer_draw) initial_events = create_initial_events( initial_monomers, DEF_RXN_RATES) # Set the state and add the option to join initial monomers initial_state = create_initial_state( initial_events, initial_monomers) # Start timing the actual KMC part # noinspection PyUnboundLocalVariable start = time.time() run_kmc(DEF_RXN_RATES, initial_state, initial_events, sg_ratio=sg_ratio, random_seed=random_seed) end = time.time() times[-1].append(end - start) print( f'Average time to complete simulation with {num_monos:5n} monomers: ' f'{np.sum(times[-1]) / num_repeats:7.2f} seconds') # Now we want to fit the times that we just calculated to a generic power law expression $t = aN^b$ to find the # scaling of our algorithm. meas_t = [np.mean(one_time) for one_time in times] # sdev_t = [np.sqrt(np.var(one_time)) for one_time in times] meas_n = test_vals sim_t = lambda p, n: p[0] * np.power(n, p[1]) loss = lambda p: np.linalg.norm(sim_t(p, meas_n) - meas_t) results = optimize.minimize(loss, np.asarray([1e-5, 2.5]), bounds=[[0, 1], [0, 10]], options={'disp': True}) opt_p = results.x scaling_formula = f'$t = {opt_p[0]:3.1e}N^{{ {opt_p[1]:4.2f} }}$' print(f'Scaling: {scaling_formula}')
def testTCLTruncateSegname(self): # Tests providing a chain_id that is longer than one character try: # easier to run_kmc to create monomer_list than recreate it here (adj easier) so doing so # minimize random calls by providing set list of monomer types initial_mono_type_list = [S, S, G, S, S, S, G, S] num_monos = len(initial_mono_type_list) initial_monomers = [ Monomer(mono_type, i) for i, mono_type in enumerate(initial_mono_type_list) ] initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = create_initial_state(initial_events, initial_monomers) # since GROW is not added to event_dict, no additional monomers will be added result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), t_max=2, random_seed=8) # quick tests to make sure run_kmc gives expected results (not what we want to test here) # self.assertAlmostEqual(result[TIME][-1], 0.000766574526703574) self.assertTrue(len(result[MONO_LIST]) == num_monos) # the function we want to test here is below with capture_stderr(gen_tcl, result[ADJ_MATRIX], result[MONO_LIST], chain_id="lignin", out_dir=SUB_DATA_DIR) as output: self.assertTrue("should be one character" in output) self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_SHORT)) finally: silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE) pass
def testNoGrowth(self): # Here, all the monomers are available at the beginning of the simulation try: # minimize random calls by providing set list of monomer types mono_type_list = [ S, S, S, S, G, S, S, S, S, S, S, G, S, S, S, S, S, S, S, S, S, S, S, S ] random_num = 24 initial_monomers = [ Monomer(mono_type, i) for i, mono_type in enumerate(mono_type_list) ] initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = create_initial_state(initial_events, initial_monomers) # since GROW is not added to event_dict, no additional monomers will be added result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), t_max=0.0001, random_seed=random_num) gen_tcl(result[ADJ_MATRIX], result[MONO_LIST], tcl_fname=TCL_FNAME, chain_id="L", out_dir=SUB_DATA_DIR) self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_NO_GROW_OUT)) finally: silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE) pass
def testB1BondGenMol(self): ini_mono_type_list = [S, S, S, G, S] sg_ratio = 1.0 max_monos = 12 random_num = 55 initial_monomers = [ Monomer(mono_type, i) for i, mono_type in enumerate(ini_mono_type_list) ] initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_events.append(Event(GROW, [], rate=1e4)) initial_state = create_initial_state(initial_events, initial_monomers) result = run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=2, random_seed=random_num, sg_ratio=sg_ratio) nodes = result[MONO_LIST] adj = result[ADJ_MATRIX] # generate_mol(adj, nodes) with capture_stderr(generate_mol, adj, nodes) as output: self.assertFalse(output) mol = MolFromMolBlock(generate_mol(adj, nodes)) mols = GetMolFrags(mol) analysis = analyze_adj_matrix(adj) frag_sizes = analysis[CHAIN_LEN] # Make sure there are the same number of separate fragments calculated by RDKIT # as we get from just separating the alternate B1 self.assertEqual(np.sum(list(frag_sizes.values())), len(mols))
def testCheckBO4Fraction(self): # similar to a test above; was useful for comparing output from different versions monomer_types = [[G, S, G, G, S, S, S, G, S, S, G, G, S, G, G, G, G, S, G, G, G, S, G, S, S, S, G, S, S, G, G], [S, S, G, G, S, G, S, G, G, G, G, S, S, S, S, S, G, S, S, S, G, G, S, G, S, G, S, S, G, S, S], [S, S, S, S, G, S, S, G, G, S, G, S, G, G, G, G, S, S, S, S, S, S, S, G, S, S, G, S, G, S, G]] num_repeats = len(monomer_types) sg_result_list = [] # will add to random seed in the iterations to insure using a different seed for each repeat random_seed = 32 for i in range(num_repeats): # Initialize the monomers, event_dict, and state initial_monomers = [Monomer(mono_type, m) for m, mono_type in enumerate(monomer_types[i])] num_monos = len(initial_monomers) initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = create_initial_state(initial_events, initial_monomers) results = run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=num_monos, t_max=2, random_seed=random_seed + i) sg_result_list.append(results) av_bo4_bonds, std_bo4_bonds = get_avg_num_bonds_single_option(BO4, sg_result_list, num_repeats) print("Average fraction BO4 bonds: {:.3f}".format(av_bo4_bonds)) print("Std dev fraction BO4 bonds: {:.3f}".format(std_bo4_bonds)) self.assertTrue(np.allclose(av_bo4_bonds, 0.21020733652312598)) self.assertTrue(np.allclose(std_bo4_bonds, 0.04743254939825481))
def testTCLGenEmptySegname(self): # tcl_fname="psfgen.tcl", psf_fname='lignin', chain_id="L", toppar_dir="toppar/" # Here, all the monomers are available at the beginning of the simulation # Increases coverage of gen_tcl try: # easier to run_kmc to create monomer_list than recreate it here (adj easier) so doing so # minimize random calls by providing set list of monomer types initial_mono_type_list = [S, S, G, S, S, S, G, S] num_monos = len(initial_mono_type_list) initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(initial_mono_type_list)] initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = create_initial_state(initial_events, initial_monomers) # since GROW is not added to event_dict, no additional monomers will be added result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), t_max=2, random_seed=8) # quick tests to make sure run_kmc gives expected results (not what we want to test here) self.assertAlmostEqual(result[TIME][-1], 0.00015059250794459398) self.assertTrue(len(result[MONO_LIST]) == num_monos) # the function we want to test here is below with capture_stderr(gen_tcl, result[ADJ_MATRIX], result[MONO_LIST], chain_id=" ", out_dir=SUB_DATA_DIR) as output: self.assertTrue("should be one character" in output) self.assertFalse(diff_lines(TCL_FILE_LOC, GOOD_TCL_SHORT)) finally: silent_remove(TCL_FILE_LOC, disable=DISABLE_REMOVE) pass
def testMissingRequiredSGRatio(self): # set up variable to allow running run_kmc without specifying sg_ratio initial_sg_ratio = 0.75 num_initial_monos = 3 monomer_draw = np.around(np.random.rand(num_initial_monos), MAX_NUM_DECIMAL) # these are tested separately initial_monomers = create_initial_monomers(initial_sg_ratio, monomer_draw) initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = create_initial_state(initial_events, initial_monomers) events = {initial_events[i] for i in range(num_initial_monos)} events.add(Event(GROW, [], rate=1e4)) try: run_kmc(DEF_RXN_RATES, initial_state, sorted(events), n_max=20, t_max=1, random_seed=10) self.assertFalse("Should not arrive here; An error should have be raised") except InvalidDataError as e: self.assertTrue("A numeric sg_ratio" in e.args[0])
def create_sample_kmc_result_c_lignin(num_monos=2, max_monos=12, seed=10): initial_monomers = [Monomer(C, i) for i in range(num_monos)] # noinspection PyTypeChecker initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = create_initial_state(initial_events, initial_monomers) initial_events.append(Event(GROW, [], rate=1e4)) result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), n_max=max_monos, t_max=2, random_seed=seed) return result
def generate_lignin(num_monomers: int = 1) -> Chem.Mol: """Generates lignin molecule. parameters ---------- num_monomers : int Number of monomers in lignin molecule. """ # Set the percentage of S sg_ratio = 0 pct_s = sg_ratio / (1 + sg_ratio) # Set the initial and maximum number of monomers to be modeled. ini_num_monos = 1 max_num_monos = num_monomers # Maximum time to simulate, in seconds t_max = 1 # seconds mono_add_rate = 1e4 # monomers/second # Use a random number and the given sg_ratio to determine the monolignol types to be initially modeled monomer_draw = np.random.rand(ini_num_monos) initial_monomers = create_initial_monomers(pct_s, monomer_draw) # Initially allow only oxidation events. After they are used to determine the initial state, add # GROW to the events, which allows additional monomers to be added to the reaction at the # specified rate and with the specified ratio initial_events = create_initial_events(initial_monomers, rxn_rates) initial_state = create_initial_state(initial_events, initial_monomers) initial_events.append(Event(GROW, [], rate=mono_add_rate)) # simulate lignin creation result = run_kmc(rxn_rates, initial_state, initial_events, n_max=max_num_monos, t_max=t_max, sg_ratio=sg_ratio) # using RDKit nodes = result[MONO_LIST] adj = result[ADJ_MATRIX] block = generate_mol(adj, nodes) mol = MolFromMolBlock(block) mol = Chem.AddHs(mol) return mol
def testIniRates(self): # Note: this test did not increase coverage. Added to help debug notebook. # run_multi = False # if run_multi: # fun = par.delayed(run_kmc) # num_jobs = num_repeats # else: num_repeats = 4 # fun = None # num_jobs = None sg_ratio = 1.1 # minimize random calls monomer_type_list = [S, G] initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(monomer_type_list)] max_monos = 12 initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) # FYI: np.logspace(start, stop, num=50, endpoint=True, base=10.0, dtype=None, axis=0)[source] num_rates = 3 add_rates = np.logspace(4, 12, num_rates) add_rates_result_list = [] # will add to random seed in the iterations to insure using a different seed for each repeat random_seed = 2 for add_rate in add_rates: initial_state = create_initial_state(initial_events, initial_monomers) initial_events.append(Event(GROW, [], rate=add_rate)) # if run_multi: # results = par.Parallel(n_jobs=num_jobs)([fun(DEF_RXN_RATES, initial_state, initial_events, # n_max=max_monos, t_max=1, sg_ratio=sg_ratio, # random_seed=(random_seed + i)) # for i in range(num_repeats)]) # else: results = [run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=1, sg_ratio=sg_ratio, random_seed=(random_seed + i)) for i in range(num_repeats)] add_rates_result_list.append(results) av_bo4_bonds, std_bo4_bonds = get_avg_num_bonds(BO4, num_rates, add_rates_result_list, num_repeats) good_av_bo4 = [0.3680555555555555, 0.2863636363636364, 0.03125] good_std_bo4 = [0.08187379251771941, 0.013636363636363641, 0.05412658773652741] self.assertTrue(np.allclose(av_bo4_bonds, good_av_bo4)) self.assertTrue(np.allclose(std_bo4_bonds, good_std_bo4))
def create_sample_kmc_result(max_time=1., num_initial_monos=3, max_monos=10, sg_ratio=0.75, seed=10): # The set lists are to minimize randomness in testing (adding while debugging source of randomness in some tests; # leaving because it doesn't hurt a thing; also leaving option to make a monomer_draw of arbitrary length # using a seed, but rounding those numbers because the machine precision differences in floats was the bug np.random.seed(seed) if num_initial_monos == 3: monomer_draw = MONO_DRAW_3 elif num_initial_monos == 20: monomer_draw = [0.77132064, 0.02075195, 0.63364823, 0.74880388, 0.49850701, 0.22479665, 0.19806286, 0.76053071, 0.16911084, 0.08833981, 0.68535982, 0.95339335, 0.00394827, 0.51219226, 0.81262096, 0.61252607, 0.72175532, 0.29187607, 0.91777412, 0.71457578] else: monomer_draw = np.around(np.random.rand(num_initial_monos), MAX_NUM_DECIMAL) # these are tested separately elsewhere initial_monomers = create_initial_monomers(sg_ratio, monomer_draw) initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = OrderedDict(create_initial_state(initial_events, initial_monomers)) initial_events.append(Event(GROW, [], rate=1e4)) result = run_kmc(DEF_RXN_RATES, initial_state, initial_events, n_max=max_monos, t_max=max_time, random_seed=10, sg_ratio=sg_ratio) return result
def testDynamics(self): # Tests procedures in the Dynamics.ipynb # minimize number of random calls during testing (here, set monomer type distribution) monomer_type_list = [G, S, G, G, S, S, S, G, S, S, G, G, S, G, G, G, G, S, G, G, G, S, S, G, S, S, G, G, ] num_monos = len(monomer_type_list) initial_monomers = [Monomer(mono_type, i) for i, mono_type in enumerate(monomer_type_list)] initial_events = create_initial_events(initial_monomers, DEF_RXN_RATES) initial_state = create_initial_state(initial_events, initial_monomers) # since GROW is not added to event_dict, no additional monomers will be added (sg_ratio is thus not needed) result = run_kmc(DEF_RXN_RATES, initial_state, sorted(initial_events), random_seed=10, dynamics=True) # With dynamics, the MONO_LIST will be a list of monomer lists: # the inner list is the usual MONO_LIST, but here is it saved for every time step t_steps = result[TIME] expected_num_t_steps = 61 self.assertEqual(len(t_steps), expected_num_t_steps) self.assertTrue(len(result[MONO_LIST]) == expected_num_t_steps) self.assertTrue(len(result[MONO_LIST][-1]) == num_monos) # want dict[key: [], ...] where the inner list is values by timestep # instead of list of time steps with [[key: val, ...], ... ] adj_list = result[ADJ_MATRIX] (bond_type_dict, olig_len_dict, sum_list, olig_count_dict, sum_count_list) = get_bond_type_v_time_dict(adj_list, sum_len_larger_than=10) # test results by checking sums good_bond_type_sum_dict = {BO4: 16, BB: 171, B1: 0, B5: 119, C5C5: 0, AO4: 0, C5O4: 23} bond_type_sum_dict = {} for bond_type, val_list in bond_type_dict.items(): self.assertEqual(len(val_list), expected_num_t_steps) bond_type_sum_dict[bond_type] = sum(val_list) self.assertEqual(bond_type_sum_dict, good_bond_type_sum_dict) good_olig_len_sum_dict = {1: 1112, 2: 474, 3: 21, 4: 56, 5: 45} olig_len_sum_dict = {} for olig_len, val_list in olig_len_dict.items(): self.assertEqual(len(val_list), expected_num_t_steps) olig_len_sum_dict[olig_len] = sum(val_list) self.assertEqual(olig_len_sum_dict, good_olig_len_sum_dict)
mono_add_rate = 1e4 # monomers/second # Use a random number and the given sg_ratio to determine the monolignol types to be initially modeled monomer_draw = np.random.rand(ini_num_monos) initial_monomers = create_initial_monomers(pct_s, monomer_draw) # Initially allow only oxidation events. After they are used to determine the initial state, add # GROW to the events, which allows additional monomers to be added to the reaction at the # specified rate and with the specified ratio initial_events = create_initial_events(initial_monomers, rxn_rates) initial_state = create_initial_state(initial_events, initial_monomers) initial_events.append(Event(GROW, [], rate=mono_add_rate)) result = run_kmc(rxn_rates, initial_state, initial_events, n_max=max_num_monos, t_max=t_max, sg_ratio=sg_ratio) # Convert the sparse matrix to a full array before printing print("The adjacency matrix for the simulated lignin is:") print(result[ADJ_MATRIX].toarray()) # From the list of monomers and the adjacency matrix, we can use LigninKMC to write out a tcl script for psfgen to # turn into a .psf file. # fname and sgnames are things that we'd want to change; file name always the same as the segname gen_tcl(result[ADJ_MATRIX], result[MONO_LIST], toppar_dir="../smilesdemo/toppar/", tcl_fname="psfgen.tcl", psf_fname="L",
def main(argv=None): """ Runs the main program. :param argv: The command line arguments. :return: The return code for the program's termination. """ print(OPENING_MSG) args, ret = parse_cmdline(argv) if ret != GOOD_RET or args is None: return ret cfg = args.config try: # tests at the beginning to catch errors early validate_input(cfg) for add_rate in cfg[ADD_RATES]: sg_adjs = [] add_rate_str = f'{add_rate:.{3}g}'.replace("+", "").replace(".", "-") for sg_ratio in cfg[SG_RATIOS]: # the initialized lists below are for storing repeats bond_types = defaultdict(list) num_monos = [] num_oligs = [] adj_repeats = [] for rep in range(cfg[NUM_REPEATS]): # decide on initial monomers, based on given sg_ratio, and create initial oxidation events initial_events, initial_state = initiate_state(add_rate, cfg, rep, sg_ratio) # begin simulation result = run_kmc(cfg[RXN_RATES], initial_state, initial_events, n_max=cfg[MAX_MONOS], t_max=cfg[SIM_TIME], sg_ratio=sg_ratio, dynamics=cfg[DYNAMICS]) if cfg[DYNAMICS]: last_adj = result[ADJ_MATRIX][-1] last_mono_list = result[MONO_LIST][-1] (bond_type_dict, olig_monos_dict, sum_monos_list, olig_count_dict, sum_count_list) = get_bond_type_v_time_dict(result[ADJ_MATRIX], sum_len_larger_than=2) for bond_type in BOND_TYPE_LIST: bond_types[bond_type].append(bond_type_dict[bond_type]) num_monos.append(olig_count_dict[1]) num_oligs.append(sum_count_list) else: last_adj = result[ADJ_MATRIX] last_mono_list = result[MONO_LIST] adj_repeats.append(last_adj) # show results summary = analyze_adj_matrix(last_adj, break_co_bonds=cfg[BREAK_CO]) adj_analysis_to_stdout(summary, break_co_bonds=cfg[BREAK_CO]) # Outputs produce_output(last_adj, last_mono_list, cfg) # save for potential plotting sg_adjs.append(adj_repeats) # Now that all repeats done, create plots for dynamics, if applicable if cfg[DYNAMICS]: # create plots of num mon & olig vs timestep, and % bond time v timestep create_dynamics_plots(add_rate_str, bond_types, cfg, num_monos, num_oligs, sg_ratio) if cfg[PLOT_BONDS]: create_bond_v_sg_plots(add_rate_str, cfg, sg_adjs) except (InvalidDataError, KeyError) as e: warning(e) return INVALID_DATA return GOOD_RET # success