def __init__(self, max_population_size, possible_actions=[], histlen=42): self.name = "XCS_ER" self.action_size = len(possible_actions) self.max_population_size = max_population_size self.possible_actions = possible_actions self.population = [] self.time_stamp = 1 self.action_history = [] self.old_action_history = [] self.reinforce = Reinforcement() self.ga = CIGeneticAlgorithm(possible_actions) ################################# self.single_testcases = True self.histlen = histlen ################################# self.rewards = None self.p_explore = 0.25 self.train_mode = True ################################# # dumb idea that will never work ################################# self.experience_length = 12000 self.experience_batch_size = 2000 self.experience = XCSExperienceReplay( max_memory=self.experience_length) self.ci_cycle = 0
def bundle_comparison(w_arr, L, shape, scale, E): '''bundle (Weibull fibers) response for comparison with the CB model''' from scipy.stats import weibull_min sV0 = scale * (3.14159 * 0.00345**2 * L)**(1 / shape) eps = w_arr / L * (1. - weibull_min(shape, scale=scale).cdf(w_arr / L)) plt.plot(w_arr / L, eps * E, lw=4, color='red', ls='dashed', label='FB model') bundle = Reinforcement(r=0.00345, tau=0.00001, V_f=0.9999, E_f=E, xi=WeibullFibers(shape=shape, sV0=sV0), n_int=50) ccb = CompositeCrackBridge(E_m=25e3, reinforcement_lst=[bundle], Ll=L / 2., Lr=L / 2.) ccb_view.model = ccb eps = [] for w in w_arr: ccb.w = w eps.append(ccb_view.sigma_c / E) plt.plot(w_arr / L, np.array(eps) * E, color='blue', lw=2, label='CB model') plt.legend(loc='best')
def analytical_comparison(): '''for the case when tau is deterministic, there is an analytical solution. The differences are caused by the additional matrix stiffness due to broken fibers, which are in the CB model added to matrix stiffness. As the matrix E grows and the V_f decreases, the solutions tend to get closer''' tau, E_f, E_m, V_f = 0.1, 72e3, 25e3, 0.2 r, shape, scale = 0.001, 5., 0.02 # analytical solution for damage controlled test ctrl_damage = np.linspace(0.0, .99, 100) def crackbridge(w, tau, E_f, E_m, V_f, r, omega): Kf = E_f * V_f * (1 - omega) Km = E_m * (1 - V_f) + E_f * V_f * omega Kc = Kf + Km T = 2. * tau * V_f * (1. - omega) / r c = np.sqrt(Kc * T / Km / Kf) return c * np.sqrt(w) * (1 - omega) def w_omega(tau, E_f, E_m, V_f, r, omega, shape, scale): Kf = E_f * V_f * (1 - omega) Km = E_m * (1 - V_f) + E_f * V_f * omega Kc = Kf + Km T = 2. * tau * V_f * (1. - omega) / r return (-np.log(1. - omega)) ** (2. / shape) \ * scale ** 2 * Km * Kf / Kc / T w_lst = [ w_omega(tau, E_f, E_m, V_f, r, omega, shape, scale) for omega in ctrl_damage ] epsf = crackbridge(np.array(w_lst), tau, E_f, E_m, V_f, r, ctrl_damage) plt.plot(np.array(w_lst), epsf * E_f * V_f, color='red', lw=4, ls='dashed', label='analytical') reinf = Reinforcement(r=r, tau=tau, V_f=V_f, E_f=E_f, xi=RV('weibull_min', shape=shape, scale=scale), n_int=20) ccb = CompositeCrackBridge(E_m=E_m, reinforcement_lst=[reinf], Ll=1000., Lr=1000.) stress = [] w_arr = np.linspace(0.0, np.max(w_lst), 100) for w in w_arr: ccb_view.model.w = w stress.append(ccb_view.sigma_c) plt.plot(w_arr, stress, color='blue', lw=2, label='CB model') plt.legend(loc='best')
def __init__(self, max_population_size, possible_actions=[], histlen=42): self.name = "XCS" self.action_size = len(possible_actions) self.max_population_size = max_population_size self.possible_actions = possible_actions self.population = [] self.time_stamp = 1 self.action_history = [] self.old_action_history = [] self.reinforce = Reinforcement() self.ga = CIGeneticAlgorithm(possible_actions) ################################# self.single_testcases = True self.histlen = histlen ################################# # stuff for batch update self.max_prediction_sum = 0 self.rewards = None self.p_explore = 0.25 self.train_mode = True
class XCS_ER: GAMMA = 0.71 def __init__(self, max_population_size, possible_actions=[], histlen=42): self.name = "XCS_ER" self.action_size = len(possible_actions) self.max_population_size = max_population_size self.possible_actions = possible_actions self.population = [] self.time_stamp = 1 self.action_history = [] self.old_action_history = [] self.reinforce = Reinforcement() self.ga = CIGeneticAlgorithm(possible_actions) ################################# self.single_testcases = True self.histlen = histlen ################################# self.rewards = None self.p_explore = 0.25 self.train_mode = True ################################# # dumb idea that will never work ################################# self.experience_length = 12000 self.experience_batch_size = 2000 self.experience = XCSExperienceReplay( max_memory=self.experience_length) self.ci_cycle = 0 def get_action(self, state): ''' :param state: State in Retects. In the XCS world = situation. :return : a action ''' theta_mna = len(self.possible_actions) matcher = CIMatching(theta_mna, self.possible_actions) match_set = matcher.get_match_set(self.population, state, self.time_stamp) self.p_explore = (self.p_explore - 0.1) * 0.99 + 0.1 action_selector = ActionSelection(self.possible_actions, self.p_explore) prediction_array = action_selector.get_prediction_array(match_set) action = action_selector.select_action(prediction_array, self.train_mode) self.action_history.append((state, action)) return action def reward(self, new_rewards): try: x = float(new_rewards) new_rewards = [x] * len(self.action_history) except Exception as _: if len(new_rewards) < len(self.action_history): raise Exception('Too few rewards') for i in range(0, len(new_rewards)): reward = new_rewards[i] state, action = self.action_history[i] self.experience.remember((state, action, reward, self.ci_cycle)) self.action_history = [] self.ci_cycle += 1 if self.ci_cycle == 2 or self.ci_cycle % 3 == 0: print("start ER") self.learn_from_experience() print("finish ER") print("finished CI cyle " + str(self.ci_cycle - 1)) def get_average_prediction(self, cycle_id, on_policy=False): next_experiences = self.experience.get_get_exp_of_CI_cyle(cycle_id + 1) if next_experiences is None: return None prediction_sum = 0 for old_experience in next_experiences: state, _, _, _ = old_experience theta_mna = len(self.possible_actions) matcher = CIMatching(theta_mna, self.possible_actions) match_set = matcher.get_match_set(self.population, state, self.time_stamp) action_selector = ActionSelection(self.possible_actions, 0) prediction_array = action_selector.get_prediction_array(match_set) action = action_selector.select_action(prediction_array, self.train_mode) if on_policy: prediction_sum += prediction_array[action] else: prediction_sum += max(prediction_array.keys(), key=(lambda k: prediction_array[k])) return prediction_sum / len(next_experiences) def learn_from_experience(self): experiences = self.experience.get_batch(self.experience_batch_size, self.ci_cycle - 1) states, actions, rewards, ci_cyles = zip(*experiences) cycles_of_batch = set(ci_cyles) prediction_vals = {} for cycle_id in cycles_of_batch: prediction_vals[cycle_id] = self.get_average_prediction( cycle_id, False) print("retrieved prediction approx.") for i in range(0, len(rewards)): state = states[i] action = actions[i] reward = rewards[i] cycle = ci_cyles[i] if prediction_vals[cycle] is not None: discounted_reward = reward + XCS_ER.GAMMA * prediction_vals[ cycle] # match set theta_mna = len(self.possible_actions) # use covering? # len(self.possible_actions) matcher = CIMatching(theta_mna, self.possible_actions) match_set = matcher.get_match_set(self.population, state, self.time_stamp) # action_set action_selector = ActionSelection(self.possible_actions, self.p_explore) action_set = action_selector.get_action_set(match_set, action) if len(action_set) > 0: # update classifiers self.reinforce.reinforce(action_set, discounted_reward) self.ga.perform_iteration(action_set, state, self.population, self.time_stamp) self.time_stamp += 1 if i % 10 == 0: print("finished " + str(i / len(rewards)) + " percent of ER") self.delete_from_population() def delete_from_population(self): ''' Deletes as many classifiers as necessary until the population size is within the defined bounds. ''' total_numerosity = sum( list(map(lambda x: x.numerosity, self.population))) while len(self.population) > self.max_population_size: total_fitness = sum(list(map(lambda x: x.fitness, self.population))) avg_fitness = total_fitness / total_numerosity vote_sum = sum( list( map(lambda x: x.deletion_vote(avg_fitness), self.population))) choice_point = random.random() * vote_sum vote_sum = 0 for classifier in self.population: vote_sum += classifier.deletion_vote(avg_fitness) if vote_sum > choice_point: if classifier.numerosity > 1: classifier.numerosity = classifier.numerosity - 1 else: self.population.remove(classifier) def save(self, filename): """ Stores agent as pickled file """ pickle.dump(self, open(filename + '.p', 'wb'), 2) @classmethod def load(cls, filename): return pickle.load(open(filename + '.p', 'rb'))
''' from depend_CB_model import CompositeCrackBridge from depend_CB_postprocessor import CompositeCrackBridgeView from reinforcement import Reinforcement, WeibullFibers from spirrid.rv import RV from matplotlib import pyplot as plt import numpy as np if __name__ == '__main__': # AR-glass reinf1 = Reinforcement( r=0.001, #RV('uniform', loc=0.012, scale=0.002), tau=0.1, #RV('uniform', loc=.3, scale=.1), V_f=0.2, E_f=72e3, xi=RV('weibull_min', shape=5., scale=.02), n_int=50) # carbon reinf2 = Reinforcement(r=RV('uniform', loc=0.002, scale=0.002), tau=RV('uniform', loc=.6, scale=.1), V_f=0.05, E_f=200e3, xi=RV('weibull_min', shape=10., scale=.015), n_int=15) # instance of CompCrackBridge with matrix E and BC model = CompositeCrackBridge(E_m=25e3, reinforcement_lst=[reinf1],
def setValues(): """Default settings :Returns: - Default settings for the probabilistic models for degradation of concrete. """ # Concrete settings concrete = Concrete('C25/30') concrete.setWCratio(0.4) # values: (0.3),0.4,(0.45),0.5 concrete.setCuringPeriod(1) # values: 1,3,7,28 concrete.setGrade(45) # values: 45,40,25,35 # Reinforcement settings reinforcement = Reinforcement('S500') reinforcement.setYieldStress(500) # values: all reinforcement.setDiameter(16) # values: (8),10,16,27 reinforcement.setBars(1) # values: all # Geometry settings geometrie = Geometrie('Beam') geometrie.setCover(30) # values: all geometrie.setBeamWidth(350) geometrie.setBeamHeight(550) geometrie.setBeamLength(5000) # Environment settings environment = Environment() environment.setZone('Submerged') # values: 'Submerged','Tidal','Splash','Atmospheric' environment.setHumidity(80) # values: 50,65,80,95,100 # for the simplified corrosion rate: # environment.setExposure('Wet-Dry') # values: 'Wet','Wet-Dry','Airborne sea water','Tidal' environment.setTemperature(20) # values: all environment.setShelter('Unsheltered') # 'Sheltered','Unsheltered' # Chloride chloride = Chloride(concrete, geometrie, environment) # Carbonation carbonation = Carbonation(concrete, geometrie, environment) # Propagation rate = Propagation(environment) # Corrosion pitting = Pitting(reinforcement, rate) # pitting.setDeltaTime(50) # values: all # Resistance resistance = Resistance(concrete, reinforcement, geometrie, rate, pitting) return concrete, reinforcement, geometrie, environment, chloride, carbonation, rate, pitting, resistance
def get_reward_max(smiles, predictor, threshold, invalid_reward=1.0, get_features=get_fp): mol, prop, nan_smiles = predictor.predict([smiles], get_features=get_features) if len(nan_smiles) == 1: return invalid_reward if prop[0] >= threshold: return 10.0 else: return invalid_reward RL_max = Reinforcement(my_generator_max, my_predictor, get_reward_max) n_iterations = 60000 data_path = [ '../data/egfr_actives.smi', '../data/egfr_enamine.smi', '../data/egfr_mixed.smi' ] save_path = [ '../checkpoints/generator/egfr_clf_rnn_primed', '../checkpoints/generator/egfr_clf_rnn_enamine_primed', '../checkpoints/generator/egfr_clf_rnn_mixed_primed' ] for dpath, mpath in zip(data_path, save_path): print('Fine-tuning on %s...' % dpath) np.random.seed(42) torch.manual_seed(42)
def main(n_iterations=20, n_policy=10, n_policy_replay=15, batch_size=16, n_fine_tune=None, seed=None, replay_data_path='../data/gen_actives.smi', primed_path='../checkpoints/generator/checkpoint_batch_training', save_every=2, save_path=None): save_path = os.path.splitext(save_path)[0] save_path = save_path.split('-')[0] if n_fine_tune is None: n_fine_tune = n_iterations # initialize RNG seeds for reproducibility if seed is not None: np.random.seed(seed) torch.manual_seed(seed) gen_data_path = '../data/chembl_22_clean_1576904_sorted_std_final.smi' tokens = [ ' ', '<', '>', '#', '%', ')', '(', '+', '-', '/', '.', '1', '0', '3', '2', '5', '4', '7', '6', '9', '8', '=', 'a', '@', 'C', 'B', 'F', 'I', 'H', 'O', 'N', 'P', 'S', '[', ']', '\\', 'c', 'e', 'i', 'l', 'o', 'n', 'p', 's', 'r' ] global gen_data gen_data = GeneratorData(gen_data_path, delimiter='\t', cols_to_read=[0], keep_header=True, tokens=tokens) # Setting up the generative model hidden_size = 1500 stack_width = 1500 stack_depth = 200 layer_type = 'GRU' optimizer = torch.optim.SGD lr = 0.0002 generator = StackAugmentedRNN(input_size=gen_data.n_characters, hidden_size=hidden_size, output_size=gen_data.n_characters, layer_type=layer_type, n_layers=1, is_bidirectional=False, has_stack=True, stack_width=stack_width, stack_depth=stack_depth, use_cuda=use_cuda, optimizer_instance=optimizer, lr=lr) # Use a model pre-trained on active molecules generator.load_model(primed_path) # Setting up the predictor model_instance = RFC model_params = {'n_estimators': 250, 'n_jobs': 10} predictor = VanillaQSAR(model_instance=model_instance, model_params=model_params, model_type='classifier') predictor.load_model('../checkpoints/predictor/egfr_rfc') # Setting up the reinforcement model def get_reward(smiles, predictor, threshold, invalid_reward=1.0, get_features=get_fp): mol, prop, nan_smiles = predictor.predict([smiles], get_features=get_features) if len(nan_smiles) == 1: return invalid_reward if prop[0] >= threshold: return 10.0 else: return invalid_reward RL_model = Reinforcement(generator, predictor, get_reward) # Define replay update functions def update_threshold(cur_threshold, prediction, proportion=0.15, step=0.05): if (prediction >= cur_threshold).mean() >= proportion: new_threshold = min(cur_threshold + step, 1.0) return new_threshold else: return cur_threshold def update_data(smiles, prediction, replay_data, fine_tune_data, threshold): for i in range(len(prediction)): if prediction[i] >= max(threshold, 0.2): fine_tune_data.file.append('<' + smiles[i] + '>') if prediction[i] >= threshold: replay_data.append(smiles[i]) return fine_tune_data, replay_data fine_tune_data = GeneratorData(replay_data_path, tokens=tokens, cols_to_read=[0], keep_header=True) replay_data = GeneratorData(replay_data_path, tokens=tokens, cols_to_read=[0], keep_header=True) replay_data = [traj[1:-1] for traj in replay_data.file] rl_losses = [] rewards = [] n_to_generate = 200 threshold = 0.05 start = time.time() active_threshold = 0.75 tmp = sys.stdout sys.stdout = sys.__stdout__ smiles, predictions, gen_metrics = estimate_and_update( RL_model.generator, RL_model.predictor, 1000, batch_size=batch_size, plot=False, threshold=active_threshold, return_metrics=True) sys.stdout = tmp mol_data = pd.DataFrame(dict(smiles=smiles, predictions=predictions)) if save_path: save_path_ = save_path + '-0.smi' mol_data.to_csv(save_path_, index=False, header=False) # log_path = save_path + '.log' # with open(log_path, 'wt') as f: # print('starting log', file=f) for i in range(n_iterations): print('%3.d Training on %d replay instances...' % (i + 1, len(replay_data))) print('Setting threshold to %f' % threshold) print('Policy gradient...') for j in trange(n_policy, desc=' %3.d Policy gradient...' % (i + 1)): cur_reward, cur_loss = RL_model.policy_gradient( gen_data, get_features=get_fp, threshold=threshold) rewards.append(simple_moving_average(rewards, cur_reward)) rl_losses.append(simple_moving_average(rl_losses, cur_loss)) print('Loss: %f' % rl_losses[-1]) print('Reward: %f' % rewards[-1]) smiles_cur, prediction_cur = estimate_and_update( RL_model.generator, RL_model.predictor, n_to_generate, batch_size=batch_size, get_features=get_fp, threshold=active_threshold, plot_counts=True, plot=False) fine_tune_data, replay_data = update_data(smiles_cur, prediction_cur, replay_data, fine_tune_data, threshold) threshold = update_threshold(threshold, prediction_cur) print('Sample trajectories:') for sm in smiles_cur[:5]: print(sm) print('Policy gradient replay...') for j in trange(n_policy_replay, desc='%3.d Policy gradient replay...' % (i + 1)): cur_reward, cur_loss = RL_model.policy_gradient( gen_data, get_features=get_fp, replay=True, replay_data=replay_data, threshold=threshold) smiles_cur, prediction_cur = estimate_and_update( RL_model.generator, RL_model.predictor, n_to_generate, batch_size=batch_size, get_features=get_fp, threshold=active_threshold, plot=False) fine_tune_data, replay_data = update_data(smiles_cur, prediction_cur, replay_data, fine_tune_data, threshold) threshold = update_threshold(threshold, prediction_cur) print('Sample trajectories:') for sm in smiles_cur[:5]: print(sm) print('Fine tuning...') RL_model.fine_tune(data=fine_tune_data, n_steps=n_fine_tune, batch_size=batch_size, print_every=10000) smiles_cur, prediction_cur = estimate_and_update( RL_model.generator, RL_model.predictor, n_to_generate, batch_size=batch_size, get_features=get_fp, threshold=active_threshold, plot=False) fine_tune_data, replay_data = update_data(smiles_cur, prediction_cur, replay_data, fine_tune_data, threshold) threshold = update_threshold(threshold, prediction_cur) print('Sample trajectories:') for sm in smiles_cur[:5]: print(sm) print('') if (i + 1) % save_every == 0: # redirect output to keep valid log tmp = sys.stdout sys.stdout = sys.__stdout__ smiles, predictions, gen_metrics = estimate_and_update( RL_model.generator, RL_model.predictor, 1000, batch_size=batch_size, plot=False, threshold=active_threshold, return_metrics=True) mol_data = pd.DataFrame( dict(smiles=smiles, predictions=predictions)) if save_path: save_path_ = save_path + '-%d.smi' % (i + 1) mol_data.to_csv(save_path_, index=False, header=False) sys.stdout = tmp duration = time.time() - start train_metrics = {} train_metrics['duration'] = duration mol_actives = mol_data[mol_data.predictions > active_threshold] egfr_data = pd.read_csv('../data/egfr_with_pubchem.csv') egfr_actives = egfr_data[egfr_data.predictions > active_threshold] mol_actives['molecules'] = mol_actives.smiles.apply(Chem.MolFromSmiles) egfr_actives['molecules'] = egfr_actives.smiles.apply(Chem.MolFromSmiles) lib_metrics = compare_libraries(mol_actives, egfr_actives, properties=['MolWt', 'MolLogP'], return_metrics=True, plot=False) # collate results of training results = {} results.update(train_metrics) results.update(gen_metrics) results.update(lib_metrics) params = dict(n_iterations=n_iterations, n_policy=n_policy, n_policy_replay=n_policy_replay, n_fine_tune=n_fine_tune, seed=seed, replay_data_path=replay_data_path, primed_path=primed_path) if save_path is not None: results['save_path'] = save_path_ print('Metrics for %s:' % params) print(results)
print 'broyden2 does not converge fast enough: switched to fsolve for this step' damage = fsolve(self.damage_residuum, 0.2 * np.ones_like(self.sorted_depsf)) print 'damage =', np.sum(damage) / len( damage), 'iteration time =', time.clock() - ff, 'sec' return damage if __name__ == '__main__': from mathkit.mfn.mfn_line.mfn_line import MFnLineArray from matplotlib import pyplot as plt reinf1 = Reinforcement( r=0.00345, #RV('uniform', loc=0.001, scale=0.005), tau=RV('uniform', loc=4., scale=2.), V_f=0.2, E_f=70e3, xi=RV('weibull_min', shape=5., scale=0.04), n_int=100, label='AR glass') reinf2 = Reinforcement( r=0.003, #RV('uniform', loc=0.002, scale=0.002), tau=RV('uniform', loc=.3, scale=.05), V_f=0.1, E_f=200e3, xi=WeibullFibers(shape=5., scale=0.02), n_int=100, label='carbon') ccb = CompositeCrackBridgeLoop(E_m=25e3, reinforcement_lst=[reinf1, reinf2],
operation_selection_kinds = ["BREAKPOINTS", "FREE"] operation_selection_kind = operation_selection_kinds[int(sys.argv[2])] genome_problems = [ rearrangements.Unsigned_Reversal, rearrangements.Transposition, rearrangements.Unsigned_RevTrans, rearrangements.Prefix_Unsigned_Reversal, rearrangements.Prefix_Transposition, rearrangements.Prefix_Unsigned_RevTrans ] genome_problem = genome_problems[int(sys.argv[3])]() easy_epoch = int(sys.argv[4]) normal_epoch = int(sys.argv[5]) reinforcement = Reinforcement() ## Very importante Object m0 = int(sys.argv[6]) m1 = int(sys.argv[7]) player0 = models.select_player(m0, operation_selection_kind, permutation_size) player1 = models.select_player(m1, operation_selection_kind, permutation_size) if len(sys.argv) == 10: player0.model.load_weights(sys.argv[8]) player1.model.load_weights(sys.argv[9]) epoch = 0 while epoch < easy_epoch:
class XCS: GAMMA = 0.71 def __init__(self, max_population_size, possible_actions=[], histlen=42): self.name = "XCS" self.action_size = len(possible_actions) self.max_population_size = max_population_size self.possible_actions = possible_actions self.population = [] self.time_stamp = 1 self.action_history = [] self.old_action_history = [] self.reinforce = Reinforcement() self.ga = CIGeneticAlgorithm(possible_actions) ################################# self.single_testcases = True self.histlen = histlen ################################# # stuff for batch update self.max_prediction_sum = 0 self.rewards = None self.p_explore = 0.25 self.train_mode = True def get_action(self, state): ''' :param state: State in Retects. In the XCS world = situation. :return : a action ''' theta_mna = len(self.possible_actions) matcher = CIMatching(theta_mna, self.possible_actions) match_set = matcher.get_match_set(self.population, state, self.time_stamp) self.p_explore = (self.p_explore - 0.1) * 0.99 + 0.1 action_selector = ActionSelection(self.possible_actions, self.p_explore) prediction_array = action_selector.get_prediction_array(match_set) action = action_selector.select_action(prediction_array, self.train_mode) max_val = prediction_array[action] # on policy #max(prediction_array.keys(), key=(lambda k: prediction_array[k])) action_set = action_selector.get_action_set(match_set, action) self.max_prediction_sum += max_val self.action_history.append((state, action_set)) return action def reward(self, new_rewards): try: x = float(new_rewards) new_rewards = [x] * len(self.action_history) except Exception as _: if len(new_rewards) < len(self.action_history): raise Exception('Too few rewards') old_rewards = self.rewards self.rewards = new_rewards if old_rewards is not None: avg_max_pred = self.max_prediction_sum / len(self.action_history) for i in range(0, len(old_rewards)): discounted_reward = old_rewards[i] + XCS.GAMMA * avg_max_pred old_sigma, old_action_set = self.old_action_history[i] self.reinforce.reinforce(old_action_set, discounted_reward) self.ga.perform_iteration(old_action_set, old_sigma, self.population, self.time_stamp) self.time_stamp += 1 self.max_prediction_sum = 0 self.old_action_history = self.action_history self.action_history = [] self.delete_from_population() def delete_from_population(self): ''' Deletes as many classifiers as necessary until the population size is within the defined bounds. ''' total_numerosity = sum(list(map(lambda x: x.numerosity, self.population))) while len(self.population) > self.max_population_size: total_fitness = sum(list(map(lambda x: x.fitness, self.population))) avg_fitness = total_fitness / total_numerosity vote_sum = sum(list(map(lambda x: x.deletion_vote(avg_fitness), self.population))) choice_point = random.random() * vote_sum vote_sum = 0 for classifier in self.population: vote_sum += classifier.deletion_vote(avg_fitness) if vote_sum > choice_point: if classifier.numerosity > 1: classifier.numerosity = classifier.numerosity - 1 else: self.population.remove(classifier) def save(self, filename): """ Stores agent as pickled file """ pickle.dump(self, open(filename + '.p', 'wb'), 2) @classmethod def load(cls, filename): return pickle.load(open(filename + '.p', 'rb'))
n_layers=1, optimizer='Adadelta', lr=lr) if use_cuda: my_generator = my_generator.cuda() #my_generator.load_model('/home/mariewelt/Notebooks/PyTorch/Model_checkpoints/generator/policy_gradient_egfr_max') my_generator.load_model( '/home/mariewelt/Notebooks/PyTorch/Model_checkpoints/generator/checkpoint_lstm' ) egfr_predictor = RandomForestQSAR(n_estimators=100, n_ensemble=5) egfr_predictor.load_model('/home/mariewelt/Notebooks/PyTorch/data/RF/EGFR_RF') RL = Reinforcement(my_generator, egfr_predictor) replay = ReplayMemory(capacity=10000) for i in range(len(egfr_data.smiles)): if egfr_data.binary_labels[i] == 1.0: replay.push(egfr_data.smiles[i]) generated = [] for _ in range(replay.capacity): generated.append(my_generator.evaluate(gen_data)) sanitized = sanitize_smiles(generated) for sm in sanitized: if sm is not None: replay.push(sm)
def setValues(): """Default settings :Returns: - Default settings for the probabilistic models for degradation of concrete. """ # Concrete settings concrete = Concrete('C25/30') concrete.setWCratio(0.4) # values: (0.3),0.4,(0.45),0.5 concrete.setCuringPeriod(1) # values: 1,3,7,28 concrete.setGrade(45) # values: 45,40,25,35 # Reinforcement settings reinforcement = Reinforcement('S500') reinforcement.setYieldStress(500) # values: all reinforcement.setDiameter(16) # values: (8),10,16,27 reinforcement.setBars(1) # values: all # Geometry settings geometrie = Geometrie('Beam') geometrie.setCover(30) # values: all geometrie.setBeamWidth(350) geometrie.setBeamHeight(550) geometrie.setBeamLength(5000) # Environment settings environment = Environment() environment.setZone('Submerged') # values: 'Submerged','Tidal','Splash','Atmospheric' environment.setHumidity(80) # values: 50,65,80,95,100 # for the simplified corrosion rate: # environment.setExposure('Wet-Dry') # values: 'Wet','Wet-Dry','Airborne sea water','Tidal' environment.setTemperature(20) # values: all environment.setShelter('Unsheltered') # 'Sheltered','Unsheltered' # Chloride chloride = Chloride(concrete,geometrie,environment) # Carbonation carbonation = Carbonation(concrete,geometrie,environment) # Propagation rate = Propagation(environment) # Corrosion pitting = Pitting(reinforcement,rate) # pitting.setDeltaTime(50) # values: all # Resistance resistance = Resistance(concrete,reinforcement,geometrie,rate,pitting) return concrete, reinforcement, geometrie, environment, chloride, carbonation, rate, pitting, resistance
return 1.0 print("done with reinforcement setup") # plots the RL reward func. x = np.linspace(-5, 12) reward = lambda x: 11.0 if ((x > 1.0) and (x < 4.0)) else 1.0 plt.plot(x, [reward(i) for i in x]) plt.xlabel('logP value') plt.ylabel('Reward value') plt.title('Reward function for logP optimization') plt.show() # does the actual reinforcement #Creates a Reinforcement object, uses previous generator and predictor except each smile is now put through the reward function RL_logp = Reinforcement(my_generator_max, my_predictor, get_reward_logp) #Only generator is affected by his since we use the same predictor rewards = [] rl_losses = [] print(n_iterations) for i in range(n_iterations): for j in trange(n_policy, desc='Policy gradient...'): cur_reward, cur_loss = RL_logp.policy_gradient(gen_data) rewards.append(simple_moving_average(rewards, cur_reward)) rl_losses.append(simple_moving_average(rl_losses, cur_loss)) plt.plot(rewards) plt.xlabel('Training iteration')