def __init__(self, exp_file='exp.json', encoder_file=None, decoder_file=None, directory=None): # files if directory is not None: curdir = os.getcwd() os.chdir(os.path.join(curdir, directory)) # exp_file = os.path.join(directory, exp_file) # load parameters self.params = hyperparameters.load_params(exp_file, False) if encoder_file is not None: self.params["encoder_weights_file"] = encoder_file if decoder_file is not None: self.params["decoder_weights_file"] = decoder_file # char stuff chars = yaml.safe_load(open(self.params['char_file'])) self.chars = chars self.params['NCHARS'] = len(chars) self.char_indices = dict((c, i) for i, c in enumerate(chars)) self.indices_char = dict((i, c) for i, c in enumerate(chars)) # encoder, decoder self.enc = load_encoder(self.params) self.dec = load_decoder(self.params) self.encode, self.decode = self.enc_dec_functions() self.data = None if self.params['do_prop_pred']: self.property_predictor = load_property_predictor(self.params) # Load data without normalization as dataframe df = pd.read_csv(self.params['data_file']) df.iloc[:, 0] = df.iloc[:, 0].str.strip() df = df[df.iloc[:, 0].str.len() <= self.params['MAX_LEN']] self.smiles = df.iloc[:, 0].tolist() if df.shape[1] > 1: self.data = df.iloc[:, 1:] self.estimate_estandarization() if directory is not None: os.chdir(curdir) return
def load_models(params): def identity(x): return K.identity(x) # def K_params with kl_loss_var kl_loss_var = K.variable(params['kl_loss_weight']) if params['reload_model'] == True: encoder = load_encoder(params) decoder = load_decoder(params) else: encoder = encoder_model(params) decoder = decoder_model(params) x_in = encoder.inputs[0] z_mean, enc_output = encoder(x_in) z_samp, z_mean_log_var_output = variational_layers(z_mean, enc_output, kl_loss_var, params) # Decoder if params['do_tgru']: x_out = decoder([z_samp, x_in]) else: x_out = decoder(z_samp) x_out = Lambda(identity, name='x_pred')(x_out) model_outputs = [x_out, z_mean_log_var_output] AE_only_model = Model(x_in, model_outputs) print(encoder.summary()) print("---------------------------") print(decoder.summary()) print("--------------------------") print(AE_only_model.summary()) if params['do_prop_pred']: if params['reload_model'] == True: property_predictor = load_property_predictor(params) else: property_predictor = property_predictor_model(params) if (('reg_prop_tasks' in params) and (len(params['reg_prop_tasks']) > 0 ) and ('logit_prop_tasks' in params) and (len(params['logit_prop_tasks']) > 0 )): reg_prop_pred, logit_prop_pred = property_predictor(z_mean) reg_prop_pred = Lambda(identity, name='reg_prop_pred')(reg_prop_pred) logit_prop_pred = Lambda(identity, name='logit_prop_pred')(logit_prop_pred) model_outputs.extend([reg_prop_pred, logit_prop_pred]) # regression only scenario elif ('reg_prop_tasks' in params) and (len(params['reg_prop_tasks']) > 0 ): reg_prop_pred = property_predictor(z_mean) reg_prop_pred = Lambda(identity, name='reg_prop_pred')(reg_prop_pred) model_outputs.append(reg_prop_pred) # logit only scenario elif ('logit_prop_tasks' in params) and (len(params['logit_prop_tasks']) > 0 ): logit_prop_pred = property_predictor(z_mean) logit_prop_pred = Lambda(identity, name='logit_prop_pred')(logit_prop_pred) model_outputs.append(logit_prop_pred) else: raise ValueError('no logit tasks or regression tasks specified for property prediction') # making the models: AE_PP_model = Model(x_in, model_outputs) return AE_only_model, AE_PP_model, encoder, decoder, property_predictor, kl_loss_var else: return AE_only_model, encoder, decoder, kl_loss_var