def test_gauge_no_fermions(self): tf.reset_default_graph() # spec batch_size = 1000 N = 2 m = 10 num_fermions = 0 rank = 1 name = "test_gauge_no_fermions" algebra = SU(N) # build the model with tf.variable_scope(name): dim = 2 * algebra.dim bosonic_wavefunc = Autoregressive( [Mixture([Affine(Normal())] * 2)] * dim, dim, 2) fermionic_wavefunc = FermionicWavefunction(algebra, dim, 2, num_fermions, rank, dim, 1) vectorizer = Vectorizer(algebra, tfp.bijectors.Exp()) wavefunc = Wavefunction(algebra, vectorizer, bosonic_wavefunc, fermionic_wavefunc) bosonic = wavefunc.sample(batch_size) log_norm, _ = wavefunc(bosonic) # observables radius = tf.sqrt(matrix_quadratic_potential(bosonic) / N) gauge = matrix_SUN_adjoint_casimir(wavefunc, bosonic) rotation = miniBMN_SO3_casimir(wavefunc, bosonic) energy = fuzzy_sphere_energy(m, wavefunc, bosonic) # training print("Training ...") output_path = "results/" + name + "/" obs = minimize(name, log_norm, energy, 10000, { "r": radius, "gauge": gauge, "rotation": rotation }, 100, 5000, lr=1e-3, output_path=output_path) obs = minimize(name, log_norm, energy, 5000, { "r": radius, "gauge": gauge, "rotation": rotation }, 100, 5000, lr=1e-4, restore_path=output_path) self.assertTrue(np.abs(obs["energy"] + 12593) < 5) self.assertTrue(np.abs(obs["r"] - 12.99) < 1e-2) self.assertTrue(0 < obs["gauge"] < 1e-8) self.assertTrue(0 < obs["rotation"] < 1e-2)
def test_gauge_susy(self): tf.reset_default_graph() # spec batch_size = 1000 N = 2 m = 10 num_fermions = 2 rank = 4 name = "test_gauge_susy" algebra = SU(N) # build the model with tf.variable_scope(name): dim = 2 * algebra.dim with open("data/SpinMatrices" + str(N) + ".bin", "rb") as f: mats = tf.constant( np.reshape( np.fromfile(f, dtype=np.dtype("complex64"), count=3 * N * N), [3, N, N])) Sx, Sy, Sz = mats[0], mats[1], mats[2] offset = m * tf.stack([-Sz, -Sy, -Sx]) vectorizer = Vectorizer(algebra, tfp.bijectors.Exp()) offset = vectorizer.encode(tf.expand_dims(offset, 0))[0] bosonic_wavefunc = NormalizingFlow([Normal()] * dim, 0, tfp.bijectors.Sigmoid(), offset) fermionic_wavefunc = FermionicWavefunction(algebra, dim, 2, num_fermions, rank, dim, 1) wavefunc = Wavefunction(algebra, vectorizer, bosonic_wavefunc, fermionic_wavefunc) bosonic = wavefunc.sample(batch_size) log_norm, fermionic = wavefunc(bosonic) # observables radius = tf.sqrt(matrix_quadratic_potential(bosonic) / N) kinetic = matrix_kinetic_energy(wavefunc, bosonic) bilinear = miniBMN_yukawa_potential(bosonic, fermionic) gauge = matrix_SUN_adjoint_casimir(wavefunc, bosonic) rotation = miniBMN_SO3_casimir(wavefunc, bosonic) energy = miniBMN_energy(m, wavefunc, bosonic) # training print("Training ...") output_path = "results/" + name + "/" obs = minimize(name, log_norm, energy, 5000, { "r": radius, "kinetic": kinetic, "bilinear": bilinear, "gauge": gauge, "rotation": rotation }, 100, 5000, lr=1e-3, output_path=output_path) obs = minimize(name, log_norm, energy, 5000, { "r": radius, "kinetic": kinetic, "bilinear": bilinear, "gauge": gauge, "rotation": rotation }, 100, 5000, lr=1e-4, restore_path=output_path) self.assertTrue(np.abs(obs["energy"] - 0) < 1) self.assertTrue(np.abs(obs["kinetic"] - 55) < 5) self.assertTrue(np.abs(obs["r"] - 8.66) < 1e-2) self.assertTrue(np.abs(obs["bilinear"] + 40) < 0.4) self.assertTrue(0 < obs["gauge"] < 1e-8) self.assertTrue(np.abs(obs["rotation"] - 0) < 4e-2)
def test_no_gauge_single_fermion(self): tf.reset_default_graph() # spec batch_size = 1000 N = 2 m = 10 num_fermions = 1 rank = 1 name = "test_no_gauge_single_fermion" algebra = SU(N) # build the model with tf.variable_scope(name): bosonic_dim = 3 * algebra.dim fermionic_dim = 2 * algebra.dim bosonic_wavefunc = Autoregressive( [Mixture([Affine(Normal())] * 2)] * bosonic_dim, bosonic_dim, 2) fermionic_wavefunc = FermionicWavefunction(algebra, bosonic_dim, 2, num_fermions, rank, fermionic_dim, 2) vectorizer = Vectorizer(algebra) wavefunc = Wavefunction(Trivial(), vectorizer, bosonic_wavefunc, fermionic_wavefunc) bosonic = wavefunc.sample(batch_size) log_norm, fermionic = wavefunc(bosonic) # observables radius = tf.sqrt(matrix_quadratic_potential(bosonic) / N) kinetic = matrix_kinetic_energy(wavefunc, bosonic) bilinear = miniBMN_yukawa_potential(bosonic, fermionic) gauge = matrix_SUN_adjoint_casimir(wavefunc, bosonic) rotation = miniBMN_SO3_casimir(wavefunc, bosonic) pre_energy = fuzzy_sphere_energy(m, wavefunc, bosonic) energy = miniBMN_energy(m, wavefunc, bosonic) # pretraining print("Pretraining ...") output_path = "results/" + name + "/" obs = minimize(name, log_norm, pre_energy, 10000, {"r": radius}, 1000, 5000, lr=1e-3, output_path=output_path) # training print("Training ...") obs = minimize(name, log_norm, energy, 10000, { "r": radius, "kinetic": kinetic, "bilinear": bilinear, "gauge": gauge, "rotation": rotation }, 100, 5000, lr=1e-3, restore_path=output_path) obs = minimize(name, log_norm, energy, 5000, { "r": radius, "kinetic": kinetic, "bilinear": bilinear, "gauge": gauge, "rotation": rotation }, 100, 5000, lr=1e-4, restore_path=output_path) self.assertTrue(obs["energy"] < 15) self.assertTrue(obs["kinetic"] < 65) self.assertTrue(np.abs(obs["r"] - 8.66) < 2e-2) self.assertTrue(np.abs(obs["bilinear"] + 20) < 5e-2)
def fit(request, hyper_params=default_hyper_params, nperiod=288): passed_hyper_params = hyper_params hyper_params = {} hyper_params.update(passed_hyper_params) hyper_params.update(request.hyper_params) logging.info(f"fitting model with hyper parameters {hyper_params}") frame = make_frame(request, hyper_params=hyper_params) basal_insulin_curve = expia1( np.arange(nperiod), request.basal_insulin_parameters.get("delay", 5.0) / 5.0, request.basal_insulin_parameters["peak"] / 5.0, request.basal_insulin_parameters["duration"] / 5.0, ) # TODO: make this the average carb curve default_carb_curve = carb_curve(np.arange(nperiod), 3, 36) # Set up parameter schedules. # # We arrange for each of basal, insulin sensitivity, and carb ratios # to have 24 windows in each day. # # TODO: assign windows for carb ratios based on data density # # TODO: find a better initialization strategy when no schedules are provided # # Order is: basals, insulin sensitivities, carb ratios if request.insulin_sensitivity_schedule is not None: init_insulin_sensitivity_params = attribute_parameters( basal_insulin_curve, request.insulin_sensitivity_schedule.index, request.insulin_sensitivity_schedule.values) else: init_insulin_sensitivity_params = 140 * np.ones(24) if request.carb_ratio_schedule is not None: init_carb_ratio_params = attribute_parameters( default_carb_curve, request.carb_ratio_schedule.index, request.carb_ratio_schedule.values) else: init_carb_ratio_params = 15. * np.ones(24) if request.basal_rate_schedule is not None: init_basal_rate_params = attribute_parameters( basal_insulin_curve, request.basal_rate_schedule.index, request.basal_rate_schedule.values) else: init_basal_rate_params = np.zeros(24) init_params = np.concatenate([ init_basal_rate_params, init_insulin_sensitivity_params, init_carb_ratio_params ]) def unpack_params(params): return params[:24], params[24:48], params[48:72] insulin = frame["insulin"].values carbs = frame["carb"].values deltas = frame["delta"].values hour = frame.index.hour quantile = hyper_params["quantile_loss_quantile"] # Construct bounds based on the allowable tuning limit. if request.tuning_limit is not None and request.tuning_limit > 0: bounds = list( zip(init_params * (1 - request.tuning_limit), init_params * 1 + request.tuning_limit)) else: bounds = None # Re-weight entries that have carbohydrate activity so that # the model prefers having (much) better carb parameters # over slightly worse-fitting sensitivity and basal parameters. weights = np.ones_like(deltas) weights[frame["carb"] > 0] = (np.sum(frame["carb"] == 0) / np.sum(frame["carb"] > 0)) def model(params): basals, insulin_sensitivities, carb_ratios = unpack_params(params) basal = basals[hour] insulin_sensitivity = insulin_sensitivities[hour] carb_ratio = carb_ratios[hour] return insulin_sensitivity * (carbs / carb_ratio - insulin + basal) if bounds is not None: lower, upper = zip(*bounds) lower, upper = np.array(lower), np.array(upper) # This is a hack to get around the fact that basals are summed # over multiple hours. Thus this is only an approximate bounds, # but it's much simpler than the alternative. insulin_duration_hours = request.basal_insulin_parameters[ "duration"] / 60. lower[:24] = lower[:24] / insulin_duration_hours upper[:24] = upper[:24] / insulin_duration_hours def loss(params, iter): preds = model(params) penalty = -10.0 * np.sum(np.minimum(params, 0.0)) # Use a barrier function if bounds are provided. if bounds is not None: # HACK: simulate a "rectified" barrier function here. # Note also that this doesn't work for basals since they # are summed up. epsilon = 0.00001 penalty_params = params.copy() penalty_params[penalty_params >= upper] = upper[penalty_params > upper] - epsilon penalty_params[penalty_params <= lower] = lower[penalty_params <= lower] + epsilon penalty += np.sum( np.maximum(0., -0.01 * np.log(upper - penalty_params))) penalty += np.sum( np.maximum(0., -0.01 * np.log(penalty_params - lower))) # Quantile regression: 50 pctile error = weights * (deltas - preds) return np.mean(np.maximum(quantile * error, (quantile - 1.0) * error)) + penalty if hyper_params["optimizer"] == "adam": params, training_loss = train.minimize(loss, init_params) elif hyper_params["optimizer"] == "scipy.minimize": opt = optimize.minimize(loss, init_params, args=(0, )) params = opt.x training_loss = opt.fun # Clip the parameters here in case the loss penalties # above were insufficient. params = np.maximum(params, 0.0) basals, insulin_sensitivities, carb_ratios = unpack_params(params) # Now, infer parameter schedules based on the optimized # instantaneous parameters. For carbs, we use the average # carb curve based on data. We also use the basal insulin # parameters for ISF schedules. if request.basal_rate_schedule is None: # Default: hourly basal_rate_index = np.arange(0, 288, 12) else: basal_rate_index = request.basal_rate_schedule.reindexed(5) basal_rate_schedule = (identify_curve( basal_insulin_curve, basal_rate_index, np.repeat(basals, 12)) * 12) if request.insulin_sensitivity_schedule is None: insulin_sensitivity_index = np.arange(0, 288, 12 * 4) else: insulin_sensitivity_index = request.insulin_sensitivity_schedule.reindexed( 5) insulin_sensitivity_schedule = identify_curve( basal_insulin_curve, insulin_sensitivity_index, np.repeat(insulin_sensitivities, 12)) if request.carb_ratio_schedule is None: carb_ratio_index = 12 * 6 + np.arange(0, 12 * 12, 4 * 12) else: carb_ratio_index = request.carb_ratio_schedule.reindexed(5) carb_ratio_schedule = identify_curve(default_carb_curve, carb_ratio_index, np.repeat(carb_ratios, 12)) # Finally, "quantize" the basal schedule if needed. # # TODO: Currently this simply tries to match the closest # allowable basal rate. We should try to push this up to the # model (e.g., the cost function could encourage values close to # allowable values), or split the schedule so so that the total # amount delivered over the scheduled intervals is equal to the # modeled amount, but the rate varies within the intervals. # # TODO: Another possibility is to perform one model run # to fit the basals, then another with the basals "fixed" to the # snapped values, allowing the model to adjust the other # parameters accordingly. # # TODO: collapse adjacent entries with the same value. if request.allowed_basal_rates is not None: allowed = sorted(request.allowed_basal_rates) for (i, rate) in enumerate(basal_rate_schedule): j = bisect.bisect(allowed, rate) # TODO: perhaps be a little more generous here, # snapping up when values are (much) closer. if j == 0 and rate != allowed[0]: basal_rate_schedule[i] = 0.0 elif j >= len(basal_rate_schedule) or rate != allowed[j]: basal_rate_schedule[i] = allowed[j - 1] def make_schedule(index, schedule): assert len(index) == len(schedule) return ((5 * index).tolist(), schedule.tolist()) return Model( params={ "insulin_sensitivity_schedule": make_schedule(insulin_sensitivity_index, insulin_sensitivity_schedule), "carb_ratio_schedule": make_schedule(carb_ratio_index, carb_ratio_schedule), "basal_rate_schedule": make_schedule(basal_rate_index, basal_rate_schedule), }, raw_insulin_sensitivities=insulin_sensitivities, raw_carb_ratios=carb_ratios, raw_basals=basals, training_loss=training_loss, )
def train(experiment): print('experiment: ', experiment) config = initialize_env(experiment) X_train, Y_train, X_valid, Y_valid, _, _ = create_Uttdata(config) vocab = utt_Vocab(config, X_train + X_valid, Y_train + Y_valid) X_train, Y_train = minimize(X_train), minimize(Y_train) X_valid, Y_valid = minimize(X_valid), minimize(Y_valid) with open('./data/minidata.pkl', 'wb') as f: a, b = parallelize(X_train, Y_train) pickle.dump([(c, d) for c, d in zip(a, b)], f) X_train, Y_train = vocab.tokenize(X_train, Y_train) X_valid, Y_valid = vocab.tokenize(X_valid, Y_valid) X_train, Y_train = parallelize(X_train, Y_train) X_valid, Y_valid = parallelize(X_valid, Y_valid) print('Finish create dataset') lr = config['lr'] batch_size = config['BATCH_SIZE'] encoder = UtteranceEncoder( utt_input_size=len(vocab.word2id), embed_size=config['UTT_EMBED'], utterance_hidden=config['UTT_HIDDEN'], padding_idx=vocab.word2id['<UttPAD>']).to(device) decoder = UtteranceDecoder( utterance_hidden_size=config['DEC_HIDDEN'], utt_embed_size=config['UTT_EMBED'], utt_vocab_size=config['UTT_MAX_VOCAB']).to(device) context = UtteranceContextEncoder( utterance_hidden_size=config['UTT_CONTEXT']).to(device) encoder_opt = optim.Adam(encoder.parameters(), lr=lr) decoder_opt = optim.Adam(decoder.parameters(), lr=lr) context_opt = optim.Adam(context.parameters(), lr=lr) model = seq2seq(device).to(device) criterion = nn.CrossEntropyLoss(ignore_index=vocab.word2id['<UttPAD>']) start = time.time() print_total_loss = 0 _valid_loss = None for e in range(config['EPOCH']): tmp_time = time.time() print('Epoch {} start'.format(e + 1)) indexes = [i for i in range(len(X_train))] random.shuffle(indexes) k = 0 while k < len(indexes): step_size = min(batch_size, len(indexes) - k) encoder_opt.zero_grad() decoder_opt.zero_grad() batch_idx = indexes[k:k + step_size] print('\r{}/{} pairs training ...'.format(k + step_size, len(X_train)), end='') X_seq = [X_train[seq_idx] for seq_idx in batch_idx] Y_seq = [Y_train[seq_idx] for seq_idx in batch_idx] max_xseq_len = max(len(x) + 1 for x in X_seq) max_yseq_len = max(len(y) + 1 for y in Y_seq) for si in range(len(X_seq)): X_seq[si] = X_seq[si] + [vocab.word2id['<UttPAD>'] ] * (max_xseq_len - len(X_seq[si])) Y_seq[si] = Y_seq[si] + [vocab.word2id['<UttPAD>'] ] * (max_yseq_len - len(Y_seq[si])) X_tensor = torch.tensor([x for x in X_seq]).to(device) Y_tensor = torch.tensor([y for y in Y_seq]).to(device) loss = model.forward(X=X_tensor, Y=Y_tensor, encoder=encoder, decoder=decoder, context=context, step_size=step_size, criterion=criterion, config=config) print_total_loss += loss encoder_opt.step() decoder_opt.step() context_opt.step() k += step_size print() valid_loss = validation(X=X_valid, Y=Y_valid, model=model, encoder=encoder, decoder=decoder, context=context, vocab=vocab, config=config) if _valid_loss is None: torch.save(encoder.state_dict(), os.path.join(config['log_dir'], 'enc_beststate.model')) torch.save(decoder.state_dict(), os.path.join(config['log_dir'], 'dec_beststate.model')) else: if _valid_loss > valid_loss: torch.save(encoder.state_dict(), os.path.join(config['log_dir'], 'enc_beststate')) torch.save( decoder.state_dict(), os.path.join(config['log_dir'], 'dec_beststate.model')) if (e + 1) % config['LOGGING_FREQ'] == 0: print_loss_avg = print_total_loss / config['LOGGING_FREQ'] print_total_loss = 0 print('steps %d\tloss %.4f\tvalid loss %.4f | exec time %.4f' % (e + 1, print_loss_avg, valid_loss, time.time() - tmp_time)) if (e + 1) % config['SAVE_MODEL'] == 0: print('saving model') torch.save( encoder.state_dict(), os.path.join(config['log_dir'], 'enc_state{}.model'.format(e + 1))) torch.save( decoder.state_dict(), os.path.join(config['log_dir'], 'dec_state{}.model'.format(e + 1))) print() print('Finish training | exec time: %.4f [sec]' % (time.time() - start))