def __init__(self, dmrg_env=None): self._env = dmrg_env self._raw = _dmrg.NewRawSpinBlock() self.braStateInfo = stateinfo.StateInfo(dmrg_env) self.ketStateInfo = stateinfo.StateInfo(dmrg_env) self.sites = [] self.leftBlock = None self.rightBlock = None
def InitStartingBlock(dmrg_env, forward=True): # usually molecule_quantum_tot_spin = 0 molecule_quantum_tot_spin = dmrg_env.spin # usually forward_starting_size = backward_starting_size = 1 forward_starting_size = dmrg_env.forward_starting_size backward_starting_size = dmrg_env.backward_starting_size startingBlock = SpinBlock(dmrg_env) if forward: startingBlock.init_dot(True, 0, forward_starting_size, is_complement=True) # dmrg_env.add_noninteracting_orbs is always True if dmrg_env.add_noninteracting_orbs and molecule_quantum_tot_spin != 0: s = quanta.SpinQuantum() s.init(nparticle, spin, irrep_id) # fixme, nparticle =?= spin, see initblocks.C addstate = stateinfo.StateInfo(dmrg_env) addstate.init_by_a_spinquantum(s) dummyblock = SpinBlock(dmrg_env) dummyblock.init_by_stateinfo(addstate) newblk = SpinBlock(dmrg_env) newblk.default_op_components(False, startingBlock, dummyblock, \ True, True,dmrg_env.implicitTranspose) newblk.BuildSumBlock(param.NO_PARTICLE_SPIN_NUMBER_CONSTRAINT, startingBlock, dummyblock) startingBlock = newblk else: backwardSites = range(dmrg_env.tot_sites - backward_starting_size, dmrg_env.tot_sites) startingBlock.default_op_components_compl(False) startingBlock.BuildTensorProductBlock(backwardSites) return startingBlock
def load(self, start_id, end_id, root_id=0, prefix=None): if prefix is None: if self._env is None: prefix = os.environ['TMPDIR'] + '/' else: prefix = self._env.scratch_prefix + '/' wfnfile = '%swave-%d-%d.0.%d.tmp' \ % (prefix, start_id, end_id, root_id) if not os.path.isfile(wfnfile): raise OSError('file %s does not exist' % wfnfile) self._raw = _dmrg.NewRawWavefunction() self.stateInfo = stateinfo.StateInfo() left = stateinfo.StateInfo() left.leftStateInfo = stateinfo.StateInfo() left.rightStateInfo = stateinfo.StateInfo() right = stateinfo.StateInfo() right.leftStateInfo = stateinfo.StateInfo() right.rightStateInfo = stateinfo.StateInfo() self.stateInfo.leftStateInfo = left self.stateInfo.rightStateInfo = right raw_si, raw_left, raw_lleft, raw_lright, \ raw_right, raw_rleft, raw_rright = self._raw.load(wfnfile) self.stateInfo.refresh_by(raw_si) left.refresh_by(raw_left) left.leftStateInfo.refresh_by(raw_lleft) left.rightStateInfo.refresh_by(raw_lright) right.refresh_by(raw_right) right.leftStateInfo.refresh_by(raw_rleft) right.rightStateInfo.refresh_by(raw_rright) self.deltaQuantum = quanta.SpinQuantum() self.deltaQuantum.refresh_by(self._raw.get_deltaQuantum()) self._sync_raw2self()
def step(self, state, reward): state = tuple(np.array(state, dtype=np.uint8)) #logging.debug("step(): state: %d, reward: %d", state, reward) # stateinfo dictionary pflegen if bool(self.stateinfos.get((state))): # print("state bekannt") # state schon bekannt ? si = self.stateinfos[state] # ja else: si = stateinfo.StateInfo(self.action_count) # Eintrag neu anlegen self.stateinfos[state] = si # oldstate, oldaction anlegen if self.firststep: self.firststep = False self.oldstate = state action = si.policy_random() self.oldaction = action return action # jetzt kann es losgehen old_si = self.stateinfos[self.oldstate] old_q_sa = old_si.actionvalues[self.oldaction] max_q_sa = si.get_max_actionvalue() old_q_sa = old_q_sa + self.alpha * ( reward + self.discountrate * max_q_sa - old_q_sa) old_si.actionvalues[self.oldaction] = old_q_sa # naechste Action action = si.policy_epsilon_greedy_q_based(self.epsilon) self.oldstate = state self.oldaction = action # nur zum Zeichnen in environment.render() triple = strucstep.StrucStep(state, reward, action) self.episode.append(triple) #logging.debug("step(): return action: %d ", action) return action
logging.error("env.perform_action(): s = %d", s) sys.exit(0) for i in range(0, 500): s = env.get_random_state() # print(s, end=' | ') y, x = env.state2coord(s) if env.gridworld[y, x] == 1: # Hindernis ? logging.error("env.get_random_state(): y = %d x = %d", y, x) sys.exit(0) # ------------------------------------------------------------- # stateinfo.py # ------------------------------------------------- si = stateinfo.StateInfo(4) si.set_testvalues() #si.cal_actionvalue(1, 5) #if (si.actioncount[1] != 4) or (si.actionvalue[1] != 5): # logging.error("si.cal_actionvalue()") # sys.exit(0) #si.cal_actionvalue(1, 30) #if (si.actioncount[1] != 5) or (si.actionvalue[1] != 10): # logging.error("si.cal_actionvalue()") # sys.exit(0)sys.exit(0 action = si.policy_greedy_q_based() if action != 1: logging.error("policy_greedy_q_based()")