Beispiel #1
0
def cmpGen(R, dt, HPLanks, N, b, wa, wc, Emin, Emax):
    HRWA = Hamiltonian.makeHamiltonian(True, N, b, wa, wc, Emin, Emax, HPLanks)
    H = Hamiltonian.makeHamiltonian(False, N, b, wa, wc, Emin, Emax, HPLanks)

    if HRWA.shape[0] != R.shape[0] or H.shape[0] != R.shape[0]:
        return -1

    iteration = 0
    diag1G = Evolution(R.copy(), H, dt, HPLanks)
    diag2G = Evolution(R.copy(), HRWA, dt, HPLanks)
    while 1:
        diag1 = next(diag1G)
        diag2 = next(diag2G)
        diag1 = diag1.diagonal()
        # diag1 = diag1.tolist()
        diag2 = diag2.diagonal()
        # diag2 = diag2.tolist()

        # dif = 0
        # for i in range(len(diag1)):
        #     dif += abs(diag1[0][i] - diag2[0][i])

        dif = np.linalg.norm(diag1 - diag2)

        print(iteration, ":", dif)
        iteration += 1
        yield dif
Beispiel #2
0
def Q_learning(N,
               N_episodes,
               alpha_0,
               eta,
               lmbda,
               beta_RL_i,
               beta_RL_inf,
               T_expl,
               m_expl,
               N_tilings,
               N_tiles,
               state_i,
               h_field,
               dh_field,
               bang,
               L,
               max_t_steps,
               delta_time,
               J,
               hz,
               hx_i,
               hx_f,
               psi_i,
               psi_f,
               theta=None,
               tilings=None,
               save=False):
    """
	This function applies modified Watkins' Q-Learning for time-dependent states with
	force-learn replays.

	1st row: RL arguments
	2nd row: physics arguments
	3rd row: optional arguments
	"""

    ### define save directory for data
    # read in local directory path
    str1 = os.getcwd()
    str2 = str1.split('\\')
    n = len(str2)
    my_dir = str2[n - 1]

    ######################################################################

    ##### physical quantities ######
    """
	# define ED Hamiltonian H(t)
	b=state_i[0]
	lin_fun = lambda t: b 
	# define Hamiltonian
	H = Hamiltonian.Hamiltonian(L,fun=lin_fun,**{'J':J,'hz':hz})
	# define matrix exponential; will be changed every time b is overwritten
	exp_H=exp_op(H,a=-1j*delta_time)
	"""

    # preallocate physical state
    psi = np.zeros_like(psi_i)

    ##### RL quantities	#####

    # define actions
    if bang:
        pos_actions = [8.0]
        a_str = '_bang'
        exp_dict_dataname = my_dir + "/unitaries/unitaries_L={}_bang".format(
            L) + '.pkl'
    else:
        pos_actions = [0.1, 0.2, 0.5, 1.0, 2.0, 4.0, 8.0]
        a_str = '_cont'
        exp_dict_dataname = my_dir + "/unitaries/unitaries_L={}_cont".format(
            L) + '.pkl'

    neg_actions = [-i for i in pos_actions]
    actions = np.sort(neg_actions + [0.0] + pos_actions)
    #del pos_actions,neg_actions

    # pre-calculate unitaries
    #expm_dict=Hamiltonian.Unitaries(delta_time,L,J,hz,min(pos_actions),max(h_field),min(h_field),state_i)
    expm_dict = cPickle.load(open(exp_dict_dataname, "rb"))

    N_actions = len(actions)

    if theta is None:
        theta = np.zeros((N_tiles * N_tilings, max_t_steps, N_actions),
                         dtype=np.float64)
    theta_old = theta.copy()
    if tilings is None:
        tilings = np.array([
            h_field + np.random.uniform(0.0, dh_field, 1)
            for j in xrange(N_tilings)
        ])

    # pre-allocate traces variable
    e = np.zeros_like(theta)
    fire_trace = np.ones(N_tilings)

    # pre-allocate usage vector: inverse gradient descent learning rate
    u0 = 1.0 / alpha_0 * np.ones((N_tiles * N_tilings, ), dtype=np.float64)
    u = np.zeros_like(u0)

    # preallocate quantities
    Return_ave = np.zeros((N_episodes, ), dtype=np.float64)
    Return = np.zeros_like(Return_ave)
    Fidelity_ep = np.zeros_like(Return_ave)
    protocol_ep = np.zeros((Fidelity_ep.shape[0], max_t_steps), )

    # initialise best fidelity
    best_R = -1.0  # best encountered fidelity
    # initialise reward
    R = 0.0
    # preallocate theta_inds
    theta_inds_zeros = np.zeros((N_tilings, ), dtype=int)

    # loop over episodes
    for ep in xrange(N_episodes):
        # set traces to zero
        e *= 0.0
        # set initial usage vector
        u[:] = u0[:]

        # set initial state of episode
        S = state_i.copy()

        # get set of features present in S
        theta_inds = find_feature_inds(tilings, S, theta_inds_zeros)
        Q = np.sum(theta[theta_inds, 0, :],
                   axis=0)  # for each action at time t_step=0

        # preallocate physical quantties
        psi[:] = psi_i[:]  # quantum state at time

        # taken encountered and taken
        actions_taken = np.zeros((max_t_steps, ), dtype=np.float64)

        #define beta
        beta_RL = explore_beta(ep,
                               m_expl,
                               beta_RL_i,
                               T_expl,
                               beta_RL_const=beta_RL_inf)

        explored = False
        # generate episode
        for t_step in xrange(max_t_steps):

            # calculate available actions from state S
            avail_inds = np.argwhere(
                (S[0] + np.array(actions) <= h_field[-1]) *
                (S[0] + np.array(actions) >= h_field[0])).squeeze()
            avail_actions = actions[avail_inds]

            if beta_RL < beta_RL_inf:  #20.0
                if ep % 2 == 0:
                    A_greedy = avail_actions[random.choice(
                        np.argwhere(
                            Q[avail_inds] == np.amax(Q[avail_inds])).ravel())]
                else:
                    A_greedy = best_actions[t_step]
            else:
                A_greedy = avail_actions[random.choice(
                    np.argwhere(
                        Q[avail_inds] == np.amax(Q[avail_inds])).ravel())]

            if beta_RL < beta_RL_inf:
                # choose a random action
                P = np.exp(beta_RL * Q[avail_inds])
                A = avail_actions[np.searchsorted(np.cumsum(P / np.sum(P)),
                                                  random.uniform(0.0, 1.0))]

                # reset traces if A is exploratory
                if abs(A - A_greedy) > np.finfo(A).eps:
                    e *= 0.0
            else:
                A = A_greedy

            # find the index of A
            indA = np.searchsorted(actions, A)

            # record action taken
            actions_taken[t_step] = A

            # take action A, return state S_prime and actual reward R
            ################################################################################
            ######################    INTERACT WITH ENVIRONMENT    #########################
            ################################################################################

            # define new state
            S_prime = S.copy()
            # calculate new field value
            S_prime[0] += A

            # all physics happens here
            b = S_prime[0]
            #psi = exp_H.dot(psi)
            psi = expm_dict[int(np.rint(
                (b - min(h_field)) / min(pos_actions)))].dot(psi)

            # assign reward
            R *= 0.0
            if t_step == max_t_steps - 1:
                # calculate final fidelity and give it as a reward
                #EGS = H.eigsh(k=1,which='SA',maxiter=1E10,return_eigenvectors=False).squeeze()
                R += abs(
                    psi.conj().dot(psi_f)
                )**2  #-(H.matrix_ele(psi,psi).real-EGS) #-ent_entropy(psi,H.basis)['Sent'] #

            ################################################################################
            ################################################################################
            ################################################################################

            # calculate usage and alpha vectors: alpha_inf = eta
            u[theta_inds] *= (1.0 - eta)
            u[theta_inds] += 1.0
            alpha = 1.0 / (N_tilings * u[theta_inds])

            # Q learning update rule; GD error in time t
            delta_t = R - Q[indA]  # error in gradient descent
            # TO
            Q_old = theta[theta_inds, t_step, indA].sum()

            # update traces
            e[theta_inds, t_step, indA] = alpha * fire_trace

            # check if S_prime is terminal or went out of grid
            if t_step == max_t_steps - 1:
                # update theta
                theta += delta_t * e
                # GD error in field h
                delta_h = Q_old - theta[theta_inds, t_step, indA].sum()
                theta[theta_inds, t_step, indA] += alpha * delta_h
                # go to next episode
                break

            # get set of features present in S_prime
            theta_inds_prime = find_feature_inds(tilings, S_prime,
                                                 theta_inds_zeros)

            # t-dependent Watkin's Q learning
            Q = np.sum(theta[theta_inds_prime, t_step + 1, :], axis=0)

            # update theta
            delta_t += np.max(Q)
            theta += delta_t * e

            # GD error in field h
            delta_h = Q_old - theta[theta_inds, t_step, indA].sum()
            theta[theta_inds, t_step, indA] += alpha * delta_h

            # update traces
            e[theta_inds, t_step,
              indA] -= alpha * e[theta_inds, t_step, indA].sum()
            e *= lmbda

            ################################
            # S <- S_prime
            S[:] = S_prime[:]
            theta_inds[:] = theta_inds_prime[:]

        # if greedy policy completes a full episode and if greedy fidelity is worse than inst one
        if R - best_R > 1E-12:
            print("best encountered fidelity is {}".format(np.around(R, 4)))
            # update list of best actions
            best_actions = actions_taken[:]
            # best reward and fidelity
            best_R = R
            # learn policy
            #if beta_RL<20.0:
            theta = Learn_Policy(state_i, best_actions, best_R, theta, tilings,
                                 actions)

        # force-learn best encountered every 100 episodes
        if ((ep + 1) % (2 * T_expl) - T_expl == 0
                and ep not in [0, N_episodes - 1]):  # and beta_RL<20.0:
            theta = Learn_Policy(state_i, best_actions, best_R, theta, tilings,
                                 actions)
        elif (ep // T_expl) % 2 == 1 and abs(R - best_R) > 1E-12:
            theta = Learn_Policy(state_i, best_actions, best_R, theta, tilings,
                                 actions)

        #"""
        # check if Q-function converges
        print ep, "beta_RL,R,d_theta:", beta_RL, R, np.max(
            abs(theta.ravel() - theta_old.ravel()))
        theta_old = theta.copy()
        #"""

        # record average return
        Return_ave[ep] = 1.0 / (ep + 1) * (R + ep * Return_ave[ep - 1])
        Return[ep] = R
        Fidelity_ep[ep] = R
        protocol_ep[ep, :] = build_protocol(actions_taken, state_i[0],
                                            delta_time)[0].astype(int)

        if (ep + 1) % (2 * T_expl) == 0:
            print "finished simulating episode {} with fidelity {} at hx_f = {}.".format(
                ep + 1, np.round(R, 5), S_prime[0])
            print 'best encountered fidelity is {}.'.format(np.round(
                best_R, 5))
            #print 'current inverse exploration tampeature is {}.'.format(np.round(beta_RL,3))

    # calculate best protocol and fidelity
    protocol_best, t_best = build_protocol(best_actions, state_i[0],
                                           delta_time)
    protocol_greedy, t_greedy = greedy_protocol(theta, tilings, actions,
                                                state_i[0], delta_time,
                                                max_t_steps, h_field)

    obs_best = Hamiltonian.MB_observables(psi_i,
                                          t_best,
                                          protocol_best,
                                          pos_actions,
                                          h_field,
                                          L,
                                          J=J,
                                          hx_i=hx_i,
                                          hx_f=hx_f,
                                          hz=hz,
                                          fin_vals=False,
                                          bang=bang)

    Data_obs_best = np.asarray((np.append(t_best, t_best[-1] + delta_time), ) +
                               obs_best).T

    ##### save data
    Data_fid = np.zeros((N_episodes, 3))

    Data_fid[:, 0] = Fidelity_ep
    Data_fid[:, 1] = Return
    Data_fid[:, 2] = Return_ave
    #
    Data_protocol = np.zeros((max_t_steps, 3))

    Data_protocol[:, 0] = t_best
    Data_protocol[:, 1] = protocol_best
    Data_protocol[:, 2] = protocol_greedy

    # define parameter-dependent part of file name
    args = (N, N_episodes, max_t_steps, L) + tuple(
        truncate([J, hz, hx_i, hx_f], 2))
    data_params = "_N=%s_Nep=%s_T=%s_L=%s_J=%s_hz=%s_hxi=%s_hxf=%s" % args
    data_params += a_str

    # create  save directory if non-existant
    save_dir = my_dir + "/data"
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_dir = "data/"

    if save:
        # display full strings
        np.set_printoptions(threshold='nan')

        # as txt format
        dataname = save_dir + "RL_data" + data_params + '.txt'
        np.savetxt(dataname, Data_fid)

        dataname = save_dir + "RL_protocols" + data_params + '.txt'
        np.savetxt(dataname, protocol_ep)

        dataname = save_dir + "obs_data_best" + data_params + '.txt'
        np.savetxt(dataname, Data_obs_best)

        dataname = save_dir + "protocol_data" + data_params + '.txt'
        np.savetxt(dataname, Data_protocol)
        # save as pickle
        dataname = save_dir + "theta_data" + data_params + '.pkl'
        cPickle.dump(theta, open(dataname, "wb"))
        #cPickle.load(open(dataname, "rb" ))

        dataname = save_dir + "tilings_data" + data_params + '.pkl'
        cPickle.dump(tilings, open(dataname, "wb"))

        RL_params = {
            "N": N,
            "N_episodes": N_episodes,
            "alpha_0": alpha_0,
            "eta": eta,
            "lmbda": lmbda,
            "beta_RL_i": beta_RL_i,
            "beta_RL_inf": beta_RL_inf,
            "T_expl": T_expl,
            "m_expl": m_expl,
            "N_tilings": N_tilings,
            "N_tiles": N_tiles,
            "state_i": state_i,
            "h_field": h_field,
            "dh_field": dh_field,
            "seed": seed,
        }
        dataname = save_dir + "RL_params_data" + data_params + '.pkl'
        cPickle.dump(RL_params, open(dataname, "wb"))

        phys_params = {
            "L": L,
            "max_t_steps": max_t_steps,
            "delta_time": delta_time,
            "J": J,
            "hz": hz,
            "hx_i": hx_i,
            "hx_f": hx_f,
            "psi_i": psi_i,
            "psi_f": psi_f,
        }
        dataname = save_dir + "phys_params_data" + data_params + '.pkl'
        cPickle.dump(phys_params, open(dataname, "wb"))
Beispiel #3
0
num_orbitals = 4
num_grid_points = 1000

### create the basis orbitals
orbitals = Orbitals.orbitals(num_orbitals)
orbitals.hydrogen_atom(num_grid_points)

### create overlap matrix
overlap_matrix = Overlap.matrix(orbitals)
overlap_matrix.hydrogen_atom(orbitals)

### find transformation matrix to convert to simple eigenvalue problem
overlap_matrix.transform()

### create hamiltonian matrix
hamiltonian = Hamiltonian.hamiltonian(orbitals)
hamiltonian.hydrogen_atom(orbitals)

### see the docstring for hamiltonian.canned_method before using!
#hamiltonian.canned_method(overlap_matrix.matrix)
#np.savetxt('eigvals1.csv',hamiltonian.eigen_vals,fmt='%.6f')
#np.savetxt('eigvecs1.csv',hamiltonian.eigen_vecs,fmt='%.6f')

### change basis of hamiltonian to convert to simple eigenvalue problem
hamiltonian.change_basis(overlap_matrix.transformation_matrix)

### solve
hamiltonian.diagonalize()
hamiltonian.transform_vectors(overlap_matrix.transformation_matrix)

np.savetxt('eigenvalues.csv', hamiltonian.eigen_vals, fmt='%.6f')
Beispiel #4
0
def Q_learning(RL_params,
               physics_params,
               theta=None,
               tilings=None,
               greedy=False):

    ####################################################################
    start_time = time.time()
    ####################################################################
    # display full strings
    np.set_printoptions(threshold='nan')
    ######################################################################
    #######################   read off params 	 #########################
    ######################################################################

    # read off RL_params
    RL_keys = [
        'N_episodes', 'alpha_0', 'eta', 'lmbda', 'beta_RL', 'traces', 'dims',
        'N_tiles', 'state_i', 'h_field', 'dh_field'
    ]
    from numpy import array
    for key, value in RL_params.iteritems():
        #print key, repr(value)
        if key not in RL_keys:
            raise TypeError(
                "Key '{}' not allowed for use in dictionary!".format(key))
        # turn key to variable and assign its value
        exec("{} = {}".format(key, repr(value))) in locals()

    # read off physics params
    physics_keys = [
        'L', 'max_t_steps', 'delta_t', 'J', 'hz', 'hx_i', 'hx_f', 'psi_i',
        'psi_f', 'E_i', 'E_f'
    ]
    for key, value in physics_params.iteritems():
        #print key, repr(value)
        if key not in physics_keys:
            raise TypeError(
                "Key '{}' not allowed for use in dictionary!".format(key))
        # turn key to variable and assign its value
        exec("{} = {}".format(key, repr(value))) in locals()

    ######################################################################

    # define all actions
    actions = RL.all_actions()

    # eta limits # max and min field
    hx_limits = [h_field[0], h_field[-1]]

    # get dimensions
    N_tilings, N_lintiles, N_vars = dims
    N_tiles = N_lintiles**N_vars
    N_actions = len(actions)
    shift_tile_inds = [j * N_tiles for j in xrange(N_tilings)]

    if theta is None:
        theta = np.zeros((N_tiles * N_tilings, max_t_steps, N_actions),
                         dtype=np.float64)

    if tilings is None:
        tilings = RL.gen_tilings(h_field, dh_field, N_tilings)

    # pre-allocate traces variable
    e = np.zeros_like(theta)
    fire_trace = np.ones(N_tilings)

    if not greedy:
        # pre-allocate usage vector: inverse gradient descent learning rate
        u0 = 1.0 / alpha_0 * np.ones((N_tiles * N_tilings, ), dtype=np.float64)
    else:
        u0 = np.inf * np.ones((N_tiles * N_tilings, ), dtype=np.float64)
    u = np.zeros_like(u0)

    #### physical quantities

    # define ED Hamiltonian H(t)
    b = hx_i
    lin_fun = lambda t: b  #+ m*t
    # define Hamiltonian
    H = Hamiltonian.Hamiltonian(L, fun=lin_fun, **{'J': J, 'hz': hz})
    # define matrix exponential
    exp_H = exp_op(H, a=-1j * delta_t)
    #"""
    ''' will not need onless we plot '''
    # defien Hamiltonian for any step-lie protocol p_vals at times t_vals
    t_vals, p_vals = [0.0, 0.0], [0.0, 0.0]

    def step_protocol(t):
        return p_vals[np.argmin(abs(np.asarray(t_vals) - t))]

    H_fid = Hamiltonian.Hamiltonian(L, fun=step_protocol, **{'J': J, 'hz': hz})
    # calculate final basis
    b = hx_f
    _, Vf = H.eigh(time=0.0)
    b = hx_i
    ''' will not need '''
    #"""

    # average reward
    Return_ave = np.zeros((N_episodes, 1), dtype=np.float64)
    Return = Return_ave.copy()
    Fidelity_ep = Return_ave.copy()

    # initialise best fidelity
    best_fidelity = 0.0  # best encountered fidelity
    # set of actions for best encountered protocol
    best_actions = [random.choice(actions) for j in range(max_t_steps)]

    # calculate importance sampling ratio
    R = 0.0  # instantaneous fidelity

    psi = np.zeros_like(psi_i)

    # loop over episodes
    for ep in xrange(N_episodes):
        # set traces to zero
        e *= 0.0
        # set initial usage vector
        u[:] = u0[:]

        # set initial state of episode
        S = state_i.copy()

        # get set of features present in S
        theta_inds = RL.find_feature_inds(S, tilings, shift_tile_inds)
        Q = np.sum(theta[theta_inds, 0, :],
                   axis=0)  # for each action at time t_step=0

        # preallocate physical quantties
        psi[:] = psi_i[:]  # quantum state at time

        protocol_inst = []
        t_inst = []

        # calculate fidelity for each fixed episode
        Return_ep = 0.0

        # taken encountered and taken
        actions_taken = []

        # generate episode
        for t_step in xrange(max_t_steps):  #

            # calculate available actions from state S
            avail_inds = np.argwhere(
                (S[0] + np.array(actions) <= hx_limits[1]) *
                (S[0] + np.array(actions) >= hx_limits[0])).squeeze()
            avail_actions = [actions[_j] for _j in avail_inds]

            # calculate greedy action(s) wrt Q policy
            A_greedy = avail_actions[random.choice(
                np.argwhere(Q[avail_inds] == np.amax(Q[avail_inds])).ravel())]

            # choose a random action
            P = np.exp(beta_RL * Q[avail_inds])
            p = np.cumsum(P / sum(P))
            if greedy or beta_RL > 1E12:
                A = A_greedy
            else:
                A = avail_actions[np.searchsorted(p, random.uniform(0.0, 1.0))]

            # find the index of A
            indA = actions.index(A)

            # reset traces if A is exploratory
            if abs(A - A_greedy) > np.finfo(A).eps:
                e *= 0.0

            # take action A, return state S_prime and actual reward R
            ################################################################################
            ######################    INTERACT WITH ENVIRONMENT    #########################
            ################################################################################

            # define new state
            S_prime = S.copy()
            # calculate new field value
            S_prime[0] += A

            ### assign reward
            R *= 0.0

            # all physics happens here
            # update dynamic arguments in place: ramp = m*t+b

            b = S_prime[0]
            psi = exp_H.dot(psi)

            # assign reward
            if t_step == max_t_steps - 1:
                # calculate final fidelity
                fidelity = abs(psi.conj().dot(psi_f))**2
                # reward
                R += fidelity

            ################################################################################
            ################################################################################
            ################################################################################

            # update episodic return
            Return_ep += R

            # update protocol and time
            protocol_inst.append(S_prime[0])
            t_inst.append(t_step * delta_t)

            # record action taken
            actions_taken.append(A)

            ############################

            # calculate usage and alpha vectors: alpha_inf = eta
            u[theta_inds] *= (1.0 - eta)
            u[theta_inds] += 1.0
            alpha = 1.0 / (N_tilings * u[theta_inds])

            # Q learning update rule; GD error in time t
            delta = R - Q[indA]  # error in gradient descent
            # TO
            Q_old = theta[theta_inds, t_step, indA].sum()

            # update traces
            e[theta_inds, t_step, indA] = alpha * fire_trace

            # check if S_prime is terminal or went out of grid
            if t_step == max_t_steps - 1:
                # update theta
                theta += delta * e
                # GD error in field h
                delta_TO = Q_old - theta[theta_inds, t_step, indA].sum()
                theta[theta_inds, t_step, indA] += alpha * delta_TO
                # go to next episode
                break

            # get set of features present in S_prime
            theta_inds_prime = RL.find_feature_inds(S_prime, tilings,
                                                    shift_tile_inds)

            # t-dependent Watkin's Q learning
            Q = np.sum(theta[theta_inds_prime, t_step + 1, :], axis=0)

            # update theta
            delta += max(Q)
            theta += delta * e

            # GD error in field h
            delta_TO = Q_old - theta[theta_inds, t_step, indA].sum()
            theta[theta_inds, t_step, indA] += alpha * delta_TO

            # update traces
            e[theta_inds, t_step,
              indA] -= alpha * e[theta_inds, t_step, indA].sum()
            e *= lmbda

            ################################
            # S <- S_prime
            S[:] = S_prime[:]
            theta_inds[:] = theta_inds_prime[:]

        if greedy:
            return protocol_inst, t_inst

        # save average return
        Return_ave[ep] = 1.0 / (ep + 1) * (Return_ep + ep * Return_ave[ep - 1])
        Return[ep] = Return_ep
        Fidelity_ep[ep] = fidelity

        # if greedy policy completes a full episode and if greedy fidelity is worse than inst one
        if fidelity - best_fidelity > 1E-12:
            # update list of best actions
            best_actions[:] = actions_taken[:]
            # calculate best protocol and fidelity
            protocol_best, t_best = best_protocol(best_actions, hx_i, delta_t)

            R_best = R
            best_fidelity = fidelity

            theta = Learn_Policy(state_i, theta, tilings, dims, best_actions,
                                 R_best)
            #theta = Replay(50,RL_params,physics_params,theta,tilings,best_actions,R_best)

        # force-learn best encountered every 100 episodes
        if (ep % 40 == 0 and ep != 0) and (R_best
                                           is not None) and beta_RL < 1E12:
            print 'learned best encountered'
            theta = Learn_Policy(state_i, theta, tilings, dims, best_actions,
                                 R_best)
            #theta = Replay(50,RL_params,physics_params,theta,tilings,best_actions,R_best)

        #'''
        if ep % 20 == 0:
            print "finished simulating episode {} with fidelity {} at hx_f = {}.".format(
                ep + 1, np.round(fidelity, 3), S_prime[0])
            print 'best encountered fidelity is {}.'.format(
                np.round(best_fidelity, 3))
        #'''

        #'''
        # plot protocols and learning rate
        if (ep % 500 == 0 and ep != 0) or (np.round(fidelity, 3) == 1.0):

            RL_params['beta_RL'] = 1E12
            RL_params['lmbda'] = 0.0
            RL_params['alpha_0'] = 0.0

            # fig file name params
            save = False  #True
            save_vars = ['J', 'hz', 'hxi', 'hxf', 'Ei', 'Ef', 'Neps']
            save_vals = truncate([J, hz, hx_i, hx_f, E_i / L, E_f / L, j], 2)
            save_params = "_L={}".format(L) + "".join(
                ['_' + i + '=' + k for i, k in zip(save_vars, save_vals)])

            # calculate greedy fidelity
            Q_args = (RL_params, physics_params)
            Q_kwargs = {'theta': theta, 'tilings': tilings}
            protocol_greedy, t_greedy = Q_learning(*Q_args,
                                                   greedy=True,
                                                   **Q_kwargs)

            # calculate inst fidelities of interpolated protocols
            t_vals, p_vals = t_inst, protocol_inst
            F_inst, E_inst, dE_inst, Sent_inst, Sd_inst = Fidelity(
                psi_i,
                H_fid,
                t_vals,
                delta_t,
                psi_f=psi_f,
                all_obs=True,
                Vf=Vf)

            t_vals, p_vals = t_greedy, protocol_greedy
            F_greedy, E_greedy, dE_greedy, Sent_greedy, Sd_greedy = Fidelity(
                psi_i,
                H_fid,
                t_vals,
                delta_t,
                psi_f=psi_f,
                all_obs=True,
                Vf=Vf)

            t_vals, p_vals = t_best, protocol_best
            F_best, E_best, dE_best, Sent_best, Sd_best = Fidelity(
                psi_i,
                H_fid,
                t_vals,
                delta_t,
                psi_f=psi_f,
                all_obs=True,
                Vf=Vf)

            # prepare plot data
            times = [t_inst, t_greedy, t_best]
            protocols = [protocol_inst, protocol_greedy, protocol_best]
            fidelities = [F_inst, F_greedy, F_best]
            energies = [E_inst, E_greedy, E_best]
            d_energies = [dE_inst, dE_greedy, dE_best]
            s_ents = [Sent_inst, Sent_greedy, Sent_best]
            s_ds = [Sd_inst, Sd_greedy, Sd_best]

            Data = np.zeros((7, max_t_steps))
            Data[0, :] = t_best
            Data[1, :] = protocol_best
            Data[2, :] = F_best
            Data[3, :] = E_best
            Data[4, :] = dE_best
            Data[5, :] = Sent_best
            Data[6, :] = Sd_best

            # plot data
            user_input = raw_input("continue? (y or n) ")
            if user_input == 'y':
                # plot rewards
                #plot_rewards(N_episodes,Return_ave,Return,Fidelity_ep,'rewards',save_params,save)
                # plot protocols
                plot_protocols(times, protocols, fidelities, 'fidelity',
                               save_params, save)
                #plot_protocols(times,protocols,energies,'energy',save_params,save)
                #plot_protocols(times,protocols,d_energies,'energy fluct.',save_params,save)
                #plot_protocols(times,protocols,s_ents,'ent. entropy',save_params,save)
                #plot_protocols(times,protocols,s_ds,'diag. entropy',save_params,save)
                """		
				# calculate approximate Q function
				etas = np.linspace(hx_limits[0],hx_limits[1],101)
				#etas = np.linspace(-1.0,1.0,101)
				Q_plot = RL.Q_greedy(etas,theta,tilings,shift_tile_inds,max_t_steps).T
				
				plot_Q(etas,t_best,-Q_plot,'Q_fn',save_params,save)
				"""

                if save:
                    user_input = raw_input("save data? (y or n) ")
                    if user_input == 'y':
                        args = (L, ) + tuple(np.around([J, hz, hx_i, hx_f], 2))
                        dataname = "best_L=%s_J=%s_hz=%s_hxi=%s_hxf=%s.txt" % args
                        np.savetxt(dataname, Data.T)

            RL_params['beta_RL'] = beta_RL
            RL_params['lmbda'] = lmbda
            RL_params['alpha_0'] = alpha_0
        #'''

    print "Calculating the Q function loop using Q-Learning took", (
        "--- %s seconds ---" % (time.time() - start_time))
def main():
    parser = argparse.ArgumentParser(description='Hamiltonian Descent Methods')
    parser.add_argument('--batchsize', '-b', type=int, default=100)
    parser.add_argument('--epoch', '-e', type=int, default=200)
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        choices=[-1, 0, 1, 2, 3])
    parser.add_argument('--out', '-o', type=str, default='verification/')
    parser.add_argument('--data',
                        '-d',
                        type=str,
                        default='mnist',
                        choices=['mnist', 'cifar10'])
    parser.add_argument('--method',
                        '-m',
                        type=str,
                        default='sem',
                        choices=['adam', 'sgd', 'fem', 'sem'])
    args = parser.parse_args()

    # Experiment setup
    if args.data == 'mnist':
        model = MLP(n_units=500, n_out=10)
        train, test = chainer.datasets.get_mnist()
    elif args.data == 'cifar10':
        model = NNet(n_out=10)
        train, test = chainer.datasets.get_cifar10()

    model = L.Classifier(model)
    chainer.cuda.get_device_from_id(args.gpu).use()
    model.to_gpu()

    # Optimizer
    if args.method == 'adam':
        optimizer = chainer.optimizers.Adam()
    elif args.method == 'sgd':
        optimizer = chainer.optimizers.MomentumSGD(lr=0.01)
    elif args.method == 'fem':
        optimizer = Hamiltonian.Hamiltonian(approx='first')
    elif args.method == 'sem':
        optimizer = Hamiltonian.Hamiltonian(approx='second')

    optimier.setup(model)

    # iterator
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Setup a trainer
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    if args.method == 'sgd':
        trainer.extend(extensions.ExponentialShift('lr', 0.1),
                       trigger=(50, 'epoch'))

    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'elapsed_time'
        ]))

    trainer.extend(extensions.ProgressBar())
    trainer.run()
Beispiel #6
0
        [parameters.kount, parameters.kount],
        dtype=complex)  # !the Hamiltonian!!!!!!!!!!!!!!!!!!!!!!!!!!
    parameters.eval = np.zeros([parameters.kount])  # !eigenvalues

    print(' Will now build the Hamiltonian, the diminsion of each k-block is:',
          parameters.kount)
    print(
        '********************************************************************')

    #build each k-block of the Hamiltonian diagonalize, and calculate the observables
    for k in range(parameters.nlbnd, parameters.nubnd + 1):
        #!initialize hamiltonian to zero
        parameters.h[:] = complex(0, 0)
        #build the hamiltonian
        if (parameters.one_state):
            parameters = Hamiltonian.build_h1p(k, parameters)
        if (parameters.two_state):
            parameters = Hamiltonian.build_h2p(k, parameters)
        if (parameters.one_state) and (parameters.two_state):
            parameters = Hamiltonian.build_h1p2p(k, parameters)
        if (parameters.ct_state):
            parameters = Hamiltonian.build_hct(k, parameters)
        if (parameters.one_state) and (parameters.ct_state):
            parameters = Hamiltonian.build_h1pct(k, parameters)
        if (parameters.two_state) and (parameters.ct_state):
            parameters = Hamiltonian.build_h2pct(k, parameters)
        #diagonalize the hamiltonian
        if (k == 0) or (parameters.esnum == parameters.kount):
            parameters.h, parameters.eval = Dia.diagonalize(
                parameters.h, parameters.kount, parameters.eval, 'A',
                parameters.kount)
Beispiel #7
0
    N = sqrt_N**2
    print('N =', N)
    X = 2 * (rand(N) > .5) - 1

    imshow(X.reshape([sqrt_N] * 2))

    X_c = int_to_state_vector(int(str(state_vector_to_int(X))), N)
    print(sum(X - X_c) == 0)

    print('------------------------')

    from Hamiltonian import *

    C = (rand(*[5] * 2) > .5) * 1

    H = Hamiltonian(terms=[neighbors_influence, polls_influence],
                    coeffs=[1, 1])

    pop = population(connectivity=C, H=H, beta=1, state=None)

    X = pop.state
    print(X)
    print(pop.N)

    for term in [neighbors_influence, polls_influence]:
        print(term.get_contribution(X, 1, connectivity=C))

    print(pop.connectivity.toarray())
    print(pop.N)

    flip_i = 2
    print(pop.state, pop.get_E())
Beispiel #8
0
import Evolution
import Hamiltonian
import sys
import matplotlib.pyplot as plt

N = 5
wa = 0.0006
Emin = 0
Emax = 5
dt = 0.0001
HPLANKS = 1
CNTSteps = 10000

Hforsize = Hamiltonian.makeHamiltonian(False, N, 0, 0, 0, Emin, Emax, 1)
R = Evolution.generateDensityMatrix(Hforsize.shape[0])

tests = [(0.0001, 1000), (0.0001, 100), (0.0001, 10), (0.0001, 1), (0.0001, 0.1)]
# tests = [(1, 10000000), (0.0001, 0.1)]

xAxis = [k for k in range(CNTSteps)]
for i in range(len(tests)):
    g = Evolution.cmpGen(R, dt, HPLANKS, N, tests[i][0], wa, tests[i][1], Emin, Emax)

    results = []
    for j in range(CNTSteps):
        results.append(next(g))

    plt.plot(xAxis, results, label='b/(h*wc) =' + str(tests[i][0]/(tests[i][1]*HPLANKS)))

plt.xlabel('steps')
plt.ylabel('mse')
Beispiel #9
0
# define model params
L = int(sys.argv[4])  # system size
if L == 1:
    J = 0
else:
    J = 1.0  # zz interaction
hz = 1.0  # hz field
hx_i = -2.0  # initial hx coupling
hx_f = +2.0  # final hx coupling

# define dynamic params of H(t)
b = hx_i
lin_fun = lambda t: b
# define Hamiltonian
H_params = {'J': J, 'hz': hz}
H = Hamiltonian.Hamiltonian(L, fun=lin_fun, **H_params)

# calculate initial state
if L == 1:
    E_i, psi_i = H.eigh()
else:
    E_i, psi_i = H.eigsh(time=0,
                         k=2,
                         which='BE',
                         maxiter=1E10,
                         return_eigenvectors=True)
    #E_i, psi_i = H.eigsh(time=0,k=1,sigma=-0.1,maxiter=1E10,return_eigenvectors=True)
E_i = E_i[0]
psi_i = psi_i[:, 0]
# calculate final state
b = hx_f
Beispiel #10
0
import Basis, Overlap, Hamiltonian, Diag

basis = Basis.gaussian(alpha=[13.00773, 1.962079, 0.444529, 0.1219492])
overlap = Overlap.gaussian(basis)
hamiltonian = Hamiltonian.gaussian(basis, overlap)
energy, eigenvec = Diag.generalized_eigenval(overlap, hamiltonian)

print('\t{:.8f}\tRy'.format(energy[0]))
Beispiel #11
0
def test_altered_delay_pert(plot=False, eps=1e-5):
    r'''
    We will have a method to shift the delays in the network before the
    commensurate root analysis, which will be based on taking the average
    Delta_delays that result from the nonlinearities over the different
    frequencies. We test this here.

    It also tests the corresponding perturbation in the frequencies.

    We assume that the refraction_index_func and the input delays into
    the Time_Delay_Network have been adjusted so that refraction_index_func
    is close to zero in the desired frequency range.

    There are several effects of the delays being different for different
    modes. The most important one is an effective detuning for different
    modes (as well as decay). There are other effects as well. The effective
    mode volume will also change (this is taken into account in the
    Hamitonian class). However, this is not taken into account in the Potapov
    expansion because it becomes computationally difficult and the effect
    will be small. This could be done in principle. The time delays in the
    transfer function could be written as a function of frequency,
    :math:`T = T(\omega)`.
    The above function can be analytically continued to the complex plane.
    Then the transfer function would be expressed
    in terms of :math:`exp(-z T) = exp ( -z T (z))`.
    Once this is done, the complex root-finding procedure can be applied.
    The difficulty in using this approach is that the resulting functions no
    longer have a periodic structure that we could identify when the delays
    were commensurate.
    '''

    Ex = Time_Delay_Network.Example3(max_linewidth=15., max_freq=500.)
    Ex.run_Potapov(commensurate_roots=True)
    modes = Ex.spatial_modes
    A, B, C, D = Ex.get_Potapov_ABCD(doubled=False)
    ham = Hamiltonian.Hamiltonian(Ex.roots,
                                  modes,
                                  Ex.delays,
                                  Omega=-1j * A,
                                  nonlin_coeff=1.)

    ## This nonlinearity will depend on the frequency.
    chi_nonlin_test = Hamiltonian.Chi_nonlin(delay_indices=[0],
                                             start_nonlin=0,
                                             length_nonlin=0.1 * consts.c)
    chi_nonlin_test.refraction_index_func = lambda freq, pol: 1. + abs(freq / (
        5000 * np.pi))
    ham.chi_nonlinearities.append(chi_nonlin_test)

    ## update delays, which are different becuase of the nonlinearity.
    ham.make_Delta_delays()
    #print ham.Delta_delays

    ## Perturb the roots to account for deviations in the index of refraction
    ## as a function of frequency.

    # print ham.roots
    perturb_func = Ex.get_frequency_pertub_func_z(use_ufuncify=True)
    ham.perturb_roots_z(perturb_func)
    # print ham.roots
    print len(ham.roots)
    # plt.plot(ham.omegas)
    if plot:
        plt.scatter(np.asarray(ham.roots).real, np.asarray(ham.roots).imag)
        plt.show()
Beispiel #12
0
import matplotlib.pyplot as plt

if len(sys.argv) != 10:
    print("Неверное кол-во параметров")

N = int(sys.argv[1])
b = float(sys.argv[2])
wa = float(sys.argv[3])
wc = float(sys.argv[4])
Emin = int(sys.argv[5])
Emax = int(sys.argv[6])
dt = float(sys.argv[7])
HPLANKS = float(sys.argv[8])
CNTSteps = int(sys.argv[9])

# 6.62606957e-27

Hforsize = Hamiltonian.makeHamiltonian(False, N, b, wa, wc, Emin, Emax, 1)
R = Evolution.generateDensityMatrix(Hforsize.shape[0])
g = Evolution.cmpGen(R, dt, HPLANKS, N, b, wa, wc, Emin, Emax)

results = []
for i in range(CNTSteps):
    results.append(next(g))

xAxis = [i for i in range(CNTSteps)]
plt.plot(xAxis, results, '-b')
plt.show()

# print(results)
# print(xAxis)