Ejemplo n.º 1
0
def sample_expert_action(concept_tree, knowledge):
    '''
    Samples an optimal action given the current knowledge and the concept tree.
    Samples uniformly from all optimal actions.
    Returns a StudentAction
    '''
    next_concepts = []

    # find all possible concepts that have not been learned yet but whose prereq are fulfilled
    for i in six.moves.range(concept_tree.n):
        if not knowledge[i]:
            cur_concept = np.zeros((concept_tree.n, ), dtype=np.int)
            cur_concept[i] = 1
            if fulfilled_prereqs(concept_tree, knowledge, cur_concept):
                next_concepts.append(i)

    if not next_concepts:
        # nothing new can be learned, then just be random
        next_action = np.random.randint(0, concept_tree.n)
    else:
        # uniformly pick an optimal action
        next_action = np.random.choice(next_concepts)
    next_c = np.zeros((concept_tree.n, ), dtype=np.int)
    next_c[next_action] = 1
    return st.StudentAction(next_action, next_c)
Ejemplo n.º 2
0
    def best_greedy_action(self):
        '''
        For each action, does a 1-step lookahead to determine best action.
        '''
        next_rewards = []

        # probability of observations
        probs = self.model.sample_observations()
        if probs is None:
            # assume [1 0 0 0 0 ...]
            probs = [0] * self.sim.dgraph.n
            probs[0] = 1

        for a in xrange(self.n_concepts):
            avg_reward = 0.0
            # action
            conceptvec = np.zeros((self.n_concepts, ))
            conceptvec[a] = 1.0
            action = st.StudentAction(a, conceptvec)
            # for each observation, weight reward with probability of seeing observation
            new_model = self.model.copy()
            new_model.advance_simulator(action, 1)
            avg_reward += probs[a] * np.sum(new_model.sample_observations())
            new_model = self.model.copy()
            new_model.advance_simulator(action, 0)
            avg_reward += (1.0 - probs[a]) * np.sum(
                new_model.sample_observations())
            # append next reward
            next_rewards.append(avg_reward)
        return argmaxlist(next_rewards)[0]
Ejemplo n.º 3
0
 def advance(self, concept):
     '''
     Advances the true student simulator.
     Creates a new state where the DKT model is advanced according to the result of the true simulator.
     '''
     conceptvec = np.zeros((self.n_concepts, ))
     conceptvec[concept] = 1.0
     action = st.StudentAction(concept, conceptvec)
     # advance the true student simulator
     (ob, r) = self.sim.advance_simulator(action)
     # advance the model with the true observation
     self.model.advance_simulator(action, ob)
Ejemplo n.º 4
0
    def advance(self, concept):
        '''
        Advances both the simulator and model.
        '''
        conceptvec = np.zeros((self.n_concepts, ))
        conceptvec[concept] = 1.0
        action = st.StudentAction(concept, conceptvec)

        # first advance the real world simulator
        self.sim.advance_simulator(action)

        # make a copy of the real world simulator
        self.model = self.sim.copy()
Ejemplo n.º 5
0
def egreedy_expert(concept_tree, knowledge, epsilon):
    '''
    egreedy over the expert policy
    '''
    if np.random.random() < epsilon:
        # random action
        next_action = np.random.randint(0, concept_tree.n)
        next_c = np.zeros((concept_tree.n, ), dtype=np.int)
        next_c[next_action] = 1
        next_act = st.StudentAction(next_action, next_c)
    else:
        next_act = sample_expert_action(concept_tree, knowledge)
    return next_act
Ejemplo n.º 6
0
    def __init__(self,
                 model,
                 sim,
                 step,
                 horizon,
                 r_type,
                 dktcache,
                 use_real,
                 new_act=None,
                 new_ob=None,
                 histhash=''):
        '''
        :param model: RnnStudentSim object
        :param sim: StudentExactSim object
        :param step: int, current step
        :param horizon: int, horizon
        :param r_type: an r_type
        :param dktcache: a dictionary used for caching the Rnn predictions or None to disable it
        :param use_real: use the sim as the real world, otherwise use model
        :param new_act: immediate action that led to this state
        :param new_ob: immediate observation that led to this state
        :param histhash: str rep of the current history used for dktcache
        '''
        # the model will be passed down when doing real world perform
        self.belief = model
        self._probs = None  # caches the current prob predictions
        self.step = step
        self.horizon = horizon
        self.r_type = r_type
        self.use_real = use_real

        # keep track of history for debugging and various uses
        self.act = new_act
        self.ob = new_ob

        # this sim should be shared between all DKTStates
        # and it is advanced only when real_world_perform is called
        # so all references to it will all be advanced
        self.sim = sim
        self.n_concepts = sim.dgraph.n

        # setup caching rnn queries
        self.dktcache = dktcache
        self.histhash = histhash

        self.actions = []
        for i in range(self.n_concepts):
            concepts = np.zeros((self.n_concepts, ))
            concepts[i] = 1
            self.actions.append(st.StudentAction(i, concepts))
Ejemplo n.º 7
0
def test_drqn_single(dgraph, student, horizon, model, DEBUG=False):
    '''
    Performs a single trajectory with MCTS and returns the final true student knowledge.
    '''
    n_concepts = dgraph.n

    # create the model and simulators
    student.reset()
    student.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(student, dgraph)

    # initialize state (or alternatively choose random first action)
    act_hist = [0]
    ob_hist = [0]
    for i in range(horizon - 1):
        # print('Step {}'.format(i))
        inputs = construct_drqn_inputs(act_hist, ob_hist, n_concepts)
        best_action, _ = model.predict(inputs, last_timestep_only=True)
        best_action = best_action[0]
        concept = best_action
        conceptvec = np.zeros(n_concepts)
        conceptvec[concept] = 1
        action = st.StudentAction(concept, conceptvec)
        # print(best_action.concept)

        # debug check for whether action is optimal
        if DEBUG:
            opt_acts = compute_optimal_actions(
                sim.dgraph,
                sim.student.knowledge)  # put function code into shared file
            is_opt = action.concept in opt_acts
            if not is_opt:
                print('ERROR {} executed non-optimal action {}'.format(
                    sim.student.knowledge, action.concept))

        # act in the real environment
        (ob, reward) = sim.advance_simulator(action)
        act_hist.append(action.concept)
        ob_hist.append(ob)

        # print('Next state: {}'.format(str(new_root.state)))
    return sim.student.knowledge
Ejemplo n.º 8
0
 def best_greedy_action(self, n_rollouts):
     '''
     For each action, samples n_rollouts number of next states and averages the immediate reward.
     Returns the action with the largest next average immediate reward.
     '''
     next_rewards = []
     for a in xrange(self.n_concepts):
         avg_reward = 0.0
         conceptvec = np.zeros((self.n_concepts, ))
         conceptvec[a] = 1.0
         action = st.StudentAction(a, conceptvec)
         # sample next state and reward
         for i in xrange(n_rollouts):
             new_model = self.model.copy()
             new_model.advance_simulator(action)
             avg_reward += np.sum(new_model.student.knowledge)
         avg_reward /= 1.0 * n_rollouts
         next_rewards.append(avg_reward)
     #print('{} next {}'.format(self, next_rewards))
     return argmaxlist(next_rewards)[0]
Ejemplo n.º 9
0
    def __init__(self, model, sim, step, horizon, r_type):
        '''
        :param model: StudentExactSim for the model
        :param sim: StudentExactSim for the real world
        :param step: the current timestep (starts from 1)
        :param horizon: the horizon length
        :param r_type: reward type
        '''
        self.belief = None  # not going to use belief at all because we know the exact state
        self.model = model
        self.sim = sim
        self.step = step
        self.horizon = horizon
        self.n_concepts = model.student.knowledge.shape[0]
        self.r_type = r_type

        self.actions = []
        for i in range(self.n_concepts):
            concepts = np.zeros((self.n_concepts, ))
            concepts[i] = 1
            self.actions.append(st.StudentAction(i, concepts))
Ejemplo n.º 10
0
def test_dkt_multistep(model_id, dataset, chkpt=None):
    '''
    Test DKT multistep error on dataset. Dataset is output from generate_data.
    '''
    import concept_dependency_graph as cdg

    n_concepts = dataset[0][0][0].shape[0]
    horizon = len(dataset[0])

    # debug
    #six.print_('n concepts {} horizon {} trajectory {}'.format(n_concepts, horizon, dataset[0]))

    dgraph = cdg.ConceptDependencyGraph()
    dgraph.init_default_tree(n_concepts)

    # create the model and simulators
    student2 = st.Student2(n_concepts, True)
    test_student = student2
    stu = test_student.copy()
    stu.reset()
    stu.knowledge[0] = 1  # initialize the first concept to be known
    sim = st.StudentExactSim(stu, dgraph)

    # load the model
    if chkpt is not None:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=False)
        model.load(chkpt)
    else:
        model = dmc.DynamicsModel(model_id=model_id,
                                  timesteps=horizon,
                                  load_checkpoint=True)
    # initialize the dktcache to speed up DKT queries
    dktcache = dict()

    print('Testing model multstep: {}'.format(model_id))

    # make the model
    dktmodel = dmc.RnnStudentSim(model)

    # accumulate error
    mse_acc = 0.0
    for i in six.moves.range(len(dataset)):
        curr_mse = 0.0
        curr_traj = dataset[i]
        curr_state = DKTState(dktmodel, sim, 1, horizon, SPARSE, dktcache,
                              False)
        for t in six.moves.range(horizon - 1):
            # advance the DKT, then compare prediction with the data, up to the last prediction
            curr_conceptvec = curr_traj[t][0]
            curr_concept = np.nonzero(curr_conceptvec)[0]
            curr_ob = int(curr_traj[t][1])

            next_conceptvec = curr_traj[t + 1][0]
            next_concept = np.nonzero(next_conceptvec)[0]
            next_ob = int(curr_traj[t + 1][1])

            # advance the DKT
            curr_state = curr_state.perform(
                st.StudentAction(curr_concept, curr_conceptvec))
            next_probs = curr_state.get_probs()

            # compute and accumulate the mse
            diff = next_probs[next_concept] - next_ob
            curr_mse += diff * diff

            #debugging
            #six.print_('traj {} step {} actvec {} act {} ob {} next probs {} diff {}'.format(i,t,curr_conceptvec,curr_concept,curr_ob,next_probs,diff))
        # average mse per step
        mse_acc += curr_mse / (horizon - 1)

        #six.print_('mse per step acc {}'.format(mse_acc))
    # return the average MSE per step in a trajectory
    return mse_acc / len(dataset)
Ejemplo n.º 11
0
def generate_student_sample(concept_tree,
                            seqlen=100,
                            student=None,
                            initial_knowledge=None,
                            policy=None,
                            epsilon=None,
                            verbose=False):
    '''
    :param n: number of concepts; if None use N_CONCEPTS
    :param concept_tree: Concept dependency graph
    :param seqlen: number of exercises the student will do.
    :param initial_knowledge: initial knowledge of student. If None, will be set to 0 for all concepts.
    :param policy: if no exercise_seq provided, use the specified policy to generate exercise sequence.
    :param epsilon: epsilon for egreedy policy
    :param verbose: if True, print out debugging / progress statements
    :return: array of tuples, where each tuple consists of
    (exercise, 0 or 1 indicating success of student on that exercise, knowledge of student after doing exercise)
    Note that this array will have length seqlen, inclusive
    '''
    n_concepts = concept_tree.n
    if initial_knowledge is None:
        initial_knowledge = np.zeros((n_concepts, ))
        initial_knowledge[0] = 1
    if student is None:
        s = st.Student()
    else:
        s = student
        s.reset()  # make sure to reset to intial conditions for this sample
    s.knowledge = initial_knowledge

    # if not exercise_seq and policy == 'expert':
    #     return _generate_student_sample_with_expert_policy(student=s, seqlen=seqlen, verbose=verbose)

    if (policy == 'modulo' or policy == 'random'):
        # for expert policy, we have to choose the next exercise online.
        exercise_seq = []
        for i in six.moves.range(seqlen):
            concepts = np.zeros((n_concepts, ), dtype=np.int)
            if policy == 'modulo':
                # choose exercise with modulo op. This imposes an ordering on exercises.
                conceptix = i % n_concepts
                concepts[conceptix] = 1
            elif policy == 'random':
                # choose one random concept for this exercise
                conceptix = np.random.randint(n_concepts)
                concepts[conceptix] = 1
            ex = st.StudentAction(conceptix, concepts)
            exercise_seq.append(ex)

    # Go through sequence of exercises and record whether student solved each or not
    student_performance = []
    student_knowledge = []
    student_state = []
    n_exercises_to_mastery = -1
    exercises = [
    ]  # so we can store sequence of exercises as numpy arrays (instead of arrays of exercise objects)
    for i in six.moves.range(seqlen):
        # print (s.knowledge)
        # store current states
        student_state.append(s.get_state())
        if policy == 'expert':
            ex = sample_expert_action(concept_tree, s.knowledge)
        elif policy == 'egreedy':
            ex = egreedy_expert(concept_tree, s.knowledge, epsilon)
        else:
            ex = exercise_seq[i]
        result = s.do_exercise(concept_tree, ex)
        exercises.append(
            ex.conceptvec
        )  # makes the assumption that an exercise is equivalent to the concepts it practices)
        student_performance.append(result)
        student_knowledge.append(copy.deepcopy(s.knowledge))
        if np.sum(s.knowledge) == n_concepts and n_exercises_to_mastery == -1:
            # if verbose and n_exercises_to_mastery == -1:
            n_exercises_to_mastery = i + 1
    if verbose:
        if n_exercises_to_mastery != -1:
            print("learned all concepts after {} exercises.".format(
                n_exercises_to_mastery))
        else:
            print(
                "Did not learn all concepts after doing {} exercises.".format(
                    seqlen))
    #six.print_(student_performance)
    student_sample = tuple(
        six.moves.zip(exercises, student_performance, student_knowledge,
                      student_state))
    #six.print_(student_sample)
    return student_sample