Example #1
0
 def setUp(self):
     self.v1 = Variable("1")
     self.v2 = Variable("2")
     self.l1 = Literal(self.v1)
     self.l2 = Literal(self.v2)
     self.l3 = Literal(self.v1, False)
     self.a = Assignment(1, self.v2, True, Clause(self.l2))
     self.v2.assign(self.a)
Example #2
0
    def forward(self, v, kn, ko, s, hidden):
        if hidden is None:
            h = self.h_initial.view(self.num_layers, self.know_length,
                                    self.seq_hidden_size)
            attn_h = self.h_initial
            length = Variable(torch.FloatTensor([0.]))
            beta = None

        else:

            h, vs, hs = hidden

            # calculate beta weights of seqs using dot product
            beta = torch.mm(vs, v.view(-1, 1)).view(-1)
            beta, idx = beta.topk(min(len(beta), self.k), sorted=False)
            beta = nn.functional.softmax(beta.view(1, -1), dim=-1)
            length = Variable(torch.FloatTensor([beta.size()[1]]))

            hs = hs.view(-1, self.know_length * self.seq_hidden_size)
            attn_h = torch.mm(beta, torch.index_select(hs, 0, idx)).view(-1)

        # calculate alpha weights of knowledges using dot product
        alpha = torch.mm(self.knowledge_memory, kn.view(-1, 1)).view(-1)
        alpha = nn.functional.softmax(alpha.view(1, -1), dim=-1)

        hkp = torch.mm(alpha,
                       attn_h.view(self.know_length,
                                   self.seq_hidden_size)).view(-1)
        pred_v = torch.cat([v, hkp]).view(1, -1)
        predict_score = self.score_layer(pred_v)

        # seq states update
        if self.score_mode == 'concat':
            x = v
        else:
            x = torch.cat([
                v * (s >= 0.5).type_as(v).expand_as(v),
                v * (s < 0.5).type_as(v).expand_as(v)
            ])
        x = torch.cat([x, s])

        # print(x.size())
        # print(torch.ones(self.know_length,1).size())
        # print(x.view(1, -1).size())
        # print(x.type())
        # xk = torch.mm(torch.ones(self.know_length, 1), x.view(1, -1))
        xk = x.view(1, -1).expand(self.know_length, -1)
        xk = alpha.view(-1, 1) * xk
        # xk = ko.float().view(-1, 1) * xk
        # xk = torch.mm(alpha, xk).view(-1)

        _, h = self.rnn(xk.unsqueeze(0), h)
        return predict_score.view(1), h, beta
Example #3
0
    def interpret(self, lines):
        regx = re.compile(r"(-?)(\d+)")
        self.known = dict()
        clauses = []
        c = []
        m = re.match("p (?:cnf )?(.+) (.+)\s*\n", lines)
        checksum = m.groups() if m else None

        lines = re.sub("[cp].*?\n", "", lines)
        for literal in re.split(r"[\s\n]+", lines):
            m = re.match(regx, literal)
            if m:
                varname = m.group(2)
                if varname != "0":
                    var = self.known.get(varname, Variable(varname))
                    var.count += 1
                    self.known[varname] = var
                    l = Literal(var, not bool(m.group(1)))
                    c.append(l)
                else:
                    addition = Clause(*c)
                    addition.link()
                    clauses.append(addition)
                    c = []
        if checksum:
            assert (len(self.known) == int(checksum[0]))
            assert (len(clauses) == int(checksum[1]))
        return clauses
Example #4
0
 def test_assignment(self):
     v = Variable("1")
     s = AssignmentStack()
     a = Assignment(1, v, True, Clause(Literal(v)))
     s.push(a)
     p = s.pop()
     self.assertEqual(p.var, v)
Example #5
0
    def forward(self, v, s, hidden):
        if hidden is None:
            h = self.initial_h.view(self.num_layers, 1, self.seq_hidden_size)
            attn_h = self.initial_h
            length = Variable(torch.FloatTensor([0.]))
        else:
            h, vs, hs = hidden
            # print(h)
            # print('start')
            # print(vs.size())
            # print(v.size())
            # print(v.view(-1,1).size())
            # print(torch.mm(vs,v.view(-1,1)).size())

            # print(hs)

            # calculate alpha using dot product
            alpha = torch.mm(vs, v.view(-1, 1)).view(-1)
            # print(alpha.size())
            # print('end')
            # print(alpha.size())
            alpha, idx = alpha.topk(min(len(alpha), self.k), sorted=False)
            alpha = nn.functional.softmax(alpha.view(1, -1), dim=-1)

            length = Variable(torch.FloatTensor([alpha.size()[1]]))

            # flatten each h
            hs = hs.view(-1, self.num_layers * self.seq_hidden_size)
            attn_h = torch.mm(alpha, torch.index_select(hs, 0, idx)).view(-1)

        if self.with_last:
            pred_v = torch.cat([v, attn_h, h.view(-1), length]).view(1, -1)
        else:
            pred_v = torch.cat([v, attn_h]).view(1, -1)
        score = self.score(pred_v)

        if self.score_mode == 'concat':
            x = v
        else:
            x = torch.cat([
                v * (s >= 0.5).type_as(v).expand_as(v),
                v * (s < 0.5).type_as(v).expand_as(v)
            ])
        x = torch.cat([x, s])

        _, h = self.rnn(x.view(1, 1, -1), h)
        return score, h
Example #6
0
 def PickBranchingVariable(self):
     m = Variable('tmp')
     for v in self.vars:
         if not v.isAssigned() and v.count>=m.count:
             m = v
     pol = False
     logging.info("Branching dl: %s -> %s = %s",self.dl+1,v.name,pol)
     return m,pol
Example #7
0
 def __init__(self):
     self.resWins = Variable()
     self.spyWins = Variable()
     self.votesRes = Variable()
     self.votesSpy = Variable()
     self.spyVoted = Variable()
     self.spySelected = Variable()
     self.selections = Variable()
Example #8
0
    def forward(self, v, kn, ko, s, h, beta=None):
        if h is None:
            h = self.h_initial.view(self.num_layers, self.know_length,
                                    self.seq_hidden_size)
            length = Variable(torch.FloatTensor([0.]))

        # calculate alpha weights of knowledges using dot product
        # print(self.knowledge_memory.size())
        # print(kn.view(-1, 1))
        if beta is None:
            alpha = torch.mm(self.knowledge_memory, kn.view(-1, 1)).view(-1)
            beta = nn.functional.softmax(alpha.view(1, -1), dim=-1)
            # print(beta.argmax(1))

        # print(alpha.size())

        # print(h.view(self.know_length, self.seq_hidden_size).size())
        # print(h.type())
        # predict score at time t
        hkp = torch.mm(beta, h.view(self.know_length,
                                    self.seq_hidden_size)).view(-1)
        # print(hkp.size())
        pred_v = torch.cat([v, hkp]).view(1, -1)
        # print(pred_v.size())
        predict_score = self.score_layer(pred_v)

        # seq states update
        if self.score_mode == 'concat':
            x = v
        else:
            x = torch.cat([
                v * (s >= 0.5).type_as(v).expand_as(v),
                v * (s < 0.5).type_as(v).expand_as(v)
            ])
        x = torch.cat([x, s])

        # print(x.size())
        # print(torch.ones(self.know_length,1).size())
        # print(x.view(1, -1).size())
        # print(x.type())
        # xk = torch.mm(torch.ones(self.know_length, 1), x.view(1, -1))
        xk = x.view(1, -1).expand(self.know_length, -1)
        xk = beta.view(-1, 1) * xk
        # xk = ko.float().view(-1, 1) * xk
        # print(xk.size())
        # print(alpha.size())
        # xk = torch.mm(alpha, xk).view(-1)
        # thresh, idx = alpha.topk(5)
        # alpha = (alpha >= thresh[0, 4]).float()
        # xk = alpha.view(-1, 1) * xk
        # xk = Variable(torch.zeros_like(x)).expand(self.know_length, -1)

        _, h = self.rnn(xk.unsqueeze(0), h)
        return predict_score.view(1), h
Example #9
0
class TestLiteral(unittest.TestCase):
    def setUp(self):
        self.v1 = Variable("1")
        self.v2 = Variable("2")
        self.l1 = Literal(self.v1)
        self.l2 = Literal(self.v2)
        self.l3 = Literal(self.v1, False)
        self.a = Assignment(1, self.v2, True, Clause(self.l2))
        self.v2.assign(self.a)

    def test_eq(self):
        t = Literal(self.v1)
        self.assertEqual(t, self.l1)
        self.assertNotEqual(self.l1, self.l3)
        self.assertNotEqual(self.l1, self.l2)

    def test_neg(self):
        self.assertEqual(self.l3, -self.l1)

    def test_value(self):
        self.assertTrue(self.l2.value())
Example #10
0
 def _estimateProb(self, dic, fullkey):
     """ If there is not enough info on the full scenario,
     gradually loosen the constraints to get at least a good prior. """
     prior_params = {1: (50, 8), 2: (30, 4), 3: (20, 2), 4: (10, 1)}
     v = Variable()
     v.total = 0.5 + dic[fullkey].total  # uncertainty bias
     v.samples = 1 + dic[fullkey].samples
     for skipunits in range(1, len(fullkey) + 1):
         bound, weight = prior_params[skipunits]
         if v.samples > bound:
             break
         for nkey in maskSome(list(fullkey), skipunits):
             if tuple(nkey) not in dic:
                 continue
             tmp = dic[tuple(nkey)]
             if tmp.samples == 0:
                 continue
             weight = min(weight, tmp.samples / float(skipunits))
             v.samples += weight
             v.total += weight * tmp.total / float(tmp.samples)
     return v.total / float(v.samples)
Example #11
0
 def _estimateProb(self, dic, fullkey):
     """ If there is not enough info on the full scenario,
     gradually loosen the constraints to get at least a good prior. """        
     prior_params = {1: (50, 8),
                     2: (30, 4),
                     3: (20, 2),
                     4: (10, 1)}        
     v = Variable()        
     v.total = 0.5 + dic[fullkey].total   # uncertainty bias
     v.samples = 1 + dic[fullkey].samples
     for skipunits in range(1,len(fullkey)+1):
         bound, weight = prior_params[skipunits]
         if v.samples > bound:
             break
         for nkey in maskSome(list(fullkey), skipunits):
             if tuple(nkey) not in dic:
                 continue
             tmp = dic[tuple(nkey)]
             if tmp.samples == 0:
                 continue
             weight = min(weight, tmp.samples/float(skipunits))
             v.samples += weight
             v.total += weight*tmp.total/float(tmp.samples)                
     return v.total/float(v.samples)
Example #12
0
def predict(model, args):
    try:
        torch.set_grad_enabled(False)
    except AttributeError:
        pass
    logging.info('model: %s, setup: %s' %
                 (type(model).__name__, str(model.args)))
    logging.info('loading dataset')

    if args.snapshot is None:
        epoch = load_last_snapshot(model, args.workspace)
    else:
        epoch = args.snapshot
        load_snapshot(model, args.workspace, epoch)
    logging.info('loaded model at epoch %s', str(epoch))

    to_categorical = Categorical('</s>')
    to_categorical.load_dict(model.words)
    trans = to_categorical(Words(':', null='</s>'))

    while True:
        # loop over inputs
        try:
            line = input()
        except EOFError:
            logging.info('bye')
            break

        try:
            obj = json.loads(line, encoding='utf-8')
            ref_seq = obj['ref']
            pred_seq = obj['pred']
        except (json.decoder.JSONDecodeError, KeyError):
            print('[]')
            continue

        h = None
        for i, item in enumerate(ref_seq):
            x = trans.apply(None, item['fea'])
            x = Variable(torch.LongTensor(x), volatile=True)
            score = Variable(torch.FloatTensor([item['t']]), volatile=True)
            t = Variable(torch.FloatTensor([item['s']]), volatile=True)
            _, h = model(x, score, t, h)

        pred_scores = []

        for i, item in enumerate(pred_seq):
            x = trans.apply(None, item['fea'])
            x = Variable(torch.LongTensor(x), volatile=True)
            score = Variable(torch.FloatTensor([0.]), volatile=True)
            t = Variable(torch.FloatTensor([item['t']]), volatile=True)
            s, _ = model(x, score, t, h)
            pred_scores.append(s.cpu().data[0][0])

        print(pred_scores)
Example #13
0
 def __init__(self,
              topic_size,
              seq_hidden_size,
              k,
              score_mode,
              num_layers=1):
     super(AttnSeqTimeDecayModel, self).__init__()
     self.topic_size = topic_size
     self.seq_hidden_size = seq_hidden_size
     self.num_layers = num_layers
     self.score_mode = score_mode
     if self.score_mode == 'concat':
         self.rnn = nn.GRU(topic_size + 1, seq_hidden_size, num_layers)
     else:
         self.rnn = nn.GRU(topic_size * 2 + 1, seq_hidden_size, num_layers)
     self.score = nn.Linear(topic_size + seq_hidden_size, 1)
     self.k = k
     self.initial_h = Variable(torch.zeros(self.num_layers *
                                           self.seq_hidden_size),
                               requires_grad=True)
Example #14
0
class TestVariable(unittest.TestCase):
    def setUp(self):
        self.v = Variable("1")
        self.a = Assignment(1, self.v, True, Clause(Literal(self.v)))

    def test_repr(self):
        v = Variable("somename")
        self.assertEqual(str(v), "somename")

    def test_value(self):
        #v starts unassigned
        self.assertEqual(self.v.value(), None)
        #associate v
        self.v.assign(self.a)
        #v should be assigned true
        self.assertTrue(self.v.value())
        #disassociate v
        self.v.unassign()
        #v should be unassigned
        self.assertFalse(self.v.isAssigned())
Example #15
0
 def test_repr(self):
     v = Variable("somename")
     self.assertEqual(str(v), "somename")
Example #16
0
 def setUp(self):
     self.v = Variable("1")
     self.a = Assignment(1, self.v, True, Clause(Literal(self.v)))
Example #17
0
 def __init__(self, topic_size, k):
     super(AttnModel, self).__init__()
     self.user_emb_size = topic_size
     self.k = k
     self.initial_guess = Variable(torch.zeros(1), requires_grad=True)
Example #18
0
 def total(self):
     return Variable(self.resWins.total + self.spyWins.total,
                     self.resWins.samples + self.spyWins.samples)
Example #19
0
 def default_hidden(self, batch_size):
     return Variable(torch.zeros(2, batch_size, self.emb_size)), \
         Variable(torch.zeros(self.num_layers - 1,
                              batch_size, self.emb_size)) \
         if self.num_layers > 1 else None
Example #20
0
 def default_hidden(self):
     return Variable(torch.zeros(self.num_layers, 1, self.seq_hidden_size))
Example #21
0
class TestClause(unittest.TestCase):
    def setUp(self):
        self.v1 = Variable("1")
        self.v2 = Variable("2")
        self.v3 = Variable("3")
        self.l1 = Literal(self.v1)
        self.l2 = Literal(self.v2)
        self.l3 = Literal(self.v2, False)
        self.a1 = Assignment(1, self.v2, True, Clause(self.l2))
        self.v2.assign(self.a1)

    def test_init(self):
        c = Clause()
        self.assertFalse(hasattr(c, 'revA') or hasattr(c, 'revB'))
        c = Clause(Literal(self.v1, True))
        self.assertFalse(hasattr(c, 'revA') or hasattr(c, 'revB'))

    def test_eq(self):
        c1 = Clause(self.l1)
        c2 = Clause(self.l2)
        c3 = Clause(self.l1)
        self.assertEqual(c1, c3)
        self.assertNotEqual(c1, c2)

    def test_contains(self):
        c1 = Clause(self.l1)
        c2 = Clause(self.l2)
        self.assertTrue(self.l1 in c1)
        self.assertFalse(self.l1 in c2)

    def test_status(self):
        #empty clause
        c = Clause()
        self.assertEqual(c.status(), "UNSAT")
        #single literal
        # unassigned
        c = Clause(self.l1)
        self.assertEqual(c.status(), "UNIT")
        # sat
        c = Clause(self.l2)
        self.assertEqual(c.status(), "SAT")
        # unsat
        c = Clause(self.l3)
        self.assertEqual(c.status(), "UNSAT")
        #two-literal
        # unit
        c = Clause(self.l1, self.l3)
        self.assertEqual(c.status(), "UNIT")
        # sat
        c = Clause(self.l2, self.l3)
        self.assertEqual(c.status(), "SAT")
        c = Clause(self.l1, self.l2)
        self.assertEqual(c.status(), "SAT")
        # unsat
        c = Clause(self.l3, self.l3)
        self.assertEqual(c.status(), "UNSAT")
        #multi-literal
        # c = Clause(self.l3, self.l1, self.l3)
        # self.assertEqual(c.status(),"UNIT")
        c = Clause(self.l3, self.l1, self.l1)
        self.l3.occurrence_link(c)
        self.assertEqual(c.status(), "UNRESOLVED")
        c = Clause(self.l3, self.l1, self.l2)
        c.link()
        self.assertEqual(c.status(), "SAT")

    def test_resolution(self):
        c1 = Clause(self.l1, self.l2)
        c2 = Clause(self.l1, self.l3)
        c3 = c1.resolve(c2)
        self.assertEqual(c3, Clause(self.l1))
    step_score = [[], []]

    print('Model initialized, starting training...')

    for step in range(n_steps):
        # sample from agent
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)

        # Remove duplicates, ie only consider unique seqs
        unique_idx = unique(seqs)
        seqs = seqs[unique_idx]
        agent_likelihood = agent_likelihood[unique_idx]
        entropy = entropy[unique_idx]

        # Get prior likelihood and score
        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = seq_to_smiles(seqs, voc)
        score = scoring_function(smiles)

        # Calculate augmented likelihood
        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)

        # Experience replay
        # First example
        if experience_replay and len(experience) > 4:
            exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4)
            exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long())
            exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score
            exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2)
            loss = torch.cat((loss, exp_loss), 0)
Example #23
0
def main(options=None):
    num_bars = options.num_bars
    cantus_file = options.cantus_file
    solution_file = options.solution_file
    sa_file = options.sa_file

    test_dir = options.test_dir
    testing = options.testing
    dfs_csv = options.dfs_csv
    sa_csv = options.sa_csv

    song_list = ['mary', 'ariana', 'shootingstar']

    arc_consistency = options.arc_consistency
    extra_harmonic = options.extra_harmonic

    if options.preset_song in song_list:
        print 'options.preset_song is {}'.format(options.preset_song)
    elif options.preset_song != '':
    	print 'preset song not found, proceeding with default'

    csp = Csp()
    cp = [] # list of counterpoint variables
    cf = [] # list of __ variables
    binary = [] # list of Constraint objects

    if options.preset_song == song_list[0]:
    	note_list = [64, 62, 60, 62, 64, 64, 64, 62, 62, 62, 64, 67, 67, 64, 62, \
					60, 62, 64, 64, 64, 64, 62, 62, 64, 62, 60]
    	num_bars = len(note_list)
    elif options.preset_song == song_list[1]:
    	num_bars = 8
    	note_list = [69, 67, 66, 67, 66, 64, 66, 64]
    elif options.preset_song == song_list[2]:
    	num_bars = 10
    	note_list = [71, 71, 72, 67, 64, 71, 71, 72, 67, 64]
    elif testing:
        print('Generating a cantus firmus over ' + str(num_bars) + ' bars')
        note_list = []
        for i in range(num_bars):
            note_list.append(random.choice(NOTE_RANGE))
    else:
        note_list = [57,60,59,57] # default

    for i in range(num_bars):
        cp.append(Variable('cp' + str(i)))
        csp.addToVariables(cp[i])
        binary.append(Constraint())
        binary[i].setVariable(cp[i])

        cf.append(Variable('cf' + str(i)))
        csp.addToVariables(cf[i])

    for i in range(len(note_list)):
        note = Note(note_list[i])
        cf[i].addToDomain(note)

    print('Cantus firmus sequential note pitches: ')
    print(note_list)

    for i in range(num_bars):
        cp_note_list = range(30, 100)

        if i != (num_bars - 2):
            map(lambda x: cp[i].addToDomain(Note(x)), cp_note_list)
        else:
            cp_note_list = range(60, 70)
            map(lambda x: cp[i].addToDomain(Note(x)), cp_note_list)

    # binary constraints, p. 109 of Ovans
    for i in range(1, num_bars):
        L = Link()
        L.setNode(binary[i-1])
        L.setLabel(Note.melodic)
        cp[i].addToNeighbors(L)

        L = Link()
        L.setNode(binary[i])
        L.setLabel(Note.melodic)
        cp[i-1].addToNeighbors(L)

        """# extra constraint!!!!
        if extra_harmonic and i != num_bars - 1:
		    L = Link()
		    L.setNode(binary[i+1])
		    L.setLabel(Note.harmonic)
		    cp[i].addToNeighbors(L)"""

    L = Link()
    L.setNode(binary[0])
    L.setLabel(Note.perfectCfHarmonic)
    cf[0].addToNeighbors(L)

    # harmonic constraints for each cantus firmus note
    for i in range(1, num_bars-2):
        L = Link()
        L.setNode(binary[i])
        L.setLabel(Note.harmonic)
        cf[i].addToNeighbors(L)

    # perfect harmonic constraints in last two bars
    for i in range(num_bars-2, num_bars):
        L = Link()
        L.setNode(binary[i])
        L.setLabel(Note.perfectHarmonic)
        cf[i].addToNeighbors(L)

    test_csp = copy.deepcopy(csp)

    if arc_consistency:
        arc_start = timeit.default_timer()
        if csp.AC3():
            arc_stop = timeit.default_timer()
            print 'Made initial arcs consistent in {} seconds.'.format(arc_stop - arc_start)
            print 'Arc consistent - looking for a solution with DFS'

            sol_start = timeit.default_timer()
            csp.backtracking_search()
            sol_stop = timeit.default_timer()

            print('Trying simulated annealing...')
            sim_start = timeit.default_timer()
            test_csp.simAnnealing()
            sim_stop = timeit.default_timer()
            print 'Completed simulated annealing after {} seconds.'.format(sim_stop - sim_start)
            print 'Simulated annealing returns with cost {}, after {} iterations.'.format(test_csp.getCost(test_csp.vars), test_csp.iters)
        else:
            arc_stop = timeit.default_timer()
            print 'Failed to make initial arcs consistent after {} seconds.'.format(arc_stop - arc_start)
            print('Not consistent')
            return None
    else:
        sol_start = timeit.default_timer()
        csp.backtracking_search()
        sol_stop = timeit.default_timer()
        print('Attempt to find a DFS solution finished after {} seconds.'.format(sol_stop - sol_start))

        print('Trying simulated annealing...')
        sim_start = timeit.default_timer()
        test_csp.simAnnealing()
        sim_stop = timeit.default_timer()
        print 'Completed simulated annealing after {} seconds.'.format(sim_stop - sim_start)
        print 'Simulated annealing returns with cost {}, after {} iterations.'.format(test_csp.getCost(test_csp.vars), test_csp.iters)
        print('Looking for a solution with DFS')

    if csp.one_sol is not None:
        print('Found a solution with arc consistency! Expanded {} nodes with {} backtracks'.format(csp.getNodes(), csp.getBts()))

        # Log stats in csv file for testing
        if testing:
            dfs_trial_info = '{},{},{},{}\n'.format(num_bars, csp.getNodes(), csp.getBts(), sol_stop - sol_start)
            with open(dfs_csv, 'a+') as f:
                f.write(dfs_trial_info)
            f.closed
    else:
        print('No solution found')
        return None

    write_solution(csp.one_sol, num_bars=num_bars, solution_file=test_dir + '/' + solution_file, random_length=options.random)
    if test_csp.getCost(test_csp.vars) == 0:
        write_solution(test_csp.vars, num_bars=num_bars,solution_file=test_dir + '/' + sa_file, random_length=options.random)

        # Log stats in csv file for testing
        if testing:
            sim_trial_info = '{},{},{},{}\n'.format(num_bars, test_csp.getCost(test_csp.vars), test_csp.iters, sim_stop - sim_start)
            with open(sa_csv, 'a+') as f:
                f.write(sim_trial_info)
            f.closed
    else:
        print('Simulated annealing failed, not writing to output')
    if testing:
        return csp
Example #24
0
def test(model, args):
    try:
        torch.set_grad_enabled(False)
    except AttributeError:
        pass
    logging.info('model: %s, setup: %s' %
                 (type(model).__name__, str(model.args)))
    logging.info('loading dataset')
    data = get_dataset(args.dataset)
    data.random_level = args.random_level

    if not args.dataset.endswith('test'):
        if args.split_method == 'user':
            _, data = data.split_user(args.frac)
            testsets = [('user_split', data, {})]
        elif args.split_method == 'future':
            _, data = data.split_future(args.frac)
            testsets = [('future_split', data, {})]
        elif args.split_method == 'old':
            trainset, _, _, _ = data.split()
            data = trainset.get_seq()
            train, user, exam, new = data.split()
            train = train.get_seq()
            user = user.get_seq()
            exam = exam.get_seq()
            new = new.get_seq()
            testsets = zip(['user', 'exam', 'new'], [user, exam, new],
                           [{}, train, user])
        else:
            if args.ref_set:
                ref = get_dataset(args.ref_set)
                ref.random_level = args.random_level
                testsets = [(args.dataset.split('/')[-1], data.get_seq(),
                             ref.get_seq())]
            else:
                testsets = [('student', data.get_seq(), {})]
    else:
        testsets = [('school', data.get_seq(), {})]

    if type(model).__name__.startswith('DK'):
        topic_dic = {}
        kcat = Categorical(one_hot=True)
        kcat.load_dict(open('data/know_list.txt').read().split('\n'))
        for line in open('data/id_know.txt'):
            uuid, know = line.strip().split(' ')
            know = know.split(',')
            topic_dic[uuid] = \
                torch.LongTensor(kcat.apply(None, know)) \
                .max(0)[0] \
                .type(torch.LongTensor)
        zero = [0] * len(kcat.apply(None, '<NULL>'))
    else:
        topics = get_topics(args.dataset, model.words)

    if args.snapshot is None:
        epoch = load_last_snapshot(model, args.workspace)
    else:
        epoch = args.snapshot
        load_snapshot(model, args.workspace, epoch)
    logging.info('loaded model at epoch %s', str(epoch))

    if use_cuda:
        model.cuda()

    for testset, data, ref_data in testsets:
        logging.info('testing on: %s', testset)
        f = open_result(args.workspace, testset, epoch)

        then = time.time()

        total_mse = 0
        total_mae = 0
        total_acc = 0
        total_seq_cnt = 0

        users = list(data)
        random.shuffle(users)
        seq_cnt = len(users)

        MSE = torch.nn.MSELoss()
        MAE = torch.nn.L1Loss()

        for user in users[:5000]:
            seq = data[user]
            if user in ref_data:
                ref_seq = ref_data[user]
            else:
                ref_seq = []

            seq2 = []
            seen = set()
            for item in ref_seq:
                if item.topic in seen:
                    continue
                seen.add(item.topic)
                seq2.append(item)
            ref_seq = seq2

            seq2 = []
            for item in seq:
                if item.topic in seen:
                    continue
                seen.add(item.topic)
                seq2.append(item)
            seq = seq2

            ref_len = len(ref_seq)
            seq = ref_seq + seq
            length = len(seq)

            if ref_len < args.ref_len:
                length = length + ref_len - args.ref_len
                ref_len = args.ref_len

            if length < 1:
                continue
            total_seq_cnt += 1

            mse = 0
            mae = 0
            acc = 0

            pred_scores = Variable(torch.zeros(len(seq)))

            s = None
            h = None

            for i, item in enumerate(seq):
                if args.test_on_last:
                    x = topics.get(seq[-1].topic).content
                    x = Variable(torch.LongTensor(x), volatile=True)
                    score = Variable(torch.FloatTensor([round(seq[-1].score)]),
                                     volatile=True)
                    t = Variable(torch.FloatTensor([seq[-1].time]),
                                 volatile=True)
                    s, _ = model(x, score, t, h)
                    s_last = torch.clamp(s, 0, 1)
                if type(model).__name__.startswith('DK'):
                    if item.topic in topic_dic:
                        x = topic_dic[item.topic]
                    else:
                        x = zero
                else:
                    x = topics.get(item.topic).content
                x = Variable(torch.LongTensor(x))
                score = Variable(torch.FloatTensor([round(item.score)]),
                                 volatile=True)
                t = Variable(torch.FloatTensor([item.time]), volatile=True)
                if args.test_as_seq and i > ref_len and ref_len > 0:
                    s, h = model(x, s.view(1), t, h)
                else:
                    if ref_len > 0 and i > ref_len and not args.test_on_one:
                        s, _ = model(x, score, t, h)
                    else:
                        s, h = model(x, score, t, h)
                if args.loss == 'cross_entropy':
                    s = F.sigmoid(s)
                else:
                    s = torch.clamp(s, 0, 1)
                if args.test_on_last:
                    pred_scores[i] = s_last
                else:
                    pred_scores[i] = s
                if i < ref_len:
                    continue
                mse += MSE(s, score)
                m = MAE(s, score).data[0]
                mae += m
                acc += m < 0.5

            print_seq(seq,
                      pred_scores.data.cpu().numpy(), ref_len, f,
                      args.test_on_last)

            mse /= length
            mae /= length
            acc /= length

            total_mse += mse.data[0]
            total_mae += mae
            total_acc += acc

            if total_seq_cnt % args.print_every != 0 and \
                    total_seq_cnt != seq_cnt:
                continue

            now = time.time()
            duration = (now - then) / 60

            logging.info(
                '[%d/%d] (%.2f seqs/min) '
                'rmse %.6f, mae %.6f, acc %.6f' %
                (total_seq_cnt, seq_cnt,
                 ((total_seq_cnt - 1) % args.print_every + 1) / duration,
                 math.sqrt(total_mse / total_seq_cnt),
                 total_mae / total_seq_cnt, total_acc / total_seq_cnt))
            then = now

        f.close()
Example #25
0
def testseq(model, args):
    try:
        torch.set_grad_enabled(False)
    except AttributeError:
        pass
    logging.info('model: %s, setup: %s' %
                 (type(model).__name__, str(model.args)))
    logging.info('loading dataset')

    data = get_dataset(args.dataset)
    data.random_level = args.random_level

    if not args.dataset.endswith('test'):
        if args.split_method == 'user':
            _, data = data.split_user(args.frac)
            testsets = [('user_split', data, {})]
        elif args.split_method == 'future':
            _, data = data.split_future(args.frac)
            testsets = [('future_split', data, {})]
        elif args.split_method == 'old':
            trainset, _, _, _ = data.split()
            data = trainset.get_seq()
            train, user, exam, new = data.split()
            train = train.get_seq()
            user = user.get_seq()
            exam = exam.get_seq()
            new = new.get_seq()
            testsets = zip(['user', 'exam', 'new'], [user, exam, new],
                           [{}, train, user])
        else:
            if args.ref_set:
                ref = get_dataset(args.ref_set)
                ref.random_level = args.random_level
                testsets = [(args.dataset.split('/')[-1], data.get_seq(),
                             ref.get_seq())]
            else:
                testsets = [('student', data.get_seq(), {})]
    else:
        testsets = [('school', data.get_seq(), {})]

    if args.input_knowledge:
        logging.info('loading knowledge concepts')
        topic_dic = {}
        kcat = Categorical(one_hot=True)
        kcat.load_dict(open(model.args['knows']).read().split('\n'))
        know = 'data/id_firstknow.txt' if 'first' in model.args['knows'] \
            else 'data/id_know.txt'
        for line in open(know):
            uuid, know = line.strip().split(' ')
            know = know.split(',')
            topic_dic[uuid] = torch.LongTensor(kcat.apply(None,
                                                          know)).max(0)[0]
        zero = [0] * len(kcat.apply(None, '<NULL>'))

    if args.input_text:
        logging.info('loading exercise texts')
        topics = get_topics(args.dataset, model.words)

    if args.snapshot is None:
        epoch = load_last_snapshot(model, args.workspace)
    else:
        epoch = args.snapshot
        load_snapshot(model, args.workspace, epoch)
    logging.info('loaded model at epoch %s', str(epoch))

    if use_cuda:
        model.cuda()

    for testset, data, ref_data in testsets:
        logging.info('testing on: %s', testset)
        f = open_result(args.workspace, testset, epoch)

        then = time.time()

        total_mse = 0
        total_mae = 0
        total_acc = 0
        total_seq_cnt = 0

        users = list(data)
        random.shuffle(users)
        seq_cnt = len(users)

        MSE = torch.nn.MSELoss()
        MAE = torch.nn.L1Loss()

        for user in users[:5000]:
            total_seq_cnt += 1

            seq = data[user]
            if user in ref_data:
                ref_seq = ref_data[user]
            else:
                ref_seq = []

            length = len(seq)
            ref_len = len(ref_seq)
            seq = ref_seq + seq

            if ref_len < args.ref_len:
                length = length + ref_len - args.ref_len
                ref_len = args.ref_len

            if length < 1:
                ref_len = ref_len + length - 1
                length = 1

            mse = 0
            mae = 0
            acc = 0

            # seq2 = []
            # seen = set()
            # for item in seq:
            #     if item.topic in seen:
            #         continue
            #     seen.add(item.topic)
            #     seq2.append(item)

            # seq = seq2
            # length = len(seq) - ref_len

            pred_scores = Variable(torch.zeros(len(seq)))

            s = None
            h = None

            for i, item in enumerate(seq):
                # get last record for testing and current record for updating
                if args.input_knowledge:
                    if item.topic in topic_dic:
                        knowledge = topic_dic[item.topic]
                        knowledge_last = topic_dic[seq[-1].topic]
                    else:
                        knowledge = zero
                        knowledge_last = zero
                    knowledge = Variable(torch.LongTensor(knowledge))
                    knowledge_last = Variable(torch.LongTensor(knowledge_last),
                                              volatile=True)

                if args.input_text:
                    text = topics.get(item.topic).content
                    text = Variable(torch.LongTensor(text))
                    text_last = topics.get(seq[-1].topic).content
                    text_last = Variable(torch.LongTensor(text_last),
                                         volatile=True)

                score = Variable(torch.FloatTensor([item.score]),
                                 volatile=True)
                score_last = Variable(torch.FloatTensor([round(seq[-1].score)
                                                         ]),
                                      volatile=True)
                item_time = Variable(torch.FloatTensor([item.time]),
                                     volatile=True)
                time_last = Variable(torch.FloatTensor([seq[-1].time]),
                                     volatile=True)

                # test last score of each seq for seq figure
                if type(model).__name__.startswith('DK'):
                    s, _ = model(knowledge_last, score_last, time_last, h)
                elif type(model).__name__.startswith('RA'):
                    s, _ = model(text_last, score_last, time_last, h)
                elif type(model).__name__.startswith('EK'):
                    s, _ = model(text_last, knowledge_last, score_last,
                                 time_last, h)
                s_last = torch.clamp(s, 0, 1)

                # update student state h until the fit process reaches trainset
                if ref_len > 0 and i > ref_len:
                    if type(model).__name__.startswith('DK'):
                        s, _ = model(knowledge, score, item_time, h)
                    elif type(model).__name__.startswith('RA'):
                        s, _ = model(text, score, item_time, h)
                    elif type(model).__name__.startswith('EK'):
                        s, _ = model(text, knowledge, score, item_time, h)
                else:
                    if type(model).__name__.startswith('DK'):
                        s, h = model(knowledge, score, item_time, h)
                    elif type(model).__name__.startswith('RA'):
                        s, h = model(text, score, item_time, h)
                    elif type(model).__name__.startswith('EK'):
                        s, h = model(text, knowledge, score, item_time, h)

                pred_scores[i] = s_last

                if args.loss == 'cross_entropy':
                    s = F.sigmoid(s)
                else:
                    s = torch.clamp(s, 0, 1)
                if i < ref_len:
                    continue

                mse += MSE(s, score)
                m = MAE(s, score).data[0]
                mae += m
                acc += m < 0.5

            print_seq(seq, pred_scores.data.cpu().numpy(), ref_len, f, True)

            mse /= length
            mae /= length
            acc = float(acc) / length

            total_mse += mse.data[0]
            total_mae += mae
            total_acc += acc

            if total_seq_cnt % args.print_every != 0 and total_seq_cnt != seq_cnt:
                continue

            now = time.time()
            duration = (now - then) / 60

            logging.info(
                '[%d/%d] (%.2f seqs/min) '
                'rmse %.6f, mae %.6f, acc %.6f' %
                (total_seq_cnt, seq_cnt,
                 ((total_seq_cnt - 1) % args.print_every + 1) / duration,
                 math.sqrt(total_mse / total_seq_cnt),
                 total_mae / total_seq_cnt, total_acc / total_seq_cnt))
            then = now
        f.close()
Example #26
0
def trainn(model, args):
    logging.info('model: %s, setup: %s' %
                 (type(model).__name__, str(model.args)))
    logging.info('loading dataset')
    data = get_dataset(args.dataset)
    data.random_level = args.random_level

    if args.split_method == 'user':
        data, _ = data.split_user(args.frac)
    elif args.split_method == 'future':
        data, _ = data.split_future(args.frac)
    elif args.split_method == 'old':
        data, _, _, _ = data.split()

    data = data.get_seq()

    if args.input_knowledge:
        logging.info('loading knowledge concepts')
        topic_dic = {}
        kcat = Categorical(one_hot=True)
        kcat.load_dict(open(model.args['knows']).read().split('\n'))
        know = 'data/id_firstknow.txt' if 'first' in model.args['knows'] \
            else 'data/id_know.txt'
        for line in open(know):
            uuid, know = line.strip().split(' ')
            know = know.split(',')
            topic_dic[uuid] = torch.LongTensor(kcat.apply(None,
                                                          know)).max(0)[0]
        zero = [0] * len(kcat.apply(None, '<NULL>'))

    if args.input_text:
        logging.info('loading exercise texts')
        topics = get_topics(args.dataset, model.words)

    optimizer = torch.optim.Adam(model.parameters())

    start_epoch = load_last_snapshot(model, args.workspace)
    if use_cuda:
        model.cuda()

    for epoch in range(start_epoch, args.epochs):
        logging.info('epoch {}:'.format(epoch))
        then = time.time()

        total_loss = 0
        total_mae = 0
        total_acc = 0
        total_seq_cnt = 0

        users = list(data)
        random.shuffle(users)
        seq_cnt = len(users)

        MSE = torch.nn.MSELoss()
        MAE = torch.nn.L1Loss()

        for user in users:
            total_seq_cnt += 1

            seq = data[user]
            seq_length = len(seq)

            optimizer.zero_grad()

            loss = 0
            mae = 0
            acc = 0

            h = None

            for i, item in enumerate(seq):
                # score = round(item.score)
                if args.input_knowledge:
                    if item.topic in topic_dic:
                        knowledge = topic_dic[item.topic]
                    else:
                        knowledge = zero
                    # knowledge = torch.LongTensor(knowledge).view(-1).type(torch.FloatTensor)
                    # one_index = torch.nonzero(knowledge).view(-1)
                    # expand_vec = torch.zeros(knowledge.size()).view(-1)
                    # expand_vec[one_index] = score
                    # cks = torch.cat([knowledge, expand_vec]).view(1, -1)
                    knowledge = Variable(torch.LongTensor(knowledge))
                    # cks = Variable(cks)

                if args.input_text:
                    text = topics.get(item.topic).content
                    text = Variable(torch.LongTensor(text))
                score = Variable(torch.FloatTensor([item.score]))
                item_time = Variable(torch.FloatTensor([item.time]))

                if type(model).__name__.startswith('DK'):
                    s, h = model(knowledge, score, item_time, h)
                elif type(model).__name__.startswith('RA'):
                    s, h = model(text, score, item_time, h)
                elif type(model).__name__.startswith('EK'):
                    s, h = model(text, knowledge, score, item_time, h)

                s = s[0]

                if args.loss == 'cross_entropy':
                    loss += F.binary_cross_entropy_with_logits(
                        s, score.view_as(s))
                    m = MAE(F.sigmoid(s), score).data[0]
                else:
                    loss += MSE(s, score)
                    m = MAE(s, score).data[0]
                mae += m
                acc += m < 0.5

            loss /= seq_length
            mae /= seq_length
            acc = float(acc) / seq_length

            total_loss += loss.data[0]
            total_mae += mae
            total_acc += acc

            loss.backward()
            optimizer.step()

            if total_seq_cnt % args.save_every == 0:
                save_snapshot(model, args.workspace,
                              '%d.%d' % (epoch, total_seq_cnt))

            if total_seq_cnt % args.print_every != 0 and total_seq_cnt != seq_cnt:
                continue

            now = time.time()
            duration = (now - then) / 60

            logging.info(
                '[%d:%d/%d] (%.2f seqs/min) loss %.6f, mae %.6f, acc %.6f' %
                (epoch, total_seq_cnt, seq_cnt,
                 ((total_seq_cnt - 1) % args.print_every + 1) / duration,
                 total_loss / total_seq_cnt, total_mae / total_seq_cnt,
                 total_acc / total_seq_cnt))
            then = now

        save_snapshot(model, args.workspace, epoch + 1)
Example #27
0
 def default_hidden(self):
     return Variable(torch.zeros(1, 1, self.topic_size))
Example #28
0
    def __init__(self):
        # Variables that caputres how well you lead in context of pass/fail mission
        self.leadTeam = Variable()
        self.leadSucMission = Variable()
        self.leadFailMission = Variable()

        # Variables that capture how well you voted in context of pass/fail mission
        self.voted = Variable()
        self.votedYes = Variable()
        self.votedNo = Variable()
        self.votedYesPass = Variable()
        self.votedYesFail = Variable()
        self.votedNoPass = Variable()
        self.votedNoFail = Variable()

        # Variables that capture how well you perform when selected in context of pass/ fail mission
        self.selected = Variable()
        self.selectedPass = Variable()
        self.selectedFail = Variable()

        # Variables that capture how much sabotaging/passing you were involved in
        self.sabotages = Variable()
        self.passeds = Variable()

        # Label True/False either spy or not
        self.Label = None
Example #29
0
def train(model, args):
    logging.info('args: %s' % str(args))
    logging.info('model: %s, setup: %s' %
                 (type(model).__name__, str(model.args)))
    logging.info('loading dataset')
    data = get_dataset(args.dataset)
    data.random_level = args.random_level

    if args.split_method == 'user':
        data, _ = data.split_user(args.frac)
    elif args.split_method == 'future':
        data, _ = data.split_future(args.frac)
    elif args.split_method == 'old':
        data, _, _, _ = data.split()

    data = data.get_seq()

    if type(model).__name__.startswith('DK'):
        topic_dic = {}
        kcat = Categorical(one_hot=True)
        kcat.load_dict(open('data/know_list.txt').read().split('\n'))
        for line in open('data/id_know.txt'):
            uuid, know = line.strip().split(' ')
            know = know.split(',')
            topic_dic[uuid] = \
                torch.LongTensor(kcat.apply(None, know)) \
                .max(0)[0] \
                .type(torch.LongTensor)
        zero = [0] * len(kcat.apply(None, '<NULL>'))
    else:
        topics = get_topics(args.dataset, model.words)

    optimizer = torch.optim.Adam(model.parameters())

    start_epoch = load_last_snapshot(model, args.workspace)
    if use_cuda:
        model.cuda()

    for epoch in range(start_epoch, args.epochs):
        logging.info(('epoch {}:'.format(epoch)))
        then = time.time()

        total_loss = 0
        total_mae = 0
        total_acc = 0
        total_seq_cnt = 0

        users = list(data)
        random.shuffle(users)
        seq_cnt = len(users)

        MSE = torch.nn.MSELoss()
        MAE = torch.nn.L1Loss()

        for user in users:
            total_seq_cnt += 1

            seq = data[user]
            length = len(seq)

            optimizer.zero_grad()

            loss = 0
            mae = 0
            acc = 0

            h = None

            for i, item in enumerate(seq):
                if type(model).__name__.startswith('DK'):
                    if item.topic in topic_dic:
                        x = topic_dic[item.topic]
                    else:
                        x = zero
                else:
                    x = topics.get(item.topic).content
                x = Variable(torch.LongTensor(x))
                # print(x.size())
                score = Variable(torch.FloatTensor([round(item.score)]))
                t = Variable(torch.FloatTensor([item.time]))
                s, h = model(x, score, t, h)
                if args.loss == 'cross_entropy':
                    loss += F.binary_cross_entropy_with_logits(
                        s, score.view_as(s))
                    m = MAE(F.sigmoid(s), score).data[0]
                else:
                    loss += MSE(s, score)
                    m = MAE(s, score).data[0]
                mae += m
                acc += m < 0.5

            loss /= length
            mae /= length
            acc /= length

            total_loss += loss.data[0]
            total_mae += mae
            total_acc += acc

            loss.backward()
            optimizer.step()

            if total_seq_cnt % args.save_every == 0:
                save_snapshot(model, args.workspace,
                              '%d.%d' % (epoch, total_seq_cnt))

            if total_seq_cnt % args.print_every != 0 and \
                    total_seq_cnt != seq_cnt:
                continue

            now = time.time()
            duration = (now - then) / 60

            logging.info(
                '[%d:%d/%d] (%.2f seqs/min) '
                'loss %.6f, mae %.6f, acc %.6f' %
                (epoch, total_seq_cnt, seq_cnt,
                 ((total_seq_cnt - 1) % args.print_every + 1) / duration,
                 total_loss / total_seq_cnt, total_mae / total_seq_cnt,
                 total_acc / total_seq_cnt))
            then = now

        save_snapshot(model, args.workspace, epoch + 1)