def setUp(self): self.v1 = Variable("1") self.v2 = Variable("2") self.l1 = Literal(self.v1) self.l2 = Literal(self.v2) self.l3 = Literal(self.v1, False) self.a = Assignment(1, self.v2, True, Clause(self.l2)) self.v2.assign(self.a)
def forward(self, v, kn, ko, s, hidden): if hidden is None: h = self.h_initial.view(self.num_layers, self.know_length, self.seq_hidden_size) attn_h = self.h_initial length = Variable(torch.FloatTensor([0.])) beta = None else: h, vs, hs = hidden # calculate beta weights of seqs using dot product beta = torch.mm(vs, v.view(-1, 1)).view(-1) beta, idx = beta.topk(min(len(beta), self.k), sorted=False) beta = nn.functional.softmax(beta.view(1, -1), dim=-1) length = Variable(torch.FloatTensor([beta.size()[1]])) hs = hs.view(-1, self.know_length * self.seq_hidden_size) attn_h = torch.mm(beta, torch.index_select(hs, 0, idx)).view(-1) # calculate alpha weights of knowledges using dot product alpha = torch.mm(self.knowledge_memory, kn.view(-1, 1)).view(-1) alpha = nn.functional.softmax(alpha.view(1, -1), dim=-1) hkp = torch.mm(alpha, attn_h.view(self.know_length, self.seq_hidden_size)).view(-1) pred_v = torch.cat([v, hkp]).view(1, -1) predict_score = self.score_layer(pred_v) # seq states update if self.score_mode == 'concat': x = v else: x = torch.cat([ v * (s >= 0.5).type_as(v).expand_as(v), v * (s < 0.5).type_as(v).expand_as(v) ]) x = torch.cat([x, s]) # print(x.size()) # print(torch.ones(self.know_length,1).size()) # print(x.view(1, -1).size()) # print(x.type()) # xk = torch.mm(torch.ones(self.know_length, 1), x.view(1, -1)) xk = x.view(1, -1).expand(self.know_length, -1) xk = alpha.view(-1, 1) * xk # xk = ko.float().view(-1, 1) * xk # xk = torch.mm(alpha, xk).view(-1) _, h = self.rnn(xk.unsqueeze(0), h) return predict_score.view(1), h, beta
def interpret(self, lines): regx = re.compile(r"(-?)(\d+)") self.known = dict() clauses = [] c = [] m = re.match("p (?:cnf )?(.+) (.+)\s*\n", lines) checksum = m.groups() if m else None lines = re.sub("[cp].*?\n", "", lines) for literal in re.split(r"[\s\n]+", lines): m = re.match(regx, literal) if m: varname = m.group(2) if varname != "0": var = self.known.get(varname, Variable(varname)) var.count += 1 self.known[varname] = var l = Literal(var, not bool(m.group(1))) c.append(l) else: addition = Clause(*c) addition.link() clauses.append(addition) c = [] if checksum: assert (len(self.known) == int(checksum[0])) assert (len(clauses) == int(checksum[1])) return clauses
def test_assignment(self): v = Variable("1") s = AssignmentStack() a = Assignment(1, v, True, Clause(Literal(v))) s.push(a) p = s.pop() self.assertEqual(p.var, v)
def forward(self, v, s, hidden): if hidden is None: h = self.initial_h.view(self.num_layers, 1, self.seq_hidden_size) attn_h = self.initial_h length = Variable(torch.FloatTensor([0.])) else: h, vs, hs = hidden # print(h) # print('start') # print(vs.size()) # print(v.size()) # print(v.view(-1,1).size()) # print(torch.mm(vs,v.view(-1,1)).size()) # print(hs) # calculate alpha using dot product alpha = torch.mm(vs, v.view(-1, 1)).view(-1) # print(alpha.size()) # print('end') # print(alpha.size()) alpha, idx = alpha.topk(min(len(alpha), self.k), sorted=False) alpha = nn.functional.softmax(alpha.view(1, -1), dim=-1) length = Variable(torch.FloatTensor([alpha.size()[1]])) # flatten each h hs = hs.view(-1, self.num_layers * self.seq_hidden_size) attn_h = torch.mm(alpha, torch.index_select(hs, 0, idx)).view(-1) if self.with_last: pred_v = torch.cat([v, attn_h, h.view(-1), length]).view(1, -1) else: pred_v = torch.cat([v, attn_h]).view(1, -1) score = self.score(pred_v) if self.score_mode == 'concat': x = v else: x = torch.cat([ v * (s >= 0.5).type_as(v).expand_as(v), v * (s < 0.5).type_as(v).expand_as(v) ]) x = torch.cat([x, s]) _, h = self.rnn(x.view(1, 1, -1), h) return score, h
def PickBranchingVariable(self): m = Variable('tmp') for v in self.vars: if not v.isAssigned() and v.count>=m.count: m = v pol = False logging.info("Branching dl: %s -> %s = %s",self.dl+1,v.name,pol) return m,pol
def __init__(self): self.resWins = Variable() self.spyWins = Variable() self.votesRes = Variable() self.votesSpy = Variable() self.spyVoted = Variable() self.spySelected = Variable() self.selections = Variable()
def forward(self, v, kn, ko, s, h, beta=None): if h is None: h = self.h_initial.view(self.num_layers, self.know_length, self.seq_hidden_size) length = Variable(torch.FloatTensor([0.])) # calculate alpha weights of knowledges using dot product # print(self.knowledge_memory.size()) # print(kn.view(-1, 1)) if beta is None: alpha = torch.mm(self.knowledge_memory, kn.view(-1, 1)).view(-1) beta = nn.functional.softmax(alpha.view(1, -1), dim=-1) # print(beta.argmax(1)) # print(alpha.size()) # print(h.view(self.know_length, self.seq_hidden_size).size()) # print(h.type()) # predict score at time t hkp = torch.mm(beta, h.view(self.know_length, self.seq_hidden_size)).view(-1) # print(hkp.size()) pred_v = torch.cat([v, hkp]).view(1, -1) # print(pred_v.size()) predict_score = self.score_layer(pred_v) # seq states update if self.score_mode == 'concat': x = v else: x = torch.cat([ v * (s >= 0.5).type_as(v).expand_as(v), v * (s < 0.5).type_as(v).expand_as(v) ]) x = torch.cat([x, s]) # print(x.size()) # print(torch.ones(self.know_length,1).size()) # print(x.view(1, -1).size()) # print(x.type()) # xk = torch.mm(torch.ones(self.know_length, 1), x.view(1, -1)) xk = x.view(1, -1).expand(self.know_length, -1) xk = beta.view(-1, 1) * xk # xk = ko.float().view(-1, 1) * xk # print(xk.size()) # print(alpha.size()) # xk = torch.mm(alpha, xk).view(-1) # thresh, idx = alpha.topk(5) # alpha = (alpha >= thresh[0, 4]).float() # xk = alpha.view(-1, 1) * xk # xk = Variable(torch.zeros_like(x)).expand(self.know_length, -1) _, h = self.rnn(xk.unsqueeze(0), h) return predict_score.view(1), h
class TestLiteral(unittest.TestCase): def setUp(self): self.v1 = Variable("1") self.v2 = Variable("2") self.l1 = Literal(self.v1) self.l2 = Literal(self.v2) self.l3 = Literal(self.v1, False) self.a = Assignment(1, self.v2, True, Clause(self.l2)) self.v2.assign(self.a) def test_eq(self): t = Literal(self.v1) self.assertEqual(t, self.l1) self.assertNotEqual(self.l1, self.l3) self.assertNotEqual(self.l1, self.l2) def test_neg(self): self.assertEqual(self.l3, -self.l1) def test_value(self): self.assertTrue(self.l2.value())
def _estimateProb(self, dic, fullkey): """ If there is not enough info on the full scenario, gradually loosen the constraints to get at least a good prior. """ prior_params = {1: (50, 8), 2: (30, 4), 3: (20, 2), 4: (10, 1)} v = Variable() v.total = 0.5 + dic[fullkey].total # uncertainty bias v.samples = 1 + dic[fullkey].samples for skipunits in range(1, len(fullkey) + 1): bound, weight = prior_params[skipunits] if v.samples > bound: break for nkey in maskSome(list(fullkey), skipunits): if tuple(nkey) not in dic: continue tmp = dic[tuple(nkey)] if tmp.samples == 0: continue weight = min(weight, tmp.samples / float(skipunits)) v.samples += weight v.total += weight * tmp.total / float(tmp.samples) return v.total / float(v.samples)
def _estimateProb(self, dic, fullkey): """ If there is not enough info on the full scenario, gradually loosen the constraints to get at least a good prior. """ prior_params = {1: (50, 8), 2: (30, 4), 3: (20, 2), 4: (10, 1)} v = Variable() v.total = 0.5 + dic[fullkey].total # uncertainty bias v.samples = 1 + dic[fullkey].samples for skipunits in range(1,len(fullkey)+1): bound, weight = prior_params[skipunits] if v.samples > bound: break for nkey in maskSome(list(fullkey), skipunits): if tuple(nkey) not in dic: continue tmp = dic[tuple(nkey)] if tmp.samples == 0: continue weight = min(weight, tmp.samples/float(skipunits)) v.samples += weight v.total += weight*tmp.total/float(tmp.samples) return v.total/float(v.samples)
def predict(model, args): try: torch.set_grad_enabled(False) except AttributeError: pass logging.info('model: %s, setup: %s' % (type(model).__name__, str(model.args))) logging.info('loading dataset') if args.snapshot is None: epoch = load_last_snapshot(model, args.workspace) else: epoch = args.snapshot load_snapshot(model, args.workspace, epoch) logging.info('loaded model at epoch %s', str(epoch)) to_categorical = Categorical('</s>') to_categorical.load_dict(model.words) trans = to_categorical(Words(':', null='</s>')) while True: # loop over inputs try: line = input() except EOFError: logging.info('bye') break try: obj = json.loads(line, encoding='utf-8') ref_seq = obj['ref'] pred_seq = obj['pred'] except (json.decoder.JSONDecodeError, KeyError): print('[]') continue h = None for i, item in enumerate(ref_seq): x = trans.apply(None, item['fea']) x = Variable(torch.LongTensor(x), volatile=True) score = Variable(torch.FloatTensor([item['t']]), volatile=True) t = Variable(torch.FloatTensor([item['s']]), volatile=True) _, h = model(x, score, t, h) pred_scores = [] for i, item in enumerate(pred_seq): x = trans.apply(None, item['fea']) x = Variable(torch.LongTensor(x), volatile=True) score = Variable(torch.FloatTensor([0.]), volatile=True) t = Variable(torch.FloatTensor([item['t']]), volatile=True) s, _ = model(x, score, t, h) pred_scores.append(s.cpu().data[0][0]) print(pred_scores)
def __init__(self, topic_size, seq_hidden_size, k, score_mode, num_layers=1): super(AttnSeqTimeDecayModel, self).__init__() self.topic_size = topic_size self.seq_hidden_size = seq_hidden_size self.num_layers = num_layers self.score_mode = score_mode if self.score_mode == 'concat': self.rnn = nn.GRU(topic_size + 1, seq_hidden_size, num_layers) else: self.rnn = nn.GRU(topic_size * 2 + 1, seq_hidden_size, num_layers) self.score = nn.Linear(topic_size + seq_hidden_size, 1) self.k = k self.initial_h = Variable(torch.zeros(self.num_layers * self.seq_hidden_size), requires_grad=True)
class TestVariable(unittest.TestCase): def setUp(self): self.v = Variable("1") self.a = Assignment(1, self.v, True, Clause(Literal(self.v))) def test_repr(self): v = Variable("somename") self.assertEqual(str(v), "somename") def test_value(self): #v starts unassigned self.assertEqual(self.v.value(), None) #associate v self.v.assign(self.a) #v should be assigned true self.assertTrue(self.v.value()) #disassociate v self.v.unassign() #v should be unassigned self.assertFalse(self.v.isAssigned())
def test_repr(self): v = Variable("somename") self.assertEqual(str(v), "somename")
def setUp(self): self.v = Variable("1") self.a = Assignment(1, self.v, True, Clause(Literal(self.v)))
def __init__(self, topic_size, k): super(AttnModel, self).__init__() self.user_emb_size = topic_size self.k = k self.initial_guess = Variable(torch.zeros(1), requires_grad=True)
def total(self): return Variable(self.resWins.total + self.spyWins.total, self.resWins.samples + self.spyWins.samples)
def default_hidden(self, batch_size): return Variable(torch.zeros(2, batch_size, self.emb_size)), \ Variable(torch.zeros(self.num_layers - 1, batch_size, self.emb_size)) \ if self.num_layers > 1 else None
def default_hidden(self): return Variable(torch.zeros(self.num_layers, 1, self.seq_hidden_size))
class TestClause(unittest.TestCase): def setUp(self): self.v1 = Variable("1") self.v2 = Variable("2") self.v3 = Variable("3") self.l1 = Literal(self.v1) self.l2 = Literal(self.v2) self.l3 = Literal(self.v2, False) self.a1 = Assignment(1, self.v2, True, Clause(self.l2)) self.v2.assign(self.a1) def test_init(self): c = Clause() self.assertFalse(hasattr(c, 'revA') or hasattr(c, 'revB')) c = Clause(Literal(self.v1, True)) self.assertFalse(hasattr(c, 'revA') or hasattr(c, 'revB')) def test_eq(self): c1 = Clause(self.l1) c2 = Clause(self.l2) c3 = Clause(self.l1) self.assertEqual(c1, c3) self.assertNotEqual(c1, c2) def test_contains(self): c1 = Clause(self.l1) c2 = Clause(self.l2) self.assertTrue(self.l1 in c1) self.assertFalse(self.l1 in c2) def test_status(self): #empty clause c = Clause() self.assertEqual(c.status(), "UNSAT") #single literal # unassigned c = Clause(self.l1) self.assertEqual(c.status(), "UNIT") # sat c = Clause(self.l2) self.assertEqual(c.status(), "SAT") # unsat c = Clause(self.l3) self.assertEqual(c.status(), "UNSAT") #two-literal # unit c = Clause(self.l1, self.l3) self.assertEqual(c.status(), "UNIT") # sat c = Clause(self.l2, self.l3) self.assertEqual(c.status(), "SAT") c = Clause(self.l1, self.l2) self.assertEqual(c.status(), "SAT") # unsat c = Clause(self.l3, self.l3) self.assertEqual(c.status(), "UNSAT") #multi-literal # c = Clause(self.l3, self.l1, self.l3) # self.assertEqual(c.status(),"UNIT") c = Clause(self.l3, self.l1, self.l1) self.l3.occurrence_link(c) self.assertEqual(c.status(), "UNRESOLVED") c = Clause(self.l3, self.l1, self.l2) c.link() self.assertEqual(c.status(), "SAT") def test_resolution(self): c1 = Clause(self.l1, self.l2) c2 = Clause(self.l1, self.l3) c3 = c1.resolve(c2) self.assertEqual(c3, Clause(self.l1))
step_score = [[], []] print('Model initialized, starting training...') for step in range(n_steps): # sample from agent seqs, agent_likelihood, entropy = Agent.sample(batch_size) # Remove duplicates, ie only consider unique seqs unique_idx = unique(seqs) seqs = seqs[unique_idx] agent_likelihood = agent_likelihood[unique_idx] entropy = entropy[unique_idx] # Get prior likelihood and score prior_likelihood, _ = Prior.likelihood(Variable(seqs)) smiles = seq_to_smiles(seqs, voc) score = scoring_function(smiles) # Calculate augmented likelihood augmented_likelihood = prior_likelihood + sigma * Variable(score) loss = torch.pow((augmented_likelihood - agent_likelihood), 2) # Experience replay # First example if experience_replay and len(experience) > 4: exp_seqs, exp_score, exp_prior_likelihood = experience.sample(4) exp_agent_likelihood, exp_entropy = Agent.likelihood(exp_seqs.long()) exp_augmented_likelihood = exp_prior_likelihood + sigma * exp_score exp_loss = torch.pow((Variable(exp_augmented_likelihood) - exp_agent_likelihood), 2) loss = torch.cat((loss, exp_loss), 0)
def main(options=None): num_bars = options.num_bars cantus_file = options.cantus_file solution_file = options.solution_file sa_file = options.sa_file test_dir = options.test_dir testing = options.testing dfs_csv = options.dfs_csv sa_csv = options.sa_csv song_list = ['mary', 'ariana', 'shootingstar'] arc_consistency = options.arc_consistency extra_harmonic = options.extra_harmonic if options.preset_song in song_list: print 'options.preset_song is {}'.format(options.preset_song) elif options.preset_song != '': print 'preset song not found, proceeding with default' csp = Csp() cp = [] # list of counterpoint variables cf = [] # list of __ variables binary = [] # list of Constraint objects if options.preset_song == song_list[0]: note_list = [64, 62, 60, 62, 64, 64, 64, 62, 62, 62, 64, 67, 67, 64, 62, \ 60, 62, 64, 64, 64, 64, 62, 62, 64, 62, 60] num_bars = len(note_list) elif options.preset_song == song_list[1]: num_bars = 8 note_list = [69, 67, 66, 67, 66, 64, 66, 64] elif options.preset_song == song_list[2]: num_bars = 10 note_list = [71, 71, 72, 67, 64, 71, 71, 72, 67, 64] elif testing: print('Generating a cantus firmus over ' + str(num_bars) + ' bars') note_list = [] for i in range(num_bars): note_list.append(random.choice(NOTE_RANGE)) else: note_list = [57,60,59,57] # default for i in range(num_bars): cp.append(Variable('cp' + str(i))) csp.addToVariables(cp[i]) binary.append(Constraint()) binary[i].setVariable(cp[i]) cf.append(Variable('cf' + str(i))) csp.addToVariables(cf[i]) for i in range(len(note_list)): note = Note(note_list[i]) cf[i].addToDomain(note) print('Cantus firmus sequential note pitches: ') print(note_list) for i in range(num_bars): cp_note_list = range(30, 100) if i != (num_bars - 2): map(lambda x: cp[i].addToDomain(Note(x)), cp_note_list) else: cp_note_list = range(60, 70) map(lambda x: cp[i].addToDomain(Note(x)), cp_note_list) # binary constraints, p. 109 of Ovans for i in range(1, num_bars): L = Link() L.setNode(binary[i-1]) L.setLabel(Note.melodic) cp[i].addToNeighbors(L) L = Link() L.setNode(binary[i]) L.setLabel(Note.melodic) cp[i-1].addToNeighbors(L) """# extra constraint!!!! if extra_harmonic and i != num_bars - 1: L = Link() L.setNode(binary[i+1]) L.setLabel(Note.harmonic) cp[i].addToNeighbors(L)""" L = Link() L.setNode(binary[0]) L.setLabel(Note.perfectCfHarmonic) cf[0].addToNeighbors(L) # harmonic constraints for each cantus firmus note for i in range(1, num_bars-2): L = Link() L.setNode(binary[i]) L.setLabel(Note.harmonic) cf[i].addToNeighbors(L) # perfect harmonic constraints in last two bars for i in range(num_bars-2, num_bars): L = Link() L.setNode(binary[i]) L.setLabel(Note.perfectHarmonic) cf[i].addToNeighbors(L) test_csp = copy.deepcopy(csp) if arc_consistency: arc_start = timeit.default_timer() if csp.AC3(): arc_stop = timeit.default_timer() print 'Made initial arcs consistent in {} seconds.'.format(arc_stop - arc_start) print 'Arc consistent - looking for a solution with DFS' sol_start = timeit.default_timer() csp.backtracking_search() sol_stop = timeit.default_timer() print('Trying simulated annealing...') sim_start = timeit.default_timer() test_csp.simAnnealing() sim_stop = timeit.default_timer() print 'Completed simulated annealing after {} seconds.'.format(sim_stop - sim_start) print 'Simulated annealing returns with cost {}, after {} iterations.'.format(test_csp.getCost(test_csp.vars), test_csp.iters) else: arc_stop = timeit.default_timer() print 'Failed to make initial arcs consistent after {} seconds.'.format(arc_stop - arc_start) print('Not consistent') return None else: sol_start = timeit.default_timer() csp.backtracking_search() sol_stop = timeit.default_timer() print('Attempt to find a DFS solution finished after {} seconds.'.format(sol_stop - sol_start)) print('Trying simulated annealing...') sim_start = timeit.default_timer() test_csp.simAnnealing() sim_stop = timeit.default_timer() print 'Completed simulated annealing after {} seconds.'.format(sim_stop - sim_start) print 'Simulated annealing returns with cost {}, after {} iterations.'.format(test_csp.getCost(test_csp.vars), test_csp.iters) print('Looking for a solution with DFS') if csp.one_sol is not None: print('Found a solution with arc consistency! Expanded {} nodes with {} backtracks'.format(csp.getNodes(), csp.getBts())) # Log stats in csv file for testing if testing: dfs_trial_info = '{},{},{},{}\n'.format(num_bars, csp.getNodes(), csp.getBts(), sol_stop - sol_start) with open(dfs_csv, 'a+') as f: f.write(dfs_trial_info) f.closed else: print('No solution found') return None write_solution(csp.one_sol, num_bars=num_bars, solution_file=test_dir + '/' + solution_file, random_length=options.random) if test_csp.getCost(test_csp.vars) == 0: write_solution(test_csp.vars, num_bars=num_bars,solution_file=test_dir + '/' + sa_file, random_length=options.random) # Log stats in csv file for testing if testing: sim_trial_info = '{},{},{},{}\n'.format(num_bars, test_csp.getCost(test_csp.vars), test_csp.iters, sim_stop - sim_start) with open(sa_csv, 'a+') as f: f.write(sim_trial_info) f.closed else: print('Simulated annealing failed, not writing to output') if testing: return csp
def test(model, args): try: torch.set_grad_enabled(False) except AttributeError: pass logging.info('model: %s, setup: %s' % (type(model).__name__, str(model.args))) logging.info('loading dataset') data = get_dataset(args.dataset) data.random_level = args.random_level if not args.dataset.endswith('test'): if args.split_method == 'user': _, data = data.split_user(args.frac) testsets = [('user_split', data, {})] elif args.split_method == 'future': _, data = data.split_future(args.frac) testsets = [('future_split', data, {})] elif args.split_method == 'old': trainset, _, _, _ = data.split() data = trainset.get_seq() train, user, exam, new = data.split() train = train.get_seq() user = user.get_seq() exam = exam.get_seq() new = new.get_seq() testsets = zip(['user', 'exam', 'new'], [user, exam, new], [{}, train, user]) else: if args.ref_set: ref = get_dataset(args.ref_set) ref.random_level = args.random_level testsets = [(args.dataset.split('/')[-1], data.get_seq(), ref.get_seq())] else: testsets = [('student', data.get_seq(), {})] else: testsets = [('school', data.get_seq(), {})] if type(model).__name__.startswith('DK'): topic_dic = {} kcat = Categorical(one_hot=True) kcat.load_dict(open('data/know_list.txt').read().split('\n')) for line in open('data/id_know.txt'): uuid, know = line.strip().split(' ') know = know.split(',') topic_dic[uuid] = \ torch.LongTensor(kcat.apply(None, know)) \ .max(0)[0] \ .type(torch.LongTensor) zero = [0] * len(kcat.apply(None, '<NULL>')) else: topics = get_topics(args.dataset, model.words) if args.snapshot is None: epoch = load_last_snapshot(model, args.workspace) else: epoch = args.snapshot load_snapshot(model, args.workspace, epoch) logging.info('loaded model at epoch %s', str(epoch)) if use_cuda: model.cuda() for testset, data, ref_data in testsets: logging.info('testing on: %s', testset) f = open_result(args.workspace, testset, epoch) then = time.time() total_mse = 0 total_mae = 0 total_acc = 0 total_seq_cnt = 0 users = list(data) random.shuffle(users) seq_cnt = len(users) MSE = torch.nn.MSELoss() MAE = torch.nn.L1Loss() for user in users[:5000]: seq = data[user] if user in ref_data: ref_seq = ref_data[user] else: ref_seq = [] seq2 = [] seen = set() for item in ref_seq: if item.topic in seen: continue seen.add(item.topic) seq2.append(item) ref_seq = seq2 seq2 = [] for item in seq: if item.topic in seen: continue seen.add(item.topic) seq2.append(item) seq = seq2 ref_len = len(ref_seq) seq = ref_seq + seq length = len(seq) if ref_len < args.ref_len: length = length + ref_len - args.ref_len ref_len = args.ref_len if length < 1: continue total_seq_cnt += 1 mse = 0 mae = 0 acc = 0 pred_scores = Variable(torch.zeros(len(seq))) s = None h = None for i, item in enumerate(seq): if args.test_on_last: x = topics.get(seq[-1].topic).content x = Variable(torch.LongTensor(x), volatile=True) score = Variable(torch.FloatTensor([round(seq[-1].score)]), volatile=True) t = Variable(torch.FloatTensor([seq[-1].time]), volatile=True) s, _ = model(x, score, t, h) s_last = torch.clamp(s, 0, 1) if type(model).__name__.startswith('DK'): if item.topic in topic_dic: x = topic_dic[item.topic] else: x = zero else: x = topics.get(item.topic).content x = Variable(torch.LongTensor(x)) score = Variable(torch.FloatTensor([round(item.score)]), volatile=True) t = Variable(torch.FloatTensor([item.time]), volatile=True) if args.test_as_seq and i > ref_len and ref_len > 0: s, h = model(x, s.view(1), t, h) else: if ref_len > 0 and i > ref_len and not args.test_on_one: s, _ = model(x, score, t, h) else: s, h = model(x, score, t, h) if args.loss == 'cross_entropy': s = F.sigmoid(s) else: s = torch.clamp(s, 0, 1) if args.test_on_last: pred_scores[i] = s_last else: pred_scores[i] = s if i < ref_len: continue mse += MSE(s, score) m = MAE(s, score).data[0] mae += m acc += m < 0.5 print_seq(seq, pred_scores.data.cpu().numpy(), ref_len, f, args.test_on_last) mse /= length mae /= length acc /= length total_mse += mse.data[0] total_mae += mae total_acc += acc if total_seq_cnt % args.print_every != 0 and \ total_seq_cnt != seq_cnt: continue now = time.time() duration = (now - then) / 60 logging.info( '[%d/%d] (%.2f seqs/min) ' 'rmse %.6f, mae %.6f, acc %.6f' % (total_seq_cnt, seq_cnt, ((total_seq_cnt - 1) % args.print_every + 1) / duration, math.sqrt(total_mse / total_seq_cnt), total_mae / total_seq_cnt, total_acc / total_seq_cnt)) then = now f.close()
def testseq(model, args): try: torch.set_grad_enabled(False) except AttributeError: pass logging.info('model: %s, setup: %s' % (type(model).__name__, str(model.args))) logging.info('loading dataset') data = get_dataset(args.dataset) data.random_level = args.random_level if not args.dataset.endswith('test'): if args.split_method == 'user': _, data = data.split_user(args.frac) testsets = [('user_split', data, {})] elif args.split_method == 'future': _, data = data.split_future(args.frac) testsets = [('future_split', data, {})] elif args.split_method == 'old': trainset, _, _, _ = data.split() data = trainset.get_seq() train, user, exam, new = data.split() train = train.get_seq() user = user.get_seq() exam = exam.get_seq() new = new.get_seq() testsets = zip(['user', 'exam', 'new'], [user, exam, new], [{}, train, user]) else: if args.ref_set: ref = get_dataset(args.ref_set) ref.random_level = args.random_level testsets = [(args.dataset.split('/')[-1], data.get_seq(), ref.get_seq())] else: testsets = [('student', data.get_seq(), {})] else: testsets = [('school', data.get_seq(), {})] if args.input_knowledge: logging.info('loading knowledge concepts') topic_dic = {} kcat = Categorical(one_hot=True) kcat.load_dict(open(model.args['knows']).read().split('\n')) know = 'data/id_firstknow.txt' if 'first' in model.args['knows'] \ else 'data/id_know.txt' for line in open(know): uuid, know = line.strip().split(' ') know = know.split(',') topic_dic[uuid] = torch.LongTensor(kcat.apply(None, know)).max(0)[0] zero = [0] * len(kcat.apply(None, '<NULL>')) if args.input_text: logging.info('loading exercise texts') topics = get_topics(args.dataset, model.words) if args.snapshot is None: epoch = load_last_snapshot(model, args.workspace) else: epoch = args.snapshot load_snapshot(model, args.workspace, epoch) logging.info('loaded model at epoch %s', str(epoch)) if use_cuda: model.cuda() for testset, data, ref_data in testsets: logging.info('testing on: %s', testset) f = open_result(args.workspace, testset, epoch) then = time.time() total_mse = 0 total_mae = 0 total_acc = 0 total_seq_cnt = 0 users = list(data) random.shuffle(users) seq_cnt = len(users) MSE = torch.nn.MSELoss() MAE = torch.nn.L1Loss() for user in users[:5000]: total_seq_cnt += 1 seq = data[user] if user in ref_data: ref_seq = ref_data[user] else: ref_seq = [] length = len(seq) ref_len = len(ref_seq) seq = ref_seq + seq if ref_len < args.ref_len: length = length + ref_len - args.ref_len ref_len = args.ref_len if length < 1: ref_len = ref_len + length - 1 length = 1 mse = 0 mae = 0 acc = 0 # seq2 = [] # seen = set() # for item in seq: # if item.topic in seen: # continue # seen.add(item.topic) # seq2.append(item) # seq = seq2 # length = len(seq) - ref_len pred_scores = Variable(torch.zeros(len(seq))) s = None h = None for i, item in enumerate(seq): # get last record for testing and current record for updating if args.input_knowledge: if item.topic in topic_dic: knowledge = topic_dic[item.topic] knowledge_last = topic_dic[seq[-1].topic] else: knowledge = zero knowledge_last = zero knowledge = Variable(torch.LongTensor(knowledge)) knowledge_last = Variable(torch.LongTensor(knowledge_last), volatile=True) if args.input_text: text = topics.get(item.topic).content text = Variable(torch.LongTensor(text)) text_last = topics.get(seq[-1].topic).content text_last = Variable(torch.LongTensor(text_last), volatile=True) score = Variable(torch.FloatTensor([item.score]), volatile=True) score_last = Variable(torch.FloatTensor([round(seq[-1].score) ]), volatile=True) item_time = Variable(torch.FloatTensor([item.time]), volatile=True) time_last = Variable(torch.FloatTensor([seq[-1].time]), volatile=True) # test last score of each seq for seq figure if type(model).__name__.startswith('DK'): s, _ = model(knowledge_last, score_last, time_last, h) elif type(model).__name__.startswith('RA'): s, _ = model(text_last, score_last, time_last, h) elif type(model).__name__.startswith('EK'): s, _ = model(text_last, knowledge_last, score_last, time_last, h) s_last = torch.clamp(s, 0, 1) # update student state h until the fit process reaches trainset if ref_len > 0 and i > ref_len: if type(model).__name__.startswith('DK'): s, _ = model(knowledge, score, item_time, h) elif type(model).__name__.startswith('RA'): s, _ = model(text, score, item_time, h) elif type(model).__name__.startswith('EK'): s, _ = model(text, knowledge, score, item_time, h) else: if type(model).__name__.startswith('DK'): s, h = model(knowledge, score, item_time, h) elif type(model).__name__.startswith('RA'): s, h = model(text, score, item_time, h) elif type(model).__name__.startswith('EK'): s, h = model(text, knowledge, score, item_time, h) pred_scores[i] = s_last if args.loss == 'cross_entropy': s = F.sigmoid(s) else: s = torch.clamp(s, 0, 1) if i < ref_len: continue mse += MSE(s, score) m = MAE(s, score).data[0] mae += m acc += m < 0.5 print_seq(seq, pred_scores.data.cpu().numpy(), ref_len, f, True) mse /= length mae /= length acc = float(acc) / length total_mse += mse.data[0] total_mae += mae total_acc += acc if total_seq_cnt % args.print_every != 0 and total_seq_cnt != seq_cnt: continue now = time.time() duration = (now - then) / 60 logging.info( '[%d/%d] (%.2f seqs/min) ' 'rmse %.6f, mae %.6f, acc %.6f' % (total_seq_cnt, seq_cnt, ((total_seq_cnt - 1) % args.print_every + 1) / duration, math.sqrt(total_mse / total_seq_cnt), total_mae / total_seq_cnt, total_acc / total_seq_cnt)) then = now f.close()
def trainn(model, args): logging.info('model: %s, setup: %s' % (type(model).__name__, str(model.args))) logging.info('loading dataset') data = get_dataset(args.dataset) data.random_level = args.random_level if args.split_method == 'user': data, _ = data.split_user(args.frac) elif args.split_method == 'future': data, _ = data.split_future(args.frac) elif args.split_method == 'old': data, _, _, _ = data.split() data = data.get_seq() if args.input_knowledge: logging.info('loading knowledge concepts') topic_dic = {} kcat = Categorical(one_hot=True) kcat.load_dict(open(model.args['knows']).read().split('\n')) know = 'data/id_firstknow.txt' if 'first' in model.args['knows'] \ else 'data/id_know.txt' for line in open(know): uuid, know = line.strip().split(' ') know = know.split(',') topic_dic[uuid] = torch.LongTensor(kcat.apply(None, know)).max(0)[0] zero = [0] * len(kcat.apply(None, '<NULL>')) if args.input_text: logging.info('loading exercise texts') topics = get_topics(args.dataset, model.words) optimizer = torch.optim.Adam(model.parameters()) start_epoch = load_last_snapshot(model, args.workspace) if use_cuda: model.cuda() for epoch in range(start_epoch, args.epochs): logging.info('epoch {}:'.format(epoch)) then = time.time() total_loss = 0 total_mae = 0 total_acc = 0 total_seq_cnt = 0 users = list(data) random.shuffle(users) seq_cnt = len(users) MSE = torch.nn.MSELoss() MAE = torch.nn.L1Loss() for user in users: total_seq_cnt += 1 seq = data[user] seq_length = len(seq) optimizer.zero_grad() loss = 0 mae = 0 acc = 0 h = None for i, item in enumerate(seq): # score = round(item.score) if args.input_knowledge: if item.topic in topic_dic: knowledge = topic_dic[item.topic] else: knowledge = zero # knowledge = torch.LongTensor(knowledge).view(-1).type(torch.FloatTensor) # one_index = torch.nonzero(knowledge).view(-1) # expand_vec = torch.zeros(knowledge.size()).view(-1) # expand_vec[one_index] = score # cks = torch.cat([knowledge, expand_vec]).view(1, -1) knowledge = Variable(torch.LongTensor(knowledge)) # cks = Variable(cks) if args.input_text: text = topics.get(item.topic).content text = Variable(torch.LongTensor(text)) score = Variable(torch.FloatTensor([item.score])) item_time = Variable(torch.FloatTensor([item.time])) if type(model).__name__.startswith('DK'): s, h = model(knowledge, score, item_time, h) elif type(model).__name__.startswith('RA'): s, h = model(text, score, item_time, h) elif type(model).__name__.startswith('EK'): s, h = model(text, knowledge, score, item_time, h) s = s[0] if args.loss == 'cross_entropy': loss += F.binary_cross_entropy_with_logits( s, score.view_as(s)) m = MAE(F.sigmoid(s), score).data[0] else: loss += MSE(s, score) m = MAE(s, score).data[0] mae += m acc += m < 0.5 loss /= seq_length mae /= seq_length acc = float(acc) / seq_length total_loss += loss.data[0] total_mae += mae total_acc += acc loss.backward() optimizer.step() if total_seq_cnt % args.save_every == 0: save_snapshot(model, args.workspace, '%d.%d' % (epoch, total_seq_cnt)) if total_seq_cnt % args.print_every != 0 and total_seq_cnt != seq_cnt: continue now = time.time() duration = (now - then) / 60 logging.info( '[%d:%d/%d] (%.2f seqs/min) loss %.6f, mae %.6f, acc %.6f' % (epoch, total_seq_cnt, seq_cnt, ((total_seq_cnt - 1) % args.print_every + 1) / duration, total_loss / total_seq_cnt, total_mae / total_seq_cnt, total_acc / total_seq_cnt)) then = now save_snapshot(model, args.workspace, epoch + 1)
def default_hidden(self): return Variable(torch.zeros(1, 1, self.topic_size))
def __init__(self): # Variables that caputres how well you lead in context of pass/fail mission self.leadTeam = Variable() self.leadSucMission = Variable() self.leadFailMission = Variable() # Variables that capture how well you voted in context of pass/fail mission self.voted = Variable() self.votedYes = Variable() self.votedNo = Variable() self.votedYesPass = Variable() self.votedYesFail = Variable() self.votedNoPass = Variable() self.votedNoFail = Variable() # Variables that capture how well you perform when selected in context of pass/ fail mission self.selected = Variable() self.selectedPass = Variable() self.selectedFail = Variable() # Variables that capture how much sabotaging/passing you were involved in self.sabotages = Variable() self.passeds = Variable() # Label True/False either spy or not self.Label = None
def train(model, args): logging.info('args: %s' % str(args)) logging.info('model: %s, setup: %s' % (type(model).__name__, str(model.args))) logging.info('loading dataset') data = get_dataset(args.dataset) data.random_level = args.random_level if args.split_method == 'user': data, _ = data.split_user(args.frac) elif args.split_method == 'future': data, _ = data.split_future(args.frac) elif args.split_method == 'old': data, _, _, _ = data.split() data = data.get_seq() if type(model).__name__.startswith('DK'): topic_dic = {} kcat = Categorical(one_hot=True) kcat.load_dict(open('data/know_list.txt').read().split('\n')) for line in open('data/id_know.txt'): uuid, know = line.strip().split(' ') know = know.split(',') topic_dic[uuid] = \ torch.LongTensor(kcat.apply(None, know)) \ .max(0)[0] \ .type(torch.LongTensor) zero = [0] * len(kcat.apply(None, '<NULL>')) else: topics = get_topics(args.dataset, model.words) optimizer = torch.optim.Adam(model.parameters()) start_epoch = load_last_snapshot(model, args.workspace) if use_cuda: model.cuda() for epoch in range(start_epoch, args.epochs): logging.info(('epoch {}:'.format(epoch))) then = time.time() total_loss = 0 total_mae = 0 total_acc = 0 total_seq_cnt = 0 users = list(data) random.shuffle(users) seq_cnt = len(users) MSE = torch.nn.MSELoss() MAE = torch.nn.L1Loss() for user in users: total_seq_cnt += 1 seq = data[user] length = len(seq) optimizer.zero_grad() loss = 0 mae = 0 acc = 0 h = None for i, item in enumerate(seq): if type(model).__name__.startswith('DK'): if item.topic in topic_dic: x = topic_dic[item.topic] else: x = zero else: x = topics.get(item.topic).content x = Variable(torch.LongTensor(x)) # print(x.size()) score = Variable(torch.FloatTensor([round(item.score)])) t = Variable(torch.FloatTensor([item.time])) s, h = model(x, score, t, h) if args.loss == 'cross_entropy': loss += F.binary_cross_entropy_with_logits( s, score.view_as(s)) m = MAE(F.sigmoid(s), score).data[0] else: loss += MSE(s, score) m = MAE(s, score).data[0] mae += m acc += m < 0.5 loss /= length mae /= length acc /= length total_loss += loss.data[0] total_mae += mae total_acc += acc loss.backward() optimizer.step() if total_seq_cnt % args.save_every == 0: save_snapshot(model, args.workspace, '%d.%d' % (epoch, total_seq_cnt)) if total_seq_cnt % args.print_every != 0 and \ total_seq_cnt != seq_cnt: continue now = time.time() duration = (now - then) / 60 logging.info( '[%d:%d/%d] (%.2f seqs/min) ' 'loss %.6f, mae %.6f, acc %.6f' % (epoch, total_seq_cnt, seq_cnt, ((total_seq_cnt - 1) % args.print_every + 1) / duration, total_loss / total_seq_cnt, total_mae / total_seq_cnt, total_acc / total_seq_cnt)) then = now save_snapshot(model, args.workspace, epoch + 1)