def dmn_mid(args): #assert args.word_vector_size in [50, 100, 200, 300] network_name = args.prefix + '%s.mh%d.n%d.bs%d%s%s%s.babi%s' % ( args.network, args.memory_hops, args.dim, args.batch_size, ".na" if args.normalize_attention else "", ".bn" if args.batch_norm else "", (".d" + str(args.dropout)) if args.dropout>0 else "", args.babi_id) babi_train_raw, babi_test_raw = utils.get_babi_raw(args.babi_id, args.babi_test_id) #word2vec = utils.load_glove(args.word_vector_size) word2vec = {} args_dict = dict(args._get_kwargs()) args_dict['babi_train_raw'] = babi_train_raw args_dict['babi_test_raw'] = babi_test_raw args_dict['word2vec'] = word2vec if args.network == 'dmn_tied': import dmn_tied if (args.batch_size != 1): print "==> not using minibatch training in this mode" args.batch_size = 1 dmn = dmn_tied.DMN_tied(**args_dict) elif args.network == 'dmn_untied': import dmn_untied if (args.batch_size != 1): print "==> not using minibatch training in this mode" args.batch_size = 1 dmn = dmn_untied.DMN_untied(**args_dict) else: raise Exception("No such network known: " + args.network) if args.load_state != "": dmn.load_state(args.load_state) return args, network_name, dmn
def dmn_mid(args): #assert args.word_vector_size in [50, 100, 200, 300] network_name = args.prefix + '%s.mh%d.n%d.bs%d%s%s%s.babi%s' % ( args.network, args.memory_hops, args.dim, args.batch_size, ".na" if args.normalize_attention else "", ".bn" if args.batch_norm else "", (".d" + str(args.dropout)) if args.dropout > 0 else "", args.babi_id) babi_train_raw, babi_test_raw = utils.get_babi_raw(args.babi_id, args.babi_test_id) #word2vec = utils.load_glove(args.word_vector_size) word2vec = {} args_dict = dict(args._get_kwargs()) args_dict['babi_train_raw'] = babi_train_raw args_dict['babi_test_raw'] = babi_test_raw args_dict['word2vec'] = word2vec if args.network == 'dmn_tied': import dmn_tied if (args.batch_size != 1): print "==> not using minibatch training in this mode" args.batch_size = 1 dmn = dmn_tied.DMN_tied(**args_dict) elif args.network == 'dmn_untied': import dmn_untied if (args.batch_size != 1): print "==> not using minibatch training in this mode" args.batch_size = 1 dmn = dmn_untied.DMN_untied(**args_dict) else: raise Exception("No such network known: " + args.network) if args.load_state != "": dmn.load_state(args.load_state) return args, network_name, dmn
parser.add_argument('--normalize_attention', type=bool, default=False, help='flag for enabling softmax on attention vector') parser.add_argument('--log_every', type=int, default=1, help='print information every x iteration') parser.add_argument('--save_every', type=int, default=1, help='save state every x epoch') parser.add_argument('--prefix', type=str, default="", help='optional prefix of network name') parser.add_argument('--no-shuffle', dest='shuffle', action='store_false') parser.add_argument('--babi_test_id', type=int, default=-1, help='babi_id of test set') parser.set_defaults(shuffle=True) args = parser.parse_args() assert args.word_vector_size in [50, 100, 200, 300] network_name = args.prefix + '%s.mh%d.n%d.bs%d%s.babi%s' % (args.network, args.memory_hops, args.dim, args.batch_size, ".na" if args.normalize_attention else "", args.babi_id) babi_train_raw, babi_test_raw = utils.get_babi_raw(args.babi_id, args.babi_test_id) word2vec = utils.load_glove(args.word_vector_size) args_dict = dict(args._get_kwargs()) args_dict['babi_train_raw'] = babi_train_raw args_dict['babi_test_raw'] = babi_test_raw args_dict['word2vec'] = word2vec # init class if args.network == 'dmn_batch': import dmn_batch dmn = dmn_batch.DMN_batch(**args_dict) elif args.network == 'dmn_basic':
for i in range(0,l): loss,pred = self.f(test_inp[i],test_q[i],test_mask[i],test_ans[i]) a_loss = a_loss + loss y_true.append(test_ans[i]) y_pred.append(pred.argmax(axis=0)) accuracy = sum([1 if t == p else 0 for t, p in zip(y_true, y_pred)]) print "accuracy: %.2f percent" % (accuracy * 100.0 / l) def shuffle(train_input,train_q,train_answer,train_input_mask,train_sf): print "==> Shuffling the train set" combined = zip(train_input,train_q,train_answer,train_input_mask,train_sf) random.shuffle(combined) train_input, train_q, train_answer, train_input_mask, train_sf = zip(*combined) return train_input,train_q,train_answer,train_input_mask,train_sf babi_train_raw, babi_test_raw = utils.get_babi_raw(task,task) train_input, train_q, train_answer, train_input_mask, train_sf = _process_input(babi_train_raw) test_input, test_q, test_answer, test_input_mask, test_sf = _process_input(babi_test_raw) vocab_size = len(vocab) a1 = train_input a2 = train_q a3 = train_input_mask a4 = train_answer dmn = DMN(word_vector_size,vocab_size) dmn.load_state('states_orig/states7/DMN_orig.epoch20') #dmn.train(a1,a2,a4,a3,train_sf) dmn.test(a1,a2,a4,a3) dmn.test(test_input,test_q,test_answer,test_input_mask)
word_vector_size=word_vector_size, to_return="word2vec") for w in q ] utils.process_word( word=x["A"].lower(), # TODO: add .lower() here word2vec=word2vec, vocab=vocab, ivocab=ivocab, word_vector_size=word_vector_size, to_return="index") gate_len.append(len(x["S"])) return sent_len, gate_len word_vector_size = 50 word2vec = utils.load_glove(word_vector_size) for babi_id in range(1, 21): babi_id = str(babi_id) print "processing babi." + babi_id vocab = {} ivocab = {} babi_train_raw, babi_test_raw = utils.get_babi_raw(babi_id, babi_id) train_len, train_gate = _process_input(babi_train_raw, word2vec, vocab, ivocab, word_vector_size) test_len, test_gate = _process_input(babi_test_raw, word2vec, vocab, ivocab, word_vector_size) print len(vocab) print max(train_gate), min(train_gate), max(test_gate), min(test_gate) print max(train_len), min(train_len), max(test_len), min(test_len)
sent_len.append(len(inp_vector)) q_vector = [utils.process_word(word = w.lower(), word2vec = word2vec, vocab = vocab, ivocab = ivocab, word_vector_size = word_vector_size, to_return = "word2vec") for w in q] utils.process_word(word = x["A"].lower(), # TODO: add .lower() here word2vec = word2vec, vocab = vocab, ivocab = ivocab, word_vector_size = word_vector_size, to_return = "index") gate_len.append(len(x["S"])) return sent_len, gate_len word_vector_size=50 word2vec = utils.load_glove(word_vector_size) for babi_id in range(1,21): babi_id = str(babi_id) print "processing babi."+babi_id vocab = {} ivocab = {} babi_train_raw, babi_test_raw = utils.get_babi_raw(babi_id, babi_id) train_len, train_gate = _process_input(babi_train_raw,word2vec,vocab,ivocab,word_vector_size) test_len, test_gate = _process_input(babi_test_raw,word2vec,vocab,ivocab,word_vector_size) print len(vocab) print max(train_gate), min(train_gate), max(test_gate), min(test_gate) print max(train_len),min(train_len),max(test_len),min(test_len)
for i, w in enumerate(split_context): if w == k: split_context[i] = v for i, w in enumerate(split_question): if w == k: split_question[i] = v if (correct_answer == k): data["A"] = v if (prediction == k): data["P"] = v data["C"] = " ".join(split_context) data["Q"] = " ".join(split_question).replace("@placeholder", "________") return data if __name__ == "__main__": babi_train_raw, babi_test_raw = utils.get_babi_raw("cnn", "") word2vec = utils.load_glove(word_vector_size) dmn = dmn_smooth.DMN_smooth( None, babi_test_raw, word2vec, word_vector_size, dim, mode, answer_module, input_mask_mode, memory_hops, l2, normalize_attention, batch_norm, dropout, learning_rate) dmn.load_state(state) print "==> running server" app.run(debug=True, use_reloader=False)