Beispiel #1
0
	def test(self):
		#self.merge()
		#self.compress()
		#return
		embedding_size = 100
		for CLUSTER_MIN_SIZE in range(4,19,2):
			for dsname in ['webkb','er']:
				mln = MLN(dsname)
				db = DBManager(dsname,mln)
				print('merge db dom sizes:')
				dom_obj_map = db.get_dom_objs_map(mln,db.merge_db_file)
				cf = common_f()
				#cf.delete_files(mln.pickle_location)
				#cf.remove_irrelevant_atoms()
				embedding_size += 100
				embedding_size = embedding_size%1000

				db.set_atoms()
				bmf = bmf_cluster(dsname)
				bmf.cluster(db,1,mln.pdm,dom_obj_map)

				print('original db dom sizes(after compression):')
				orig_dom_objs_map = db.get_dom_objs_map(mln,mln.orig_db_file)
				CLUSTER_MIN_SIZE = 10
				w2v = word2vec(dsname,db,CLUSTER_MIN_SIZE,embedding_size)
				print('w2v cluster dom sizes:')
				w2v_dom_objs_map = db.get_dom_objs_map(mln,w2v.w2v__cluster_db_file)
				cr = cf.calculate_cr(orig_dom_objs_map,w2v_dom_objs_map)


				print('cr : ' + str(cr))
				rc = random_cluster(dsname)
				rc.generate_random_db(db,w2v.pred_atoms_reduced_numbers,mln,w2v_dom_objs_map)
				print('random cluster dom sizes')
				db.get_dom_objs_map(mln,mln.random__cluster_db_file)




				kmc = kmeans_cluster(dsname)
				kmc.cluster(db,str(cr),mln.pdm,w2v_dom_objs_map,mln.dom_pred_map)
				print('kmeans cluster dom sizes:')
				kmeans_dom_objs_map = db.get_dom_objs_map(mln,kmc.kmeans__cluster_db_file)
				mln.create_magician_mln()
				#magician(dsname,mln)
				tuffy(dsname)
				orig_meta_map = {}

				orig_meta_map['bmf'] = bmf.bmf_orig_meta_map
				orig_meta_map['w2v'] = w2v.w2v_orig_meta_map
				orig_meta_map['random'] = rc.rand_orig_meta_map
				orig_meta_map['kmeans'] = kmc.kmeans_orig_meta_map
				print('Dataset : ' + dsname +  '; CR : ' + str(cr))
				p = performance(dsname,embedding_size)
				p.compare_marginal(mln,orig_meta_map,cr)
				p.compare_map(mln,orig_meta_map,cr)
			break
Beispiel #2
0
 def __init__(self, dsname):
     MLN.__init__(self, dsname)
     self.set_pred_pairs()
Beispiel #3
0
    def test(self):
        #self.merge()
        #self.compress()
        #return
        embedding_size = 100
        for CLUSTER_MIN_SIZE in range(4, 19, 2):
            for dsname in ['webkb']:
                mln = MLN(dsname)
                db = DBManager(dsname, mln)
                print('merge db dom sizes:')
                db.set_doms_atoms(mln, db.merge_db_file)

                cf = common_f()
                #cf.delete_files(mln.pickle_location)
                if dsname == 'er':
                    cf.remove_irrelevant_atoms()

                embedding_size = 300
                print('generating sentences')
                start = time.time()
                cnn_atoms, ntn_atoms = db.pred_atoms, db.pred_atoms
                while True:
                    #cnn_atoms = self.embed(cnn_atoms,mln.pdm,mln.dom_sizes_map,True)
                    ntn_atoms = self.embed(ntn_atoms, mln.pdm,
                                           mln.dom_sizes_map, False)

                sg = None
                if dsname == 'review':

                    return
                    #end = time.time()
                    #print('Time : ',end-start)
                else:
                    sg = sentence_generator(mln.pdm, db.pred_atoms,
                                            db.TEST_SIZE, db)
                    #print('calling w2v')
                    #wv = word2vec_cnn()
                    #print('making images')
                    #wv.make_images(sg.sentences,mln.pdm,db.pred_atoms,mln.dom_sizes_map,dsname,sg.train_atoms,sg.test_atoms,db.TEST_SIZE)

                cor = corrupt(dsname, db.pred_atoms, mln.pdm, db.dom_objs_map,
                              sg.sentences)
                return
                bmf = bmf_cluster(dsname)
                bmf.cluster(db, 1, mln.pdm, dom_obj_map)

                print('original db dom sizes(after compression):')
                orig_dom_objs_map = db.get_dom_objs_map(mln, mln.orig_db_file)
                CLUSTER_MIN_SIZE = 10
                w2v = word2vec(dsname, db, CLUSTER_MIN_SIZE, embedding_size)
                print('w2v cluster dom sizes:')
                w2v_dom_objs_map = db.get_dom_objs_map(
                    mln, w2v.w2v__cluster_db_file)
                cr = cf.calculate_cr(orig_dom_objs_map, w2v_dom_objs_map)

                print('cr : ' + str(cr))
                rc = random_cluster(dsname)
                rc.generate_random_db(db, w2v.pred_atoms_reduced_numbers, mln,
                                      w2v_dom_objs_map)
                print('random cluster dom sizes')
                db.get_dom_objs_map(mln, mln.random__cluster_db_file)

                kmc = kmeans_cluster(dsname)
                kmc.cluster(db, str(cr), mln.pdm, w2v_dom_objs_map,
                            mln.dom_pred_map)
                print('kmeans cluster dom sizes:')
                kmeans_dom_objs_map = db.get_dom_objs_map(
                    mln, kmc.kmeans__cluster_db_file)
                mln.create_magician_mln()
                magician(dsname, mln)
                #tuffy(dsname)
                orig_meta_map = {}

                orig_meta_map['bmf'] = bmf.bmf_orig_meta_map
                orig_meta_map['w2v'] = w2v.w2v_orig_meta_map
                orig_meta_map['random'] = rc.rand_orig_meta_map
                orig_meta_map['kmeans'] = kmc.kmeans_orig_meta_map
                print('Dataset : ' + dsname + '; CR : ' + str(cr))
                p = performance(dsname, embedding_size)
                p.compare_marginal(mln, orig_meta_map, cr)
Beispiel #4
0
 def compress(self):
     for dsname in ['er', 'protein', 'webkb']:
         mln = MLN(dsname)
         db = DBManager(dsname, mln)
         db.compress(mln, .3)
Beispiel #5
0
 def merge(self):
     for dsname in ['protein']:
         mln = MLN(dsname)
         db = DBManager(dsname, mln)
         db.merge()
Beispiel #6
0
from DBManager import DBManager,dbconfig
from mln import MLN
from word2vec import word2vec

dsname = 'er'
mln = MLN(dsname)
db = DBManager(dsname,mln)
#db.merge()
db.compress(mln,.1)
#db.calculate_dom_sizes(mln)
#w2v = word2vec(dsname,db)
Beispiel #7
0
def train():
    rnd_seed = 0
    np.random.seed(rnd_seed)
    tf.set_random_seed(rnd_seed)
    sim = GymSim('CartPole-v0', 5000, seed=rnd_seed)
    sim.act_sample_batch(
        5000, FLAGS.sample_neg_ratio)  # bootstrap with random actions
    sim.print_stats()
    #embed()
    #sys.exit()
    q_network = MLN(sim.INPUT_DIM, sim.ACTION_DIM)
    target_network = MLN(sim.INPUT_DIM, sim.ACTION_DIM, name_scope='target')

    with tf.Graph().as_default():

        global_step = tf.Variable(0, trainable=False)

        action_pl = tf.placeholder(tf.int64, name='action_pl')
        reward_pl = tf.placeholder(tf.float32, name='reward_pl')
        state_pl = tf.placeholder(tf.float32, (None, sim.INPUT_DIM),
                                  name='state_pl')
        observ_pl = tf.placeholder(tf.float32, (None, sim.INPUT_DIM),
                                   name='observ_pl')

        action_q = q_network.inference(state_pl)
        target_q = tf.stop_gradient(target_network.inference(observ_pl))
        target_q_pt = tf.Print(target_q, [target_q])
        action_q_pt = tf.Print(action_q, [action_q])

        loss = dqn.td_loss(action_pl, sim.ACTION_DIM, action_q, reward_pl,
                           target_q)

        train_op = dqn.train(FLAGS.learning_rate, loss, global_step)

        saver = tf.train.Saver(tf.all_variables())

        summary_op = tf.merge_all_summaries()

        action_op = tf.argmax(action_q, 1, name='action_op')

        copy_var = q_network.copy_to('target')

        init = tf.initialize_all_variables()

        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))

        #initialize variables
        sess.run(init)

        summary_writer = tf.train.SummaryWriter(
            os.path.join(FLAGS.train_dir, 'logs'), sess.graph)

    for step in xrange(FLAGS.max_steps):
        start_time = time.time()

        if step % 4 == 0:
            sess.run(copy_var)

        feed = sim.feed_batch(state_pl, action_pl, reward_pl, observ_pl,
                              FLAGS.batch_size)

        _, loss_value = sess.run([train_op, loss], feed_dict=feed)

        duration = time.time() - start_time

        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

        if step % 10 == 0:
            num_examples_per_step = FLAGS.batch_size
            examples_per_sec = num_examples_per_step / duration
            sec_per_batch = float(duration)

            format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                          'sec/batch)')
            print(format_str % (datetime.now(), step, loss_value,
                                examples_per_sec, sec_per_batch))

        if step > FLAGS.sample_after:
            pred_act = sess.run(action_op, feed_dict={state_pl: sim.state})
            pred_act = pred_act[0]
            sim.act_sample_once(pred_act,
                                neg_ratio=FLAGS.sample_neg_ratio,
                                append_db=True)

        # visualization
        if step % 1000 == 0 and step != 0:
            sim.reset()
            survive = 0
            for _ in range(200):
                pred_act = sess.run(action_op, feed_dict={state_pl: sim.state})
                pred_act = pred_act[0]
                done = sim.act_demo(pred_act)
                if not done:
                    survive += 1
                else:
                    print('Survived for %i frame' % survive)
                    survive = 0

        #if step % 100 == 0:
        #    summary_str = sess.run(summary_op)
        #    summary_writer.add_summary(summary_str, step)

        # Save the model checkpoint periodically.
        if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
            checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)
Beispiel #8
0
def train():
    rnd_seed = 0
    np.random.seed(rnd_seed)
    tf.set_random_seed(rnd_seed)
    sim = GymSim('CartPole-v0', 5000, seed=rnd_seed)
    sim.act_sample_batch(5000, FLAGS.sample_neg_ratio) # bootstrap with random actions
    sim.print_stats()
    #embed()
    #sys.exit()
    q_network = MLN(sim.INPUT_DIM, sim.ACTION_DIM)
    target_network = MLN(sim.INPUT_DIM, sim.ACTION_DIM, name_scope='target')

    with tf.Graph().as_default():

        global_step = tf.Variable(0, trainable=False)

        action_pl = tf.placeholder(tf.int64, name='action_pl')
        reward_pl = tf.placeholder(tf.float32, name='reward_pl')
        state_pl  = tf.placeholder(tf.float32, (None, sim.INPUT_DIM), name='state_pl')
        observ_pl = tf.placeholder(tf.float32, (None, sim.INPUT_DIM), name='observ_pl')

        action_q = q_network.inference(state_pl)
        target_q = tf.stop_gradient(target_network.inference(observ_pl))
        target_q_pt = tf.Print(target_q, [target_q])
        action_q_pt = tf.Print(action_q, [action_q])

        loss = dqn.td_loss(action_pl, sim.ACTION_DIM, action_q, reward_pl, target_q)

        train_op = dqn.train(FLAGS.learning_rate, loss, global_step)

        saver = tf.train.Saver(tf.all_variables())

        summary_op = tf.merge_all_summaries()

        action_op = tf.argmax(action_q, 1, name='action_op')

        copy_var = q_network.copy_to('target')

        init = tf.initialize_all_variables()

        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))

        #initialize variables
        sess.run(init)

        summary_writer = tf.train.SummaryWriter(os.path.join(FLAGS.train_dir, 'logs')
                                                , sess.graph)

    for step in xrange(FLAGS.max_steps):
        start_time = time.time()

        if step % 4 == 0:
            sess.run(copy_var)

        feed = sim.feed_batch(state_pl,
                              action_pl,
                              reward_pl,
                              observ_pl,
                              FLAGS.batch_size)

        _, loss_value = sess.run([train_op, loss],
                                 feed_dict = feed)

        duration = time.time() - start_time

        assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

        if step % 10 == 0:
            num_examples_per_step = FLAGS.batch_size
            examples_per_sec = num_examples_per_step / duration
            sec_per_batch = float(duration)

            format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
            print (format_str % (datetime.now(), step, loss_value,
                                examples_per_sec, sec_per_batch))

        if step > FLAGS.sample_after:
            pred_act = sess.run(action_op,
                                feed_dict={state_pl: sim.state})
            pred_act = pred_act[0]
            sim.act_sample_once(pred_act, neg_ratio=FLAGS.sample_neg_ratio,
                                append_db=True)


        # visualization
        if step % 1000 == 0 and step != 0:
            sim.reset()
            survive = 0
            for _ in range(200):
                pred_act = sess.run(action_op,
                                    feed_dict={state_pl: sim.state})
                pred_act = pred_act[0]
                done = sim.act_demo(pred_act)
                if not done:
                    survive += 1
                else:
                    print('Survived for %i frame' % survive)
                    survive = 0

        #if step % 100 == 0:
        #    summary_str = sess.run(summary_op)
        #    summary_writer.add_summary(summary_str, step)

        # Save the model checkpoint periodically.
        if step % 1000 == 0 or (step + 1) == FLAGS.max_steps:
            checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)