コード例 #1
0
    def test_train_and_load_embedding(self):
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['passj', 'word', 'db'])

        with tempfile.NamedTemporaryFile(mode='w') as ofile:
            config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                        password_batch=5,
                                        embedding_window_size=1,
                                        batch_size=2,
                                        embedding_size=4,
                                        embedding_num_neg_samples=2,
                                        logging_freq=1,
                                        num_train_epochs=1)
            emb_trainer = pe.EmbeddingTrainer(config)
            with self.test_session() as session:
                emb_trainer.train_and_save(test_dataset, session, ofile)

            ofile.flush()

            with open(ofile.name, 'r') as ifile:
                loader = pe.CharEmbeddingLoader(config)
                output = loader.read_from_file(ifile)

                self.assertEqual(26, len(output))
                for _, value in output.items():
                    self.assertEqual(4, len(value))
コード例 #2
0
    def test_skipgram_randomize(self):
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['passj', 'word', 'db'])

        config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                    password_batch=5,
                                    batch_size=10,
                                    embedding_window_size=3)
        emb_trainer = pe.EmbeddingTrainer(config)
        examples, labels = emb_trainer.skipgram(test_dataset, randomize=True)

        with self.test_session() as session:
            session.run([tf.tables_initializer()])
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            try:
                session.run([examples, labels])

            except tf.errors.OutOfRangeError:
                pass

            finally:
                coord.request_stop()

            coord.join(threads)
コード例 #3
0
ファイル: test_pass_encoder.py プロジェクト: mmahmad/cs598-gw
    def test_expand_one_hot(self):
        with self.test_session() as sess:
            config = pe.EmbeddingConfig(alphabet='abc', batch_size=6)
            data_maker = p_enc.encoder_from_config(config)
            data_maker.one_time_tensor_initialize()
            prefix = tf.convert_to_tensor([[0, 0], [ord('a'), 0],
                                           [ord('a'), ord('b')], [0, 0],
                                           [ord('b'), 0], [ord('b'),
                                                           ord('c')]])
            labels = tf.convert_to_tensor(
                [ord(c) for c in ['a', 'b', '\n', 'b', 'c', '\n']])
            seq_len = tf.convert_to_tensor([0, 1, 2, 0, 1, 2])
            one_hot_prefix, one_hot_label = data_maker.encode_training(
                prefix, labels)
            sess.run(data_maker.initializers())

            outputs_out, lab_out, seq_len_out = sess.run(
                [one_hot_prefix, one_hot_label, seq_len])

            self.assertAllClose(
                [[[0, 0, 0], [0, 0, 0]], [[1, 0, 0], [0, 0, 0]],
                 [[1, 0, 0], [0, 1, 0]], [[0, 0, 0], [0, 0, 0]],
                 [[0, 1, 0], [0, 0, 0]], [[0, 1, 0], [0, 0, 1]]], outputs_out)
            self.assertAllClose([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 1],
                                 [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
                                lab_out)
            self.assertAllEqual([0, 1, 2, 0, 1, 2], seq_len_out)
コード例 #4
0
 def test_graph_builds(self):
     config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                 password_batch=5,
                                 embedding_window_size=3)
     test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
         ['passj', 'word', 'db'])
     emb_trainer = pe.EmbeddingTrainer(config)
     with self.test_session() as session:
         emb_trainer.build_graph(test_dataset, session)
コード例 #5
0
ファイル: test_pass_encoder.py プロジェクト: mmahmad/cs598-gw
 def test_input_prefix_to_tensor_padding(self):
     with self.test_session() as sess:
         config = pe.EmbeddingConfig(alphabet='abc')
         maker = p_enc.encoder_from_config(config)
         maker.one_time_tensor_initialize()
         out = maker.encode_inputs([[97, 98, 10], [99, 99, 10]])
         sess.run(maker.initializers())
         self.assertAllEqual([[[1., 0., 0.], [0., 1., 0.], [0., 0., 0.]],
                              [[0., 0., 1.], [0., 0., 1.], [0., 0., 0.]]],
                             sess.run(out))
コード例 #6
0
    def test_alphabet_not_float(self):
        ifile = io.StringIO("""{ "a": [0.4], "b" : ["asdf"], "c" : [0.2] }""")
        config = pe.EmbeddingConfig(alphabet='ab', embedding_size=1)
        loader = pe.CharEmbeddingLoader(config)
        try:
            loader.read_from_file(ifile)
            errored = False
        except pe.CharEmbeddingLoader.LoadException:
            errored = True

        self.assertTrue(errored)
コード例 #7
0
    def test_alphabet_not_equal(self):
        config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                    embedding_size=1)
        ifile = io.StringIO("""{ "a" : [0.4], "b" : [0.1]}""")
        loader = pe.CharEmbeddingLoader(config)
        try:
            loader.read_from_file(ifile)
            errored = False
        except pe.CharEmbeddingLoader.LoadException:
            errored = True

        self.assertTrue(errored)
コード例 #8
0
    def test_train_loop(self):
        config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                    password_batch=5,
                                    embedding_window_size=1,
                                    batch_size=2,
                                    embedding_size=4,
                                    embedding_num_neg_samples=2,
                                    logging_freq=1,
                                    num_train_epochs=1)
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['passj', 'word', 'db'])
        emb_trainer = pe.EmbeddingTrainer(config)
        with self.test_session() as session:
            answer = emb_trainer.train(test_dataset, session)

        self.assertAllEqual([26, 4], answer.shape)
コード例 #9
0
    def test_train_save(self):
        with tempfile.NamedTemporaryFile(mode='w') as ofile:
            config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                        password_batch=5,
                                        embedding_window_size=1,
                                        batch_size=2,
                                        embedding_size=4,
                                        embedding_num_neg_samples=2,
                                        logging_freq=1,
                                        num_train_epochs=1)
            test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
                ['passj', 'word', 'db'])

            emb_trainer = pe.EmbeddingTrainer(config)
            with self.test_session() as session:
                emb_trainer.train_and_save(test_dataset, session, ofile)

            ofile.flush()
コード例 #10
0
    def test_initial_counts_partial_batch(self):
        test_dataset = tf.contrib.data.Dataset.from_tensor_slices(
            ['pass', 'word', 'db'])

        config = pe.EmbeddingConfig(alphabet='abcdefghijklmnopqrstuvwxyz',
                                    password_batch=1,
                                    embedding_window_size=1)
        emb_trainer = pe.EmbeddingTrainer(config)
        with self.test_session() as session:
            counts, samples = emb_trainer.initial_counts(test_dataset, session)

        #         a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v
        expect = [
            1,
            1,
            0,
            2,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            0,
            1,
            1,
            0,
            1,
            2,
            0,
            0,
            0,
            #         w  x  y  z
            1,
            0,
            0,
            0
        ]

        self.assertAllEqual(expect, counts)
        self.assertEqual(7, samples)
コード例 #11
0
def main(args):
    if not args.input_file or not args.output_file:
        sys.stderr.write('--input-file and --output-file are required\n')
        return

    FORMAT = '%(asctime)s %(levelname)s %(message)s'
    logging.basicConfig(level=logging.INFO, format=FORMAT)

    import pass_embedding as pe
    import pass_dataset as pd

    if args.help_config:
        sys.stdout.write(pe.EmbeddingConfig.__init__.__doc__ + "\n")
        return

    logging.info('Called with %s', vars(args))

    if args.config is not None:
        logging.info('Reading configuration from %s', args.config)
        try:
            config = pe.EmbeddingConfig.from_config_file(args.config)

        except pe.ConfigurationException as e:
            logging.fatal('Error while reading configuration: %s', str(e))
            raise

        except ValueError as e:
            logging.fatal('Error while reading configuration: %s', str(e))
            raise

    else:
        config = pe.EmbeddingConfig()

    dataset = pd.PasswordDatasetMakerFromFile([args.input_file]).make()
    with tf.Graph().as_default():
        with tf.Session() as session:
            trainer = pe.EmbeddingTrainer(
              config, tensorboard_logdir=args.tensorboard_logdir)
            trainer.train_and_save(dataset, session, args.output_file)