Python read_word_embeddingsの例

プログラミング言語: Python

名前空間/パッケージ名: data

メソッド/関数: read_word_embeddings

hotexamples.comのコード掲載数: 5

Python read_word_embeddings - 5件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdata.read_word_embeddingsの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

    def setUp(self):
        """
        Constructs the data and reads in the embeddings from the example data in the test_data directory
        """
        test_data_dir = Path(__file__).resolve().parents[1]
        embeddings_file = str(
            test_data_dir.joinpath("test_data").joinpath("embeddings.txt"))
        train_dataset = str(
            test_data_dir.joinpath("test_data").joinpath("train_data.txt"))

        self._unknown_word_key = "<unk>"
        word_embeddings = data.read_word_embeddings(embeddings_file,
                                                    self._unknown_word_key)
        self._word_index = word_embeddings.wv.vocab

        self._db = data.generate_instances(
            batch_size=3,
            file_path=train_dataset,
            word_index=self._word_index,
            unknown_word_key=self._unknown_word_key,
            separator=" ")
        self._mh_index_map, self._unk_matrix_id = matrix_mapping.create_matrix_mapping(
            train_mh=self._db.mh_set, unk_vec_id=self._db.unk_vector_id)
        self._lookup = word_embeddings.wv.syn0
        tf.set_random_seed(1)

コード例 #2

ファイルを表示

ファイル: test_evaluation.py プロジェクト: sfb833-a3/commix

    def setUp(self):
        test_data_dir = Path(__file__).resolve().parents[1]
        embeddings_file = str(test_data_dir.joinpath("test_data").joinpath("embeddings.txt"))
        self._predictions_file = str(test_data_dir.joinpath("test_data").joinpath("gold_standard.txt"))

        self._unknown_word_key = "<unk>"
        self._word_embeddings = data.read_word_embeddings(embeddings_file, self._unknown_word_key)

コード例 #3

ファイルを表示

    def setUp(self):
        test_data_dir = Path(__file__).resolve().parents[1]
        embeddings_file = str(
            test_data_dir.joinpath("test_data").joinpath("embeddings.txt"))
        self._train_dataset = str(
            test_data_dir.joinpath("test_data").joinpath("train_data.txt"))
        self._validation_dataset = str(
            test_data_dir.joinpath("test_data").joinpath("valid_data.txt"))

        self._unknown_word_key = "<unk>"
        self._separator = " "
        gensim_model = data.read_word_embeddings(embeddings_file,
                                                 self._unknown_word_key)
        self._word_index = gensim_model.wv.vocab

コード例 #4

ファイルを表示

    def setUp(self):
        """
        This sets up the data and properties (e.g. embedding dimension, batch size) all models are built on. These properties
        are fixed and the same for all test classes inheriting from this class.
        """
        self._test_data_dir = Path(__file__).resolve().parents[1]
        embeddings_file = str(
            self._test_data_dir.joinpath("test_data").joinpath(
                "embeddings.txt"))
        train_dataset = str(
            self._test_data_dir.joinpath("test_data").joinpath(
                "train_data.txt"))
        validation_dataset = str(
            self._test_data_dir.joinpath("test_data").joinpath(
                "valid_data.txt"))

        self._unknown_word_key = "<unk>"

        self._embedding_model = data.read_word_embeddings(
            embeddings_file, self._unknown_word_key)
        self._word_index = self._embedding_model.wv.vocab
        self._lookup = self._embedding_model.wv.syn0
        self._embedding_dim = self._lookup.shape[1]
        self._batch_size = 3

        self._db = data.generate_instances(
            batch_size=self._batch_size,
            file_path=train_dataset,
            word_index=self._word_index,
            unknown_word_key=self._unknown_word_key,
            separator=" ")
        self._vd = data.generate_instances(
            batch_size=self._batch_size,
            file_path=validation_dataset,
            word_index=self._word_index,
            unknown_word_key=self._unknown_word_key,
            separator=" ")
        self._comp_model = None

コード例 #5

ファイルを表示

ファイル: evaluation_composed.py プロジェクト: sfb833-a3/commix

        "--unknown_word_key",
        type=str,
        help=
        "string corresponding to the unknown word embedding in the embedding file",
        default="<unk>")
    parser.add_argument("--max_rank",
                        type=int,
                        help="maximum rank",
                        default=1000)
    parser.add_argument("--batch_size",
                        type=int,
                        help="how many instances per batch",
                        default=500)
    args = parser.parse_args()

    embeddings = data.read_word_embeddings(args.embeddings,
                                           args.unknown_word_key)
    ranks = get_all_ranks(predictions_file=args.predictions,
                          word_embeddings=embeddings,
                          max_rank=args.max_rank,
                          batch_size=args.batch_size,
                          path_to_ranks=args.ranks)
    print("ranks\n")
    print(sorted(ranks))
    print("quartiles\n")
    print(evaluation.calculate_quartiles(ranks))

    tf.enable_eager_execution()
    loss = evaluation.get_loss(predictions_file=args.predictions, word_embeddings=embeddings, \
                            batch_size=args.batch_size)
    print("loss %.5f\n" % loss)