Python MatchingField Examples

Programming Language: Python

Namespace/Package Name: deepmatcher.data.field

Class/Type: MatchingField

Examples at hotexamples.com: 16

Python MatchingField - 16 examples found. These are the top rated real world Python examples of deepmatcher.data.field.MatchingField extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

MatchingField(14)

build_vocab(4)

_get_vector_data(2)

numericalize(2)

extend_vocab(1)

preprocess_args(1)

tokenize(1)

Example #1

Show file

File: test_field.py Project: y0uCeF/deepmatcher

 def test_init_2(self):
     mf = MatchingField()
     seq = 'Hello, This is a test sequence for tokenizer.'
     tok_seq = [
         'Hello', ',', 'This', 'is', 'a', 'test', 'sequence', 'for', 'tokenizer', '.'
     ]
     self.assertEqual(mf.tokenize(seq), tok_seq)

Example #2

Show file

 def test_preprocess_args_1(self):
     mf = MatchingField()
     arg_dict = mf.preprocess_args()
     res_dict = {'sequential': True, 'init_token': None,
                 'eos_token': None, 'init_token': None,
                 'lower': False, 'preprocessing': None,
                 'sequential': True, 'tokenizer_arg': 'moses',
                 'unk_token': '<unk>'}
     self.assertEqual(arg_dict, res_dict)

Example #3

Show file

File: test_dataset.py Project: hanli91/deepmatcher

 def test_init_1(self):
     fields = [('left_a', MatchingField()), ('right_a', MatchingField())]
     col_naming = {'id':'id', 'label':'label', 'left':'left',
                   'right':'right'}
     path = os.path.join('.', 'test_datasets', 'sample_table_small.csv')
     md = MatchingDataset(fields, col_naming, path=path)
     self.assertEqual(md.id_field, 'id')
     self.assertEqual(md.label_field, 'label')
     self.assertEqual(md.all_left_fields, ['left_a'])
     self.assertEqual(md.all_right_fields, ['right_a'])
     self.assertEqual(md.all_text_fields, ['left_a', 'right_a'])
     self.assertEqual(md.canonical_text_fields, ['_a'])

Example #4

Show file

    def test_extend_vectors_1(self):
        vectors_cache_dir = '.cache'
        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

        pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
        filename = 'fasttext_sample.vec'
        file = os.path.join(pathdir, filename)
        url_base = urljoin('file:', pathname2url(file))
        vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)
        self.assertIsInstance(vecs, Vectors)

        vec_data = MatchingField._get_vector_data(vecs, vectors_cache_dir)
        v = MatchingVocab(Counter())
        v.vectors = torch.Tensor(1, vec_data[0].dim)
        v.unk_init = torch.Tensor.zero_
        tokens = {'hello', 'world'}
        v.extend_vectors(tokens, vec_data)
        self.assertEqual(len(v.itos), 4)
        self.assertEqual(v.vectors.size(), torch.Size([4, 300]))
        self.assertEqual(list(v.vectors[2][0:10]), [0.0] * 10)
        self.assertEqual(list(v.vectors[3][0:10]), [0.0] * 10)

        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

Example #5

Show file

File: test_dataset.py Project: suzil/deepmatcher

def test_class_matching_dataset():
    fields = [("left_a", MatchingField()), ("right_a", MatchingField())]
    col_naming = {
        "id": "id",
        "label": "label",
        "left": "left",
        "right": "right"
    }
    path = os.path.join(test_dir_path, "test_datasets",
                        "sample_table_small.csv")
    md = MatchingDataset(fields, col_naming, path=path)
    assert md.id_field == "id"
    assert md.label_field == "label"
    assert md.all_left_fields == ["left_a"]
    assert md.all_right_fields == ["right_a"]
    assert md.all_text_fields == ["left_a", "right_a"]
    assert md.canonical_text_fields == ["_a"]

Example #6

Show file

    def test_extend_vocab_1(self):
        vectors_cache_dir = '.cache'
        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

        mf = MatchingField()
        lf = MatchingField(id=True, sequential=False)
        fields = [('id', lf), ('left_a', mf), ('right_a', mf), ('label', lf)]
        col_naming = {
            'id': 'id',
            'label': 'label',
            'left': 'left_',
            'right': 'right_'
        }

        pathdir = os.path.abspath(os.path.join(test_dir_path, 'test_datasets'))
        filename = 'fasttext_sample.vec'
        file = os.path.join(pathdir, filename)
        url_base = urljoin('file:', pathname2url(file))
        vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)

        data_path = os.path.join(test_dir_path, 'test_datasets',
                                 'sample_table_small.csv')
        md = MatchingDataset(fields, col_naming, path=data_path)

        mf.build_vocab()
        mf.vocab.vectors = torch.Tensor(len(mf.vocab.itos), 300)
        mf.extend_vocab(md, vectors=vecs)
        self.assertEqual(len(mf.vocab.itos), 6)
        self.assertEqual(mf.vocab.vectors.size(), torch.Size([6, 300]))

Example #7

Show file

File: process.py Project: suzil/deepmatcher

def _make_fields(header, id_attr, label_attr, ignore_columns, lower, tokenize,
                 include_lengths):
    """Create field metadata, i.e., attribute processing specification for each attribute.

    This includes fields for label and ID columns.

    Returns:
        list(tuple(str, MatchingField)): A list of tuples containing column name
            (e.g. "left_address") and corresponding :class:`~data.MatchingField` pairs,
            in the same order that the columns occur in the CSV file.

    """

    text_field = MatchingField(
        lower=lower,
        tokenize=tokenize,
        init_token="<<<",
        eos_token=">>>",
        batch_first=True,
        include_lengths=include_lengths,
    )
    numeric_field = MatchingField(sequential=False,
                                  preprocessing=lambda x: int(x),
                                  use_vocab=False)
    id_field = MatchingField(sequential=False, use_vocab=False, id=True)

    fields = []
    for attr in header:
        if attr == id_attr:
            fields.append((attr, id_field))
        elif attr == label_attr:
            fields.append((attr, numeric_field))
        elif attr in ignore_columns:
            fields.append((attr, None))
        else:
            fields.append((attr, text_field))
    return fields

Example #8

Show file

    def test_get_vector_data(self):
        vectors_cache_dir = '.cache'
        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

        pathdir = os.path.abspath(os.path.join('.', 'test_datasets'))
        filename = 'fasttext_sample.vec'
        file = os.path.join(pathdir, filename)
        url_base = urljoin('file:', pathname2url(file))
        vecs = Vectors(name=filename, cache=vectors_cache_dir, url=url_base)
        self.assertIsInstance(vecs, Vectors)

        vec_data = MatchingField._get_vector_data(vecs, vectors_cache_dir)
        self.assertEqual(len(vec_data), 1)

        if os.path.exists(vectors_cache_dir):
            shutil.rmtree(vectors_cache_dir)

Example #9

Show file

 def test_init_1(self):
     mf = MatchingField()
     self.assertTrue(mf.sequential)

Example #10

Show file

 def test_reset_vector_cache_1(self):
     mf = MatchingField()
     reset_vector_cache()
     self.assertDictEqual(mf._cached_vec_data, {})

Example #11

Show file

 def test_numericalize_2(self):
     mf = MatchingField()
     arr = [['a'], ['b'], ['c']]
     mf.numericalize(arr)

Example #12

Show file

 def test_numericalize_1(self):
     mf = MatchingField(id=True)
     arr = [[1], [2], [3]]
     mf.numericalize(arr)
     self.assertEqual(arr, [[1], [2], [3]])

Example #13

Show file

 def test_build_vocab_3(self):
     mf = MatchingField()
     vector_file_name = 'fasttext.crawl_test.vec'
     cache_dir = os.path.join(test_dir_path, 'test_datasets')
     vec_data = mf.build_vocab(vectors=vector_file_name, cache=cache_dir)
     self.assertIsNone(vec_data)

Example #14

Show file

 def test_build_vocab_2(self):
     mf = MatchingField()
     vector_file_name = 'fasttext.wiki_test.vec'
     cache_dir = os.path.join(test_dir_path, 'test_datasets')
     vec_data = mf.build_vocab(vectors=vector_file_name, cache=cache_dir)

Example #15

Show file

 def test_build_vocab_1(self):
     mf = MatchingField()
     mf.build_vocab()

Example #16

Show file

 def test_init_3(self):
     mf = MatchingField(tokenize='random string')