Example #1
0
def test_findall():
    pattern = "[aA]"
    s = ["hello", "and héllo", "this was empty", ""]
    nvstrs = nvstrings.to_device(s)
    got = nvstrs.findall(pattern)[0]
    expected = [None, "a", "a", None]
    assert_eq(got, expected)
Example #2
0
def test_ftos():
    s = np.array([0, 103, -254848.5929, 8395794.248339, np.nan, np.inf],
                 dtype=np.float32)
    got = nvstrings.ftos(s)
    expected = nvstrings.to_device(
        ['0.0', '103.0', '-254848.5938', '8395794.0', 'NaN', 'Inf'])
    assert_eq(got, expected)
Example #3
0
def test_match(pattern):
    s = ["hello", "and héllo", None, ""]
    pstrs = pd.Series(s)
    nvstrs = nvstrings.to_device(s)
    got = nvstrs.match(pattern)
    expected = pstrs.str.match(pattern).values
    assert_eq(got, expected)
Example #4
0
def test_count(pattern):
    s = ["hello", "and héllo", "this was empty", ""]
    pstrs = pd.Series(s)
    nvstrs = nvstrings.to_device(s)
    got = nvstrs.count(pattern)
    expected = pstrs.str.count(pattern).values
    assert_eq(got, expected)
Example #5
0
def test_indexes_for_key():
    strs = nvstrings.to_device(
        ["eee", "aaa", "eee", "ddd", "ccc", "ccc", "ccc", "eee", "aaa"])
    cat = nvcategory.from_strings(strs)
    got = cat.indexes_for_key("ccc")
    expected = [4, 5, 6]
    assert_eq(got, expected)
Example #6
0
def test_decode_url():
    s = nvstrings.to_device(urls2)
    got = s.url_decode()
    expected = []
    for url in urls2:
        expected.append(urllib.parse.unquote(url))
    assert_eq(got, expected)
Example #7
0
def test_from_offsets_with_bitmask():
    values = np.array([97, 112, 112, 108, 101], dtype=np.int8)
    offsets = np.array([0, 1, 2, 3, 4, 5], dtype=np.int32)
    bitmask = np.array([29], dtype=np.int8)
    s = nvstrings.from_offsets(values, offsets, 5, bitmask, 1)
    expected = ['a', None, 'p', 'l', 'e']
    assert_eq(s, expected)
Example #8
0
    def eval(self, train_xs, test_xs):
        # print(train_xs.shape)
        # print(test_xs.shape)
        batch_size = 1000
        utils.assert_eq(len(train_xs) % batch_size, 0)
        utils.assert_eq(len(test_xs) % batch_size, 0)
        train_fes = np.zeros(len(train_xs) // batch_size)
        test_fes = np.zeros(len(test_xs) // batch_size)

        x_shape = (batch_size, ) + train_xs.shape[1:]
        x_node = Variable(torch.cuda.FloatTensor(*x_shape))

        for i in xrange(len(train_fes)):
            x_node.data.copy_(
                torch.from_numpy(train_xs[i * batch_size:(i + 1) *
                                          batch_size]))
            train_fes[i] = self.net_f(x_node).data[0]
        for i in xrange(len(test_fes)):
            # print(test_xs.shape)
            x_node.data.copy_(
                torch.from_numpy(test_xs[i * batch_size:(i + 1) * batch_size]))
            test_fes[i] = self.net_f(x_node).data[0]

        mean_train_fes = train_fes.mean()
        mean_test_fes = test_fes.mean()
        log = 'Eval:\n'
        log += '\tfree_energy on train: %s;\n' % mean_train_fes
        log += '\tfree_energy on test: %s;\n' % mean_test_fes
        log += '\tratio: %s' % np.exp(mean_train_fes - mean_test_fes)
        print(log)
        return log
Example #9
0
def test_fillna(repl):
    s = ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""]
    strs = nvstrings.to_device(s)
    pstrs = pd.Series(s)
    got = strs.fillna(repl)
    expected = pstrs.fillna(repl)
    assert_eq(got.to_host(), expected)
Example #10
0
    def tokenize(self, max_length=14):
        """Tokenizes the questions.

        This will add q_token in each entry of the dataset.
        -1 represent nil, and should be treated as padding_idx in embedding
        """
        for entry in self.entries:
            tokens = self.dictionary.tokenize(entry['question'], False)
            tokens = tokens[:max_length]
            if len(tokens) < max_length:
                # Note here we pad in front of the sentence
                padding = [self.dictionary.padding_idx
                           ] * (max_length - len(tokens))
                tokens = padding + tokens
            utils.assert_eq(len(tokens), max_length)
            entry['q_token'] = tokens

            if entry['caption']:
                tokens = self.dictionary.tokenize(entry['caption'], False)
                tokens = tokens[:50]
                if len(tokens) < 50:
                    # Note here we pad in front of the sentence
                    padding = [self.dictionary.padding_idx
                               ] * (50 - len(tokens))
                    tokens = padding + tokens
                utils.assert_eq(len(tokens), 50)
                entry['c_token'] = tokens
            else:
                entry['c_token'] = [0] * 50
Example #11
0
def test_get(index):
    index = 0
    s = ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""]
    strs = nvstrings.to_device(s)
    got = strs.get(index)
    expected = ['a', '0', '9', None, 'a', '']
    assert_eq(got.to_host(), expected)
Example #12
0
def test_replace(find, replace):
    s = ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""]
    pstrs = pd.Series(s)
    nvstrs = nvstrings.to_device(s)
    got = nvstrs.replace(find, replace, regex=False)
    expected = pstrs.str.replace(find, replace, regex=False)
    assert_eq(got, expected)
Example #13
0
def test_slice_replace(start, stop, repl):
    s = ["abcdefghij", "0123456789", "9876543210", None, "accénted", ""]
    strs = nvstrings.to_device(s)
    pstrs = pd.Series(s)
    got = strs.slice_replace(start, stop, repl)
    expected = pstrs.str.slice_replace(start, stop, repl)
    assert_eq(got.to_host(), expected)
Example #14
0
def test_slice_from():
    strs = nvstrings.to_device(
        ["hello world", "holy accéntéd", "batman", None, ""])
    d_arr = rmm.to_device(np.asarray([2, 3, -1, -1, -1], dtype=np.int32))
    got = strs.slice_from(starts=d_arr.device_ctypes_pointer.value)
    expected = ['llo world', 'y accéntéd', '', None, '']
    assert_eq(got, expected)
Example #15
0
def test_cat():
    strs = nvstrings.to_device(
        ["abc", "def", None, "", "jkl", "mno", "accént"]
    )
    got = strs.cat()
    expected = ['abcdefjklmnoaccént']
    assert_eq(got, expected)

    # non-default separator
    got = strs.cat(sep=':')
    expected = ['abc:def::jkl:mno:accént']
    assert_eq(got, expected)

    # non default separator and na_rep
    got = strs.cat(sep=':', na_rep='_')
    expected = ['abc:def:_::jkl:mno:accént']
    assert_eq(got, expected)

    # non-null others, default separator, and na_rep
    strs2 = nvstrings.to_device(["1", "2", "3", "4", "5", "é", None])
    got = strs.cat(strs2, sep=":", na_rep="_")
    expected = ['abc:1', 'def:2', '_:3', ':4', 'jkl:5', 'mno:é', 'accént:_']
    assert_eq(got, expected)

    # nvstrings others
    strs2 = nvstrings.to_device(["1", "2", "3", None, "5", "é", ""])
    got = strs.cat(strs2)
    expected = ['abc1', 'def2', None, None, 'jkl5', 'mnoé', 'accént']
    assert_eq(got, expected)
Example #16
0
def test_rstrip():
    s = ["  hello  ", "  there  ", "  world  ", None, "  accénté  ", ""]
    strs = nvstrings.to_device(s)
    pstrs = pd.Series(s)
    got = strs.rstrip()
    expected = pstrs.str.rstrip()
    assert_eq(got.to_host(), expected)
Example #17
0
def test_encode_url():
    s = nvstrings.to_device(urls1)
    got = s.url_encode()
    expected = []
    for url in urls1:
        expected.append(urllib.parse.quote(url, safe="~"))
    assert_eq(got, expected)
Example #18
0
def test_values():
    strs = nvstrings.to_device(
        ["eee", "aaa", "eee", "ddd", "ccc", "ccc", "ccc", "eee", "aaa"])
    cat = nvcategory.from_strings(strs)
    got = cat.values()
    expected = [3, 0, 3, 2, 1, 1, 1, 3, 0]
    assert_eq(got, expected)
Example #19
0
def test_extract():
    pattern = r"Flight:([A-Z]+)(\d+)"
    s = [
        "ALA-PEK Flight:HU7934",
        "HKT-PEK Flight:CA822",
        "FRA-PEK Flight:LA8769",
        "FRA-PEK Flight:LH7332",
        "",
        None,
        "Flight:ZZ",
    ]
    nvstrs = nvstrings.to_device(s)
    got = nvstrs.extract(pattern)
    expected = np.array(
        [
            ["HU", "7934"],
            ["CA", "822"],
            ["LA", "8769"],
            ["LH", "7332"],
            [None, None],
            [None, None],
            [None, None],
        ]
    )
    assert_eq(got[0], expected[:, 0])
    assert_eq(got[1], expected[:, 1])
Example #20
0
def test_remove_unused_keys():
    strs1 = nvstrings.to_device(["a", "b", "b", "f", "c", "f"])
    strs2 = nvstrings.to_device(["b", "c", "e", "d"])
    cat = nvcategory.from_strings(strs1)
    cat1 = cat.set_keys(strs2)
    cat1_unused_removed = cat1.remove_unused_keys()
    assert_eq(cat1_unused_removed.keys(), ["b", "c"])
Example #21
0
def test_dtos():
    s = np.array([0, 103342.313, -25.4294, 839542223232.794248339],
                 dtype=np.float64)
    got = nvstrings.dtos(s)
    expected = nvstrings.to_device(
        ['0', '103342.313', '-25.4294', '8.395422232e+11'])
    assert_eq(got, expected)
Example #22
0
    def tokenize(self, max_qu_length=14, max_cap_length=18):
        """Tokenizes the questions.

        This will add q_token in each entry of the dataset.
        -1 represent nil, and should be treated as padding_idx in embedding
        """
        for entry in self.entries:
            qtokens = self.question_dictionary.tokenize(
                entry['question'], False)
            qtokens = qtokens[:max_qu_length]
            ctokens = [self.caption_dictionary.word2idx['<start>']]
            ctokens.extend(
                self.caption_dictionary.tokenize(entry['caption'], False))
            ctokens.append(self.caption_dictionary.word2idx['<end>'])
            ctokens = ctokens[:max_cap_length]
            if len(qtokens) < max_qu_length:
                # Note here we pad in front of the sentence
                qpadding = [self.question_dictionary.padding_idx] \
                          * (max_qu_length - len(qtokens))
                qtokens = qpadding + qtokens
            utils.assert_eq(len(qtokens), max_qu_length)
            entry['c_len'] = len(ctokens)
            if len(ctokens) < max_cap_length:
                # Note here we pad in front of the sentence
                cpadding = [self.caption_dictionary.padding_idx] \
                          * (max_cap_length - len(ctokens))
                ctokens = ctokens + cpadding
            utils.assert_eq(len(ctokens), max_cap_length)
            entry['q_token'] = qtokens
            entry['c_token'] = ctokens
Example #23
0
def test_extract_record():
    pattern = r"Flight:([A-Z]+)(\d+)"
    s = [
        "ALA-PEK Flight:HU7934",
        "HKT-PEK Flight:CA822",
        "FRA-PEK Flight:LA8769",
        "FRA-PEK Flight:LH7332",
        "",
        None,
        "Flight:ZZ",
    ]
    nvstrs = nvstrings.to_device(s)
    got = nvstrs.extract_record(pattern)
    expected = np.array(
        [
            ["HU", "7934"],
            ["CA", "822"],
            ["LA", "8769"],
            ["LH", "7332"],
            [None, None],
            [None, None],
            [None, None],
        ]
    )

    for i in range(len(got)):
        assert_eq(got[i], expected[i, :])
Example #24
0
def _load_dataset(dataroot, name, img_id2val, label2ans, args):
    """Load entries

    img_id2val: dict {img_id -> val} val can be used to retrieve image or features_path
    data_root: root path of dataset
    name: 'train', 'val', 'test-dev2015', test2015'
    """
    question_path = os.path.join(dataroot,
                                 'questions/%s_questions.json' % (name))

    print(f"Question Path : {question_path}")
    if name == 'trainval':
        combine_trainval(dataroot)
    questions = json.load(open(question_path))
    if 'questions' in questions:
        questions = questions['questions']
    questions = sorted(questions, key=lambda x: x['question_id'])
    answer_not_found = 0
    print(name)
    # if 'test2015' not in name or 'test_dev' not in name:
    if 'test_dev' not in name:
        qn_id_to_ans = {}
        answer_path = os.path.join(dataroot, 'features',
                                   '%s_target.json' % name)
        print(f"Answer Path : {answer_path}")
        answers = json.load(open(answer_path, 'r'))
        for answer in answers:
            qn_id_to_ans[str(answer['question_id'])] = answer

        entries = []
        for question in questions:
            if str(question['question_id']) in qn_id_to_ans:
                answer = qn_id_to_ans[str(question['question_id'])].copy()
            else:
                answer_not_found += 1
                answer = {
                    'question_id': question['question_id'],
                    'image_id': question['image_id'],
                    'scores': [],
                    'labels': []
                }
            try:
                utils.assert_eq(question['question_id'], answer['question_id'])
                utils.assert_eq(question['image_id'], answer['image_id'])
            except AssertionError as e:
                print(e)

            img_id = question['image_id']

            entries.append(
                _create_entry(img_id2val[str(img_id)], question, answer))
    else:  # test2015
        entries = []
        for question in questions:
            img_id = question['image_id']
            entries.append(
                _create_entry(img_id2val[str(img_id)], question, None))
    print("answers not found {}".format(answer_not_found))
    return entries
Example #25
0
def test_match_strings():
    s1 = ["hello", "here", None, "accéntéd", None, ""]
    s2 = ["hello", "there", "world", "accéntéd", None, ""]
    strs1 = nvstrings.to_device(s1)
    strs2 = nvstrings.to_device(s2)
    got = strs1.match_strings(strs2)
    expected = [True, False, False, True, True, True]
    assert_eq(got, expected)
Example #26
0
def test_values():
    narr = np.array([4, 1, 2, 3, 2, 1, 4, 1, 1])
    cat = nvcategory.from_numbers(narr)
    values = np.empty([cat.size()], dtype=np.int32)
    cat.values(values)
    got = values.tolist()
    expected = [3, 0, 1, 2, 1, 0, 3, 0, 0]
    assert_eq(got, expected)
Example #27
0
def test_to_numbers():
    narr = np.array([2, 1, 1.25, 1.5, 1, 1.25, 1, 1, 2])
    cat = nvcategory.from_numbers(narr)
    nbrs = np.empty([cat.size()], dtype=narr.dtype)
    cat.to_numbers(nbrs)
    got = nbrs.tolist()
    expected = narr.tolist()
    assert_eq(got, expected)
Example #28
0
def test_order_length_alphabetical():
    strs = nvstrings.to_device([
        "abc", "defghi", None, "jkl", "mno", "pqr", "stu", "dog and cat",
        "accénted", ""
    ])
    sorted_strs = strs.order(3)
    expected = [2, 9, 0, 3, 4, 5, 6, 1, 8, 7]
    assert_eq(sorted_strs, expected)
Example #29
0
def test_keys():
    narr = np.array([2, 1, 1.25, 1.5, 1, 1.25, 1, 1, 2])
    cat = nvcategory.from_numbers(narr)
    keys = np.empty([cat.keys_size()], dtype=narr.dtype)
    cat.keys(keys)
    got = keys.tolist()
    expected = [1.0, 1.25, 1.5, 2.0]
    assert_eq(got, expected)
Example #30
0
def test_from_offsets():
    values = np.array([97, 112, 112, 108, 101], dtype=np.int8)
    offsets = np.array([0, 1, 2, 3, 4, 5], dtype=np.int32)
    cat = nvcategory.from_offsets(values, offsets, 5)
    expected_keys = ['a', 'e', 'l', 'p']
    expected_values = [0, 3, 3, 2, 1]
    assert_eq(cat.keys(), expected_keys)
    assert_eq(cat.values(), expected_values)
Example #31
0
    def __init__(self, state, action, reward, next_state, end):
        utils.assert_eq(type(state), type(next_state))

        self._state = (state * 255.0).astype(np.uint8)
        self._next_state = (next_state * 255.0).astype(np.uint8)
        self.action = action
        self.reward = reward
        self.end = end
Example #32
0
    def loss(self, states, actions, targets):
        """
        params:
            states: Variable [batch, channel, w, h]
            actions: Variable [batch, num_actions] one hot encoding
            targets: Variable [batch]
        """
        utils.assert_eq(actions.size(1), self.num_actions)

        qs = self.online_q_net(states)
        preds = (qs * actions).sum(1)
        err = nn.functional.smooth_l1_loss(preds, targets)
        return err
Example #33
0
    def loss(self, states, actions, targets):
        """
        params:
            states: Variable [batch, channel, w, h]
            actions: Variable [batch, num_actions] one hot encoding
            targets: Variable [batch, num_atoms]
        """
        utils.assert_eq(actions.size(1), self.num_actions)

        actions = actions.unsqueeze(2)
        probs = self.online_q_net(states) # [batch, num_actions, num_atoms]
        probs = (probs * actions).sum(1) # [batch, num_atoms]
        xent = -(targets * torch.log(probs.clamp(min=utils.EPS))).sum(1)
        xent = xent.mean(0)
        return xent
    def tokenize(self, max_length=14):
        """Tokenizes the questions.

        This will add q_token in each entry of the dataset.
        -1 represent nil, and should be treated as padding_idx in embedding
        """
        for entry in self.entries:
            tokens = self.dictionary.tokenize(entry['question'], False)
            tokens = tokens[:max_length]
            if len(tokens) < max_length:
                # Note here we pad in front of the sentence
                padding = [self.dictionary.padding_idx] * (max_length - len(tokens))
                tokens = padding + tokens
            utils.assert_eq(len(tokens), max_length)
            entry['q_token'] = tokens
def _load_dataset(dataroot, name, img_id2val):
    """Load entries

    img_id2val: dict {img_id -> val} val can be used to retrieve image or features
    dataroot: root path of dataset
    name: 'train', 'val'
    """
    question_path = os.path.join(
        dataroot, 'v2_OpenEnded_mscoco_%s2014_questions.json' % name)
    questions = sorted(json.load(open(question_path))['questions'],
                       key=lambda x: x['question_id'])
    answer_path = os.path.join(dataroot, 'cache', '%s_target.pkl' % name)
    answers = cPickle.load(open(answer_path, 'rb'))
    answers = sorted(answers, key=lambda x: x['question_id'])

    utils.assert_eq(len(questions), len(answers))
    entries = []
    for question, answer in zip(questions, answers):
        utils.assert_eq(question['question_id'], answer['question_id'])
        utils.assert_eq(question['image_id'], answer['image_id'])
        img_id = question['image_id']
        entries.append(_create_entry(img_id2val[img_id], question, answer))

    return entries