Example #1
0
    def decode(self, mode, write_fp, decode_fn):
        self.model.eval()
        cnt = 0
        sampler, nb_instance = self.iterate_instance(mode)
        decode_fn.reset()
        # with open(f'{write_fp}.{mode}.tsv', 'w') as fp:
        #  fix alexander kahanek
        with open('{0}.{1}.tsv'.format(write_fp, mode), 'w') as fp:
            # fp.write(f'prediction\ttarget\tloss\tdist\n')
            # fix alexander kahanek
            fp.write('prediction\ttarget\tloss\tdist\n')
            for src, trg in tqdm(sampler(), total=nb_instance):
                pred, _ = decode_fn(self.model, src)
                dist = util.edit_distance(pred, trg.view(-1).tolist()[1:-1])

                src_mask = dummy_mask(src)
                trg_mask = dummy_mask(trg)
                data = (src, src_mask, trg, trg_mask)
                loss = self.model.get_loss(data).item()

                trg = self.data.decode_target(trg)[1:-1]
                pred = self.data.decode_target(pred)
                fp.write(
                    # f'{" ".join(pred)}\t{" ".join(trg)}\t{loss}\t{dist}\n')
                    # fix alexander kahanek
                    '{0}\t{1}\t{2}\t{3}\n'.format(" ".join(pred),
                                                  " ".join(trg), loss, dist))
                cnt += 1
        decode_fn.reset()
        # self.logger.info(f'finished decoding {cnt} {mode} instance')
        # fix alexander kahanek
        self.logger.info('finished decoding {0} {1} instance'.format(
            cnt, mode))
Example #2
0
    def decode(self, mode, batch_size, write_fp, decode_fn):
        self.model.eval()
        cnt = 0
        sampler, nb_batch = self.iterate_batch(mode, batch_size)
        with open(f"{write_fp}.{mode}.tsv", "w") as fp:
            fp.write("prediction\ttarget\tloss\tdist\n")
            for src, src_mask, trg, trg_mask in tqdm(sampler(batch_size),
                                                     total=nb_batch):
                pred, _ = decode_fn(self.model, src, src_mask)
                self.evaluator.add(src, pred, trg)

                data = (src, src_mask, trg, trg_mask)
                losses = self.model.get_loss(data, reduction=False).cpu()

                pred = util.unpack_batch(pred)
                trg = util.unpack_batch(trg)
                for p, t, loss in zip(pred, trg, losses):
                    dist = util.edit_distance(p, t)
                    p = self.data.decode_target(p)
                    t = self.data.decode_target(t)
                    fp.write(
                        f'{" ".join(p)}\t{" ".join(t)}\t{loss.item()}\t{dist}\n'
                    )
                    cnt += 1
        self.logger.info(f"finished decoding {cnt} {mode} instance")
        results = self.evaluator.compute(reset=True)
        return results
Example #3
0
 def __add(self, tree, root, word):
     """Add a word."""
     distance = edit_distance(root[0], word)
     collision = False
     for (child, child_distance) in tree[root].keys():
         if distance == child_distance:
             self.__add(tree[root], (child, child_distance), word)
             collision = True
             break
     if not collision:
         tree[root][(word, distance)] = {}
Example #4
0
 def __add(self, tree, root, word):
     """Add a word."""
     distance = edit_distance(root[0], word)
     collision = False
     for (child, child_distance) in tree[root].keys():
         if distance == child_distance:
             self.__add(tree[root], (child, child_distance), word)
             collision = True
             break
     if not collision:
         tree[root][(word, distance)] = {}
Example #5
0
    def distance(self, other_state):
        """ Returns the distance between two WorldStates.

        Inputs:
            other_state (AlchemyState): The other alchemy state to compute the
                distance from.

        Returns:
            float representing the distance.
        """
        delta = 0
        for this_beaker, that_beaker in zip(self._beakers,
                                            other_state.beakers()):
            delta += edit_distance(this_beaker, that_beaker)
        return delta
	def subcluster_by_editdistance(self, center, item_list, threshold=2):
		clusters = defaultdict(list)
		clusters[center].append(center)
		for item in item_list:
			if item in clusters.keys():
				continue
			flag = 0
			list_item = self.stemmer.stem(item.encode('utf-8')).split()
			for goal in clusters.keys():
				list_goal = self.stemmer.stem(goal.encode('utf-8')).split()
				d = edit_distance(list_goal, list_item)
				if d < threshold:
					clusters[goal].append(item)
					flag = 1
					break
			if flag == 0:
				clusters[item].append(item)
		return clusters
def autocomplete(suggest_tree, bktree, prefix, count=5):
    """Suggest top completions for a prefix given a SuggestTree and BKTree.
    
    Completions for a given prefix are weighted primarily by their weight in the 
    suggest tree, and secondarily by their Levenshtein distance to words in the
    BK-tree (where nearby words are weighted higher)."""
    completion_weights = suggest_tree.completion_weights(prefix)
    if completion_weights:
        weight = lambda completion: completion_weights[completion]
        proximity = lambda completion: completion_proximity_score(
                prefix, completion)
        selection_criteria = lambda completion: (
                weight(completion), proximity(completion))
        completions = completion_weights.keys()
        return heapq.nlargest(count, completions, key=selection_criteria)
    else:
        matches = bktree.search(prefix)
        proximity = lambda completion: edit_distance(prefix, completion)
        return heapq.nsmallest(count, matches, key=proximity)
Example #8
0
    def search(self, prefix, tolerance=2, tree=None, root=None, matches=None):
        """Search for words within a given edit distance of prefix."""
        # TODO: Number of arguments can be reduced by defining BKTree
        # recursively (i.e. root and tree args shouldn't be necessary).
        if root is None:
            root = self.root
        if tree is None:
            tree = self.tree[self.root]
        if matches is None:
            matches = set()

        prefix_distance = edit_distance(prefix, root[0])
        if prefix_distance <= tolerance:
            matches.add(root[0])

        for word, distance in tree.keys():
            if abs(prefix_distance - distance) <= tolerance:
                child = (word, distance)
                self.search(prefix, tolerance, tree[child], child, matches)

        return matches
Example #9
0
    def search(self, prefix, tolerance=2, tree=None, root=None, matches=None):
        """Search for words within a given edit distance of prefix."""
        # TODO: Number of arguments can be reduced by defining BKTree
        # recursively (i.e. root and tree args shouldn't be necessary).
        if root is None:
            root = self.root
        if tree is None:
            tree = self.tree[self.root]
        if matches is None:
            matches = set()

        prefix_distance = edit_distance(prefix, root[0])
        if prefix_distance <= tolerance:
            matches.add(root[0])

        for word, distance in tree.keys():
            if abs(prefix_distance - distance) <= tolerance:
                child = (word, distance)
                self.search(prefix, tolerance, tree[child], child, matches)

        return matches
Example #10
0
    def decode(self, mode, write_fp, decode_fn):
        self.model.eval()
        cnt = 0
        sampler, nb_instance = self.iterate_instance(mode)
        decode_fn.reset()
        with open(f"{write_fp}.{mode}.tsv", "w") as fp:
            fp.write("prediction\ttarget\tloss\tdist\n")
            for src, trg in tqdm(sampler(), total=nb_instance):
                pred, _ = decode_fn(self.model, src)
                dist = util.edit_distance(pred, trg.view(-1).tolist()[1:-1])

                src_mask = dummy_mask(src)
                trg_mask = dummy_mask(trg)
                data = (src, src_mask, trg, trg_mask)
                loss = self.model.get_loss(data).item()

                trg = self.data.decode_target(trg)[1:-1]
                pred = self.data.decode_target(pred)
                fp.write(f'{" ".join(pred)}\t{" ".join(trg)}\t{loss}\t{dist}\n')
                cnt += 1
        decode_fn.reset()
        self.logger.info(f"finished decoding {cnt} {mode} instance")
Example #11
0
### 5. Step: Determine mean edit distance
test_error = np.zeros([rn.batch_quantity('test'), rn.batch_size()])

for j in xrange(rn.batch_quantity('test')):
    net_out = rn.forward_fn(mb_test_x[j], mb_test_m[j])

    for b in xrange(rn.batch_size()):
        true_out = mb_test_y[j][b, :]
        cln_true_out = np.delete(true_out, np.where(true_out == 10))

        net_out = net_out[:, b, :]
        arg_net_out = np.argmax(net_out, axis=1)
        cln_net_out = np.delete(arg_net_out, np.where(arg_net_out == 10))

        test_error[j, b] = edit_distance(cln_true_out, cln_net_out)

print("Test set mean edit distance: " + "{0:.4f}".format(np.mean(test_error)))

# Plot results
sample_no = 1
batch = 0
net_out = rn.forward_fn(mb_test_x[sample_no], mb_test_m[sample_no])
sample = mb_test_x[sample_no][:, batch, :]
mask = mb_test_m[sample_no][:, batch, :]
signal = net_out[:, batch, :]

fig = plt.figure()
fig.suptitle('Numbers recognition - Sample')
plt.subplot(2, 1, 1)
plt.xlabel('Image of numbers')