Python subsequences Examples

Programming Language: Python

Namespace/Package Name: spelling.edits

Method/Function: subsequences

Examples at hotexamples.com: 2

Python subsequences - 2 examples found. These are the top rated real world Python examples of spelling.edits.subsequences extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: test_edits.py Project: ndronen/spelling

 def test_subsequences(self):
     real_word = "which"
     expected = [
         'w', 'wh', 'whi', 'whic', 'which',
         'h', 'hi', 'hic', 'hich',
         'i', 'ic', 'ich',
         'c', 'ch', 
         'h'
         ]
     actual = [s for s in subsequences(real_word)]
     self.assertEqual(len(expected), len(actual))
     self.assertEqual(expected, actual)

Example #2

Show file

File: jobs.py Project: ndronen/spelling

    def run(self):
        errors = []
        pbar = build_progressbar(self.real_words)

        finder = EditFinder()

        for i,word in enumerate(self.real_words):
            pbar.update(i+1)

            # Find all the edits we can make to this word.
            possible_edits = list()
            probs = list()
            for subseq in subsequences(word):
                # Probably delete this if statement as redundant.
                for e in self.edit_db.edits(subseq):
                    _, error_subseq, count = e
                    possible_edit = (subseq, error_subseq)
                    if count > 0:
                        possible_edits.append(possible_edit)
                        probs.append(count)

            if len(possible_edits) == 0:
                continue

            probs = np.array(probs)
            probs = probs / float(probs.sum())

            seen_edits = set()
            errors_for_word = []
            attempts = 0.

            # Try to generate up to the requested number of errors per word.
            while True:
                try:
                    attempts += 1.

                    if self.enough_errors_for_word(word, errors_for_word):
                        # Generated enough errors for this word.
                        break
                    elif attempts > 10 and len(errors_for_word) / attempts < 0.1:
                        # Not finding many errors to apply.  Break out.
                        break

                    # Sample the number of edits.
                    edit_sizes = np.arange(1, self.max_edits_per_error+1)
                    edit_size_probs = 1. / edit_sizes
                    edit_size_probs /= edit_size_probs.sum()
                    size = self.random_state.choice(edit_sizes, size=1, replace=False,
                            p=edit_size_probs)[0]

                    # Sample edits with probability proportional to the edit's frequency.
                    edit_idx = self.random_state.choice(len(probs), size=size, replace=False, p=probs)

                    edit = []
                    for i in edit_idx:
                        pe = possible_edits[i]
                        if pe in seen_edits:
                            continue
                        seen_edits.add(pe)
                        edit.append(pe)

                    if len(edit) == 0:
                        continue
    
                    # Avoid applying edits that result in unlikely errors.
                    for constraint in self.constraints:
                        for e in edit:
                            if constraint(word, e):
                                raise EditConstraintError("can't apply edit %s=>%s to word '%s'" % \
                                        (e[0], e[1], word))

                    error = finder.apply(word, edit)
                    if error in self.blacklist:
                        # Skip blacklisted words (i.e. non-words in a corpus used to generate the
                        # edit patterns in the edit database).
                        continue

                    errors_for_word.append((word, len(possible_edits), edit, error))

                except EditConstraintError as e:
                    if self.verbose:
                        print(e)

            errors.extend(errors_for_word)

        pbar.finish()
    
        return errors