Esempi in Python per enweight, esempi in Python per utils.enweight

Esempio n. 1

0

Mostra file

 def split_match(fragments, start=0, end=-1):
     n = len(fragments)
     # print(f"GALVEZ: n={n}")
     # import sys; sys.stdout.flush()
     if n < 1:
         return
     elif n == 1:
         weighted_fragments = [(0, fragments[0])]
     else:
         # so we later know the original index of each fragment
         weighted_fragments = enumerate(fragments)
         # assigns high values to long statements near the center of the list
         weighted_fragments = enweight(weighted_fragments)
         weighted_fragments = map(
             lambda fw: (fw[0], (1 - fw[1]) * len(fw[0][1]["transcript"])),
             weighted_fragments,
         )
         # fragments with highest weights first
         weighted_fragments = sorted(weighted_fragments,
                                     key=lambda fw: fw[1],
                                     reverse=True)
         # strip weights
         weighted_fragments = list(map(lambda fw: fw[0],
                                       weighted_fragments))
     for index, fragment in weighted_fragments:
         # find_best?
         match = search.find_best(fragment["transcript"],
                                  start=start,
                                  end=end)
         match_start, match_end, sws_score, match_substitutions = match
         # At least half must overlap...
         # print(f"GALVEZ: sws_score={sws_score}")
         # import sys; sys.stdout.flush()
         # Maybe what I need to do is require this score to be higher?
         # The problem is that I don't know how to decrease this...
         # If score > n / (2n). So basically >0.5, right?
         if sws_score > (n - 1) / (2 * n):
             # print(f"GALVEZ: sws passed sws_score={sws_score}")
             # import sys; sys.stdout.flush()
             fragment["match-start"] = match_start
             fragment["match-end"] = match_end
             fragment["sws"] = sws_score
             fragment["substitutions"] = match_substitutions
             # Here's the recursive joining, is that right?
             # What does this do?
             for f in split_match(fragments[0:index],
                                  start=start,
                                  end=match_start):
                 yield f
             yield fragment
             for f in split_match(fragments[index + 1:],
                                  start=match_end,
                                  end=end):
                 yield f
             return
     for _, _ in weighted_fragments:
         yield None

Esempio n. 2

0

Mostra file

File: text.py Progetto: mlcommons/peoples-speech

def weighted_ngrams(s, size, direction=0):
    """
    Lists all appearances of all N-grams of a string from left to right together with a positional weight value.
    The positional weight progresses quadratically.
    :param s: String to decompose
    :param size: N-gram size
    :param direction: Order of assigning positional weights to N-grams:
        direction < 0: Weight of first N-gram is 1.0 and of last one 0.0
        direction > 0: Weight of first N-gram is 0.0 and of last one 1.0
        direction == 0: Weight of center N-gram(s) near or equal 0, weight of first and last N-gram 1.0
    :return: Produces (string, float) tuples representing the N-gram along with its assigned positional weight value
    """
    return enweight(ngrams(s, size), direction=direction)

Esempio n. 3

0

Mostra file

 def split_match(fragments, start=0, end=-1):
     n = len(fragments)
     if n < 1:
         return
     elif n == 1:
         weighted_fragments = [(0, fragments[0])]
     else:
         # so we later know the original index of each fragment
         weighted_fragments = enumerate(fragments)
         # assigns high values to long statements near the center of the list
         weighted_fragments = enweight(weighted_fragments)
         weighted_fragments = map(
             lambda fw: (fw[0], (1 - fw[1]) * len(fw[0][1]["transcript"])),
             weighted_fragments,
         )
         # fragments with highest weights first
         weighted_fragments = sorted(weighted_fragments,
                                     key=lambda fw: fw[1],
                                     reverse=True)
         # strip weights
         weighted_fragments = list(map(lambda fw: fw[0],
                                       weighted_fragments))
     for index, fragment in weighted_fragments:
         match = search.find_best(fragment["transcript"],
                                  start=start,
                                  end=end)
         match_start, match_end, sws_score, match_substitutions = match
         if sws_score > (n - 1) / (2 * n):
             fragment["match-start"] = match_start
             fragment["match-end"] = match_end
             fragment["sws"] = sws_score
             fragment["substitutions"] = match_substitutions
             # Here's the recursive joining, is that right?
             # This is an exponential time algorithm?
             for f in split_match(fragments[0:index],
                                  start=start,
                                  end=match_start):
                 yield f
             yield fragment
             for f in split_match(fragments[index + 1:],
                                  start=match_end,
                                  end=end):
                 yield f
             return
     for _, _ in weighted_fragments:
         yield None

Esempio n. 4

0

Mostra file

File: align.py Progetto: Fakhraddin/DSAlign

 def split_match(fragments, start=0, end=-1):
     n = len(fragments)
     if n < 1:
         raise StopIteration
     elif n == 1:
         weighted_fragments = [(0, fragments[0])]
     else:
         # so we later know the original index of each fragment
         weighted_fragments = enumerate(fragments)
         # assigns high values to long statements near the center of the list
         weighted_fragments = enweight(weighted_fragments)
         weighted_fragments = map(
             lambda fw: (fw[0], (1 - fw[1]) * len(fw[0][1]['transcript'])),
             weighted_fragments)
         # fragments with highest weights first
         weighted_fragments = sorted(weighted_fragments,
                                     key=lambda fw: fw[1],
                                     reverse=True)
         # strip weights
         weighted_fragments = list(map(lambda fw: fw[0],
                                       weighted_fragments))
     for index, fragment in weighted_fragments:
         match = search.find_best(fragment['transcript'],
                                  start=start,
                                  end=end)
         match_start, match_end, sws_score, match_substitutions = match
         if sws_score > (n - 1) / (2 * n):
             fragment['match-start'] = match_start
             fragment['match-end'] = match_end
             fragment['sws'] = sws_score
             fragment['substitutions'] = match_substitutions
             for f in split_match(fragments[0:index],
                                  start=start,
                                  end=match_start):
                 yield f
             yield fragment
             for f in split_match(fragments[index + 1:],
                                  start=match_end,
                                  end=end):
                 yield f
             raise StopIteration
     for _, _ in weighted_fragments:
         yield None