def make_data(entries):

    data_train = defaultdict(list)

    for e in entries:

        good_aggs = [
            l['sorted_triples'] for l in e.lexes
            if l['comment'] == 'good' and l['sorted_triples'] and sum(
                len(x) for x in l['sorted_triples']) == len(e.triples)
        ]

        if good_aggs:
            all_aggs = partitions(e.triples)

            for agg in all_aggs:

                agg = tuple([tuple(x) for x in agg])

                distance = calc_distance(agg, good_aggs)

                data = (agg, distance)

                data_train[len(e.triples)].append(data)

    return data_train
예제 #2
0
def acc_majority_agg(train_sa_db, test_sa_db):

    from more_itertools import partitions
    from operator import itemgetter

    counters = analyze_agg_patterns(train_sa_db)

    tp = 0

    for ts, aggs in test_sa_db:

        n = len(ts)

        all_partitions = list(partitions(ts))
        all_patterns = [extract_agg_pattern(agg) for agg in all_partitions]
        all_counts = [counters[n][pat] for pat in all_patterns]

        choosen_agg = max(zip(all_partitions, all_counts),
                          key=itemgetter(1))[0]
        choosen_agg = [tuple(agg_part) for agg_part in choosen_agg]

        if choosen_agg in aggs:
            tp += 1

    return tp / len(test_sa_db)
예제 #3
0
def partition_mit(s: str) -> list:
    # Copied from: https://stackoverflow.com/questions/4904430/find-all-list-permutations-of-splitting-a-string-in-python
    if s:
        for lst in mit.partitions(s):
            yield ["".join(sublst) for sublst in lst]
    else:
        yield []
예제 #4
0
    def select_sentence_aggregation(self, dp, n_triples):

        sas = list(partitions(dp))
        sas_scores = self.sa_scorer(sas, n_triples)
        sas = sort_together([sas_scores, sas], reverse=True)[1]

        return sas
 def _span_partitions(self,
                      span: Span) -> Generator[List[Token], None, None]:
     if len(span) == 1:
         return span[0]
     for partition in partitions(span):
         yield [
             spacy_utlis.make_token(nlp=self._nlp, word=''.join(element))
             for element in partition
         ]
예제 #6
0
    def select_sa(self, dp):

        sas = list(partitions(dp))
        sas_scores = self.sa_scorer(sas)
        sas_scores, sas = sort_together([sas_scores, sas], reverse=True)

        self.logger.debug('Sentence Aggregation: {}'.format('\n'.join(f'{score:.3f} -> {sa}' for score, sa in zip(sas_scores, sas))))

        return [[tuple(sa_part) for sa_part in sa] for sa in sas[:self.max_sa]]
예제 #7
0
def is_split_number(what: int):
    """ check if the number is split """
    sqr = int(sqrt(what))

    for comb in partitions(str(what)):
        result = 0
        for item in comb:
            result += int(''.join(item))
        if result == sqr:
            return 1

    return 0
def get_search_terms(current_line: str) -> Dict[int, Set[str]]:
    # Remove unwanted characters
    modified_line = current_line.translate(translation_table)
    # Remove multiple spaces
    modified_line = re.sub(r"\s\s+", " ", modified_line)
    words = [word.rstrip(",") for word in modified_line.split(" ")]

    results: Dict[int, Set[str]] = {}
    for partition in partitions(words):
        for subpartition in partition:
            subpartition_word_count: int = len(subpartition)
            if subpartition_word_count not in results.keys():
                results[subpartition_word_count] = set()
            result = " ".join(subpartition)
            results[subpartition_word_count].update([result])
    return results
예제 #9
0
def get_phrases(phrase_pronunciations: PhrasePronunciationList) -> PhraseList:
    """
    Finally convert pronunciations of phrases back into english phrases consisting of words.
    """
    phrases = []

    print(
        f'Looking over {len(phrase_pronunciations)} different pronunciations of sentence...')
    for pronunciation in phrase_pronunciations:
        for part in partitions(pronunciation):
            try:
                phrases.extend(part_to_phrases(part))
            except KeyError:
                pass

    return phrases
예제 #10
0
def multiplicative_partitions(n, k=None):
    factors = it.chain.from_iterable([p] * m
                                     for p, m in sp.factorint(n).items())

    # TODO: Try to avoid the filter_seen step by generating distinct partitions
    # directly if possible.
    #
    # Otherwise, perhaps multiplicative partitioning should be implemented from
    # scratch
    # https://stackoverflow.com/questions/8558292/how-to-find-multiplicative-partitions-of-any-integer.

    ps = mit.partitions(factors)
    ps = it.takewhile(lambda p: len(p) <= k, ps) if k else ps
    ps = map(lambda p: tuple(sorted(map(math.prod, p))), ps)
    ps = filter_seen(ps)
    ps = it.chain.from_iterable(map(mit.distinct_permutations, ps))

    yield from ps
예제 #11
0
def acc_random(sa_db, seed=0):

    from random import Random
    from more_itertools import partitions

    r = Random(seed)

    tp = 0

    for ts, aggs in sa_db:

        all_partitions = list(partitions(ts))
        random_partition = r.choice(all_partitions)
        random_partition = [tuple(part) for part in random_partition]

        if random_partition in aggs:
            tp += 1

    return tp / len(sa_db)
예제 #12
0
 def _preprocess(z, stopwords=stopwords):
     tokens = [tok for tok in word_tokenize(z) if tok not in stopwords]
     parts = [[" ".join(tokens) for tokens in part]
              for part in partitions(tokens)]
     return parts
예제 #13
0
index = 0
for a, b, c in product(
        letters, repeat=3):  # These 3-letter combinations are called "roots"
    index += 1
    insert_word(0, 3, index, a + b + c, 1)

# Parse all books of the Tanach, insert all words/verses

for book in booklist:
    parse_book(f"morphhb/wlc/{book}.xml", booklist[book])

# Populate formations table

print("inserting formations for", len(worddict), "items...")
for word, wordnum in worddict.items():
    for part in partitions(word):
        if len(part) != 1:
            result = parse_partition(part)
            if result:
                insert_formation(wordnum, result)
        if len(part) == 3:
            result = parse_inside(part)
            if result:
                insert_formation(wordnum, result)

# Save the in-memory DB to a file
conn.commit()

db_file = sqlite3.connect("bible.db")
conn.backup(db_file)
예제 #14
0
def understanding_partition_size():
    for x in range(1, 30):
        deck = list(range(x))
        print(x, 2**x, len(list(more_itertools.partitions(deck))))