def make_data(entries): data_train = defaultdict(list) for e in entries: good_aggs = [ l['sorted_triples'] for l in e.lexes if l['comment'] == 'good' and l['sorted_triples'] and sum( len(x) for x in l['sorted_triples']) == len(e.triples) ] if good_aggs: all_aggs = partitions(e.triples) for agg in all_aggs: agg = tuple([tuple(x) for x in agg]) distance = calc_distance(agg, good_aggs) data = (agg, distance) data_train[len(e.triples)].append(data) return data_train
def acc_majority_agg(train_sa_db, test_sa_db): from more_itertools import partitions from operator import itemgetter counters = analyze_agg_patterns(train_sa_db) tp = 0 for ts, aggs in test_sa_db: n = len(ts) all_partitions = list(partitions(ts)) all_patterns = [extract_agg_pattern(agg) for agg in all_partitions] all_counts = [counters[n][pat] for pat in all_patterns] choosen_agg = max(zip(all_partitions, all_counts), key=itemgetter(1))[0] choosen_agg = [tuple(agg_part) for agg_part in choosen_agg] if choosen_agg in aggs: tp += 1 return tp / len(test_sa_db)
def partition_mit(s: str) -> list: # Copied from: https://stackoverflow.com/questions/4904430/find-all-list-permutations-of-splitting-a-string-in-python if s: for lst in mit.partitions(s): yield ["".join(sublst) for sublst in lst] else: yield []
def select_sentence_aggregation(self, dp, n_triples): sas = list(partitions(dp)) sas_scores = self.sa_scorer(sas, n_triples) sas = sort_together([sas_scores, sas], reverse=True)[1] return sas
def _span_partitions(self, span: Span) -> Generator[List[Token], None, None]: if len(span) == 1: return span[0] for partition in partitions(span): yield [ spacy_utlis.make_token(nlp=self._nlp, word=''.join(element)) for element in partition ]
def select_sa(self, dp): sas = list(partitions(dp)) sas_scores = self.sa_scorer(sas) sas_scores, sas = sort_together([sas_scores, sas], reverse=True) self.logger.debug('Sentence Aggregation: {}'.format('\n'.join(f'{score:.3f} -> {sa}' for score, sa in zip(sas_scores, sas)))) return [[tuple(sa_part) for sa_part in sa] for sa in sas[:self.max_sa]]
def is_split_number(what: int): """ check if the number is split """ sqr = int(sqrt(what)) for comb in partitions(str(what)): result = 0 for item in comb: result += int(''.join(item)) if result == sqr: return 1 return 0
def get_search_terms(current_line: str) -> Dict[int, Set[str]]: # Remove unwanted characters modified_line = current_line.translate(translation_table) # Remove multiple spaces modified_line = re.sub(r"\s\s+", " ", modified_line) words = [word.rstrip(",") for word in modified_line.split(" ")] results: Dict[int, Set[str]] = {} for partition in partitions(words): for subpartition in partition: subpartition_word_count: int = len(subpartition) if subpartition_word_count not in results.keys(): results[subpartition_word_count] = set() result = " ".join(subpartition) results[subpartition_word_count].update([result]) return results
def get_phrases(phrase_pronunciations: PhrasePronunciationList) -> PhraseList: """ Finally convert pronunciations of phrases back into english phrases consisting of words. """ phrases = [] print( f'Looking over {len(phrase_pronunciations)} different pronunciations of sentence...') for pronunciation in phrase_pronunciations: for part in partitions(pronunciation): try: phrases.extend(part_to_phrases(part)) except KeyError: pass return phrases
def multiplicative_partitions(n, k=None): factors = it.chain.from_iterable([p] * m for p, m in sp.factorint(n).items()) # TODO: Try to avoid the filter_seen step by generating distinct partitions # directly if possible. # # Otherwise, perhaps multiplicative partitioning should be implemented from # scratch # https://stackoverflow.com/questions/8558292/how-to-find-multiplicative-partitions-of-any-integer. ps = mit.partitions(factors) ps = it.takewhile(lambda p: len(p) <= k, ps) if k else ps ps = map(lambda p: tuple(sorted(map(math.prod, p))), ps) ps = filter_seen(ps) ps = it.chain.from_iterable(map(mit.distinct_permutations, ps)) yield from ps
def acc_random(sa_db, seed=0): from random import Random from more_itertools import partitions r = Random(seed) tp = 0 for ts, aggs in sa_db: all_partitions = list(partitions(ts)) random_partition = r.choice(all_partitions) random_partition = [tuple(part) for part in random_partition] if random_partition in aggs: tp += 1 return tp / len(sa_db)
def _preprocess(z, stopwords=stopwords): tokens = [tok for tok in word_tokenize(z) if tok not in stopwords] parts = [[" ".join(tokens) for tokens in part] for part in partitions(tokens)] return parts
index = 0 for a, b, c in product( letters, repeat=3): # These 3-letter combinations are called "roots" index += 1 insert_word(0, 3, index, a + b + c, 1) # Parse all books of the Tanach, insert all words/verses for book in booklist: parse_book(f"morphhb/wlc/{book}.xml", booklist[book]) # Populate formations table print("inserting formations for", len(worddict), "items...") for word, wordnum in worddict.items(): for part in partitions(word): if len(part) != 1: result = parse_partition(part) if result: insert_formation(wordnum, result) if len(part) == 3: result = parse_inside(part) if result: insert_formation(wordnum, result) # Save the in-memory DB to a file conn.commit() db_file = sqlite3.connect("bible.db") conn.backup(db_file)
def understanding_partition_size(): for x in range(1, 30): deck = list(range(x)) print(x, 2**x, len(list(more_itertools.partitions(deck))))