def solve_structured_clue(clue): pattern = clue.pop() length = clue.pop() definition, d = clue[[x[1] for x in clue].index('d')] if len(clue) == 1: # this must be just a regular crossword clue (no wordplay) answers = [(s, semantic_similarity(s, definition)) for s in cached_synonyms(definition) if len(s) == length and matches_pattern(s, pattern)] return sorted(answers, key=lambda x: x[1], reverse=True) groups_to_skip = find_skipped_groups(clue) answer_subparts = [set([]) for x in clue] groups_added = 0 active_set = set(['']) count = 0 while any(len(s) == 0 for s in answer_subparts) and count < 2: count += 1 for i, group in enumerate(clue): remaining_letters = length - min(len(s) for s in active_set) if remaining_letters < 0: return [] if len(answer_subparts[i]) == 0: phrase, kind = group if kind[:3] in FUNCTIONS: func, arg_offsets = compute_arg_offsets(i, clue) arg_indices = [i + x for x in arg_offsets] if any(len(answer_subparts[j]) == 0 for j in arg_indices): continue arg_sets = tree_search([[]], [answer_subparts[ai] for ai in arg_indices], combination_func=lambda s, w: s + [w]) for arg_set in arg_sets: arg_set += [remaining_letters] answer_subparts[i].update(list(FUNCTIONS[func](*arg_set))) else: answer_subparts[i] = set(TRANSFORMS[kind](phrase, remaining_letters)) if len(answer_subparts[i]) == 0: if kind[:3] in FUNCTIONS: base_clue = clue[:i + max(0, *arg_offsets) + 1] else: base_clue = clue[:i + 1] raise ClueUnsolvableException(base_clue) # print "index and subparts", i, answer_subparts if all(len(s) > 0 for s in answer_subparts[:i + 1]) and i not in groups_to_skip: if i >= groups_added: if VERBOSE: print "updating" print "current active set:", active_set print "branching list:", answer_subparts[groups_added:i + 1] active_set = set(tree_search(active_set, answer_subparts[groups_added:i + 1], lambda x: (x + groups_added) not in groups_to_skip, lambda x: len(x) <= length and x in INITIAL_NGRAMS[length][len(x)] and matches_pattern(x, pattern))) if VERBOSE: print "new active set:", active_set groups_added = i + 1 if len(active_set) == 0: raise ClueUnsolvableException(clue[:i + 3]) wordplay_answers = active_set answers = [(s, semantic_similarity(s, definition)) for s in wordplay_answers if s in WORDS and len(s) == length] return sorted(answers, key=lambda x: x[1], reverse=True)
def generate_kinds(phrases): if len(phrases) in all_kinds: return all_kinds[len(phrases)] else: print "Warning: very long clue. This may take a very long time. Can you make fewer phrases out of this clue?" potential_kinds = tree_search([], [KINDS] * (len(phrases)), combination_func=lambda s, w: s + [w], member_test=valid_intermediate) generated_kinds = (k for k in potential_kinds if valid_kinds(k)) return generated_kinds
def solve_structured_clue(clue): pattern = clue.pop() length = clue.pop() definition, d = clue[[x[1] for x in clue].index('d')] if len(clue) == 1: # this must be just a regular crossword clue (no wordplay) answers = [(s, semantic_similarity(s, definition)) for s in cached_synonyms(definition) if len(s) == length and matches_pattern(s, pattern)] return sorted(answers, key=lambda x: x[1], reverse=True) groups_to_skip = find_skipped_groups(clue) answer_subparts = [set([]) for x in clue] groups_added = 0 active_set = set(['']) count = 0 while any(len(s) == 0 for s in answer_subparts) and count < 2: count += 1 for i, group in enumerate(clue): remaining_letters = length - min(len(s) for s in active_set) if remaining_letters < 0: return [] if len(answer_subparts[i]) == 0: phrase, kind = group if kind[:3] in FUNCTIONS: func, arg_offsets = compute_arg_offsets(i, clue) arg_indices = [i + x for x in arg_offsets] if any(len(answer_subparts[j]) == 0 for j in arg_indices): continue arg_sets = tree_search( [[]], [answer_subparts[ai] for ai in arg_indices], combination_func=lambda s, w: s + [w]) for arg_set in arg_sets: arg_set += [remaining_letters] answer_subparts[i].update( list(FUNCTIONS[func](*arg_set))) else: answer_subparts[i] = set(TRANSFORMS[kind]( phrase, remaining_letters)) if len(answer_subparts[i]) == 0: if kind[:3] in FUNCTIONS: base_clue = clue[:i + max(0, *arg_offsets) + 1] else: base_clue = clue[:i + 1] raise ClueUnsolvableException(base_clue) # print "index and subparts", i, answer_subparts if all(len(s) > 0 for s in answer_subparts[:i + 1]) and i not in groups_to_skip: if i >= groups_added: if VERBOSE: print "updating" print "current active set:", active_set print "branching list:", answer_subparts[ groups_added:i + 1] active_set = set( tree_search( active_set, answer_subparts[groups_added:i + 1], lambda x: (x + groups_added) not in groups_to_skip, lambda x: len(x) <= length and x in INITIAL_NGRAMS[ length][len(x)] and matches_pattern( x, pattern))) if VERBOSE: print "new active set:", active_set groups_added = i + 1 if len(active_set) == 0: raise ClueUnsolvableException(clue[:i + 3]) wordplay_answers = active_set answers = [(s, semantic_similarity(s, definition)) for s in wordplay_answers if s in WORDS and len(s) == length] return sorted(answers, key=lambda x: x[1], reverse=True)
from utils.kinds import valid_intermediate, valid_kinds, KINDS from utils.search import tree_search import cPickle as pickle import json all_kinds = dict([]) for i in range(1, 10): potential_kinds = tree_search([[]], [KINDS] * i, combination_func=lambda s, w: s + [w], member_test=valid_intermediate) all_kinds[i] = [k for k in potential_kinds if valid_kinds(k)] print i with open('data/kinds.pck', 'wb') as f: pickle.dump(all_kinds, f) with open('data/kinds.json', 'w') as f: json.dump(all_kinds, f, separators=(',', ':'), indent=0)