def populate_topics_from_phantom_forms(cls): all_forms = phantom_on_the_capitol.retrieve_form_elements([x.bioguide_id for x in Legislator.query.all()]) all_topics = {} for legislator, req in all_forms.iteritems(): for key, val in req.iteritems(): for step in val: if step['value'] == '$TOPIC': if type(step['options_hash']) is dict: keys = step['options_hash'].keys() else: keys = step['options_hash'] for k in keys: k = k.strip() if all_topics.has_key(k): all_topics[k] += 1 else: all_topics[k] = 1 failed_topics = [] for topic, count in all_topics.iteritems(): result = select_solver.choose('test', [topic.lower()]) if result is None: failed_topics.append(topic.lower()) elif result: db_first_or_create(Topic, name=topic.lower()) all_topics = Topic.query.filter_by(wikipedia_parent=None) for f_topic in failed_topics: try: lowest = (None, None) for topic in all_topics: print topic.name, f_topic d = jellyfish.damerau_levenshtein_distance(unicode(str(topic.name)), unicode(str(f_topic))) if lowest[0] is None or lowest[1] > d: lowest = (topic, d) print 'Adding ' + f_topic + ' with parent ' + lowest[0].name db_first_or_create(Topic, name=f_topic, wikipedia_parent=lowest[0].id) except: continue
def topic_for_message(choices, message): return select_solver.choose(message.msgbody, [t.name.lower() for t in Topic.topics_from_choices(choices)])