def refine_pattern(data): send('refine pattern request received') send('Loading pattern') pattern_id = data['pattern_id'] feature_dict = data.get('feature_dict') if not feature_dict: feature_dict = DEFAULT_REFINE_PATTERN_FEATURE_DICT role_pattern = db.load_role_pattern(pattern_id) send('Loading matches') pos_match_id = data['pos_match_id'] neg_match_ids = data['neg_match_ids'] pos_match_row = db.fetch_row('matches', pos_match_id, return_type='dict') if not pos_match_row: emit('error', 'no row found for pos match id: {}'.format(pos_match_id)) neg_match_rows = db.fetch_rows('matches', neg_match_ids, return_type='dict') for id_, row in zip(neg_match_ids, neg_match_rows): if not row: emit('error', 'no row found for neg match id: {}'.format(id_)) send('preparing training data') pos_match_sentence_id = pos_match_row['sentence_id'] pos_match = json.loads(pos_match_row['data']) pos_match = db.spacify_match(pos_match, pos_match_sentence_id) neg_matches = [] for neg_match_row in neg_match_rows: sentence_id = neg_match_row['sentence_id'] neg_match = json.loads(neg_match_row['data']) neg_match = db.spacify_match(neg_match, sentence_id) neg_matches.append(neg_match) send('calculating pattern') feature_dict = {'DEP': 'dep_', 'TAG': 'tag_', 'LOWER': 'lower_'} role_pattern_builder = RolePatternBuilder(feature_dict) role_pattern_variants = role_pattern_builder.refine( role_pattern, pos_match, neg_matches) role_pattern_variants = list(role_pattern_variants) try: # Try to take the first pattern refined_pattern = role_pattern_variants[0] except IndexError as e: # None meet the criteria refined_pattern = None if refined_pattern: send('success. saving pattern') pattern_row = { 'name': 'unamed_pattern', 'role_pattern_instance': pickle.dumps(role_pattern), } pattern_id = db.insert_row('patterns', pattern_row) send('pattern saved: {}'.format(pattern_id)) else: send('pattern refinement unsuccessful') emit('refine_pattern_success')
def build_pattern(data): send('Build pattern request received') pos_match_id = data['pos_match_id'] feature_dict = data.get('feature_dict') pattern_id = data.get('pattern_id') if not feature_dict: feature_dict = DEFAULT_BUILD_PATTERN_FEATURE_DICT pos_match_row = db.fetch_row('matches', pos_match_id, return_type='dict') sentence_id = pos_match_row['sentence_id'] send('Preparing training data') pos_match = json.loads(pos_match_row['data'])['slots'] pos_match = db.spacify_match(pos_match, sentence_id) send('Calculating pattern') role_pattern_builder = RolePatternBuilder(feature_dict) role_pattern = role_pattern_builder.build(pos_match, validate_pattern=True) token_labels = role_pattern.token_labels role_pattern_bytes = pickle.dumps(role_pattern) pattern_row = { 'role_pattern_instance': role_pattern_bytes, 'data': json.dumps({'token_labels': token_labels}), } if pattern_id: pattern_row['id'] = pattern_id pattern_id = db.insert_row('patterns', pattern_row) pattern_training_match_row = { 'match_id': pos_match_id, 'pattern_id': pattern_id, 'pos_or_neg': 'pos', } db.insert_row('pattern_training_matches', pattern_training_match_row) send('Pattern saved. ID: {}'.format(pattern_id)) emit('build_pattern_success', {'pattern_id': pattern_id})
def visualise_match(data): match_id = data['match_id'] send('Loading data') match_row = db.fetch_row( 'matches', match_id, return_type='dict', ) print(match_row['data']) sentence_id = match_row['sentence_id'] slots = json.loads(match_row['data'])['slots'] match_tokens = json.loads(match_row['data'])['match_tokens'] slots = db.spacify_match(slots, sentence_id) match_tokens = db.spacify_tokens(match_tokens, sentence_id) role_pattern_match = RolePatternMatch(slots) role_pattern_match.match_tokens = match_tokens graph, legend = role_pattern_match.to_pydot(legend=True) graph, legend = graph.to_string(), legend.to_string() dot_data = { 'graph': graph, 'legend': legend, } emit('visualise_match_success', dot_data)
) print('pattern_id', pattern_id) # Load training match from old DB training_match_row = db.db_query( 'select * from pattern_training_matches_view where pattern_id = {}'. format(pattern_id), fetch='one', ) training_match_row = db.row_to_dict(training_match_row, 'pattern_training_matches_view') training_match_id = training_match_row['id'] training_match_sentence_id = training_match_row['sentence_id'] training_match_data = json.loads(training_match_row['match_data']) slots = training_match_data['slots'] training_match = db.spacify_match(slots, training_match_sentence_id) # Load matches from new DB, find training match equivalents, and insert into pattern_training_matches match_rows = db.db_query( 'select * from pattern_matches_view where pattern_id = {}'.format( pattern_id), db_path=new_db_path, ) training_match_equivalent_candidates = [] training_match_equivalent_candidates_ids = [] for match_row in match_rows: match_row = db.row_to_dict(match_row, 'pattern_matches_view') match_id = match_row['id'] match_sentence_id = match_row['sentence_id'] match_data = json.loads(match_row['match_data']) slots, match_tokens = match_data['slots'], match_data['match_tokens']
role_pattern = db.load_role_pattern(pattern_id) vis_outpath = '{0}/{1}_original.png'.format(config['vis_output_dir'], pattern_id) role_pattern.write_vis(vis_outpath) # Load training matches training_match_row = db.db_query( 'select * from pattern_training_matches_view where pattern_id = {}'. format(pattern_id), fetch='one', ) training_match_row = db.row_to_dict(training_match_row, 'pattern_training_matches_view') training_match_id = training_match_row['id'] training_match_sentence_id = training_match_row['sentence_id'] training_match = json.loads(training_match_row['match_data'])['slots'] training_match = db.spacify_match(training_match, training_match_sentence_id) # Load pattern matches pattern_match_rows = db.db_query( 'select * from pattern_matches_view where pattern_id = {}'.format( pattern_id)) pattern_match_rows = db.rows_to_dicts(pattern_match_rows, 'pattern_matches_view') all_pattern_match_ids = [row['id'] for row in pattern_match_rows] assert all([ id_ in all_pattern_match_ids for id_ in pos_match_ids + neg_match_ids ]), ('Not all of: ' + pos_match_ids + neg_match_ids + ' in: ' + all_pattern_match_ids) # Map matches to pos or neg annotations pos_matches = [] neg_matches = [] matches = []