Example #1
0
def refine_pattern(data):
    send('refine pattern request received')
    send('Loading pattern')
    pattern_id = data['pattern_id']
    feature_dict = data.get('feature_dict')
    if not feature_dict:
        feature_dict = DEFAULT_REFINE_PATTERN_FEATURE_DICT
    role_pattern = db.load_role_pattern(pattern_id)
    send('Loading matches')
    pos_match_id = data['pos_match_id']
    neg_match_ids = data['neg_match_ids']
    pos_match_row = db.fetch_row('matches', pos_match_id, return_type='dict')
    if not pos_match_row:
        emit('error', 'no row found for pos match id: {}'.format(pos_match_id))
    neg_match_rows = db.fetch_rows('matches',
                                   neg_match_ids,
                                   return_type='dict')
    for id_, row in zip(neg_match_ids, neg_match_rows):
        if not row:
            emit('error', 'no row found for neg match id: {}'.format(id_))
    send('preparing training data')
    pos_match_sentence_id = pos_match_row['sentence_id']
    pos_match = json.loads(pos_match_row['data'])
    pos_match = db.spacify_match(pos_match, pos_match_sentence_id)
    neg_matches = []
    for neg_match_row in neg_match_rows:
        sentence_id = neg_match_row['sentence_id']
        neg_match = json.loads(neg_match_row['data'])
        neg_match = db.spacify_match(neg_match, sentence_id)
        neg_matches.append(neg_match)
    send('calculating pattern')
    feature_dict = {'DEP': 'dep_', 'TAG': 'tag_', 'LOWER': 'lower_'}
    role_pattern_builder = RolePatternBuilder(feature_dict)
    role_pattern_variants = role_pattern_builder.refine(
        role_pattern, pos_match, neg_matches)
    role_pattern_variants = list(role_pattern_variants)
    try:  # Try to take the first pattern
        refined_pattern = role_pattern_variants[0]
    except IndexError as e:  # None meet the criteria
        refined_pattern = None
    if refined_pattern:
        send('success. saving pattern')
        pattern_row = {
            'name': 'unamed_pattern',
            'role_pattern_instance': pickle.dumps(role_pattern),
        }
        pattern_id = db.insert_row('patterns', pattern_row)
        send('pattern saved: {}'.format(pattern_id))
    else:
        send('pattern refinement unsuccessful')
    emit('refine_pattern_success')
Example #2
0
def build_pattern(data):
    send('Build pattern request received')
    pos_match_id = data['pos_match_id']
    feature_dict = data.get('feature_dict')
    pattern_id = data.get('pattern_id')
    if not feature_dict:
        feature_dict = DEFAULT_BUILD_PATTERN_FEATURE_DICT
    pos_match_row = db.fetch_row('matches', pos_match_id, return_type='dict')
    sentence_id = pos_match_row['sentence_id']
    send('Preparing training data')
    pos_match = json.loads(pos_match_row['data'])['slots']
    pos_match = db.spacify_match(pos_match, sentence_id)
    send('Calculating pattern')
    role_pattern_builder = RolePatternBuilder(feature_dict)
    role_pattern = role_pattern_builder.build(pos_match, validate_pattern=True)
    token_labels = role_pattern.token_labels
    role_pattern_bytes = pickle.dumps(role_pattern)
    pattern_row = {
        'role_pattern_instance': role_pattern_bytes,
        'data': json.dumps({'token_labels': token_labels}),
    }
    if pattern_id:
        pattern_row['id'] = pattern_id
    pattern_id = db.insert_row('patterns', pattern_row)
    pattern_training_match_row = {
        'match_id': pos_match_id,
        'pattern_id': pattern_id,
        'pos_or_neg': 'pos',
    }
    db.insert_row('pattern_training_matches', pattern_training_match_row)
    send('Pattern saved. ID: {}'.format(pattern_id))
    emit('build_pattern_success', {'pattern_id': pattern_id})
Example #3
0
def visualise_match(data):
    match_id = data['match_id']
    send('Loading data')
    match_row = db.fetch_row(
        'matches',
        match_id,
        return_type='dict',
    )
    print(match_row['data'])
    sentence_id = match_row['sentence_id']
    slots = json.loads(match_row['data'])['slots']
    match_tokens = json.loads(match_row['data'])['match_tokens']
    slots = db.spacify_match(slots, sentence_id)
    match_tokens = db.spacify_tokens(match_tokens, sentence_id)
    role_pattern_match = RolePatternMatch(slots)
    role_pattern_match.match_tokens = match_tokens
    graph, legend = role_pattern_match.to_pydot(legend=True)
    graph, legend = graph.to_string(), legend.to_string()
    dot_data = {
        'graph': graph,
        'legend': legend,
    }
    emit('visualise_match_success', dot_data)
Example #4
0
    )
    print('pattern_id', pattern_id)

    # Load training match from old DB
    training_match_row = db.db_query(
        'select * from pattern_training_matches_view where pattern_id = {}'.
        format(pattern_id),
        fetch='one',
    )
    training_match_row = db.row_to_dict(training_match_row,
                                        'pattern_training_matches_view')
    training_match_id = training_match_row['id']
    training_match_sentence_id = training_match_row['sentence_id']
    training_match_data = json.loads(training_match_row['match_data'])
    slots = training_match_data['slots']
    training_match = db.spacify_match(slots, training_match_sentence_id)

    # Load matches from new DB, find training match equivalents, and insert into pattern_training_matches
    match_rows = db.db_query(
        'select * from pattern_matches_view where pattern_id = {}'.format(
            pattern_id),
        db_path=new_db_path,
    )
    training_match_equivalent_candidates = []
    training_match_equivalent_candidates_ids = []
    for match_row in match_rows:
        match_row = db.row_to_dict(match_row, 'pattern_matches_view')
        match_id = match_row['id']
        match_sentence_id = match_row['sentence_id']
        match_data = json.loads(match_row['match_data'])
        slots, match_tokens = match_data['slots'], match_data['match_tokens']
 role_pattern = db.load_role_pattern(pattern_id)
 vis_outpath = '{0}/{1}_original.png'.format(config['vis_output_dir'],
                                             pattern_id)
 role_pattern.write_vis(vis_outpath)
 # Load training matches
 training_match_row = db.db_query(
     'select * from pattern_training_matches_view where pattern_id = {}'.
     format(pattern_id),
     fetch='one',
 )
 training_match_row = db.row_to_dict(training_match_row,
                                     'pattern_training_matches_view')
 training_match_id = training_match_row['id']
 training_match_sentence_id = training_match_row['sentence_id']
 training_match = json.loads(training_match_row['match_data'])['slots']
 training_match = db.spacify_match(training_match,
                                   training_match_sentence_id)
 # Load pattern matches
 pattern_match_rows = db.db_query(
     'select * from pattern_matches_view where pattern_id = {}'.format(
         pattern_id))
 pattern_match_rows = db.rows_to_dicts(pattern_match_rows,
                                       'pattern_matches_view')
 all_pattern_match_ids = [row['id'] for row in pattern_match_rows]
 assert all([
     id_ in all_pattern_match_ids for id_ in pos_match_ids + neg_match_ids
 ]), ('Not all of: ' + pos_match_ids + neg_match_ids + ' in: ' +
      all_pattern_match_ids)
 # Map matches to pos or neg annotations
 pos_matches = []
 neg_matches = []
 matches = []