예제 #1
0
def find_matches(data):
    # Look for matches in all sentences
    # Load the role pattern class
    send('Find matches request received')
    send('Loading pattern')
    pattern_id = data['pattern_id']
    role_pattern = db.load_role_pattern(pattern_id)
    send('Finding matches')
    # Init a minimal vocab to save on deserialisation and memory
    vocab = util.init_vocab()
    sentence_ids = db.get_ids('sentences')
    match_ids = []
    for sentence_id in sentence_ids:
        doc = db.load_sentence_doc(sentence_id, vocab)
        for token in doc:
            print(token, token._.valence)
        matches = role_pattern.match(doc)
        for match in matches:
            slots, match_tokens = db.despacify_match(match, sentence_id)
            match_row = {
                'sentence_id': sentence_id,
                'data': json.dumps({
                    'slots': slots,
                    'match_tokens': match_tokens
                })
            }
            match_id = db.insert_row('matches', match_row)
            match_ids.append(match_id)
            pattern_match_row = {
                'match_id': match_id,
                'pattern_id': pattern_id,
            }
            db.insert_row('pattern_matches', pattern_match_row)
    send('Matches saved. IDs: {}'.format(match_ids))
    emit('find_matches_success')
예제 #2
0
def refine_pattern(data):
    send('refine pattern request received')
    send('Loading pattern')
    pattern_id = data['pattern_id']
    feature_dict = data.get('feature_dict')
    if not feature_dict:
        feature_dict = DEFAULT_REFINE_PATTERN_FEATURE_DICT
    role_pattern = db.load_role_pattern(pattern_id)
    send('Loading matches')
    pos_match_id = data['pos_match_id']
    neg_match_ids = data['neg_match_ids']
    pos_match_row = db.fetch_row('matches', pos_match_id, return_type='dict')
    if not pos_match_row:
        emit('error', 'no row found for pos match id: {}'.format(pos_match_id))
    neg_match_rows = db.fetch_rows('matches',
                                   neg_match_ids,
                                   return_type='dict')
    for id_, row in zip(neg_match_ids, neg_match_rows):
        if not row:
            emit('error', 'no row found for neg match id: {}'.format(id_))
    send('preparing training data')
    pos_match_sentence_id = pos_match_row['sentence_id']
    pos_match = json.loads(pos_match_row['data'])
    pos_match = db.spacify_match(pos_match, pos_match_sentence_id)
    neg_matches = []
    for neg_match_row in neg_match_rows:
        sentence_id = neg_match_row['sentence_id']
        neg_match = json.loads(neg_match_row['data'])
        neg_match = db.spacify_match(neg_match, sentence_id)
        neg_matches.append(neg_match)
    send('calculating pattern')
    feature_dict = {'DEP': 'dep_', 'TAG': 'tag_', 'LOWER': 'lower_'}
    role_pattern_builder = RolePatternBuilder(feature_dict)
    role_pattern_variants = role_pattern_builder.refine(
        role_pattern, pos_match, neg_matches)
    role_pattern_variants = list(role_pattern_variants)
    try:  # Try to take the first pattern
        refined_pattern = role_pattern_variants[0]
    except IndexError as e:  # None meet the criteria
        refined_pattern = None
    if refined_pattern:
        send('success. saving pattern')
        pattern_row = {
            'name': 'unamed_pattern',
            'role_pattern_instance': pickle.dumps(role_pattern),
        }
        pattern_id = db.insert_row('patterns', pattern_row)
        send('pattern saved: {}'.format(pattern_id))
    else:
        send('pattern refinement unsuccessful')
    emit('refine_pattern_success')
예제 #3
0
def visualise_pattern(data):
    pattern_id = data['pattern_id']
    send('Loading pattern')
    role_pattern = db.load_role_pattern(pattern_id)
    pprint(role_pattern.spacy_dep_pattern)
    send('Generating DOT')
    node_attrs = role_pattern_vis.DEFAULT_NODE_ATTRS
    # for token in doc:
    #     token._.plot.update(node_attrs)
    #     token._.plot['label'] = '{0} [{1}]\n({2})'.format(token.orth_, token.i, token.tag_)
    graph, legend = role_pattern.to_pydot(legend=True)
    graph, legend = graph.to_string(), legend.to_string()
    dot_data = {
        'graph': graph,
        'legend': legend,
    }
    emit('visualise_pattern_success', dot_data)
예제 #4
0
progress = util.read_progress()

n_patterns_to_insert = len(pattern_ids_to_insert)
for pattern_id in pattern_ids_to_insert:
    if pattern_id not in pattern_ids_to_insert:
        continue
    if pattern_id in progress['pattern_ids_inserted']:
        continue
    print('pattern_id', pattern_id)
    # Load RolePattern
    role_pattern_path = os.path.join(config['patterns_output_dir'],
                                     '{}.p'.format(pattern_id))
    try:
        with open(role_pattern_path, 'rb') as f:
            role_pattern = pickle.load(f)
    except:
        role_pattern = db.load_role_pattern(pattern_id)
    token_labels = role_pattern.token_labels
    role_pattern_bytes = pickle.dumps(role_pattern)
    pattern_row = {
        'id': pattern_id,
        'role_pattern_instance': role_pattern_bytes,
        'data': json.dumps({'token_labels': token_labels}),
    }
    pattern_id = db.insert_row('patterns',
                               pattern_row,
                               db_path=config['new_db_file_path'])
    progress['pattern_ids_inserted'].append(pattern_id)
    print(len(progress['pattern_ids_inserted']), '/', n_patterns_to_insert)
    util.write_progress(progress)