コード例 #1
0
ファイル: app.py プロジェクト: davechan/dedupe-web
def mark_pair():
    if not flask_session.get('session_id'):
        return make_response(jsonify(status='error', message='need to start a session'), 400)
    else:
        action = request.args['action']
        deduper_id = flask_session['session_id']
        dedupers[deduper_id]['last_interaction'] = datetime.now()
        if dedupers[deduper_id].get('counter'):
            counter = dedupers[deduper_id]['counter']
        else:
            counter = {'yes': 0, 'no': 0, 'unsure': 0}
        if dedupers[deduper_id].get('training_data'):
            labels = dedupers[deduper_id]['training_data']
        else:
            labels = {'distinct' : [], 'match' : []}
        deduper = dedupers[deduper_id]['deduper']
        if action == 'yes':
            current_pair = dedupers[deduper_id]['current_pair']
            labels['match'].append(current_pair)
            counter['yes'] += 1
            resp = {'counter': counter}
        elif action == 'no':
            current_pair = dedupers[deduper_id]['current_pair']
            labels['distinct'].append(current_pair)
            counter['no'] += 1
            resp = {'counter': counter}
        elif action == 'finish':
            file_io = dedupers[deduper_id]['csv']
            training_file_path = os.path.join(UPLOAD_FOLDER, '%s-training.json' % file_io.file_path)
            training_data = dedupers[deduper_id]['training_data']
            with open(training_file_path, 'wb') as f:
                f.write(json.dumps(training_data, default=_to_json))
            field_defs = dedupers[deduper_id]['field_defs']
            sample = deduper.data_sample
            args = {
                'field_defs': field_defs,
                'training_data': training_file_path,
                'file_io': file_io,
                'data_sample': sample,
            }
            rv = dedupeit.delay(**args)
            flask_session['deduper_key'] = rv.key
            resp = {'finished': True}
            flask_session['dedupe_start'] = time.time()
        else:
            counter['unsure'] += 1
            dedupers[deduper_id]['counter'] = counter
            resp = {'counter': counter}
        deduper.markPairs(labels)
        dedupers[deduper_id]['training_data'] = labels
        dedupers[deduper_id]['counter'] = counter
        if resp.get('finished'):
            del dedupers[deduper_id]
    resp = make_response(json.dumps(resp))
    resp.headers['Content-Type'] = 'application/json'
    return resp