def mark_pair(): if not flask_session.get('session_id'): return make_response(jsonify(status='error', message='need to start a session'), 400) else: action = request.args['action'] deduper_id = flask_session['session_id'] dedupers[deduper_id]['last_interaction'] = datetime.now() if dedupers[deduper_id].get('counter'): counter = dedupers[deduper_id]['counter'] else: counter = {'yes': 0, 'no': 0, 'unsure': 0} if dedupers[deduper_id].get('training_data'): labels = dedupers[deduper_id]['training_data'] else: labels = {'distinct' : [], 'match' : []} deduper = dedupers[deduper_id]['deduper'] if action == 'yes': current_pair = dedupers[deduper_id]['current_pair'] labels['match'].append(current_pair) counter['yes'] += 1 resp = {'counter': counter} elif action == 'no': current_pair = dedupers[deduper_id]['current_pair'] labels['distinct'].append(current_pair) counter['no'] += 1 resp = {'counter': counter} elif action == 'finish': file_io = dedupers[deduper_id]['csv'] training_file_path = os.path.join(UPLOAD_FOLDER, '%s-training.json' % file_io.file_path) training_data = dedupers[deduper_id]['training_data'] with open(training_file_path, 'wb') as f: f.write(json.dumps(training_data, default=_to_json)) field_defs = dedupers[deduper_id]['field_defs'] sample = deduper.data_sample args = { 'field_defs': field_defs, 'training_data': training_file_path, 'file_io': file_io, 'data_sample': sample, } rv = dedupeit.delay(**args) flask_session['deduper_key'] = rv.key resp = {'finished': True} flask_session['dedupe_start'] = time.time() else: counter['unsure'] += 1 dedupers[deduper_id]['counter'] = counter resp = {'counter': counter} deduper.markPairs(labels) dedupers[deduper_id]['training_data'] = labels dedupers[deduper_id]['counter'] = counter if resp.get('finished'): del dedupers[deduper_id] resp = make_response(json.dumps(resp)) resp.headers['Content-Type'] = 'application/json' return resp