Esempio n. 1
0
def get(project_id, run_id):
    log = logger.bind(pid=project_id, rid=run_id)
    log.info("Checking for results of run.")
    parent_span = g.flask_tracer.get_span()
    with opentracing.tracer.start_span('check-auth',
                                       child_of=parent_span) as span:
        # Check the project and run resources exist
        abort_if_run_doesnt_exist(project_id, run_id)
        # Check the caller has a valid results token.
        token = request.headers.get('Authorization')
        log.info("request to access run result authorized")

    with opentracing.tracer.start_span('get-run-state',
                                       child_of=parent_span) as span:
        dbinstance = db.get_db()
        state = db.get_run_state(dbinstance, run_id)
        log.info("run state is '{}'".format(state))

    # Check that the run is not in a terminal state, otherwise 404
    if state == 'completed':
        with opentracing.tracer.start_span('get-run-result',
                                           child_of=parent_span) as span:
            return get_result(dbinstance, project_id, run_id, token)
    elif state == 'error':
        safe_fail_request(500, message='Error during computation of run')
    else:
        safe_fail_request(404, message='run is not complete')
Esempio n. 2
0
def status_get():
    """Displays the latest mapping statistics"""

    status = cache.get_status()

    if status is None:
        # We ensure we can connect to the database during the status check
        db1 = db.get_db()

        number_of_mappings = db.query_db(db1,
                                         '''
                    SELECT COUNT(*) FROM projects
                    ''',
                                         one=True)['count']

        current_rate = db.get_latest_rate(db1)

        status = {
            'status': 'ok',
            'project_count': number_of_mappings,
            'rate': current_rate
        }

        cache.set_status(status)
    return status
def dataprovider_id_if_authorize(resource_id, receipt_token):
    logger.debug("checking authorization token to fetch mask data")
    if not is_receipt_token_valid(resource_id, receipt_token):
        safe_fail_request(403, message=INVALID_ACCESS_MSG)

    dp_id = db.select_dataprovider_id(get_db(), resource_id, receipt_token)
    return dp_id
Esempio n. 4
0
def get(project_id):
    log = logger.bind(pid=project_id)
    log.info("Listing runs for project: {}".format(project_id))

    authorize_run_listing(project_id)

    log.info("Authorized request to list runs")

    return RunList().dump(get_runs(get_db(), project_id))
Esempio n. 5
0
    def __init__(self, project_id, threshold, name, notes):
        self.project_id = project_id
        self.name = name
        self.notes = notes
        self.threshold = threshold
        self.run_id = generate_code()
        logger.info("Created run id", rid=self.run_id)

        self.type = 'no_mapping' \
            if db.get_project_column(db.get_db(), project_id, 'result_type') == 'similarity_scores' \
            else 'default'
Esempio n. 6
0
def get(project_id, run_id):
    log = logger.bind(pid=project_id, rid=run_id)
    logger.info("request description of a run")
    # Check the project and run resources exist
    abort_if_run_doesnt_exist(project_id, run_id)

    # Check the caller has a valid results token. Yes it should be renamed.
    abort_if_invalid_results_token(project_id,
                                   request.headers.get('Authorization'))

    log.info("request for run description authorized")

    db_conn = db.get_db()
    run_object = db.get_run(db_conn, run_id)

    return RunDescription().dump(run_object)
def get_authorization_token_type_or_abort(project_id, token):
    """
    In case of a permutation with an unencrypted mask, we are using both the result token and the receipt tokens.
    The result token reveals the mask. The receipts tokens are used by the dataproviders to get their permutations.
    However, we do not know the type of token we have before checking.
    """
    logger.debug("checking if provided authorization is a results_token")
    # If the token is not a valid result token, it should be a receipt token.
    if not is_results_token_valid(project_id, token):
        logger.debug("checking if provided authorization is receipt_token")
        # If the token is not a valid receipt token, we abort.
        if not is_receipt_token_valid(project_id, token):
            safe_fail_request(403, message=INVALID_ACCESS_MSG)
        token_type = 'receipt_token'
    else:
        token_type = 'result_token'

    # Note that at this stage we have EITHER a receipt or result token, and depending on the result_type
    # that might mean the caller is not authorized.
    result_type = get_project_column(get_db(), project_id, 'result_type')
    if result_type in {'mapping', 'similarity_scores'} and token_type == 'receipt_token':
        logger.info("Caller provided receipt token to get results")
        safe_fail_request(403, message=INVALID_ACCESS_MSG)
    return token_type
Esempio n. 8
0
def projects_get():
    logger.info("Getting list of all projects")
    projects = db.query_db(get_db(),
                           'select project_id, time_added from projects')
    return ProjectList().dump(projects)
Esempio n. 9
0
def project_clks_post(project_id):
    """
    Update a project to provide encoded PII data.
    """
    log = logger.bind(pid=project_id)
    headers = request.headers

    parent_span = g.flask_tracer.get_span()

    with opentracing.tracer.start_span('check-auth',
                                       child_of=parent_span) as span:
        abort_if_project_doesnt_exist(project_id)
        if headers is None or 'Authorization' not in headers:
            safe_fail_request(401, message="Authentication token required")

        token = headers['Authorization']

        # Check the caller has valid token -> otherwise 403
        abort_if_invalid_dataprovider_token(token)

    with DBConn() as conn:
        dp_id = db.get_dataprovider_id(conn, token)
        project_encoding_size = db.get_project_schema_encoding_size(
            get_db(), project_id)

    log = log.bind(dp_id=dp_id)
    log.info("Receiving CLK data.")
    receipt_token = None

    with opentracing.tracer.start_span('upload-data',
                                       child_of=parent_span) as span:
        span.set_tag("project_id", project_id)
        if headers['Content-Type'] == "application/json":
            span.set_tag("content-type", 'json')
            # TODO: Previously, we were accessing the CLKs in a streaming fashion to avoid parsing the json in one hit. This
            #       enables running the web frontend with less memory.
            #       However, as connexion is very, very strict about input validation when it comes to json, it will always
            #       consume the stream first to validate it against the spec. Thus the backflip to fully reading the CLks as
            #       json into memory. -> issue #184

            receipt_token, raw_file = upload_json_clk_data(
                dp_id, get_json(), span)
            # Schedule a task to deserialize the hashes, and carry
            # out a pop count.
            handle_raw_upload.delay(project_id,
                                    dp_id,
                                    receipt_token,
                                    parent_span=serialize_span(span))
            log.info("Job scheduled to handle user uploaded hashes")
        elif headers['Content-Type'] == "application/octet-stream":
            span.set_tag("content-type", 'binary')
            log.info("Handling binary CLK upload")
            try:
                count, size = check_binary_upload_headers(headers)
                log.info(
                    f"Headers tell us to expect {count} encodings of {size} bytes"
                )
                span.log_kv({'count': count, 'size': size})
            except Exception:
                log.warning(
                    "Upload failed due to problem with headers in binary upload"
                )
                raise
            # Check against project level encoding size (if it has been set)
            if project_encoding_size is not None and size != project_encoding_size:
                # fail fast - we haven't stored the encoded data yet
                return safe_fail_request(
                    400, "Upload 'Hash-Size' doesn't match project settings")

            # TODO actually stream the upload data straight to Minio. Currently we can't because
            # connexion has already read the data before our handler is called!
            # https://github.com/zalando/connexion/issues/592
            # stream = get_stream()
            stream = BytesIO(request.data)
            log.debug(
                f"Stream size is {len(request.data)} B, and we expect {(6 + size)* count} B"
            )
            if len(request.data) != (6 + size) * count:
                safe_fail_request(
                    400,
                    "Uploaded data did not match the expected size. Check request headers are correct"
                )
            try:
                receipt_token = upload_clk_data_binary(project_id, dp_id,
                                                       stream, count, size)
            except ValueError:
                safe_fail_request(
                    400,
                    "Uploaded data did not match the expected size. Check request headers are correct."
                )
        else:
            safe_fail_request(400, "Content Type not supported")

    return {'message': 'Updated', 'receipt_token': receipt_token}, 201
def is_receipt_token_valid(resource_id, receipt_token):
    if db.select_dataprovider_id(get_db(), resource_id, receipt_token) is None:
        return False
    else:
        return True
def is_results_token_valid(project_id, results_token):
    return db.check_project_auth(get_db(), project_id, results_token)
def abort_if_invalid_dataprovider_token(update_token):
    logger.debug("checking authorization token to update data")
    resource_exists = db.check_update_auth(get_db(), update_token)
    if not resource_exists:
        safe_fail_request(403, message=INVALID_ACCESS_MSG)
def abort_if_run_doesnt_exist(project_id, run_id):
    resource_exists = db.check_run_exists(get_db(), project_id, run_id)
    if not resource_exists:
        logger.info("Requested project or run resource with invalid identifier token")
        safe_fail_request(403, message=INVALID_ACCESS_MSG)
def abort_if_project_in_error_state(project_id):
    conn = get_db()
    num_parties_with_error = db.get_encoding_error_count(conn, project_id)
    if num_parties_with_error > 0:
        safe_fail_request(500, message="Can't post run as project has errors")
Esempio n. 15
0
def get(project_id, run_id):
    log = logger.bind(pid=project_id, rid=run_id)
    parent_span = g.flask_tracer.get_span()
    log.debug("request run status")
    with opentracing.tracer.start_span('check-auth',
                                       child_of=parent_span) as span:
        # Check the project and run resources exist
        abort_if_run_doesnt_exist(project_id, run_id)

        # Check the caller has a valid results token. Yes it should be renamed.
        auth_token_type = get_authorization_token_type_or_abort(
            project_id, request.headers.get('Authorization'))
        log.debug(
            "Run status authorized using {} token".format(auth_token_type))

    with opentracing.tracer.start_span('get-status-from-db',
                                       child_of=parent_span) as span:
        dbinstance = get_db()
        run_status = db.get_run_status(dbinstance, run_id)
        project_in_error = db.get_encoding_error_count(dbinstance,
                                                       project_id) > 0
        span.set_tag('stage', run_status['stage'])

    run_type = RUN_TYPES[run_status['type']]
    state = 'error' if project_in_error else run_status['state']
    stage = run_status['stage']
    status = {
        "state": state,
        "time_added": run_status['time_added'],
        "stages": run_type['stages'],
        "current_stage": {
            "number":
            stage,
            "description":
            run_type['stage_descriptions'].get(
                stage, "there is no description for this stage")
        }
    }
    # trying to get progress if available
    if stage == 1:
        # waiting for CLKs
        abs_val = db.get_number_parties_uploaded(dbinstance, project_id)
        max_val = db.get_project_column(dbinstance, project_id, 'parties')
    elif stage == 2:
        # Computing similarity
        abs_val = cache.get_progress(run_id)
        if abs_val is not None:
            max_val = db.get_total_comparisons_for_project(
                dbinstance, project_id)
    else:
        # Solving for mapping (no progress)
        abs_val = None
    if abs_val is not None:
        progress = {
            'absolute': abs_val,
            'relative': (abs_val / max_val) if max_val != 0 else 0,
        }
        if progress['relative'] > 1.0:
            log.warning('oh no. more than 100% ??? abs: {}, max: {}'.format(
                abs_val, max_val))
        if run_status['stage'] in run_type['stage_progress_descriptions']:
            progress['description'] = run_type['stage_progress_descriptions'][
                run_status['stage']]
        status["current_stage"]["progress"] = progress
    if state == 'completed':
        status["time_started"] = run_status['time_started']
        status["time_completed"] = run_status['time_completed']
        return completed().dump(status)
    elif state == 'running' or state == 'queued' or state == 'created':
        status["time_started"] = run_status['time_started']
        return running().dump(status)
    elif state == 'error':
        log.warning(
            'handling the run status for state "error" is not implemented')
        return error().dump(status)