Ejemplo n.º 1
0
    def get(self, model_id, prediction_id):
        req_format = request.args.get('format', 'geojson')
        req_inferences = request.args.get('inferences', 'all')
        req_threshold = request.args.get('threshold', '0')
        req_threshold = float(req_threshold)

        stream = PredictionService.export(prediction_id)
        inferences = PredictionService.inferences(prediction_id)
        pred = PredictionService.get_prediction_by_id(prediction_id)

        first = False

        if req_inferences != 'all':
            inferences = [ req_inferences ]

        def generate_npz():
            nonlocal req_threshold
            labels_dict ={}
            for row in stream:
                if req_inferences != 'all' and row[3].get(req_inferences) is None:
                    continue

                if req_inferences != 'all' and row[3].get(req_inferences) <= req_threshold:
                    continue
                if row[4]:
                    i_lst = pred.inf_list.split(",")

                    #convert raw predictions into 0 or 1 based on threshold
                    raw_pred = []
                    for num, inference in enumerate(i_lst):
                        raw_pred.append(row[3][inference])
                    if  req_inferences == 'all':
                        req_threshold = request.args.get('threshold', '0.5')
                        req_threshold = float(req_threshold)
                    l = [1 if score >= req_threshold else 0 for score in raw_pred]

                    #convert quadkey to x-y-z
                    t = '-'.join([str(i) for i in mercantile.quadkey_to_tile(row[1])])

                    # special case for binary
                    if (pred.inf_binary) and (len(i_lst) != 2):
                        return err(400, "binary models must have two catagories"), 400
                    if (len(i_lst) == 2) and (pred.inf_binary):
                        if list(row[4].values())[0]: #validated and true, keep original
                            labels_dict.update({t:l})
                        else:
                            if l == [1, 0]:
                                l = [0, 1]
                            else:
                                l = [1, 0]
                            labels_dict.update({t:l})
                    else:
                        # for multi-label
                        for key in list(row[4].keys()):
                            i = i_lst.index(key)
                            if not row[4][key]:
                                if l[i] == 0:
                                    l[i] = 1
                                else:
                                    l[i] = 0
                            labels_dict.update({t:l})
            if not labels_dict:
                raise NoValid

            bytestream = io.BytesIO()
            np.savez(bytestream, **labels_dict)
            return bytestream.getvalue()

        def generate():
            nonlocal first
            if req_format == "geojson":
                yield '{ "type": "FeatureCollection", "features": ['
            elif req_format == "csv":
                output = io.StringIO()
                rowdata = ["ID", "QuadKey", "QuadKeyGeom"]
                rowdata.extend(inferences)
                csv.writer(output, quoting=csv.QUOTE_NONNUMERIC).writerow(rowdata)
                yield output.getvalue()

            for row in stream:
                if req_inferences != 'all' and row[3].get(req_inferences) is None:
                    continue

                if req_inferences != 'all' and row[3].get(req_inferences) <= req_threshold:
                    continue

                if req_format == "geojson" or req_format == "geojsonld":
                    properties_dict = {}
                    if row[4]:
                        properties_dict = row[3]
                        valid_dict = {}
                        valid_dict.update({'validity': row[4]})
                        properties_dict.update(valid_dict)
                    else:
                        properties_dict = row[3]
                    feat = {
                        "id": row[0],
                        "quadkey": row[1],
                        "type": "Feature",
                        "properties": properties_dict,
                        "geometry": json.loads(row[2])
                    }
                    if req_format == "geojsonld":
                        yield json.dumps(feat) + '\n'
                    elif req_format == "geojson":
                        if first == False:
                            first = True
                            yield '\n' + json.dumps(feat)
                        else:
                            yield ',\n' + json.dumps(feat)
                elif req_format == "csv":
                    output = io.StringIO()
                    rowdata = [ row[0], row[1], row[2]]
                    for inf in inferences:
                        rowdata.append(row[3].get(inf, 0.0))
                    csv.writer(output, quoting=csv.QUOTE_NONNUMERIC).writerow(rowdata)
                    yield output.getvalue()
                else:
                    return err(501, "not a valid export type, valid export types are: geojson, csv, and npz"), 501

            if req_format == "geojson":
                yield ']}'

        if req_format == "csv":
            mime = "text/csv"
        elif req_format == "geojson":
            mime = "application/geo+json"
        elif req_format == "geojsonld":
            mime = "application/geo+json-seq"
        elif req_format == "npz":
            mime = "application/npz"
        if req_format == "npz":
            try:
                npz = generate_npz()
                return Response(
                response = generate_npz(),
                mimetype = mime,
                status = 200,
                headers = {
                    "Content-Disposition": 'attachment; filename="export.' + req_format + '"'
                }
            )
            except NoValid:
                return err(400, "Can only return npz if predictions are validated. Currently there are no valid predictions"), 400
        else:
            return Response(
                generate(),
                mimetype = mime,
                status = 200,
                headers = {
                    "Content-Disposition": 'attachment; filename="export.' + req_format + '"'
                }
            )
Ejemplo n.º 2
0
    def payload(integration_id: int, payload: dict):
        integration = IntegrationService.get_secrets(integration_id)

        if integration is None:
            raise IntegrationNotFound("Integration Not Found")

        if integration.integration != "maproulette":
            raise Exception("Only MapRoulette Integrations supported")

        for ele in [
                "prediction",
                "project",
                "project_desc",
                "challenge",
                "challenge_instr",
                "threshold",
                "inferences",
        ]:
            if payload.get(ele) is None:
                raise Exception("Missing " + ele + " key in body")

        auth = integration.auth
        if payload.get("auth") is not None:
            auth = payload.get("auth")

        parsed = urlparse(integration.url)

        config = maproulette.Configuration(api_key=auth,
                                           hostname=parsed.netloc,
                                           protocol=parsed.scheme)

        project_api = maproulette.Project(config)
        challenge_api = maproulette.Challenge(config)

        try:
            project = project_api.get_project_by_name(
                project_name=payload.get("project"))
        except Exception:
            project = project_api.create_project(
                data={
                    "name": payload.get("project"),
                    "display_name": payload.get("project"),
                    "description": payload.get("project_desc"),
                    "enabled": True,
                })

        try:
            challenge = challenge_api.create_challenge(
                data={
                    "name": payload.get("challenge"),
                    "parent": project["data"]["id"],
                    "instruction": payload.get("challenge_instr"),
                })
        except Exception as e:
            raise e

        req_inferences = payload.get("inferences", "all")
        req_threshold = float(payload.get("threshold", "0"))

        stream = PredictionService.export(int(payload.get("prediction")))

        feats = {"type": "FeatureCollection", "features": []}

        for row in stream:
            if req_inferences != "all" and row[3].get(req_inferences) is None:
                continue
            if req_inferences != "all" and row[3].get(
                    req_inferences) <= req_threshold:
                continue

            properties_dict = row[3]

            if row[4]:
                valid_dict = {}
                valid_dict.update({"validity": row[4]})
                properties_dict.update(valid_dict)

            properties_dict["mle:id"] = row[0]

            feat = {
                "quadkey": row[1],
                "type": "Feature",
                "properties": properties_dict,
                "geometry": json.loads(row[2]),
            }

            feats["features"].append(feat)

        challenge_api.add_tasks_to_challenge(
            challenge_id=challenge["data"]["id"], data=feats)

        return {
            "project": project["data"]["id"],
            "challenge": challenge["data"]["id"]
        }
Ejemplo n.º 3
0
    def payload(integration_id: int, payload: dict):
        integration = IntegrationService.get_secrets(integration_id)

        if integration is None:
            raise IntegrationNotFound('Integration Not Found')

        if integration.integration != "maproulette":
            raise Exception("Only MapRoulette Integrations supported");

        for ele in ['prediction', 'project', 'project_desc', 'challenge', 'challenge_instr', 'threshold', 'inferences']:
            if payload.get(ele) is None:
                raise Exception('Missing ' + ele + ' key in body')

        auth = integration.auth
        if payload.get('auth') is not None:
            auth = payload.get('auth')

        parsed = urlparse(integration.url)

        config = maproulette.Configuration(
            api_key=auth,
            hostname=parsed.netloc,
            protocol=parsed.scheme
        )

        project_api = maproulette.Project(config)
        challenge_api = maproulette.Challenge(config)

        try:
            project = project_api.get_project_by_name(
                project_name=payload.get('project')
            )
        except:
            project = project_api.create_project(
                data={
                "name": payload.get('project'),
                "display_name": payload.get('project'),
                "description": payload.get('project_desc'),
                "enabled": True
                }
            )

        try:
            challenge = challenge_api.create_challenge(
                data={
                    'name': payload.get('challenge'),
                    'parent': project['data']['id'],
                    'instruction': payload.get('challenge_instr')
                }
            )
        except Exception as e:
            raise e

        req_inferences = payload.get('inferences', 'all')
        req_threshold = float(payload.get('threshold', '0'))

        stream = PredictionService.export(int(payload.get('prediction')))
        inferences = PredictionService.inferences(int(payload.get('prediction')))
        pred = PredictionService.get_prediction_by_id(int(payload.get('prediction')))

        if req_inferences != 'all':
            inferences = [ req_inferences ]

        fc = {
            'type': 'FeatureCollection',
            'features': []
        }

        for row in stream:
            if req_inferences != 'all' and row[3].get(req_inferences) is None:
                continue
            if req_inferences != 'all' and row[3].get(req_inferences) <= req_threshold:
                continue

            properties_dict = {}
            if row[4]:
                properties_dict = row[3]
                valid_dict = {}
                valid_dict.update({'validity': row[4]})
                properties_dict.update(valid_dict)

            feat = {
                "id": row[0],
                "quadkey": row[1],
                "type": "Feature",
                "properties": properties_dict,
                "geometry": json.loads(row[2])
            }

            fc['features'].append(feat)

        challenge_api.add_tasks_to_challenge(
            challenge_id=challenge['data']['id'],
            data=fc
        )

        return {
            "project": project['data']['id'],
            "challenge": challenge['data']['id']
        }
Ejemplo n.º 4
0
    def get(self, project_id, prediction_id):
        """
        Export Geospatial Predictions
        ---
        parameters:
            - name: project_id
              in: path
              schema:
                type: integer
                minimum: 0
              description: The ID of the Project

            - name: prediction_id
              in: path
              schema:
                type: integer
                minimum: 0
              description: The ID of the Project

            - name: format
              in: query
              schema:
                type: string
                default: geojson
                enum:
                  - geojson
                  - geojsonseq
                  - csv
              description: The format to provide records in

            - name: inferences
              in: query
              schema:
                type: string
                default: all
              description: Return all inferences or only a single inference

            - name: threshold
              in: query
              schema:
                type: integer
                default: 0
                minimum: 0
                maximum: 1
              description: The confidence threshold to apply to exported inferences
        responses:
            200:
                description: Exported Data
        """
        req_format = request.args.get("format", "geojson")
        req_inferences = request.args.get("inferences", "all")
        req_threshold = request.args.get("threshold", "0")
        req_threshold = float(req_threshold)

        stream = PredictionService.export(prediction_id)
        inferences = PredictionService.inferences(prediction_id)
        pred = PredictionService.get_prediction_by_id(prediction_id)
        hint = pred.hint
        z = pred.tile_zoom
        i_info = ImageryService.get(pred.imagery_id)
        c_list = ImageryService.get(pred.imagery_id)

        first = False

        if req_inferences != "all":
            inferences = [req_inferences]

        def generate_npz():
            nonlocal req_threshold
            nonlocal hint
            nonlocal z
            nonlocal i_info
            nonlocal c_list

            # get chip list csv as dataframe to match up chip-lst name + geometry with geometry in the predictions database

            labels_dict = {}
            for row in stream:
                if req_inferences != "all" and row[3].get(req_inferences) is None:
                    continue

                if (
                    req_inferences != "all"
                    and row[3].get(req_inferences) <= req_threshold
                ):
                    continue

                # set labels.npz key to be x-y-z tile either from quadkey or wkt geometry
                if i_info["fmt"] == "wms":
                    if row[1]:
                        t = "-".join(
                            [str(i) for i in mercantile.quadkey_to_tile(row[1])]
                        )
                    else:
                        s = shape(json.loads(row[2])).centroid
                        t = "-".join([str(i) for i in mercantile.tile(s.x, s.y, z)])
                if i_info["fmt"] == "list":
                    r = requests.get(c_list["url"])
                    df = pd.read_csv(io.StringIO(r.text))
                    df["c"] = df["bounds"].apply(
                        lambda x: box(*[float(n) for n in x.split(",")])
                    )
                    gdf = gpd.GeoDataFrame(df, crs="EPSG:4326", geometry=df["c"])
                    # get tile name that where chip-list geom and geom in prediction row match
                    gdf_2 = gpd.GeoDataFrame(
                        {"geometry": [shape(json.loads(row[2]))]}, crs="EPSG:4326"
                    )
                    # To-DO account for no overlap case
                    i = gpd.overlay(gdf, gdf_2, how="intersection")
                    tiles_intersection = i["name"].tolist()

                # convert raw predictions into 0 or 1 based on threshold
                raw_pred = []
                i_lst = pred.inf_list.split(",")
                for num, inference in enumerate(i_lst):
                    raw_pred.append(row[3][inference])
                if req_inferences == "all":
                    req_threshold = request.args.get("threshold", "0.5")
                    req_threshold = float(req_threshold)
                binary_pred_list = [1 if score >= req_threshold else 0 for score in raw_pred]

                # special case for training and not predictions
                if hint == "training":
                    if i_info["fmt"] == "list":
                        for chip_name in tiles_intersection:
                            labels_dict.update({chip_name: binary_pred_list})
                    else:
                        labels_dict.update({t: binary_pred_list})
                elif row[4]:
                    t = "-".join([str(i) for i in mercantile.quadkey_to_tile(row[1])])

                    # special case for binary
                    if pred.inf_binary and len(i_lst) != 2:
                        return err(400, "binary models must have two catagories"), 400
                    if len(i_lst) == 2 and pred.inf_binary:
                        if list(row[4].values())[
                            0
                        ]:  # validated and true, keep original
                            labels_dict.update({t: binary_pred_list})
                        else:
                            if binary_pred_list == [1, 0]:
                                binary_pred_list = [0, 1]
                            else:
                                binary_pred_list = [1, 0]
                            labels_dict.update({t: binary_pred_list})
                    else:
                        # for multi-label
                        for key in list(row[4].keys()):
                            i = i_lst.index(key)
                            if not row[4][key]:
                                if binary_pred_list[i] == 0:
                                    binary_pred_list[i] = 1
                                else:
                                    binary_pred_list[i] = 0
                            labels_dict.update({t: binary_pred_list})
            if not labels_dict:
                raise NoValid

            bytestream = io.BytesIO()
            np.savez(bytestream, **labels_dict)
            return bytestream.getvalue()

        def generate():
            nonlocal first
            if req_format == "geojson":
                yield '{ "type": "FeatureCollection", "features": ['
            elif req_format == "csv":
                output = io.StringIO()
                rowdata = ["ID", "QuadKey", "QuadKeyGeom"]
                rowdata.extend(inferences)
                csv.writer(output, quoting=csv.QUOTE_NONNUMERIC).writerow(rowdata)
                yield output.getvalue()
            for row in stream:
                if req_inferences != "all" and row[3].get(req_inferences) is None:
                    continue

                if (
                    req_inferences != "all"
                    and row[3].get(req_inferences) <= req_threshold
                ):
                    continue

                if req_format == "geojson" or req_format == "geojsonld":
                    properties_dict = {}
                    if row[4]:
                        properties_dict = row[3]
                        valid_dict = {}
                        valid_dict.update({"validity": row[4]})
                        properties_dict.update(valid_dict)
                    else:
                        properties_dict = row[3]
                    feat = {
                        "id": row[0],
                        "quadkey": row[1],
                        "type": "Feature",
                        "properties": properties_dict,
                        "geometry": json.loads(row[2]),
                    }
                    if req_format == "geojsonld":
                        yield json.dumps(feat) + "\n"
                    elif req_format == "geojson":
                        if first is False:
                            first = True
                            yield "\n" + json.dumps(feat)
                        else:
                            yield ",\n" + json.dumps(feat)
                elif req_format == "csv":
                    output = io.StringIO()
                    rowdata = [row[0], row[1], row[2]]
                    for inf in inferences:
                        rowdata.append(row[3].get(inf, 0.0))
                    csv.writer(output, quoting=csv.QUOTE_NONNUMERIC).writerow(rowdata)
                    yield output.getvalue()
                else:
                    return (
                        err(
                            501,
                            "not a valid export type, valid export types are: geojson, csv, and npz",
                        ),
                        501,
                    )

            if req_format == "geojson":
                yield "]}"

        if req_format == "csv":
            mime = "text/csv"
        elif req_format == "geojson":
            mime = "application/geo+json"
        elif req_format == "geojsonld":
            mime = "application/geo+json-seq"
        elif req_format == "npz":
            mime = "application/npz"
        if req_format == "npz":
            try:
                return Response(
                    response=generate_npz(),
                    mimetype=mime,
                    status=200,
                    headers={
                        "Content-Disposition": 'attachment; filename="export.'
                        + req_format
                        + '"'
                    },
                )
            except NoValid:
                return (
                    err(
                        400,
                        "Can only return npz if predictions are validated. Currently there are no valid predictions",
                    ),
                    400,
                )
        else:
            return Response(
                generate(),
                mimetype=mime,
                status=200,
                headers={
                    "Content-Disposition": 'attachment; filename="export.'
                    + req_format
                    + '"'
                },
            )