Esempio n. 1
0
def post_node_merge_into(node_id):
    """
    Merge a node into another node.

    URL parameters:
        node_id: Node that is merged.

    Request parameters (body):
        dest_node_id: Node that absorbs the children and objects.
    """
    with database.engine.connect() as connection:
        tree = Tree(connection)

        data = request.get_json()

        print(data)

        # TODO: Unapprove
        tree.merge_node_into(node_id, data["dest_node_id"])

        log(
            connection,
            "merge_node_into({}, {})".format(node_id, data["dest_node_id"]),
            node_id=data["dest_node_id"],
        )

        return jsonify(None)
Esempio n. 2
0
def get_node_stats(node_id):
    """
    Return progress information about this node.

    URL parameters:
        node_id (int): ID of a node

    Request parameters:
        log (str): Save an entry to the log?

    Returns:
        JSON-dict
    """

    parser = reqparse.RequestParser()
    parser.add_argument("log", default=None)
    arguments = parser.parse_args(strict=True)

    with database.engine.connect() as connection:
        tree = Tree(connection)

        with connection.begin():
            progress = tree.calculate_progress(node_id)

            if arguments["log"] is not None:
                log(
                    connection,
                    "progress-{}".format(arguments["log"]),
                    node_id=node_id,
                    data=json_dumps(progress),
                )

            return jsonify(progress)
Esempio n. 3
0
def patch_node(node_id):
    with database.engine.connect() as connection:
        tree = Tree(connection)

        data = request.get_json()
        flags = {k: request.args.get(k, 0, strtobool) for k in ("include_children",)}

        # TODO: Use argparse
        if "starred" in data:
            data["starred"] = strtobool(str(data["starred"]))

        if "parent_id" in data:
            raise ValueError(
                "parent_id must not be set directly, use /nodes/<node_id>/adopt."
            )

        with connection.begin():
            tree.update_node(node_id, data)

            log(
                connection,
                "update_node({})".format(json.dumps(data, sort_keys=True)),
                node_id=node_id,
            )

            node = tree.get_node(node_id, True)

        result = _node(tree, node, **flags)

        return jsonify(result)
Esempio n. 4
0
    def export_direct_objects(node_id, filename):
        with database.engine.connect() as conn, open(filename, "w") as f:
            tree = Tree(conn)

            f.writelines(
                "{}\n".format(o["object_id"]) for o in tree.get_objects(node_id)
            )
Esempio n. 5
0
def _node_get_recommended_objects(node_id=None, max_n=None):
    with database.engine.connect() as connection:
        tree = Tree(connection)

        result = [_object(o) for o in tree.recommend_objects(node_id, max_n)]

        return result
Esempio n. 6
0
    def export_tree(root_id, tree_fn):
        """
        Export the whole tree with its objects.
        """
        with database.engine.connect() as conn:
            tree = Tree(conn)

            tree.export_tree(root_id, tree_fn)
Esempio n. 7
0
def node_get_n_sorted(node_id):
    with database.engine.connect() as connection:
        tree = Tree(connection)

        nodes = tree.get_minlevel_starred(node_id)

        n_sorted = sum(n["_n_objects_deep"] for n in nodes)

        return jsonify(n_sorted)
Esempio n. 8
0
def get_node(node_id):
    with database.engine.connect() as connection:
        tree = Tree(connection)

        flags = {k: request.args.get(k, 0, strtobool) for k in ("include_children",)}

        node = tree.get_node(node_id)

        log(connection, "get_node", node_id=node_id)

        result = _node(tree, node, **flags)

        return jsonify(result)
Esempio n. 9
0
def get_project(project_id):

    parser = reqparse.RequestParser()
    parser.add_argument("include_progress", type=strtobool, default=0)
    arguments = parser.parse_args(strict=True)

    with database.engine.connect() as connection:
        tree = Tree(connection)
        result = tree.get_project(project_id)

        if arguments["include_progress"]:
            progress = tree.calculate_progress(result["node_id"])
            result["progress"] = progress

        return jsonify(result)
Esempio n. 10
0
def get_subtree(node_id):
    flags = {k: request.args.get(k, 0, strtobool) for k in ("supertree",)}

    with database.engine.connect() as connection:
        tree = Tree(connection)

        if flags["supertree"]:
            children = tree.get_children(
                node_id, supertree=True, include="starred", order_by="_n_children DESC"
            )
        else:
            children = tree.get_children(node_id, order_by="_n_children DESC")

        result = [_tree_node(c, flags["supertree"]) for c in children]

        return jsonify(result)
Esempio n. 11
0
def create_node():
    """
    Create a new node.

    Request parameters:
        project_id
        name
        members
        starred
    """

    with database.engine.connect() as connection:
        tree = Tree(connection)
        data = request.get_json()

        object_ids = [m["object_id"] for m in data["members"] if "object_id" in m]
        node_ids = [m["node_id"] for m in data["members"] if "node_id" in m]

        project_id = data.get("project_id", None)
        name = data.get("name", None)
        parent_id = int(data.get("parent_id"))

        starred = strtobool(str(data.get("starred", "0")))

        if project_id is None:
            # Retrieve project_id for the parent_id
            project_id = tree.get_node(parent_id)["project_id"]

        print(data)

        with connection.begin():
            node_id = tree.create_node(
                int(project_id), parent_id=parent_id, name=name, starred=starred
            )

            tree.relocate_nodes(node_ids, node_id)

            tree.relocate_objects(object_ids, node_id)

            log(connection, "create_node", node_id=node_id)

            node = tree.get_node(node_id, require_valid=True)

            print("Created node {}.".format(node_id))

        result = _node(tree, node)

        return jsonify(result)
Esempio n. 12
0
    def progress(root_id, log):
        """
        Report progress on a tree
        """
        with database.engine.connect() as conn:
            tree = Tree(conn)

            if root_id is None:
                root_ids = [p["node_id"] for p in tree.get_projects()]
            else:
                root_ids = [root_id]

            with Timer("Progress") as timer:
                for rid in root_ids:
                    print("Root {}:".format(rid))
                    with timer.child(str(rid)):
                        prog = tree.calculate_progress(rid)

                    for k in sorted(prog.keys()):
                        print("{}: {}".format(k, prog[k]))
Esempio n. 13
0
def export_project(project_id):
    config = app.config

    # Dump the database tree
    with database.engine.connect() as conn:
        db_tree = Tree(conn)
        root_id = db_tree.get_root_id(project_id)
        project = db_tree.get_project(project_id)
        tree = db_tree.dump_tree(root_id)

    tree_fn = os.path.join(
        config["PROJECT_EXPORT_DIR"],
        "{:%Y-%m-%d-%H-%M-%S}--{}--{}.zip".format(dt.datetime.now(),
                                                  project["project_id"],
                                                  project["name"]),
    )

    tree.save(tree_fn)

    return tree_fn
Esempio n. 14
0
def node_adopt_members(parent_id):
    """
    Adopt a list of nodes.

    URL parameters:
        parent_id (int): ID of the node that accepts new members.

    Request parameters:
        members: List of nodes ({node_id: ...}) and objects ({object_id: ...}).

    Returns:
        Nothing.
    """
    with database.engine.connect() as connection:
        tree = Tree(connection)

        members = request.get_json()["members"]

        node_ids = [int(m["node_id"]) for m in members if "node_id" in m]
        object_ids = [m["object_id"] for m in members if "object_id" in m]

        with connection.begin():
            tree.relocate_nodes(node_ids, parent_id)
            tree.relocate_objects(object_ids, parent_id)

        print(
            "Node {} adopted {} nodes and {} objects.".format(
                parent_id, len(node_ids), len(object_ids)
            )
        )

        return jsonify({})
Esempio n. 15
0
def node_get_next_unfilled(node_id):
    parser = reqparse.RequestParser()
    parser.add_argument("leaf", type=strtobool, default=False)
    parser.add_argument("preferred_first", type=strtobool, default=False)
    arguments = parser.parse_args(strict=True)

    print(arguments)

    with database.engine.connect() as connection:
        tree = Tree(connection)

        # Filter descendants that are approved and unfilled
        def filter(subtree):
            return (subtree.c.approved == True) & (subtree.c.filled == False)

        return jsonify(
            tree.get_next_node(
                node_id,
                leaf=arguments["leaf"],
                preferred_first=arguments["preferred_first"],
                filter=filter,
            )
        )
Esempio n. 16
0
def node_get_next(node_id):
    parser = reqparse.RequestParser()
    parser.add_argument("leaf", type=strtobool, default=False)
    arguments = parser.parse_args(strict=True)

    print(arguments)

    with database.engine.connect() as connection:
        tree = Tree(connection)

        # Descend if the successor is not approved
        # Rationale: Approval is for a whole subtree.
        def recurse(_, s):
            return s.c.approved == False

        # Filter descendants that are not approved
        def filter(subtree):
            return subtree.c.approved == False

        return jsonify(
            tree.get_next_node(
                node_id, leaf=arguments["leaf"], recurse_cb=recurse, filter=filter
            )
        )
Esempio n. 17
0
def post_node_members(node_id):
    data = request.get_json()

    object_ids = [d["object_id"] for d in data if "object_id" in d]
    node_ids = [d["node_id"] for d in data if "node_id" in d]

    with database.engine.connect() as connection:
        tree = Tree(connection)

        with connection.begin():
            tree.relocate_nodes(node_ids, node_id)
            tree.relocate_objects(object_ids, node_id)

    return jsonify("ok")
Esempio n. 18
0
    def consolidate(root_id):
        with database.engine.connect() as conn, Timer("Consolidate") as timer:
            tree = Tree(conn)

            if root_id == "all":
                print("Consolidating all projects...")
                root_ids = [p["node_id"] for p in tree.get_projects()]
            elif root_id == "visible":
                print("Consolidating visible projects...")
                root_ids = [p["node_id"] for p in tree.get_projects(True)]
            else:
                print("Consolidating {}...".format(root_id))
                root_ids = [root_id]

            for rid in root_ids:
                with timer.child(str(rid)):
                    print("Consolidating {}...".format(rid))
                    tree.consolidate_node(rid)
            print("Done.")
Esempio n. 19
0
def save_project(project_id):
    """
    Save the project at PROJECT_EXPORT_DIR.
    """
    with database.engine.connect() as conn:
        tree = Tree(conn)

        project = tree.get_project(project_id)

        root_id = tree.get_root_id(project_id)

        tree_fn = os.path.join(
            api.config["PROJECT_EXPORT_DIR"],
            "{:%Y-%m-%d-%H-%M-%S}--{}--{}.zip".format(
                datetime.now(), project["project_id"], project["name"]
            ),
        )

        tree.export_tree(root_id, tree_fn)

        return jsonify({"tree_fn": tree_fn,})
Esempio n. 20
0
    def load_project(tree_fn, project_name, consolidate):
        """
        Load a project from a saved tree.
        """

        with database.engine.connect() as conn:
            tree = Tree(conn)

            if project_name is None:
                project_name = os.path.basename(os.path.splitext(tree_fn)[0])

            with conn.begin():
                print("Loading {}...".format(tree_fn))
                project_id = tree.load_project(project_name, tree_fn)
                root_id = tree.get_root_id(project_id)

                if consolidate:
                    print("Consolidating ...")
                    tree.consolidate_node(root_id)

            print("Root ID: {}".format(root_id))
            print("Project ID: {}".format(project_id))
Esempio n. 21
0
def post_node_classify(node_id):
    """
    Classify the members of a node into their starred siblings.

    URL parameters:
        node_id: Parent of the classified members.

    GET parameters:
        nodes (boolean): Classify nodes? (Default: False)
        objects (boolean): Classify objects? (Default: False)
        safe (boolean): Perform safe classification (Default: False)
        subnode (boolean): Move classified objects into a child of the target node. (Default: False)
    """

    flags = {
        k: request.args.get(k, 0, strtobool)
        for k in ("nodes", "objects", "safe", "subnode")
    }

    print(flags)

    n_predicted_children = 0
    n_predicted_objects = 0

    with database.engine.connect() as connection:
        tree = Tree(connection)

        # Split children into starred and unstarred
        with connection.begin():
            children = tree.get_children(node_id)

            starred = []
            unstarred = []
            for c in children:
                (starred if c["starred"] else unstarred).append(c)

            starred_centroids = np.array([c["_centroid"] for c in starred])

            print("|starred_centroids|", np.linalg.norm(starred_centroids, axis=1))

            # Initialize classifier
            classifier = Classifier(starred_centroids)

            if flags["subnode"]:

                def _subnode_for(node_id):
                    return tree.create_node(parent_id=node_id, name="classified")

                target_nodes = keydefaultdict(_subnode_for)
            else:
                target_nodes = keydefaultdict(lambda k: k)

            if flags["nodes"]:
                unstarred_centroids = np.array([c["_centroid"] for c in unstarred])
                unstarred_ids = np.array([c["node_id"] for c in unstarred])

                # Predict unstarred children (if any)
                n_unstarred = len(unstarred_centroids)
                if n_unstarred > 0:
                    print(
                        "Predicting {} unstarred children of {}...".format(
                            n_unstarred, node_id
                        )
                    )
                    type_predicted = classifier.classify(
                        unstarred_centroids, safe=flags["safe"]
                    )

                    for i, starred_node in enumerate(starred):
                        nodes_to_move = [
                            int(n) for n in unstarred_ids[type_predicted == i]
                        ]

                        if len(nodes_to_move):
                            target_node_id = target_nodes[starred_node["node_id"]]
                            tree.relocate_nodes(
                                nodes_to_move, target_node_id, unapprove=True
                            )

                    n_predicted_children = np.sum(type_predicted > -1)

            if flags["objects"]:
                # Predict objects
                objects = tree.get_objects(node_id)
                print("Predicting {} objects of {}...".format(len(objects), node_id))
                object_vectors = np.array([o["vector"] for o in objects])
                object_ids = np.array([o["object_id"] for o in objects])

                type_predicted = classifier.classify(object_vectors, safe=flags["safe"])

                for i, starred_node in enumerate(starred):
                    objects_to_move = [str(o) for o in object_ids[type_predicted == i]]
                    if len(objects_to_move):
                        target_node_id = target_nodes[starred_node["node_id"]]
                        print(
                            "Moving objects {!r} -> {}".format(
                                objects_to_move, target_node_id
                            )
                        )
                        tree.relocate_objects(
                            objects_to_move, target_node_id, unapprove=True
                        )

                n_predicted_objects = np.sum(type_predicted > -1)

            log(
                connection,
                "classify_members(nodes={nodes},objects={objects})".format(**flags),
                node_id=node_id,
            )

            return jsonify(
                {
                    "n_predicted_children": int(n_predicted_children),
                    "n_predicted_objects": int(n_predicted_objects),
                }
            )
Esempio n. 22
0
def _node_get_recommended_children(node_id, max_n):
    with database.engine.connect() as connection:
        tree = Tree(connection)
        result = [_node(tree, c) for c in tree.recommend_children(node_id, max_n=max_n)]
        return result
Esempio n. 23
0
def accept_recommended_objects(node_id):
    """
    Accept recommended objects.

    URL parameters:
        node_id (int): ID of the node that accepts recommendations

    Request parameters:
        request_id: URL of the recommendations.
        rejected_members: Rejected members.
        last_page: Last page of accepted recommendations.
        log_data (optional): Additional data to be stored in the log (only if SAVE_RECOMMENDATION_STATS!)

    Returns:
        Nothing.
    """

    parameters = request.get_json()

    print(parameters)

    with Timer("accept_recommended_objects") as t:

        with t.child("assemble set of rejected objects"):
            rejected_object_ids = set(
                m[1:] for m in parameters["rejected_members"] if m.startswith("o")
            )

        with t.child("assemble list of accepted objects"):
            object_ids = []
            for page in range(parameters["last_page"] + 1):
                response = _node_get_recommended_objects(
                    node_id=node_id, request_id=parameters["request_id"], page=page
                )
                page_object_ids = (
                    v["object_id"] for v in json.loads(response.data.decode())["data"]
                )
                object_ids.extend(page_object_ids)

        # Save list of objects to enable calculation of Average Precision and the like
        if app.config.get("SAVE_RECOMMENDATION_STATS", False):
            print("Saving accept-reject stats...")
            with t.child("Save accept-reject stats") as t2:
                with t2.child("calc rejected"):
                    rejected = [o in rejected_object_ids for o in object_ids]
                with t2.child("assemble DataFrame"):
                    data = pd.DataFrame({"object_id": object_ids, "rejected": rejected})

                data_fn = os.path.join(
                    app.config["PROJECT_EXPORT_DIR"],
                    "{:%Y-%m-%d-%H-%M-%S}--accept-reject--{}.csv".format(
                        datetime.now(), node_id
                    ),
                )
                with t2.child("write data"):
                    data.to_csv(data_fn, index=False)

        with t.child("filter accepted objects"):
            # Filter object_ids
            object_ids = [o for o in object_ids if o not in rejected_object_ids]

        # print(object_ids)

        # Assemble log
        log_data = {
            "n_accepted": len(object_ids),
            "n_rejected": len(rejected_object_ids),
        }

        # Store additional log data
        addlog_data = parameters.get("log_data")
        if isinstance(addlog_data, dict):
            log_data.update(addlog_data)
        elif addlog_data is not None:
            raise ValueError(
                "Parameter log_data should be a dict, got a {}!".format(
                    type(addlog_data)
                )
            )

        with database.engine.connect() as connection:
            tree = Tree(connection)
            with t.child("save accepted/rejected to database"), connection.begin():
                tree.relocate_objects(object_ids, node_id)
                tree.reject_objects(node_id, rejected_object_ids)

            log(
                connection,
                "accept_recommended_objects",
                node_id=node_id,
                data=json_dumps(log_data),
            )

        print(
            "Node {} adopted {} objects and rejected {} objects.".format(
                node_id, len(object_ids), len(rejected_object_ids)
            )
        )

        return jsonify({})
Esempio n. 24
0
 def connect_supertree(root_id):
     with database.engine.connect() as conn:
         tree = Tree(conn)
         tree.connect_supertree(root_id)
Esempio n. 25
0
def _get_node_members(
    node_id,
    nodes=False,
    objects=False,
    arrange_by="",
    starred_first=False,
    descending=False,
):
    with database.engine.connect() as connection, Timer("_get_node_members") as timer:
        tree = Tree(connection)

        sorted_nodes_include = "unstarred" if starred_first else None

        result = []
        if nodes:
            with timer.child("tree.get_children()"):
                result.extend(tree.get_children(node_id, include=sorted_nodes_include))
        if objects:
            with timer.child("tree.get_objects()"):
                result.extend(tree.get_objects(node_id))

        if arrange_by == "starred_sim" or starred_first:
            with timer.child("tree.get_children(starred)"):
                starred = tree.get_children(node_id, include="starred")

        if arrange_by != "":
            result = np.array(result, dtype=object)

            if arrange_by == "sim":
                with timer.child("sim"):
                    order = _arrange_by_sim(result)
            elif arrange_by == "nleaves":
                with timer.child("nleaves"):
                    order = _arrange_by_nleaves(result)
            elif arrange_by == "starred_sim":
                with timer.child("starred_sim"):
                    # If no starred members yet, arrange by distance to regular children
                    anchors = starred if len(starred) else tree.get_children(node_id)

                    order = _arrange_by_starred_sim(result, anchors)
            elif arrange_by == "interleaved":
                with timer.child("interleaved"):
                    order = _arrange_by_sim(result)
                    if len(order):
                        order0, order1 = np.array_split(order.copy(), 2)
                        order[::2] = order0
                        order[1::2] = order1[::-1]
            elif arrange_by == "random":
                with timer.child("random"):
                    order = np.random.permutation(len(result))
            else:
                warnings.warn("arrange_by={} not supported!".format(arrange_by))
                order = ()

            if descending:
                order = order[::-1]

            # ===================================================================
            # if len(order):
            #     try:
            #         assert np.all(np.bincount(order) == 1)
            #     except:
            #         print(order)
            #         print(np.bincount(order))
            #         raise
            # ===================================================================

            result = result[order].tolist()

        if starred_first:
            result = starred + result

        result = _members(tree, result)

        return result
Esempio n. 26
0
 def export_classifications(root_id, classification_fn):
     with database.engine.connect() as conn:
         tree = Tree(conn)
         tree.export_classifications(root_id, classification_fn)
Esempio n. 27
0
def node_get_tip(node_id):
    with database.engine.connect() as connection:
        tree = Tree(connection)

        return jsonify(tree.get_tip(node_id))
Esempio n. 28
0
def get_tree_root():
    with database.engine.connect() as connection:
        tree = Tree(connection)
        result = [_tree_root(p) for p in tree.get_projects()]

        return jsonify(result)
Esempio n. 29
0
def recluster_project(project_id, min_cluster_size):
    """
    Timeout: 12h
    """

    config = morphocluster.app.app.config

    # Dump the database tree
    print("Dumping database tree...")
    with database.engine.connect() as conn:
        db_tree = Tree(conn)
        root_id = db_tree.get_root_id(project_id)
        project = db_tree.get_project(project_id)
        tree = db_tree.dump_tree(root_id)

    # Recluster unapproved objects
    print("Reclustering...")
    recluster = Recluster()
    recluster.load_tree(tree)

    for features_fn in config["RECLUSTER_FEATURES"]:
        recluster.load_features(features_fn)

    # Cluster 1M objects maximum
    # sample_size = int(1e6)
    sample_size = 1000

    recluster.cluster(
        ignore_approved=True,
        sample_size=sample_size,
        min_cluster_size=min_cluster_size,
        min_samples=1,
        cluster_selection_method="leaf",
    )

    tree = recluster.merge_trees()

    # Load new tree into the database
    print("Loading tree into database...")
    project_name = "{}-{}".format(project["name"], min_cluster_size)

    with database.engine.connect() as conn:
        db_tree = Tree(conn)

        with conn.begin():
            project_id = db_tree.load_project(project_name, tree)
            root_id = db_tree.get_root_id(project_id)

            print("Consolidating ...")
            db_tree.consolidate_node(root_id)

        print("Root ID: {}".format(root_id))
        print("Project ID: {}".format(project_id))

    print("Done.")