def post_node_merge_into(node_id): """ Merge a node into another node. URL parameters: node_id: Node that is merged. Request parameters (body): dest_node_id: Node that absorbs the children and objects. """ with database.engine.connect() as connection: tree = Tree(connection) data = request.get_json() print(data) # TODO: Unapprove tree.merge_node_into(node_id, data["dest_node_id"]) log( connection, "merge_node_into({}, {})".format(node_id, data["dest_node_id"]), node_id=data["dest_node_id"], ) return jsonify(None)
def get_node_stats(node_id): """ Return progress information about this node. URL parameters: node_id (int): ID of a node Request parameters: log (str): Save an entry to the log? Returns: JSON-dict """ parser = reqparse.RequestParser() parser.add_argument("log", default=None) arguments = parser.parse_args(strict=True) with database.engine.connect() as connection: tree = Tree(connection) with connection.begin(): progress = tree.calculate_progress(node_id) if arguments["log"] is not None: log( connection, "progress-{}".format(arguments["log"]), node_id=node_id, data=json_dumps(progress), ) return jsonify(progress)
def patch_node(node_id): with database.engine.connect() as connection: tree = Tree(connection) data = request.get_json() flags = {k: request.args.get(k, 0, strtobool) for k in ("include_children",)} # TODO: Use argparse if "starred" in data: data["starred"] = strtobool(str(data["starred"])) if "parent_id" in data: raise ValueError( "parent_id must not be set directly, use /nodes/<node_id>/adopt." ) with connection.begin(): tree.update_node(node_id, data) log( connection, "update_node({})".format(json.dumps(data, sort_keys=True)), node_id=node_id, ) node = tree.get_node(node_id, True) result = _node(tree, node, **flags) return jsonify(result)
def export_direct_objects(node_id, filename): with database.engine.connect() as conn, open(filename, "w") as f: tree = Tree(conn) f.writelines( "{}\n".format(o["object_id"]) for o in tree.get_objects(node_id) )
def _node_get_recommended_objects(node_id=None, max_n=None): with database.engine.connect() as connection: tree = Tree(connection) result = [_object(o) for o in tree.recommend_objects(node_id, max_n)] return result
def export_tree(root_id, tree_fn): """ Export the whole tree with its objects. """ with database.engine.connect() as conn: tree = Tree(conn) tree.export_tree(root_id, tree_fn)
def node_get_n_sorted(node_id): with database.engine.connect() as connection: tree = Tree(connection) nodes = tree.get_minlevel_starred(node_id) n_sorted = sum(n["_n_objects_deep"] for n in nodes) return jsonify(n_sorted)
def get_node(node_id): with database.engine.connect() as connection: tree = Tree(connection) flags = {k: request.args.get(k, 0, strtobool) for k in ("include_children",)} node = tree.get_node(node_id) log(connection, "get_node", node_id=node_id) result = _node(tree, node, **flags) return jsonify(result)
def get_project(project_id): parser = reqparse.RequestParser() parser.add_argument("include_progress", type=strtobool, default=0) arguments = parser.parse_args(strict=True) with database.engine.connect() as connection: tree = Tree(connection) result = tree.get_project(project_id) if arguments["include_progress"]: progress = tree.calculate_progress(result["node_id"]) result["progress"] = progress return jsonify(result)
def get_subtree(node_id): flags = {k: request.args.get(k, 0, strtobool) for k in ("supertree",)} with database.engine.connect() as connection: tree = Tree(connection) if flags["supertree"]: children = tree.get_children( node_id, supertree=True, include="starred", order_by="_n_children DESC" ) else: children = tree.get_children(node_id, order_by="_n_children DESC") result = [_tree_node(c, flags["supertree"]) for c in children] return jsonify(result)
def create_node(): """ Create a new node. Request parameters: project_id name members starred """ with database.engine.connect() as connection: tree = Tree(connection) data = request.get_json() object_ids = [m["object_id"] for m in data["members"] if "object_id" in m] node_ids = [m["node_id"] for m in data["members"] if "node_id" in m] project_id = data.get("project_id", None) name = data.get("name", None) parent_id = int(data.get("parent_id")) starred = strtobool(str(data.get("starred", "0"))) if project_id is None: # Retrieve project_id for the parent_id project_id = tree.get_node(parent_id)["project_id"] print(data) with connection.begin(): node_id = tree.create_node( int(project_id), parent_id=parent_id, name=name, starred=starred ) tree.relocate_nodes(node_ids, node_id) tree.relocate_objects(object_ids, node_id) log(connection, "create_node", node_id=node_id) node = tree.get_node(node_id, require_valid=True) print("Created node {}.".format(node_id)) result = _node(tree, node) return jsonify(result)
def progress(root_id, log): """ Report progress on a tree """ with database.engine.connect() as conn: tree = Tree(conn) if root_id is None: root_ids = [p["node_id"] for p in tree.get_projects()] else: root_ids = [root_id] with Timer("Progress") as timer: for rid in root_ids: print("Root {}:".format(rid)) with timer.child(str(rid)): prog = tree.calculate_progress(rid) for k in sorted(prog.keys()): print("{}: {}".format(k, prog[k]))
def export_project(project_id): config = app.config # Dump the database tree with database.engine.connect() as conn: db_tree = Tree(conn) root_id = db_tree.get_root_id(project_id) project = db_tree.get_project(project_id) tree = db_tree.dump_tree(root_id) tree_fn = os.path.join( config["PROJECT_EXPORT_DIR"], "{:%Y-%m-%d-%H-%M-%S}--{}--{}.zip".format(dt.datetime.now(), project["project_id"], project["name"]), ) tree.save(tree_fn) return tree_fn
def node_adopt_members(parent_id): """ Adopt a list of nodes. URL parameters: parent_id (int): ID of the node that accepts new members. Request parameters: members: List of nodes ({node_id: ...}) and objects ({object_id: ...}). Returns: Nothing. """ with database.engine.connect() as connection: tree = Tree(connection) members = request.get_json()["members"] node_ids = [int(m["node_id"]) for m in members if "node_id" in m] object_ids = [m["object_id"] for m in members if "object_id" in m] with connection.begin(): tree.relocate_nodes(node_ids, parent_id) tree.relocate_objects(object_ids, parent_id) print( "Node {} adopted {} nodes and {} objects.".format( parent_id, len(node_ids), len(object_ids) ) ) return jsonify({})
def node_get_next_unfilled(node_id): parser = reqparse.RequestParser() parser.add_argument("leaf", type=strtobool, default=False) parser.add_argument("preferred_first", type=strtobool, default=False) arguments = parser.parse_args(strict=True) print(arguments) with database.engine.connect() as connection: tree = Tree(connection) # Filter descendants that are approved and unfilled def filter(subtree): return (subtree.c.approved == True) & (subtree.c.filled == False) return jsonify( tree.get_next_node( node_id, leaf=arguments["leaf"], preferred_first=arguments["preferred_first"], filter=filter, ) )
def node_get_next(node_id): parser = reqparse.RequestParser() parser.add_argument("leaf", type=strtobool, default=False) arguments = parser.parse_args(strict=True) print(arguments) with database.engine.connect() as connection: tree = Tree(connection) # Descend if the successor is not approved # Rationale: Approval is for a whole subtree. def recurse(_, s): return s.c.approved == False # Filter descendants that are not approved def filter(subtree): return subtree.c.approved == False return jsonify( tree.get_next_node( node_id, leaf=arguments["leaf"], recurse_cb=recurse, filter=filter ) )
def post_node_members(node_id): data = request.get_json() object_ids = [d["object_id"] for d in data if "object_id" in d] node_ids = [d["node_id"] for d in data if "node_id" in d] with database.engine.connect() as connection: tree = Tree(connection) with connection.begin(): tree.relocate_nodes(node_ids, node_id) tree.relocate_objects(object_ids, node_id) return jsonify("ok")
def consolidate(root_id): with database.engine.connect() as conn, Timer("Consolidate") as timer: tree = Tree(conn) if root_id == "all": print("Consolidating all projects...") root_ids = [p["node_id"] for p in tree.get_projects()] elif root_id == "visible": print("Consolidating visible projects...") root_ids = [p["node_id"] for p in tree.get_projects(True)] else: print("Consolidating {}...".format(root_id)) root_ids = [root_id] for rid in root_ids: with timer.child(str(rid)): print("Consolidating {}...".format(rid)) tree.consolidate_node(rid) print("Done.")
def save_project(project_id): """ Save the project at PROJECT_EXPORT_DIR. """ with database.engine.connect() as conn: tree = Tree(conn) project = tree.get_project(project_id) root_id = tree.get_root_id(project_id) tree_fn = os.path.join( api.config["PROJECT_EXPORT_DIR"], "{:%Y-%m-%d-%H-%M-%S}--{}--{}.zip".format( datetime.now(), project["project_id"], project["name"] ), ) tree.export_tree(root_id, tree_fn) return jsonify({"tree_fn": tree_fn,})
def load_project(tree_fn, project_name, consolidate): """ Load a project from a saved tree. """ with database.engine.connect() as conn: tree = Tree(conn) if project_name is None: project_name = os.path.basename(os.path.splitext(tree_fn)[0]) with conn.begin(): print("Loading {}...".format(tree_fn)) project_id = tree.load_project(project_name, tree_fn) root_id = tree.get_root_id(project_id) if consolidate: print("Consolidating ...") tree.consolidate_node(root_id) print("Root ID: {}".format(root_id)) print("Project ID: {}".format(project_id))
def post_node_classify(node_id): """ Classify the members of a node into their starred siblings. URL parameters: node_id: Parent of the classified members. GET parameters: nodes (boolean): Classify nodes? (Default: False) objects (boolean): Classify objects? (Default: False) safe (boolean): Perform safe classification (Default: False) subnode (boolean): Move classified objects into a child of the target node. (Default: False) """ flags = { k: request.args.get(k, 0, strtobool) for k in ("nodes", "objects", "safe", "subnode") } print(flags) n_predicted_children = 0 n_predicted_objects = 0 with database.engine.connect() as connection: tree = Tree(connection) # Split children into starred and unstarred with connection.begin(): children = tree.get_children(node_id) starred = [] unstarred = [] for c in children: (starred if c["starred"] else unstarred).append(c) starred_centroids = np.array([c["_centroid"] for c in starred]) print("|starred_centroids|", np.linalg.norm(starred_centroids, axis=1)) # Initialize classifier classifier = Classifier(starred_centroids) if flags["subnode"]: def _subnode_for(node_id): return tree.create_node(parent_id=node_id, name="classified") target_nodes = keydefaultdict(_subnode_for) else: target_nodes = keydefaultdict(lambda k: k) if flags["nodes"]: unstarred_centroids = np.array([c["_centroid"] for c in unstarred]) unstarred_ids = np.array([c["node_id"] for c in unstarred]) # Predict unstarred children (if any) n_unstarred = len(unstarred_centroids) if n_unstarred > 0: print( "Predicting {} unstarred children of {}...".format( n_unstarred, node_id ) ) type_predicted = classifier.classify( unstarred_centroids, safe=flags["safe"] ) for i, starred_node in enumerate(starred): nodes_to_move = [ int(n) for n in unstarred_ids[type_predicted == i] ] if len(nodes_to_move): target_node_id = target_nodes[starred_node["node_id"]] tree.relocate_nodes( nodes_to_move, target_node_id, unapprove=True ) n_predicted_children = np.sum(type_predicted > -1) if flags["objects"]: # Predict objects objects = tree.get_objects(node_id) print("Predicting {} objects of {}...".format(len(objects), node_id)) object_vectors = np.array([o["vector"] for o in objects]) object_ids = np.array([o["object_id"] for o in objects]) type_predicted = classifier.classify(object_vectors, safe=flags["safe"]) for i, starred_node in enumerate(starred): objects_to_move = [str(o) for o in object_ids[type_predicted == i]] if len(objects_to_move): target_node_id = target_nodes[starred_node["node_id"]] print( "Moving objects {!r} -> {}".format( objects_to_move, target_node_id ) ) tree.relocate_objects( objects_to_move, target_node_id, unapprove=True ) n_predicted_objects = np.sum(type_predicted > -1) log( connection, "classify_members(nodes={nodes},objects={objects})".format(**flags), node_id=node_id, ) return jsonify( { "n_predicted_children": int(n_predicted_children), "n_predicted_objects": int(n_predicted_objects), } )
def _node_get_recommended_children(node_id, max_n): with database.engine.connect() as connection: tree = Tree(connection) result = [_node(tree, c) for c in tree.recommend_children(node_id, max_n=max_n)] return result
def accept_recommended_objects(node_id): """ Accept recommended objects. URL parameters: node_id (int): ID of the node that accepts recommendations Request parameters: request_id: URL of the recommendations. rejected_members: Rejected members. last_page: Last page of accepted recommendations. log_data (optional): Additional data to be stored in the log (only if SAVE_RECOMMENDATION_STATS!) Returns: Nothing. """ parameters = request.get_json() print(parameters) with Timer("accept_recommended_objects") as t: with t.child("assemble set of rejected objects"): rejected_object_ids = set( m[1:] for m in parameters["rejected_members"] if m.startswith("o") ) with t.child("assemble list of accepted objects"): object_ids = [] for page in range(parameters["last_page"] + 1): response = _node_get_recommended_objects( node_id=node_id, request_id=parameters["request_id"], page=page ) page_object_ids = ( v["object_id"] for v in json.loads(response.data.decode())["data"] ) object_ids.extend(page_object_ids) # Save list of objects to enable calculation of Average Precision and the like if app.config.get("SAVE_RECOMMENDATION_STATS", False): print("Saving accept-reject stats...") with t.child("Save accept-reject stats") as t2: with t2.child("calc rejected"): rejected = [o in rejected_object_ids for o in object_ids] with t2.child("assemble DataFrame"): data = pd.DataFrame({"object_id": object_ids, "rejected": rejected}) data_fn = os.path.join( app.config["PROJECT_EXPORT_DIR"], "{:%Y-%m-%d-%H-%M-%S}--accept-reject--{}.csv".format( datetime.now(), node_id ), ) with t2.child("write data"): data.to_csv(data_fn, index=False) with t.child("filter accepted objects"): # Filter object_ids object_ids = [o for o in object_ids if o not in rejected_object_ids] # print(object_ids) # Assemble log log_data = { "n_accepted": len(object_ids), "n_rejected": len(rejected_object_ids), } # Store additional log data addlog_data = parameters.get("log_data") if isinstance(addlog_data, dict): log_data.update(addlog_data) elif addlog_data is not None: raise ValueError( "Parameter log_data should be a dict, got a {}!".format( type(addlog_data) ) ) with database.engine.connect() as connection: tree = Tree(connection) with t.child("save accepted/rejected to database"), connection.begin(): tree.relocate_objects(object_ids, node_id) tree.reject_objects(node_id, rejected_object_ids) log( connection, "accept_recommended_objects", node_id=node_id, data=json_dumps(log_data), ) print( "Node {} adopted {} objects and rejected {} objects.".format( node_id, len(object_ids), len(rejected_object_ids) ) ) return jsonify({})
def connect_supertree(root_id): with database.engine.connect() as conn: tree = Tree(conn) tree.connect_supertree(root_id)
def _get_node_members( node_id, nodes=False, objects=False, arrange_by="", starred_first=False, descending=False, ): with database.engine.connect() as connection, Timer("_get_node_members") as timer: tree = Tree(connection) sorted_nodes_include = "unstarred" if starred_first else None result = [] if nodes: with timer.child("tree.get_children()"): result.extend(tree.get_children(node_id, include=sorted_nodes_include)) if objects: with timer.child("tree.get_objects()"): result.extend(tree.get_objects(node_id)) if arrange_by == "starred_sim" or starred_first: with timer.child("tree.get_children(starred)"): starred = tree.get_children(node_id, include="starred") if arrange_by != "": result = np.array(result, dtype=object) if arrange_by == "sim": with timer.child("sim"): order = _arrange_by_sim(result) elif arrange_by == "nleaves": with timer.child("nleaves"): order = _arrange_by_nleaves(result) elif arrange_by == "starred_sim": with timer.child("starred_sim"): # If no starred members yet, arrange by distance to regular children anchors = starred if len(starred) else tree.get_children(node_id) order = _arrange_by_starred_sim(result, anchors) elif arrange_by == "interleaved": with timer.child("interleaved"): order = _arrange_by_sim(result) if len(order): order0, order1 = np.array_split(order.copy(), 2) order[::2] = order0 order[1::2] = order1[::-1] elif arrange_by == "random": with timer.child("random"): order = np.random.permutation(len(result)) else: warnings.warn("arrange_by={} not supported!".format(arrange_by)) order = () if descending: order = order[::-1] # =================================================================== # if len(order): # try: # assert np.all(np.bincount(order) == 1) # except: # print(order) # print(np.bincount(order)) # raise # =================================================================== result = result[order].tolist() if starred_first: result = starred + result result = _members(tree, result) return result
def export_classifications(root_id, classification_fn): with database.engine.connect() as conn: tree = Tree(conn) tree.export_classifications(root_id, classification_fn)
def node_get_tip(node_id): with database.engine.connect() as connection: tree = Tree(connection) return jsonify(tree.get_tip(node_id))
def get_tree_root(): with database.engine.connect() as connection: tree = Tree(connection) result = [_tree_root(p) for p in tree.get_projects()] return jsonify(result)
def recluster_project(project_id, min_cluster_size): """ Timeout: 12h """ config = morphocluster.app.app.config # Dump the database tree print("Dumping database tree...") with database.engine.connect() as conn: db_tree = Tree(conn) root_id = db_tree.get_root_id(project_id) project = db_tree.get_project(project_id) tree = db_tree.dump_tree(root_id) # Recluster unapproved objects print("Reclustering...") recluster = Recluster() recluster.load_tree(tree) for features_fn in config["RECLUSTER_FEATURES"]: recluster.load_features(features_fn) # Cluster 1M objects maximum # sample_size = int(1e6) sample_size = 1000 recluster.cluster( ignore_approved=True, sample_size=sample_size, min_cluster_size=min_cluster_size, min_samples=1, cluster_selection_method="leaf", ) tree = recluster.merge_trees() # Load new tree into the database print("Loading tree into database...") project_name = "{}-{}".format(project["name"], min_cluster_size) with database.engine.connect() as conn: db_tree = Tree(conn) with conn.begin(): project_id = db_tree.load_project(project_name, tree) root_id = db_tree.get_root_id(project_id) print("Consolidating ...") db_tree.consolidate_node(root_id) print("Root ID: {}".format(root_id)) print("Project ID: {}".format(project_id)) print("Done.")