예제 #1
0
def upload(workspace: str, graph: str) -> Any:
    """Store a d3 json-encoded graph into the database, with node and edge tables.

    `workspace` - the target workspace
    `graph` - the target graph
    `data` - the json data, passed in the request body. The json data should contain
    nodes: [] and links: []
    """
    loaded_workspace = Workspace(workspace)
    if loaded_workspace.has_graph(graph):
        raise AlreadyExists("graph", graph)

    # Get data from the request and load it as json
    body = decode_data(request.data)
    data = json.load(StringIO(body), object_pairs_hook=OrderedDict)

    # Check file structure
    errors = validate_d3_json(data)
    if len(errors) > 0:
        raise ValidationFailed(errors)

    node_table_name = f"{graph}_nodes"
    edge_table_name = f"{graph}_links"

    # Change column names from the d3 format to the arango format
    nodes = data["nodes"]
    for node in nodes:
        node["_key"] = str(node["id"])
        del node["id"]

    links = data["links"]
    for link in links:
        link["_from"] = f"{node_table_name}/{link['source']}"
        link["_to"] = f"{node_table_name}/{link['target']}"
        del link["source"]
        del link["target"]

    # Create or retrieve the node and edge tables
    if loaded_workspace.has_table(node_table_name):
        node_table = loaded_workspace.table(node_table_name)
    else:
        node_table = loaded_workspace.create_table(node_table_name, edge=False)

    if loaded_workspace.has_table(edge_table_name):
        edge_table = loaded_workspace.table(edge_table_name)
    else:
        edge_table = loaded_workspace.create_table(edge_table_name, edge=True)

    # Insert data
    node_table.insert(nodes)
    edge_table.insert(links)

    loaded_workspace.create_graph(graph, edge_table_name)

    return {"nodecount": len(nodes), "edgecount": len(links)}
예제 #2
0
def upload(workspace: str, graph: str) -> Any:
    """
    Store a nested_json tree into the database in coordinated node and edge tables.

    `workspace` - the target workspace.
    `graph` - the target graph.
    `data` - the nested_json data, passed in the request body.
    """
    loaded_workspace = Workspace(workspace)
    if loaded_workspace.has_graph(graph):
        raise AlreadyExists("graph", graph)

    # Set up the parameters.
    data = request.data.decode("utf8")

    edgetable_name = f"{graph}_edges"
    int_nodetable_name = f"{graph}_internal_nodes"
    leaf_nodetable_name = f"{graph}_leaf_nodes"

    # Set up the database targets.
    if loaded_workspace.has_table(edgetable_name):
        edgetable = loaded_workspace.table(edgetable_name)
    else:
        edgetable = loaded_workspace.create_table(edgetable_name, edge=True)

    if loaded_workspace.has_table(int_nodetable_name):
        int_nodetable = loaded_workspace.table(int_nodetable_name)
    else:
        int_nodetable = loaded_workspace.create_table(int_nodetable_name,
                                                      edge=False)

    if loaded_workspace.has_table(leaf_nodetable_name):
        leaf_nodetable = loaded_workspace.table(leaf_nodetable_name)
    else:
        leaf_nodetable = loaded_workspace.create_table(leaf_nodetable_name,
                                                       edge=False)

    # Analyze the nested_json data into a node and edge table.
    (nodes, edges) = analyze_nested_json(data, int_nodetable_name,
                                         leaf_nodetable_name)

    # Upload the data to the database.
    edgetable.insert(edges)
    int_nodetable.insert(nodes[0])
    leaf_nodetable.insert(nodes[1])

    # Create graph
    loaded_workspace.create_graph(graph, edgetable_name)

    return {
        "edgecount": len(edges),
        "int_nodecount": len(nodes[0]),
        "leaf_nodecount": len(nodes[1]),
    }
예제 #3
0
def link_generator(loaded_workspace: Workspace,
                   loaded_graph: Graph) -> Generator[str, None, None]:
    """Generate the JSON list of links."""

    # Checks for node tables that have a `_nodes` suffix.
    # If matched, removes this suffix.
    table_nodes_pattern = re.compile(r"^([^\d_]\w+)_nodes(/.+)")

    # Done this way to preserve logic in the future case of multiple edge tables
    edge_tables: List[str] = [loaded_graph.edge_table()]

    comma = ""
    for edge_table in edge_tables:
        edges = loaded_workspace.table(edge_table).rows()["rows"]

        for edge in edges:
            source = edge["_from"]
            target = edge["_to"]
            source_match = table_nodes_pattern.search(source)
            target_match = table_nodes_pattern.search(target)

            if source_match and target_match:
                source = "".join(source_match.groups())
                target = "".join(target_match.groups())

            edge["source"] = source
            edge["target"] = target
            del edge["_from"]
            del edge["_to"]

            yield f"{comma}{json.dumps(edge, separators=(',', ':'))}"
            comma = comma or ","
예제 #4
0
def download(workspace: str, table: str) -> Any:
    """
    Download a table from the database as a CSV file.

    `workspace` - the target workspace
    `table` - the target table
    """
    loaded_workspace = Workspace(workspace)
    if not loaded_workspace.has_table(table):
        raise NotFound("table", table)

    loaded_table = loaded_workspace.table(table)
    table_rows = loaded_table.rows()["rows"]

    fields = loaded_table.headers()

    def csv_row_generator() -> Generator[str, None, None]:
        header_line = StringIO()
        writer = csv.DictWriter(header_line, fieldnames=fields)
        writer.writeheader()
        yield header_line.getvalue()

        for csv_row in generate_filtered_docs(table_rows):
            line = StringIO()
            writer = csv.DictWriter(line, fieldnames=fields)
            writer.writerow(csv_row)
            yield line.getvalue()

    response = Response(csv_row_generator(), mimetype="text/csv")
    response.headers[
        "Content-Disposition"] = f"attachment; filename={table}.csv"
    response.headers["Content-type"] = "text/csv"

    return response
예제 #5
0
def node_generator(loaded_workspace: Workspace,
                   loaded_graph: Graph) -> Generator[str, None, None]:
    """Generate the JSON list of nodes."""

    comma = ""
    node_tables = loaded_graph.node_tables()
    for node_table in node_tables:
        table_nodes = loaded_workspace.table(node_table).rows()["rows"]

        for node in table_nodes:
            node["id"] = node["_key"]
            del node["_key"]

            yield f"{comma}{json.dumps(node, separators=(',', ':'))}"
            comma = comma or ","
예제 #6
0
def upload(workspace: str, graph: str) -> Any:
    """
    Store a newick tree into the database in coordinated node and edge tables.

    `workspace` - the target workspace.
    `graph` - the target graph.
    `data` - the newick data, passed in the request body.
    """
    app.logger.info("newick tree")

    loaded_workspace = Workspace(workspace)
    if loaded_workspace.has_graph(graph):
        raise AlreadyExists("graph", graph)

    body = decode_data(request.data)
    tree = newick.loads(body)
    validate_newick(tree)

    edgetable_name = f"{graph}_edges"
    nodetable_name = f"{graph}_nodes"

    if loaded_workspace.has_table(edgetable_name):
        edgetable = loaded_workspace.table(edgetable_name)
    else:
        # Note that edge=True must be set or the _from and _to keys
        # will be ignored below.
        edgetable = loaded_workspace.create_table(edgetable_name, edge=True)

    if loaded_workspace.has_table(nodetable_name):
        nodetable = loaded_workspace.table(nodetable_name)
    else:
        nodetable = loaded_workspace.create_table(nodetable_name, edge=False)

    edgecount = 0
    nodecount = 0

    def read_tree(parent: Optional[str], node: newick.Node) -> None:
        nonlocal nodecount
        nonlocal edgecount
        key = node.name or uuid.uuid4().hex
        if not nodetable.row(key):
            nodetable.insert([{"_key": key}])
        nodecount = nodecount + 1
        for desc in node.descendants:
            read_tree(key, desc)
        if parent:
            edgetable.insert(
                [
                    {
                        "_from": f"{nodetable_name}/{parent}",
                        "_to": f"{nodetable_name}/{key}",
                        "length": node.length,
                    }
                ]
            )
            edgecount += 1

    read_tree(None, tree[0])

    loaded_workspace.create_graph(graph, edgetable_name)

    return {"edgecount": edgecount, "nodecount": nodecount}