Example #1
0
def load_sif(args):
    conn = aql.Connection(args.server)
    O = conn.graph(args.db)
    proteins = set()
    with open(args.input) as handle:
        rows = []
        for line in handle:
            row = line.rstrip().split("\t")
            if not (row[0].startswith("CHEBI:")
                    or row[2].startswith("CHEBI:")):
                rows.append(row)
                proteins.add(row[0])
                proteins.add(row[2])

    print "Loading Proteins"
    for i in proteins:
        O.addVertex(i, "Protein", {})

    def chunks(l, n):
        """Yield successive n-sized chunks from l."""
        for i in range(0, len(l), n):
            yield l[i:i + n]

    i = 0
    for chunk in chunks(rows, 10000):
        b = O.bulkAdd()
        for row in chunk:
            b.addEdge(row[0], row[2], row[1], {})
        b.commit()
        i += len(chunk)
        print "Loaded %s edges" % i
Example #2
0
def run(args):
    conn = aql.Connection(args.arachne)
    O = conn.graph(args.graph)
    ensembl_id = conversions.hugo_ensembl(args.gene)
    gid = "gene:" + ensembl_id

    sample_mutation_status = {}

    query =  O.query().V(gid).\
             in_("variantIn").mark("variant").\
             out("variantCall").where(aql.eq("method", "MUTECT")).mark("callset").\
             out("callSetOf").where(aql.eq("sample_type", "Primary Tumor")).mark("sample").\
             in_("fileFor").mark("cel")
    if args.project is not None:
        query = query.where(aql.eq("project", args.project))
    query = query.mark("cel").\
            select(["variant", "sample", "callset", "cel"])

    for res in query:
        if res.variant.data.alternateBases != res.variant.data.referenceBases:
            if res.cel.data.individual_barcode not in sample_mutation_status:
                sample_mutation_status[res.cel.data.individual_barcode] = 2

    query = O.query().V().where(aql.eq("_label", "CELFile"))
    if args.project is not None:
        query = query.where(aql.eq("project", args.project))

    for res in query:
        if res.data.individual_barcode not in sample_mutation_status:
            sample_mutation_status[res.data.individual_barcode] = 1

    for k in sample_mutation_status:
        print(k, k, sample_mutation_status[k], sep="\t")

    return
Example #3
0
def load_matrix(args):
    conn = aql.Connection(args.server)
    O = conn.graph(args.db)

    matrix = pandas.read_csv(args.input, sep="\t", index_col=0).transpose()

    for c in matrix.columns:
        if list(O.query().V(c).count())[0]['count'] == 0:
            if args.debug:
                print("AddVertex", c)
            else:
                O.addVertex(c, "Protein")

    for name, row in matrix.iterrows():
        src = "%s:%s" % (args.data_type, name)
        print("Loading: %s" % (src))
        data = {}
        for c in matrix.columns:
            v = row[c]
            if not math.isnan(v):
                data[c] = v
        if args.debug:
            print("Add Vertex", name)
        else:
            O.addVertex(name, "Sample")
        if args.debug:
            print("AddVertex", "Data:%s" % (args.data_type))
        else:
            O.addVertex(src, "Data:%s" % (args.data_type), data)
        if args.debug:
            print("AddEdge", name)
        else:
            O.addEdge(name, src, "has")
Example #4
0
def load_matrix(args):
    conn = aql.Connection(args.server)
    O = conn.graph(args.db)

    matrix = pandas.read_csv(args.input, sep="\t", index_col=0)

    for name, row in matrix.iterrows():
        data = {}
        for k, v in row.iteritems():
            if not isinstance(v, float) or not math.isnan(v):
                data[k] = v
        O.addVertex(name, "Sample", data)
Example #5
0
#!/usr/bin/env python

import aql
import json

conn = aql.Connection("http://localhost:8201")

conn.delete("test-graph")

conn.new("test-graph")
O = conn.graph("test-graph")

O.addVertex("1", "Person", {"name": "marko", "age": "29"})
O.addVertex("2", "Person", {"name": "vadas", "age": "27"})
O.addVertex("3", "Software", {"name": "lop", "lang": "java"})
O.addVertex("4", "Person", {"name": "josh", "age": "32"})
O.addVertex("5", "Software", {"name": "ripple", "lang": "java"})
O.addVertex("6", "Person", {"name": "peter", "age": "35"})

O.addEdge("1", "3", "created", {"weight": 0.4})
O.addEdge("1", "2", "knows", {"weight": 0.5})
O.addEdge("1", "4", "knows", {"weight": 1.0})
O.addEdge("4", "3", "created", {"weight": 0.4})
O.addEdge("6", "3", "created", {"weight": 0.2})
O.addEdge("4", "5", "created", {"weight": 1.0})
"""
query = O.query().V()
"""
query = O.query().V().match([
    O.mark('a').outgoing('created').mark('b'),
    O.mark('b').has('name', 'lop'),
BASE = os.path.dirname(os.path.abspath(__file__))
TESTS = os.path.join(BASE, "tests")
AQL = os.path.join(os.path.dirname(BASE), "aql", "python")
GRAPH = "test_graph"
sys.path.append(AQL)
import aql

if __name__ == "__main__":
    server = sys.argv[1]
    if len(sys.argv) > 2:
        tests = sys.argv[2:]
    else:
        tests = []

    conn = aql.Connection(server)
    if GRAPH in conn.listGraphs():
        print(list(conn.graph(GRAPH).query().V().count())[0])
        if int(list(conn.graph(GRAPH).query().V().count())[0]['count']) != 0:
            print("Need to start with empty DB: %s" % (GRAPH))
            sys.exit()

    correct = 0
    total = 0
    for a in glob(os.path.join(TESTS, "ot_*.py")):
        name = os.path.basename(a)[:-3]
        if len(tests) == 0 or name[3:] in tests:
            mod = imp.load_source('test.%s' % name, a)
            for f in dir(mod):
                if f.startswith("test_"):
                    func = getattr(mod, f)