def load_sif(args): conn = aql.Connection(args.server) O = conn.graph(args.db) proteins = set() with open(args.input) as handle: rows = [] for line in handle: row = line.rstrip().split("\t") if not (row[0].startswith("CHEBI:") or row[2].startswith("CHEBI:")): rows.append(row) proteins.add(row[0]) proteins.add(row[2]) print "Loading Proteins" for i in proteins: O.addVertex(i, "Protein", {}) def chunks(l, n): """Yield successive n-sized chunks from l.""" for i in range(0, len(l), n): yield l[i:i + n] i = 0 for chunk in chunks(rows, 10000): b = O.bulkAdd() for row in chunk: b.addEdge(row[0], row[2], row[1], {}) b.commit() i += len(chunk) print "Loaded %s edges" % i
def run(args): conn = aql.Connection(args.arachne) O = conn.graph(args.graph) ensembl_id = conversions.hugo_ensembl(args.gene) gid = "gene:" + ensembl_id sample_mutation_status = {} query = O.query().V(gid).\ in_("variantIn").mark("variant").\ out("variantCall").where(aql.eq("method", "MUTECT")).mark("callset").\ out("callSetOf").where(aql.eq("sample_type", "Primary Tumor")).mark("sample").\ in_("fileFor").mark("cel") if args.project is not None: query = query.where(aql.eq("project", args.project)) query = query.mark("cel").\ select(["variant", "sample", "callset", "cel"]) for res in query: if res.variant.data.alternateBases != res.variant.data.referenceBases: if res.cel.data.individual_barcode not in sample_mutation_status: sample_mutation_status[res.cel.data.individual_barcode] = 2 query = O.query().V().where(aql.eq("_label", "CELFile")) if args.project is not None: query = query.where(aql.eq("project", args.project)) for res in query: if res.data.individual_barcode not in sample_mutation_status: sample_mutation_status[res.data.individual_barcode] = 1 for k in sample_mutation_status: print(k, k, sample_mutation_status[k], sep="\t") return
def load_matrix(args): conn = aql.Connection(args.server) O = conn.graph(args.db) matrix = pandas.read_csv(args.input, sep="\t", index_col=0).transpose() for c in matrix.columns: if list(O.query().V(c).count())[0]['count'] == 0: if args.debug: print("AddVertex", c) else: O.addVertex(c, "Protein") for name, row in matrix.iterrows(): src = "%s:%s" % (args.data_type, name) print("Loading: %s" % (src)) data = {} for c in matrix.columns: v = row[c] if not math.isnan(v): data[c] = v if args.debug: print("Add Vertex", name) else: O.addVertex(name, "Sample") if args.debug: print("AddVertex", "Data:%s" % (args.data_type)) else: O.addVertex(src, "Data:%s" % (args.data_type), data) if args.debug: print("AddEdge", name) else: O.addEdge(name, src, "has")
def load_matrix(args): conn = aql.Connection(args.server) O = conn.graph(args.db) matrix = pandas.read_csv(args.input, sep="\t", index_col=0) for name, row in matrix.iterrows(): data = {} for k, v in row.iteritems(): if not isinstance(v, float) or not math.isnan(v): data[k] = v O.addVertex(name, "Sample", data)
#!/usr/bin/env python import aql import json conn = aql.Connection("http://localhost:8201") conn.delete("test-graph") conn.new("test-graph") O = conn.graph("test-graph") O.addVertex("1", "Person", {"name": "marko", "age": "29"}) O.addVertex("2", "Person", {"name": "vadas", "age": "27"}) O.addVertex("3", "Software", {"name": "lop", "lang": "java"}) O.addVertex("4", "Person", {"name": "josh", "age": "32"}) O.addVertex("5", "Software", {"name": "ripple", "lang": "java"}) O.addVertex("6", "Person", {"name": "peter", "age": "35"}) O.addEdge("1", "3", "created", {"weight": 0.4}) O.addEdge("1", "2", "knows", {"weight": 0.5}) O.addEdge("1", "4", "knows", {"weight": 1.0}) O.addEdge("4", "3", "created", {"weight": 0.4}) O.addEdge("6", "3", "created", {"weight": 0.2}) O.addEdge("4", "5", "created", {"weight": 1.0}) """ query = O.query().V() """ query = O.query().V().match([ O.mark('a').outgoing('created').mark('b'), O.mark('b').has('name', 'lop'),
BASE = os.path.dirname(os.path.abspath(__file__)) TESTS = os.path.join(BASE, "tests") AQL = os.path.join(os.path.dirname(BASE), "aql", "python") GRAPH = "test_graph" sys.path.append(AQL) import aql if __name__ == "__main__": server = sys.argv[1] if len(sys.argv) > 2: tests = sys.argv[2:] else: tests = [] conn = aql.Connection(server) if GRAPH in conn.listGraphs(): print(list(conn.graph(GRAPH).query().V().count())[0]) if int(list(conn.graph(GRAPH).query().V().count())[0]['count']) != 0: print("Need to start with empty DB: %s" % (GRAPH)) sys.exit() correct = 0 total = 0 for a in glob(os.path.join(TESTS, "ot_*.py")): name = os.path.basename(a)[:-3] if len(tests) == 0 or name[3:] in tests: mod = imp.load_source('test.%s' % name, a) for f in dir(mod): if f.startswith("test_"): func = getattr(mod, f)