def insert_nodes_orders(self): graphDB = InitilizeDB.init() try: graphDB.run( 'CREATE CONSTRAINT ON (o:ORDER) ASSERT o.id IS UNIQUE;') except: pass graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///orders_neo4j.csv" AS line FIELDTERMINATOR "|" ' # 'WITH DISTINCT line, SPLIT(line.O_ORDERDATE, " / ") AS date ' 'CREATE (order:ORDER { id: TOINTEGER(line.O_ORDERKEY) }) ' 'SET order.O_CUSTKEY = TOINTEGER(line.O_CUSTKEY),' ' order.O_ORDERSTATUS = line.O_ORDERSTATUS, ' ' order.O_ORDERKEY = line.O_ORDERKEY, ' ' order.O_TOTALPRICE = TOFLOAT(line.O_TOTALPRICE),' ' order.O_ORDERDATE = line.O_ORDERDATE,' ' order.O_ORDERPRIORITY = line.O_ORDERPRIORITY,' ' order.O_CLERK = line.O_CLERK,' ' order.O_SHIPPRIORITY = line.O_SHIPPRIORITY,' ' order.O_COMMENT = line.O_COMMENT,' ' order.O_YEAR = TOINTEGER(line.O_YEAR),' ' order.O_MONTH = TOINTEGER(line.O_MONTH),' ' order.O_DAY = TOINTEGER(line.O_DAY);')
def insert_relation_part_partsupp(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_part_partsupp.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (partsupp:PARTSUPP {PS_PARTKEY: toInteger(row.P_PARTKEY)}) ' 'MATCH (part:PART {id: toInteger(row.P_PARTKEY)}) ' 'MERGE (partsupp)-[:COMPOSED_BY_2]->(part); ')
def insert_relation_nation_supplier(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_nation_supplier.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (supplier:SUPPLIER {id: toInteger(row.S_SUPPKEY)}) ' 'MATCH (nation:NATION {id: toInteger(row.S_NATIONKEY)}) ' 'MERGE (supplier)-[:BELONGS_TO_1]->(nation); ')
def insert_relation_nation_region(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_nation_region.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (nation:NATION {id: toInteger(row.N_NATIONKEY)}) ' 'MATCH (region:REGION {id: toInteger(row.N_REGIONKEY)}) ' 'MERGE (nation)-[:FROM_10]->(region); ')
def insert_relation_customer_nation(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_customer_nation.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (customer:CUSTOMER {id: toInteger(row.C_CUSTKEY)}) ' 'MATCH (nation:NATION {id: toInteger(row.C_NATIONKEY)}) ' 'MERGE (customer)-[:FROM_4]->(nation); ')
def insert_relation_orders_customer(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_orders_customer.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (orders:ORDER {id: toInteger(row.O_ORDERKEY)}) ' 'MATCH (customer:CUSTOMER {id: toInteger(row.O_CUSTKEY)}) ' 'MERGE (orders)-[:BY_5]->(customer); ')
def insert_relation_supplier_partsupp(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_supplier_partsupp.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (partsupp:PARTSUPP {PS_PARTKEY:toInteger(row.PS_PARTKEY) , PS_SUPPKEY: ' 'toInteger(row.PS_SUPPKEY)}) ' 'MATCH (supplier:SUPPLIER {id: toInteger(row.PS_SUPPKEY)}) ' 'MERGE (partsupp)-[:SUPPLIED_BY_3]->(supplier); ')
def insert_relation_lineitem_supplier(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_lineitem_supplier.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (lineitem:LINEITEM {L_ORDERKEY: toInteger(row.L_ORDERKEY), L_PARTKEY:' 'toInteger(row.L_PARTKEY), L_SUPPKEY: toInteger(row.L_SUPPKEY),L_LINENUMBER:' 'toInteger(row.L_LINENUMBER) }) ' 'MATCH (supplier:SUPPLIER {id: toInteger(row.L_SUPPKEY)}) ' 'MERGE (lineitem)-[:SUPPLIED_BY_9]->(supplier);')
def insert_relation_lineitem_orders(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_lineitem_orders.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (lineitem:LINEITEM {L_ORDERKEY: toInteger(row.L_ORDERKEY), L_PARTKEY: ' 'toInteger(row.L_PARTKEY), L_SUPPKEY: toInteger(row.L_SUPPKEY),L_LINENUMBER: ' 'toInteger(row.L_LINENUMBER) }) ' 'MATCH (orders:ORDER {id: toInteger(row.L_ORDERKEY), O_CUSTKEY: toInteger(row.O_CUSTKEY)}) ' 'MERGE (lineitem)-[:BELONGS_TO_7]->(orders); ')
def insert_relation_lineitem_part(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS FROM "file:///rel_lineitem_part.csv" AS row FIELDTERMINATOR "|" ' 'MATCH (lineitem:LINEITEM {L_ORDERKEY: toInteger(row.L_ORDERKEY), L_PARTKEY: ' 'toInteger(row.L_PARTKEY), L_SUPPKEY: toInteger(row.L_SUPPKEY),L_LINENUMBER: ' 'toInteger(row.L_LINENUMBER) }) ' 'MATCH (part:PART {id: toInteger(row.L_PARTKEY), P_NAME: row.P_NAME }) ' 'MERGE (lineitem)-[:COMPOSED_BY_8]->(part); ')
def insert_nodes_region(self): graphDB = InitilizeDB.init() try: graphDB.run( 'CREATE CONSTRAINT ON (r:REGION) ASSERT r.id IS UNIQUE;') except: pass graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///region.csv" AS line FIELDTERMINATOR "|"' 'CREATE (region:REGION { id: TOINTEGER(line.R_REGIONKEY) }) ' 'SET region.R_NAME = line.R_NAME, ' ' region.R_COMMENT = line.R_COMMENT; ')
def insert_nodes_partsupp(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///partsupp.csv" AS line FIELDTERMINATOR "|" ' 'CREATE (partsupp:PARTSUPP { PS_PARTKEY: TOINTEGER(line.PS_PARTKEY) }) ' 'SET partsupp.PS_SUPPKEY = TOINTEGER(line.PS_SUPPKEY), ' ' partsupp.PS_AVAILQTY = TOINTEGER(line.PS_AVAILQTY), ' ' partsupp.PS_SUPPLYCOST = TOFLOAT(line.PS_SUPPLYCOST), ' ' partsupp.PS_COMMENT = line.PS_COMMENT; ') graphDB.run('CREATE INDEX ON :PARTSUPP(PS_PARTKEY); ') graphDB.run('CREATE INDEX ON :PARTSUPP(PS_SUPPKEY); ')
def insert_nodes_nation(self): graphDB = InitilizeDB.init() # graphDB.run( # 'CREATE CONSTRAINT ON (n:NATION) ASSERT n.id IS UNIQUE; ' # ) graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///nation.csv" AS line FIELDTERMINATOR "|" ' 'CREATE (nation:NATION { id: TOINTEGER(line.N_NATIONKEY) }) ' 'SET nation.N_NAME = line.N_NAME, ' 'nation.N_REGIONKEY = line.N_REGIONKEY, ' 'nation.N_COMMENT = line.N_COMMENT; ')
def delete_node(self): graphDB = InitilizeDB.init() # [LINEITEM | CUSTOMER | NATION | REGION | SUPPLIER | ORDER | PARTSUPP | PART] for i in range(5): graphDB.run( "MATCH (n) " "WITH n LIMIT 100000 " "DETACH DELETE n; " ) # graphDB.run( # 'MATCH (n:LINEITEM) WITH n LIMIT 400000 DETACH DELETE n; ' # ) print("Deleted node")
def insert_nodes_part(self): graphDB = InitilizeDB.init() try: graphDB.run('CREATE CONSTRAINT ON (p:PART) ASSERT p.id IS UNIQUE;') except: pass graphDB.run('USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///part.csv" AS line FIELDTERMINATOR "|" ' 'CREATE (part:PART { id: TOINTEGER(line.P_PARTKEY) }) ' 'SET part.P_NAME =line.P_NAME,' ' part.P_MFGR = line.P_MFGR,' ' part.P_BRAND = line.P_BRAND,' ' part.P_TYPE = line.P_TYPE,' ' part.P_SIZE = TOINTEGER(line.P_SIZE),' ' part.P_CONTAINER = line.P_CONTAINER,' ' part.P_RETAILPRICE =TOFLOAT( line.P_RETAILPRICE),' ' part.P_COMMENT = line.P_COMMENT; ')
def insert_nodes_customer(self): graphDB = InitilizeDB.init() # graphDB.run( # 'CREATE CONSTRAINT ON (c:CUSTOMER) ASSERT c.id IS UNIQUE;' # ) graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///customer.csv" AS line FIELDTERMINATOR "|" ' 'CREATE (customer:CUSTOMER { id: TOINTEGER(line.C_CUSTKEY) } ) ' 'SET customer.C_NAME = line.C_NAME, ' 'customer.C_ADDRESS = line.C_ADDRESS, ' 'customer.C_NATIONKEY = line.C_NATIONKEY, ' 'customer.C_PHONE = line.C_PHONE, ' 'customer.C_ACCTBAL = toFloat(line.C_ACCTBAL), ' 'customer.C_MKTSEGMENT = line.C_MKTSEGMENT, ' 'customer.C_COMMENT = line.C_COMMENT; ')
def insert_nodes_supplier(self): graphDB = InitilizeDB.init() try: graphDB.run( 'CREATE CONSTRAINT ON (s:SUPPLIER) ASSERT s.id IS UNIQUE; ') except: pass graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///supplier.csv" AS line FIELDTERMINATOR "|" ' 'CREATE (supplier:SUPPLIER { id: TOINTEGER(line.S_SUPPKEY) }) ' 'SET supplier.S_NAME = line.S_NAME, ' ' supplier.S_ADDRESS = line.S_ADDRESS, ' ' supplier.S_NATIONKEY = line.S_NATIONKEY, ' ' supplier.S_PHONE = line.S_PHONE, ' ' supplier.S_ACCTBAL = line.S_ACCTBAL, ' ' supplier.S_COMMENT = line.S_COMMENT; ')
def execute(self): try: graphDB = InitilizeDB.init() start_time = time.time() result = graphDB.run( "WITH date('1998-12-01') - duration('P90D') AS my_date " "MATCH (item:LINEITEM) " "WHERE date(item.L_SHIPDATE) <= date(my_date) " "RETURN item.L_RETURNFLAG,item.L_LINESTATUS,sum(item.L_QUANTITY) AS sum_qty, sum(item.L_EXTENDEDPRICE) AS sum_base_price, sum(item.L_EXTENDEDPRICE*(1-item.L_DISCOUNT)) AS sum_disc_price,sum(item.L_EXTENDEDPRICE*(1-item.L_DISCOUNT)*(1+item.L_TAX)) AS sum_charge,avg(item.L_QUANTITY) AS avg_qty, avg(item.L_EXTENDEDPRICE) AS avg_price, avg(item.L_DISCOUNT) AS avg_disc, COUNT(*) AS count_order " "ORDER BY item.L_RETURNFLAG, item.L_LINESTATUS; ") print(result) end_time = time.time() print("---------------Query 1-------------") print("Start time: " + str(start_time)) print("End time: " + str(end_time)) print("In seconds: " + str("{:.7f}".format(end_time - start_time))) except: print("py2neo ERROR:")
def execute(self): try: graphDB = InitilizeDB.init() start_time = time.time() result = graphDB.run( "MATCH (lineitem:LINEITEM)-[:BELONGS_TO_7]->(order:ORDER)-[:BY_5]->(customer:CUSTOMER) " "WHERE customer.C_MKTSEGMENT = 'BUILDING' AND date(order.O_ORDERDATE) < date('1995-03-15') AND date(lineitem.L_SHIPDATE) > date('1995-03-15') " "RETURN order.id, sum(lineitem.L_EXTENDEDPRICE*(1-lineitem.L_DISCOUNT)) AS REVENUE, order.O_ORDERDATE, order.O_SHIPPRIORITY " "ORDER BY REVENUE DESC, order.O_ORDERDATE " "LIMIT 10; ") print(result) end_time = time.time() print("---------------Query 3-------------") print("Start time: " + str(start_time)) print("End time: " + str(end_time)) print("In seconds: " + str("{:.7f}".format(end_time - start_time))) except: print("py2neo ERROR:")
def execute(self): try: graphDB = InitilizeDB.init() start_time = time.time() result = graphDB.run( "WITH date('1993-07-01') + duration('P3M') AS my_date " "MATCH (lineitem: LINEITEM)-[:BELONGS_TO_7]->(order: ORDER) " "WHERE date(lineitem.L_COMMITDATE) < date(lineitem.L_RECEIPTDATE) AND date(order.O_ORDERDATE) >= date('1993-07-01') AND date(order.O_ORDERDATE) < date(my_date) " "RETURN order.O_ORDERPRIORITY, COUNT(*) AS ORDER_COUNT " "ORDER BY order.O_ORDERPRIORITY; ") print(result) end_time = time.time() print("---------------Query 4-------------") print("Start time: " + str(start_time)) print("End time: " + str(end_time)) print("In seconds: " + str("{:.7f}".format(end_time - start_time))) except: print("py2neo ERROR:")
def execute(self): try: graphDB = InitilizeDB.init() start_time = time.time() result = graphDB.run( "WITH date('1994-01-01') + duration('P1Y') AS my_date " "MATCH (lineitem: LINEITEM)-[:BELONGS_TO_7]->(order: ORDER)-[:BY_5]->(customer: CUSTOMER)-[:FROM_4]->(nation: NATION)-[:FROM_10]-(region: REGION) " "WHERE region.R_NAME = 'ASIA' AND date(order.O_ORDERDATE) >= date('1994-01-01') AND date(order.O_ORDERDATE) < date(my_date) " "RETURN nation.N_NAME, SUM(lineitem.L_EXTENDEDPRICE * (1 - lineitem.L_DISCOUNT)) AS REVENUE " "ORDER BY REVENUE DESC; ") print(result) end_time = time.time() print("---------------Query 5-------------") print("Start time: " + str(start_time)) print("End time: " + str(end_time)) print("In seconds: " + str("{:.7f}".format(end_time - start_time))) except: print("py2neo ERROR:")
def main(): print("Neo4j db started...") db = InitilizeDB.init() insert_data = False if insert_data: insert_task = InsertData(db) insert_task.insert_nodes() insert_task.insert_relations() delete_data = False if delete_data: delete_task = DeleteData(db) delete_task.delete_nodes() run_queries = True if run_queries: query = RunQueries() query.run_queries() print("Successful!")
def execute(self): try: graphDB = InitilizeDB.init() start_time = time.time() result = graphDB.run( "MATCH (ps:PARTSUPP)-[:SUPPLIED_BY_3]->(s:SUPPLIER)-[:BELONGS_TO_1]->(n:NATION)-[:FROM_10]->(r:REGION) " "where r.R_NAME = 'EUROPE' with ps, min(ps.PS_SUPPLYCOST) as minvalue " "MATCH (ps:PARTSUPP)-[:COMPOSED_BY_2]->(p:PART) " "MATCH (ps:PARTSUPP)-[:SUPPLIED_BY_3]->(s:SUPPLIER)-[:BELONGS_TO_1]->(n:NATION)-[:FROM_10]->(r:REGION) " "where p.P_SIZE = 15 and p.P_TYPE =~ '.*BRASS.*' and r.R_NAME = 'EUROPE' and ps.PS_SUPPLYCOST = minvalue " "return p.id,p.P_MFGR,s.S_ACCTBAL,s.S_NAME,s.S_ADDRESS,s.S_PHONE,s.S_COMMENT,n.N_NAME, r.R_NAME " "order by s.S_ACCTBAL desc,n.N_NAME,s.S_NAME,p.id; ") print(result) end_time = time.time() print("---------------Query 2-------------") print("Start time: " + str(start_time)) print("End time: " + str(end_time)) print("In seconds: " + str("{:.7f}".format(end_time - start_time))) except: print("py2neo ERROR:")
def insert_nodes_lineItem(self): graphDB = InitilizeDB.init() graphDB.run( 'USING PERIODIC COMMIT ' 'LOAD CSV WITH HEADERS ' 'FROM "file:///lineitem_neo4j.csv" AS line FIELDTERMINATOR "|" ' 'CREATE (lineItem:LINEITEM) ' 'SET lineItem.L_ORDERKEY = toInteger(line.L_ORDERKEY), ' ' lineItem.L_PARTKEY = toInteger(line.L_PARTKEY), ' ' lineItem.L_SUPPKEY = toInteger(line.L_SUPPKEY), ' ' lineItem.L_LINENUMBER = toInteger(line.L_LINENUMBER), ' ' lineItem.L_QUANTITY = toFloat(line.L_QUANTITY), ' ' lineItem.L_EXTENDEDPRICE = toFloat(line.L_EXTENDEDPRICE),' ' lineItem.L_DISCOUNT = toFloat(line.L_DISCOUNT),' ' lineItem.L_TAX = toFloat(line.L_TAX),' ' lineItem.L_RETURNFLAG = line.L_RETURNFLAG,' ' lineItem.L_LINESTATUS = line.L_LINESTATUS,' ' lineItem.L_SHIPDATE = line.L_SHIPDATE,' ' lineItem.L_COMMITDATE = line.L_COMMITDATE,' ' lineItem.L_RECEIPTDATE = line.L_RECEIPTDATE,' ' lineItem.L_SHIPINSTRUCT = line.L_SHIPINSTRUCT,' ' lineItem.L_SHIPMODE = line.L_SHIPMODE,' ' lineItem.L_COMMENT = line.L_COMMENT, ' ' lineItem.L_SHIPDATE_DAY = TOINTEGER(line.L_SHIPDATE_DAY),' ' lineItem.L_SHIPDATE_MONTH = TOINTEGER(line.L_SHIPDATE_MONTH),' ' lineItem.L_SHIPDATE_YEAR = TOINTEGER(line.L_SHIPDATE_YEAR),' ' lineItem.L_COMMITDATE_DAY = TOINTEGER(line.L_COMMITDATE_DAY),' ' lineItem.L_COMMITDATE_MONTH = TOINTEGER(line.L_COMMITDATE_MONTH),' ' lineItem.L_COMMITDATE_YEAR =TOINTEGER(line.L_COMMITDATE_YEAR),' ' lineItem.L_RECEIPTDATE_DAY = TOINTEGER(line.L_RECEIPTDATE_DAY),' ' lineItem.L_RECEIPTDATE_MONTH = TOINTEGER(line.L_RECEIPTDATE_MONTH),' ' lineItem.L_RECEIPTDATE_YEAR = TOINTEGER(line.L_RECEIPTDATE_YEAR);' ) graphDB.run('CREATE INDEX ON :LINEITEM(L_SUPPKEY); ') graphDB.run('CREATE INDEX ON :LINEITEM(L_ORDERKEY); ') graphDB.run('CREATE INDEX ON :LINEITEM(L_PARTKEY); ') graphDB.run('CREATE INDEX ON :LINEITEM(L_LINENUMBER); ')