createDbCursor.execute("SET FOREIGN_KEY_CHECKS = 0;") createDbCursor.execute("SET UNIQUE_CHECKS = 0;") createDbCursor.execute("SET SESSION tx_isolation='READ-UNCOMMITTED'") createDbCursor.execute("SET sql_log_bin = 0;") createDbCursor.close() # Dictionaries and arrays for SQL and MongoDB queries snpInserts = {} # Dictionary for rsid/insert for SNP data lociInserts = [] # Array for loci insert queries rsidList = {} # Dictionary of RSIDs that will also hold the # primary key for each SNP in SQL for curChr in chromosomes: result = Result() result.method = "MySQL" result.tag = tag print "Chromosome " + str(curChr) result.chromosome = str(curChr) # Set file paths for current chromosome curSnpFilePath = snpFilePath.format(curChr) curLociFilePath = lociFilePath.format(curChr) if len(path) > 0: curSnpFilePath = path.rstrip('\\') + '\\' + curSnpFilePath curLociFilePath = path.rsplit('\\') + '\\' + curLociFilePath # Clear dictionaries for loading multiple chromosomes snpInserts.clear() lociInserts = []
# Disable triggers/constraints on tables createDbCursor.execute("ALTER TABLE snp DISABLE trigger ALL;") createDbCursor.execute("ALTER TABLE locus DISABLE trigger ALL;") createDbCursor.close() # Dictionaries and arrays for SQL and MongoDB queries snpInserts = {} # Dictionary for rsid/insert for SNP data lociInserts = [] # Array for loci insert queries rsidList = {} # Dictionary of RSIDs that will also hold the # primary key for each SNP in SQL # Load each chromosome into database for curChr in chromosomes: result = Result() result.method = "pgsql" result.tag = tag print "Chromosome " + str(curChr) result.chromosome = str(curChr) # Set file paths for current chromosome curSnpFilePath = snpFilePath.format(curChr) curLociFilePath = lociFilePath.format(curChr) if len(path) > 0: curSnpFilePath = path.rstrip('\\').rstrip('/') + '\\' + curSnpFilePath curLociFilePath = path.rstrip('\\').rstrip('/') + '\\' + curLociFilePath # Clear dictionaries for loading multiple chromosomes snpInserts.clear() lociInserts = []
hitMin = True if hitMin: startList.append(cur) chromosomes = startList # Create MongoDB and MySQL connections mongoClient = MongoClient(mongoHost) mongoDb = mongoClient[databaseName] mongoCollection = mongoDb[collectionName] # Dictionaries and arrays for SQL and MongoDB queries documents = {} # Dictionary for MongoDB SNP/loci documents for curChr in chromosomes: result = Result() result.method = "Mongo" if bulk: result.method += "-Bulk" if mongoimport: result.method += "-jsonImport" result.tag = tag print "Chromosome " + str(curChr) result.chromosome = str(curChr) # Set file paths for current chromosome curSnpFilePath = snpFilePath.format(curChr) curLociFilePath = lociFilePath.format(curChr) if len(path) > 0: curSnpFilePath = path.rstrip('\\') + '\\' + curSnpFilePath curLociFilePath = path.rsplit('\\') + '\\' + curLociFilePath
# Open results file, print headers resultsFileName = 'qresults-pgsql-nosql' if resultsFileName != "": resultsFileName += '-' + tag resultsFileName += '.txt' resultsFile = open(resultsFileName, 'w') result = Result() resultsFile.write(result.toHeader() + '\n') # Create pgsql connection postgresConnection = psycopg2.connect("dbname=" + databaseName + " user="******"ACSL6","ZDHHC8","TPH1","SYN2","DISC1","DISC2","COMT","FXYD6","ERBB4","DAOA","MEGF10","SLC18A1","DYM","SREBF2","NXRN1","CSF2RA","IL3RA","DRD2"] for z in range(1,11): for g in genes: result = Result() result.method = "pgsql-jsonb-QrySet" + str(z) result.tag = tag + "-" + g + "/" + str(z) print "Running queries: " + g + "/" + str(z) sys.stdout.flush() qryStart = time.time() cursor.execute('SELECT count(*) FROM snp WHERE jsondata->\'loci\' @> \'[{"gene":"' + g + '"}]\'') qryEnd = time.time() result.qryByGene = qryEnd-qryStart resultsFile.write(result.toString() + '\n') print "Run complete!"
gs = gspread.Client(auth=(gusername,gpassword)) gs.login() ss = gs.open_by_key(docKey) ws = ss.add_worksheet(tag + "-" + str(time.time()),1,1) ws.append_row(result.headerArr()) # Create MySQL database, tables if not exists mysqlConnection = MySQLdb.connect(host=sqlHost,user=username,passwd=password,db=databaseName) cursor = mysqlConnection.cursor() genes = ["ACSL6","ZDHHC8","TPH1","SYN2","DISC1","DISC2","COMT","FXYD6","ERBB4","DAOA","MEGF10","SLC18A1","DYM","SREBF2","NXRN1","CSF2RA","IL3RA","DRD2"] for z in range(1,11): for g in genes: result = Result() result.method = "MySQL-QrySet" + str(z) result.tag = tag + "-" + g + "/" + str(z) print "Running queries: " + g + "/" + str(z) qryStart = time.time() cursor.execute("SELECT count(distinct s.rsid) FROM locus l, snp s WHERE l.snp_id = s.id AND l.gene = '" + g + "'") qryEnd = time.time() result.qryByGene = qryEnd-qryStart qryStart = time.time() cursor.execute("SELECT count(distinct s.rsid) FROM locus l, snp s WHERE l.snp_id = s.id AND l.gene = '" + g + "' AND s.has_sig = true") qryEnd = time.time() result.qryByGeneSig = qryEnd-qryStart resultsFile.write(result.toString() + '\n') if remote:
gs.login() ss = gs.open_by_key(docKey) ws = ss.add_worksheet(tag + "-" + str(time.time()),1,1) ws.append_row(result.headerArr()) # Create MongoDB connection mongoClient = MongoClient(mongoHost) mongoDb = mongoClient[databaseName] mongoCollection = mongoDb[collectionName] genes = ["ACSL6","ZDHHC8","TPH1","SYN2","DISC1","DISC2","COMT","FXYD6","ERBB4","DAOA","MEGF10","SLC18A1","DYM","SREBF2","NXRN1","CSF2RA","IL3RA","DRD2"] for z in range(1,11): for g in genes: result = Result() result.method = "Mongo-QrySet" + str(z) result.tag = tag + "-" + g + "/" + str(z) print "Running queries: " + g + "/" + str(z) qryStart = time.time() temptotal = mongoCollection.find({"loci.gene":g}).count() qryEnd = time.time() result.qryByGene = qryEnd-qryStart qryStart = time.time() temptotal = mongoCollection.find({"has_sig":True,"loci.gene":g}).count() qryEnd = time.time() result.qryByGeneSig = qryEnd-qryStart resultsFile.write(result.toString() + '\n') if remote:
for name, ddl in TABLES.iteritems(): createDbCursor.execute(ddl) postgresConnection.commit() # Disable triggers/constraints on tables createDbCursor.execute("ALTER TABLE snp DISABLE trigger ALL;") createDbCursor.close() # Dictionaries and arrays for SQL and MongoDB queries documents = {} # Dictionary for MongoDB SNP/loci documents for curChr in chromosomes: result = Result() result.method = "pgsql-json" if jsonb: result.method = "pgsql-jsonb" result.tag = tag print "Chromosome " + str(curChr) result.chromosome = str(curChr) # Set file paths for current chromosome curSnpFilePath = snpFilePath.format(curChr) curLociFilePath = lociFilePath.format(curChr) if len(path) > 0: curSnpFilePath = path.rstrip('\\').rstrip('/') + '\\' + curSnpFilePath curLociFilePath = path.rstrip('\\').rstrip('/') + '\\' + curLociFilePath documents.clear()