from pgdb import connect import collections, datetime import sys, json, os, pprint, hashlib client = MongoClient(os.environ["MONGODEV_INSTANCE"]) mongodbHandle = client["admin"] mongodbHandle.authenticate(os.environ["MONGODEV_UNAME"], os.environ["MONGODEV_PASS"]) mongodbHandle = client["eva_testing"] srcCollHandle = mongodbHandle["variant_chr21_1_1_sample_mod"] postgresDBHandle = connect( database='template1', host=guiutils.promptGUIInput("PostgreSQL Host", "PostgreSQL Host"), user=guiutils.promptGUIInput("PostgreSQL Username", "PostgreSQL Username"), password=guiutils.promptGUIInput("PostgreSQL password", "PostgreSQL password", "*")) postgresCursor = postgresDBHandle.cursor() def getDictValueOrNull(dict, key): if key in dict: return dict[key] return None sampleDocs = srcCollHandle.find().limit(1000) print("Start Time:" + str(datetime.datetime.now()))
from commonpyutils import guiutils from bson import CodecOptions, SON, json_util from pymongo import MongoClient import pymongo import collections, datetime, random import sys, json, os, pprint import multiprocessing, psycopg2, socket from multiprocessing import Process, Pipe mongoDevClient = MongoClient(os.environ["MONGODEV_INSTANCE"]) mongoDevDBHandle = mongoDevClient["admin"] mongoDevDBHandle.authenticate(os.environ["MONGODEV_UNAME"], os.environ["MONGODEV_PASS"]) mongoDevDBHandle = mongoDevClient["eva_testing"] mongoProdClient = MongoClient("mongodb://{0}".format(os.environ["MONGO_PROD_INSTANCES"])) mongoProdUname = guiutils.promptGUIInput("User", "User") mongoProdPwd = guiutils.promptGUIInput("Pass", "Pass", "*") mongoProdDBHandle = mongoProdClient["admin"] mongoProdDBHandle.authenticate(mongoProdUname, mongoProdPwd) mongoProdDBHandle = mongoProdClient.get_database("eva_hsapiens_grch37", read_preference= pymongo.ReadPreference.SECONDARY_PREFERRED, read_concern=pymongo.read_concern.ReadConcern(level="local")) mongoProdCollHandle = mongoProdDBHandle["variants_1_1"] mongoProdCollHandle_2 = mongoProdDBHandle["variants_1_2"] chromosome_LB_UB_Map = [{ "_id" : "1", "minStart" : 10020, "maxStart" : 249240605, "numEntries" : 12422239 }, { "_id" : "2", "minStart" : 10133, "maxStart" : 243189190, "numEntries" : 13217397 }, { "_id" : "3", "minStart" : 60069, "maxStart" : 197962381, "numEntries" : 10891260 }, { "_id" : "4", "minStart" : 10006, "maxStart" : 191044268, "numEntries" : 10427984 }, { "_id" : "5", "minStart" : 10043, "maxStart" : 180905164, "numEntries" : 9742153 }, { "_id" : "6", "minStart" : 61932, "maxStart" : 171054104, "numEntries" : 9340928 }, { "_id" : "7", "minStart" : 10010, "maxStart" : 159128653, "numEntries" : 8803393 },
import datetime import random import getpass import psycopg2 from commonpyutils import guiutils import pymongo from pymongo import MongoClient # Mongo credentials mongoProdClient = MongoClient(guiutils.promptGUIInput("Host", "Host")) mongoProdUname = guiutils.promptGUIInput("User", "User") mongoProdPwd = guiutils.promptGUIInput("Pass", "Pass", "*") mongoProdDBHandle = mongoProdClient["admin"] mongoProdDBHandle.authenticate(mongoProdUname, mongoProdPwd) mongoProdDBHandle = mongoProdClient["eva_hsapiens_grch37"] mongoProdCollHandle_2 = mongoProdDBHandle["variants_1_2"] # Citus credentials postgresHost = getpass._raw_input("PostgreSQL Host:\n") postgresUser = getpass._raw_input("PostgreSQL Username:\n") postgresConnHandle = psycopg2.connect( "dbname='postgres' user='******' host='{1}' password=''".format( postgresUser, postgresHost)) resultCursor = postgresConnHandle.cursor() chromosome_LB_UB_Map = [ { "_id": "1", "minStart": 10020, "maxStart": 249240605,
mongoCumulativeExecTime += duration print("Mongo: Joined {0} records in {1} seconds".format( numVarRecords, duration)) print("****************") startFirstPos += step endFirstPos += step totalNumRecords += numVarRecords numScans += 1 if (startFirstPos >= startLastPos) or (endFirstPos >= endLastPos): break devMongoClient.close() prodClient.close() return (totalNumRecords, mongoCumulativeExecTime) mongoProdHost = guiutils.promptGUIInput("MongoDB Production Host:", "MongoDB Production Host:") mongoProdUser = guiutils.promptGUIInput("MongoDB Production User:"******"MongoDB Production User:"******"MongoDB Production Password:"******"MongoDB Production Password:"******"*") prodClient = MongoClient(mongoProdHost) prodMongoHandle = prodClient["admin"] prodMongoHandle.authenticate(mongoProdUser, mongoProdPwd) prodMongoHandle = prodClient["eva_hsapiens_grch37"] filesCollHandle_grch37 = prodMongoHandle["files_1_2"] filesCache = list(filesCollHandle_grch37.find()) filesCacheLookup = {} for doc in filesCache: filesCacheLookup[doc["fid"] + "_" + doc["sid"]] = doc prodClient.close()