from datatable import dt, f, count, as_type, fread, rbind, by from dotenv import load_dotenv from os import environ from datetime import datetime # import pandas as pd import re # set vars load_dotenv() currentReleaseType = 'patch' # or 'release' currentFreeze = 'freeze1' # 'freeze2' currentPatch = 'patch3' # 'patch1' # host=environ['MOLGENIS_PROD_HOST'] host = environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) rd3.login(username=environ['MOLGENIS_ACC_USR'], password=environ['MOLGENIS_ACC_PWD']) # build entity IDs and paths based on current freeze and patch paths = buildRd3Paths(freeze=currentFreeze, patch=currentPatch, baseFilePath=environ['CLUSTER_BASE']) # def __unpack__phenotypicfeatures(phenotypicFeatures): # """Unpack Phenotypic Features # Extract `phenotypicFeatures` and separate into observed and unobserved # phenotypic codes # @param phenotypicFeatures : output from data['phenopacket']['phenotypicFeatures'] # @return a dictionary with observed and unobserved phenotype codes # """
#' CREATED: 2022-01-18 #' MODIFIED: 2022-05-13 #' PURPOSE: Update the Menu setting in Application Settings #' STATUS: stable #' PACKAGES: rd3tools, dotenv, os, json #' COMMENTS: NA #'//////////////////////////////////////////////////////////////////////////// from rd3.api.molgenis import Molgenis from dotenv import load_dotenv from os import environ import json # set vars and init sessions for both Molgenis instances load_dotenv() rd3_acc = Molgenis(url=environ['MOLGENIS_ACC_HOST']) rd3_prod = Molgenis(url=environ['MOLGENIS_PROD_HOST']) rd3_prod.login(username=environ['MOLGENIS_PROD_USR'], password=environ['MOLGENIS_PROD_PWD']) rd3_acc.login(username=environ['MOLGENIS_ACC_USR'], password=environ['MOLGENIS_ACC_PWD']) # read contents of the menu config file with open('rd3/molgenis_menu.json', 'r') as file: menu = file.read() file.close() # parse json and stringify molgenisMenuJson = json.loads(menu)
#' COMMENTS: NA #'//////////////////////////////////////////////////////////////////////////// from rd3.api.molgenis import Molgenis from rd3.utils.utils import statusMsg import re # for local dev, set credentials from dotenv import load_dotenv from os import environ load_dotenv() # host=environ['MOLGENIS_PROD_HOST'] host = environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) rd3.login(username=environ['MOLGENIS_ACC_USR'], password=environ['MOLGENIS_ACC_PWD']) # pull RD3 data files = rd3.get(entity='rd3_portal_cluster', q='type=="phenopacket"', attributes='release,name,type', batch_size=10000) subjects = rd3.get(entity='rd3_freeze1_subject', attributes='id,subjectID,patch', batch_size=10000) statusMsg('File metadata entries pulled: {}'.format(len(files))) statusMsg('Subject metadata entries pulled: {}'.format(len(subjects)))
timestamp, toKeyPairs) from datatable import dt, f import functools import operator # LOCAL DEV USE ONLY # If you are running this script locally, make sure you have a valid token # saved in the .env file. If not, generate one and register it in the RD3- # Molgenis database. Switch host and token when pushing data to PROD or ACC. from dotenv import load_dotenv from os import environ load_dotenv() host = environ['MOLGENIS_PROD_HOST'] # host=environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) # rd3.login( # username=environ['MOLGENIS_ACC_USR'], # password=environ['MOLGENIS_ACC_PWD'] # ) rd3.login(username=environ['MOLGENIS_PROD_USR'], password=environ['MOLGENIS_PROD_PWD']) # SET EXISTING NOVELOMICS RELEASES # Since there are many substudies in the Novel Omics space, these releases are # imported into the their own EMX package. In novelOmicsReleases = { 'deepwes': 'rd3_noveldeepwes', 'rnaseq': 'rd3_novelrnaseq', 'srwgs': 'rd3_novelsrwgs', 'lrwgs': 'rd3_novellrwgs'
patchinfo = { 'name': 'freeze3', # name of the RD3 Release 'id': 'freeze3_original', # ID labels `<name>_original` 'type': 'freeze', # 'freeze' or 'patch' 'date': '2022-05-09', # Date of release, yyyy-mm-dd 'description': 'Data Freeze 3' # a nice description } # for local dev use only from dotenv import load_dotenv from os import environ load_dotenv() # host=environ['MOLGENIS_PROD_HOST'] host = environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) rd3.login(username=environ['MOLGENIS_ACC_USR'], password=environ['MOLGENIS_ACC_PWD']) # migrate data from one server to the other: # pull data then switch tokens and restart connection # portalData = rd3.get(releaseName,batch_size=10000) # rd3.importData(entity='rd3_portal_release_freeze3', data=portalData) #////////////////////////////////////////////////////////////////////////////// # ~ 0 ~ # Create Reference Datasets # Pull reference tables to create mapping tables for recoding raw values into # RD3 terminology. Add additional mappings as needed.
# PACKAGES: **see below** # COMMENTS: NA #////////////////////////////////////////////////////////////////////////////// from rd3.api.molgenis import Molgenis from rd3.utils.utils import dtFrameToRecords from datatable import dt, f, fread, as_type # for local dev from dotenv import load_dotenv from os import environ load_dotenv() # host=environ['MOLGENIS_PROD_HOST'] host = environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) rd3.login(username=environ['MOLGENIS_ACC_USR'], password=environ['MOLGENIS_ACC_PWD']) #/////////////////////////////////////// # ~ 1 ~ # Merge Datasets # Join the external xlsx file with the RD3 data. # ~ 1a ~ # Load data from external file # Download the latest file and import the contents. Select the columns of # interest and rename them to align with the RD3 EMX. Set the key as well. newData = fread('')[:, [
from rd3.utils.utils import (buildRd3Paths, statusMsg, pedtools) from dotenv import load_dotenv from os import environ from datetime import datetime import pandas as pd import re # set vars currentReleaseType = 'patch' # or 'release' currentFreeze = 'freeze1' currentPatch = 'patch3' # host=environ['MOLGENIS_PROD_HOST'] host = environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) rd3.login(username=environ['MOLGENIS_ACC_USR'], password=environ['MOLGENIS_ACC_PWD']) # build entity IDs and paths based on current freeze and patch paths = buildRd3Paths(freeze=currentFreeze, patch=currentPatch, baseFilePath=environ['CLUSTER_BASE']) #////////////////////////////////////////////////////////////////////////////// # ~ 1 ~ # Start Molgenis Session and Pull Required Data # # For processing the PED files, we will need the following attributes from the # RD3 `rd3_freeze[x]_subject` where `[x]` is the freeze that the new PED files
# ~ 0 ~ # Fetch Metadata for all releases # init database connection statusMsg('Connecting to RD3....') load_dotenv() # rd3=Molgenis(url=environ['MOLGENIS_ACC_HOST']) # rd3.login( # username=environ['MOLGENIS_ACC_USR'], # password=environ['MOLGENIS_ACC_PWD'] # ) rd3=Molgenis(url=environ['MOLGENIS_PROD_HOST']) rd3.login( username=environ['MOLGENIS_PROD_USR'], password=environ['MOLGENIS_PROD_PWD'] ) # SET RELEASES # If new releases are added to RD3, add package identifier to the list below. # Since the aim of the table is to be able to identify subjects that are # lacking genetic data, it is best to separate novelomics releases from # regular freezes, and then merge the two arrays regularReleases=[ 'freeze1', 'freeze2',
#'//////////////////////////////////////////////////////////////////////////// from rd3.api.molgenis import Molgenis from rd3.utils.clustertools import clustertools, statusMsg from datetime import datetime import pandas as pd import re import os # for local dev only from dotenv import load_dotenv load_dotenv() # host=os.environ['MOLGENIS_PROD_HOST'] host = os.environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) rd3.login(username=os.environ['MOLGENIS_ACC_USR'], password=os.environ['MOLGENIS_ACC_PWD']) # list available directories directories = [ row for row in clustertools.listFiles(path=os.environ['CLUSTER_BASE']) if row['filename'] != 'master' ] # collate file metadata: release, name, path, type, etc. files = [] for dir in directories: statusMsg('Processing files in', dir['filepath']) # gather list of pedigree (.ped) and phenopacket files (.json)
#' PACKAGES: **see below** #' COMMENTS: NA #'//////////////////////////////////////////////////////////////////////////// from rd3.api.molgenis import Molgenis from dotenv import load_dotenv import pandas as pd # for local dev, set credentials from dotenv import load_dotenv from os import environ load_dotenv() # host=environ['MOLGENIS_PROD_HOST'] host = environ['MOLGENIS_ACC_HOST'] rd3 = Molgenis(url=host) rd3.login(username=environ['MOLGENIS_ACC_USR'], password=environ['MOLGENIS_ACC_PWD']) # What are the most commonly reported phenotypes def getPhenotypicData(entityId): """Get Phenotypic Data Pull observed phenotype data from entities @param entityId RD3 table identifier @return list of dictionaries """ return rd3.get(entity=entityId, attributes='phenotype', q='phenotype!=""',