Beispiel #1
0
from datatable import dt, f, count, as_type, fread, rbind, by
from dotenv import load_dotenv
from os import environ
from datetime import datetime
# import pandas as pd
import re

# set vars
load_dotenv()
currentReleaseType = 'patch'  # or 'release'
currentFreeze = 'freeze1'  # 'freeze2'
currentPatch = 'patch3'  # 'patch1'

# host=environ['MOLGENIS_PROD_HOST']
host = environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=environ['MOLGENIS_ACC_USR'],
          password=environ['MOLGENIS_ACC_PWD'])

# build entity IDs and paths based on current freeze and patch
paths = buildRd3Paths(freeze=currentFreeze,
                      patch=currentPatch,
                      baseFilePath=environ['CLUSTER_BASE'])

# def __unpack__phenotypicfeatures(phenotypicFeatures):
#     """Unpack Phenotypic Features
#     Extract `phenotypicFeatures` and separate into observed and unobserved
#     phenotypic codes
#     @param phenotypicFeatures : output from data['phenopacket']['phenotypicFeatures']
#     @return a dictionary with observed and unobserved phenotype codes
#     """
Beispiel #2
0
#' CREATED: 2022-01-18
#' MODIFIED: 2022-05-13
#' PURPOSE: Update the Menu setting in Application Settings
#' STATUS: stable
#' PACKAGES: rd3tools, dotenv, os, json
#' COMMENTS: NA
#'////////////////////////////////////////////////////////////////////////////

from rd3.api.molgenis import Molgenis
from dotenv import load_dotenv
from os import environ
import json

# set vars and init sessions for both Molgenis instances
load_dotenv()
rd3_acc = Molgenis(url=environ['MOLGENIS_ACC_HOST'])
rd3_prod = Molgenis(url=environ['MOLGENIS_PROD_HOST'])

rd3_prod.login(username=environ['MOLGENIS_PROD_USR'],
               password=environ['MOLGENIS_PROD_PWD'])

rd3_acc.login(username=environ['MOLGENIS_ACC_USR'],
              password=environ['MOLGENIS_ACC_PWD'])

# read contents of the menu config file
with open('rd3/molgenis_menu.json', 'r') as file:
    menu = file.read()
    file.close()

# parse json and stringify
molgenisMenuJson = json.loads(menu)
Beispiel #3
0
#' COMMENTS: NA
#'////////////////////////////////////////////////////////////////////////////

from rd3.api.molgenis import Molgenis
from rd3.utils.utils import statusMsg
import re

# for local dev, set credentials
from dotenv import load_dotenv
from os import environ

load_dotenv()

# host=environ['MOLGENIS_PROD_HOST']
host = environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=environ['MOLGENIS_ACC_USR'],
          password=environ['MOLGENIS_ACC_PWD'])

# pull RD3 data
files = rd3.get(entity='rd3_portal_cluster',
                q='type=="phenopacket"',
                attributes='release,name,type',
                batch_size=10000)

subjects = rd3.get(entity='rd3_freeze1_subject',
                   attributes='id,subjectID,patch',
                   batch_size=10000)

statusMsg('File metadata entries pulled: {}'.format(len(files)))
statusMsg('Subject metadata entries pulled: {}'.format(len(subjects)))
                             timestamp, toKeyPairs)
from datatable import dt, f
import functools
import operator

# LOCAL DEV USE ONLY
# If you are running this script locally, make sure you have a valid token
# saved in the .env file.  If not, generate one and register it in the RD3-
# Molgenis database. Switch host and token when pushing data to PROD or ACC.
from dotenv import load_dotenv
from os import environ
load_dotenv()

host = environ['MOLGENIS_PROD_HOST']
# host=environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
# rd3.login(
#     username=environ['MOLGENIS_ACC_USR'],
#     password=environ['MOLGENIS_ACC_PWD']
# )
rd3.login(username=environ['MOLGENIS_PROD_USR'],
          password=environ['MOLGENIS_PROD_PWD'])

# SET EXISTING NOVELOMICS RELEASES
# Since there are many substudies in the Novel Omics space, these releases are
# imported into the their own EMX package. In
novelOmicsReleases = {
    'deepwes': 'rd3_noveldeepwes',
    'rnaseq': 'rd3_novelrnaseq',
    'srwgs': 'rd3_novelsrwgs',
    'lrwgs': 'rd3_novellrwgs'
patchinfo = {
    'name': 'freeze3',  # name of the RD3 Release
    'id': 'freeze3_original',  # ID labels `<name>_original`
    'type': 'freeze',  # 'freeze' or 'patch'
    'date': '2022-05-09',  # Date of release, yyyy-mm-dd
    'description': 'Data Freeze 3'  # a nice description
}

# for local dev use only
from dotenv import load_dotenv
from os import environ
load_dotenv()

# host=environ['MOLGENIS_PROD_HOST']
host = environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=environ['MOLGENIS_ACC_USR'],
          password=environ['MOLGENIS_ACC_PWD'])

# migrate data from one server to the other:
# pull data then switch tokens and restart connection
# portalData = rd3.get(releaseName,batch_size=10000)
# rd3.importData(entity='rd3_portal_release_freeze3', data=portalData)

#//////////////////////////////////////////////////////////////////////////////

# ~ 0 ~
# Create Reference Datasets
# Pull reference tables to create mapping tables for recoding raw values into
# RD3 terminology. Add additional mappings as needed.
# PACKAGES: **see below**
# COMMENTS: NA
#//////////////////////////////////////////////////////////////////////////////

from rd3.api.molgenis import Molgenis
from rd3.utils.utils import dtFrameToRecords
from datatable import dt, f, fread, as_type

# for local dev
from dotenv import load_dotenv
from os import environ
load_dotenv()

# host=environ['MOLGENIS_PROD_HOST']
host = environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=environ['MOLGENIS_ACC_USR'],
          password=environ['MOLGENIS_ACC_PWD'])

#///////////////////////////////////////

# ~ 1 ~
# Merge Datasets
# Join the external xlsx file with the RD3 data.

# ~ 1a ~
# Load data from external file
# Download the latest file and import the contents. Select the columns of
# interest and rename them to align with the RD3 EMX. Set the key as well.

newData = fread('')[:, [
from rd3.utils.utils import (buildRd3Paths, statusMsg, pedtools)

from dotenv import load_dotenv
from os import environ
from datetime import datetime
import pandas as pd
import re

# set vars
currentReleaseType = 'patch'  # or 'release'
currentFreeze = 'freeze1'
currentPatch = 'patch3'

# host=environ['MOLGENIS_PROD_HOST']
host = environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=environ['MOLGENIS_ACC_USR'],
          password=environ['MOLGENIS_ACC_PWD'])

# build entity IDs and paths based on current freeze and patch
paths = buildRd3Paths(freeze=currentFreeze,
                      patch=currentPatch,
                      baseFilePath=environ['CLUSTER_BASE'])

#//////////////////////////////////////////////////////////////////////////////

# ~ 1 ~
# Start Molgenis Session and Pull Required Data
#
# For processing the PED files, we will need the following attributes from the
# RD3 `rd3_freeze[x]_subject` where `[x]` is the freeze that the new PED files

# ~ 0 ~ 
# Fetch Metadata for all releases

# init database connection
statusMsg('Connecting to RD3....')
load_dotenv()

# rd3=Molgenis(url=environ['MOLGENIS_ACC_HOST'])
# rd3.login(
#     username=environ['MOLGENIS_ACC_USR'],
#     password=environ['MOLGENIS_ACC_PWD']
# )

rd3=Molgenis(url=environ['MOLGENIS_PROD_HOST'])
rd3.login(
    username=environ['MOLGENIS_PROD_USR'],
    password=environ['MOLGENIS_PROD_PWD']
)


# SET RELEASES
# If new releases are added to RD3, add package identifier to the list below.
# Since the aim of the table is to be able to identify subjects that are
# lacking genetic data, it is best to separate novelomics releases from 
# regular freezes, and then merge the two arrays

regularReleases=[
    'freeze1',
    'freeze2',
#'////////////////////////////////////////////////////////////////////////////

from rd3.api.molgenis import Molgenis
from rd3.utils.clustertools import clustertools, statusMsg
from datetime import datetime
import pandas as pd
import re
import os

# for local dev only
from dotenv import load_dotenv
load_dotenv()

# host=os.environ['MOLGENIS_PROD_HOST']
host = os.environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=os.environ['MOLGENIS_ACC_USR'],
          password=os.environ['MOLGENIS_ACC_PWD'])

# list available directories
directories = [
    row for row in clustertools.listFiles(path=os.environ['CLUSTER_BASE'])
    if row['filename'] != 'master'
]

# collate file metadata: release, name, path, type, etc.
files = []
for dir in directories:
    statusMsg('Processing files in', dir['filepath'])

    # gather list of pedigree (.ped) and phenopacket files (.json)
#' PACKAGES: **see below**
#' COMMENTS: NA
#'////////////////////////////////////////////////////////////////////////////

from rd3.api.molgenis import Molgenis
from dotenv import load_dotenv
import pandas as pd

# for local dev, set credentials
from dotenv import load_dotenv
from os import environ
load_dotenv()

# host=environ['MOLGENIS_PROD_HOST']
host = environ['MOLGENIS_ACC_HOST']
rd3 = Molgenis(url=host)
rd3.login(username=environ['MOLGENIS_ACC_USR'],
          password=environ['MOLGENIS_ACC_PWD'])


# What are the most commonly reported phenotypes
def getPhenotypicData(entityId):
    """Get Phenotypic Data
    Pull observed phenotype data from entities

    @param entityId RD3 table identifier
    @return list of dictionaries
    """
    return rd3.get(entity=entityId,
                   attributes='phenotype',
                   q='phenotype!=""',