def __init__(self, biolink, config=None): self.biolink = biolink if not config: config = get_config() self.config = config separator = self.config.get('bulk_loader', {}).get('separator', '|') self.separator = chr(separator) if isinstance(separator, int) else separator
def __init__(self, biolink, config=None): if not config: config = get_config() self.config = config self.biolink_version = self.config.get('kgx').get( 'biolink_model_version') log.debug(f"Trying to get biolink version : {self.biolink_version}") self.biolink = BiolinkModel(self.biolink_version)
def __init__(self, to_string=False, config=None): """ Initialize. :param to_string: Log messages to a string, available as self.log_stream.getvalue() after execution completes. """ import logging self.has_string_handler = to_string if not config: config = get_config() self.config = config if to_string: """ Add a stream handler to enable to_string. """ self.log_stream = StringIO() self.string_handler = logging.StreamHandler(self.log_stream) log.addHandler(self.string_handler) self.biolink = BiolinkModel() self.kgx = KGXModel(self.biolink, config=config) self.bulk = BulkLoad(self.biolink, config=config)
def task_wrapper(python_callable, **kwargs): """ Overrides configuration with config from airflow. :param python_callable: :param kwargs: :return: """ # get dag config provided dag_run = kwargs.get('dag_run') dag_conf = {} logger = get_logger() config = get_config() # config.update({'data_root': ''}) if dag_run: dag_conf = dag_run.conf # remove this since to send every other argument to the python callable. del kwargs['dag_run'] # overrides values config.update(dag_conf) logger.info("Config") logger.info(config) return python_callable(to_string=True, config=config)
def dug_crawl_path(name): data_root = get_config()['data_root'] return os.path.join(data_root, "dug", "crawl", name)
def dug_kgx_path(name): data_root = get_config()['data_root'] return os.path.join(data_root, "dug", "kgx", name)
def dug_annotation_path(name): data_root = get_config()['data_root'] return os.path.join(data_root, "dug", "annotations", name)
def bulk_path(name): """ Path to a bulk load object. :param name: Name of the object. """ data_root = get_config()['data_root'] return os.path.join(data_root, "bulk", name)
def schema_path(name): """ Path to a schema object. :param name: Name of the object to get a path for. """ data_root = get_config()['data_root'] return os.path.join(data_root, "schema", name)
def merge_path(name): """ Form a merged KGX object path. :path name: Name of the merged KGX object. """ data_root = get_config()['data_root'] return os.path.join(data_root, "merge", name)
def kgx_path(name): """ Form a KGX object path. :path name: Name of the KGX object. """ data_root = get_config()['data_root'] return os.path.join(data_root, "kgx", name)
import sys import time import yaml from bmt import Toolkit from collections import defaultdict from enum import Enum from io import StringIO from roger.Config import get_default_config as get_config from roger.roger_util import get_logger from roger.components.data_conversion_utils import TypeConversionUtil from redisgraph_bulk_loader.bulk_insert import bulk_insert from roger.roger_db import RedisGraph from string import Template log = get_logger() config = get_config() class SchemaType(Enum): """ High level semantic metatdata concepts. Categories are classes in an ontological model like Biolink. Predicates are links between nodes. """ CATEGORY = "category" PREDICATE = "predicate" class FileFormat(Enum): """ File formats this module knows about. """ JSON = "json" YAML = "yaml"