Esempio n. 1
0
from typing import Callable

import bioversions
import pystow

__all__ = [
    "RAW_DIRECTORY",
    "DATABASE_DIRECTORY",
    "SPECIES_REMAPPING",
    "get_sqlalchemy_uri",
    "version_getter",
]

logger = logging.getLogger(__name__)

PYOBO_MODULE = pystow.module("pyobo")
RAW_MODULE = PYOBO_MODULE.submodule("raw")
RAW_DIRECTORY = RAW_MODULE.base
DATABASE_MODULE = PYOBO_MODULE.submodule("database")
DATABASE_DIRECTORY = DATABASE_MODULE.base

SPECIES_REMAPPING = {
    "Canis familiaris": "Canis lupus familiaris",
}

GLOBAL_SKIP = {
    "rnao",
    "mo",  # deprecated
    "resid",  # deprecated
    "adw",  # deprecated
}
Esempio n. 2
0
from pathlib import Path

import pystow

__all__ = [
    'PYKEEN_HOME',
    'PYKEEN_DATASETS',
    'PYKEEN_BENCHMARKS',
    'PYKEEN_EXPERIMENTS',
    'PYKEEN_CHECKPOINTS',
]

#: A manager around the PyKEEN data folder. It defaults to ``~/.data/pykeen``.
#  This can be overridden with the envvar ``PYKEEN_HOME``.
#: For more information, see https://github.com/cthoyt/pystow
PYKEEN_MODULE: pystow.Module = pystow.module('pykeen')
#: A path representing the PyKEEN data folder
PYKEEN_HOME: Path = PYKEEN_MODULE.base
#: A subdirectory of the PyKEEN data folder for datasets, defaults to ``~/.data/pykeen/datasets``
PYKEEN_DATASETS: Path = PYKEEN_MODULE.get('datasets')
#: A subdirectory of the PyKEEN data folder for benchmarks, defaults to ``~/.data/pykeen/benchmarks``
PYKEEN_BENCHMARKS: Path = PYKEEN_MODULE.get('benchmarks')
#: A subdirectory of the PyKEEN data folder for experiments, defaults to ``~/.data/pykeen/experiments``
PYKEEN_EXPERIMENTS: Path = PYKEEN_MODULE.get('experiments')
#: A subdirectory of the PyKEEN data folder for checkpoints, defaults to ``~/.data/pykeen/checkpoints``
PYKEEN_CHECKPOINTS: Path = PYKEEN_MODULE.get('checkpoints')

PYKEEN_DEFAULT_CHECKPOINT = "PyKEEN_just_saved_my_day.pt"

DEFAULT_DROPOUT_HPO_RANGE = dict(type=float, low=0.0, high=0.5, q=0.1)
#: We define the embedding dimensions as a multiple of 16 because it is computational beneficial (on a GPU)
Esempio n. 3
0
_USER_CONFIG_DIRECTORY = os.path.abspath(
    os.path.join(os.path.expanduser('~'), '.config'))
DEFAULT_CONFIG_PATHS = [
    'bio2bel.cfg',
    'bio2bel.ini',
    'pybel.cfg',
    'pybel.ini',
    os.path.join(_USER_CONFIG_DIRECTORY, 'bio2bel.ini'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'bio2bel.cfg'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'bio2bel', 'config.ini'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'bio2bel', 'bio2bel.cfg'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'bio2bel', 'bio2bel.ini'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'pybel.ini'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'pybel.cfg'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'pybel', 'config.ini'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'pybel', 'pybel.cfg'),
    os.path.join(_USER_CONFIG_DIRECTORY, 'pybel', 'pybel.ini'),
]

BIO2BEL_MODULE = pystow.module('bio2bel')
BIO2BEL_HOME = BIO2BEL_MODULE.base

directory_option = click.option(
    '-d',
    '--directory',
    type=click.Path(file_okay=False, dir_okay=True),
    default=os.getcwd(),
    help='output directory, defaults to current.',
    show_default=True,
)
Esempio n. 4
0
__all__ = [
    'HERE',
    'DATA_DIRECTORY',
    'BIOREGISTRY_PATH',
    'METAREGISTRY_PATH',
    'BIOREGISTRY_MODULE',
    'EnsureEntry',
]

HERE = pathlib.Path(os.path.abspath(os.path.dirname(__file__)))
DATA_DIRECTORY = HERE / 'data'
BIOREGISTRY_PATH = DATA_DIRECTORY / 'bioregistry.json'
METAREGISTRY_PATH = DATA_DIRECTORY / 'metaregistry.json'
COLLECTIONS_PATH = DATA_DIRECTORY / 'collections.json'

BIOREGISTRY_MODULE = pystow.module('bioregistry')
EnsureEntry = Any

DOCS = os.path.abspath(os.path.join(HERE, os.pardir, os.pardir, 'docs'))
DOCS_DATA = os.path.join(DOCS, '_data')
DOCS_IMG = os.path.join(DOCS, 'img')

#: The URL of the remote Bioregistry site
BIOREGISTRY_REMOTE_URL = pystow.get_config('bioregistry',
                                           'url') or 'https://bioregistry.io'

#: Resolution is broken on identifiers.org for the following
IDOT_BROKEN = {
    'gramene.growthstage',
    'oma.hog',
    'obi',
Esempio n. 5
0
from typing import Callable

import bioversions
import pystow

__all__ = [
    'RAW_DIRECTORY',
    'DATABASE_DIRECTORY',
    'SPECIES_REMAPPING',
    'get_sqlalchemy_uri',
    'version_getter',
]

logger = logging.getLogger(__name__)

PYOBO_MODULE = pystow.module('pyobo')
RAW_MODULE = PYOBO_MODULE.submodule('raw')
RAW_DIRECTORY = RAW_MODULE.base
DATABASE_MODULE = PYOBO_MODULE.submodule('database')
DATABASE_DIRECTORY = DATABASE_MODULE.base

SPECIES_REMAPPING = {
    "Canis familiaris": "Canis lupus familiaris",
}

GLOBAL_SKIP = {
    'rnao',
    'mo',  # deprecated
    'resid',  # deprecated
    'adw',  # deprecated
}
Esempio n. 6
0
import click
import pandas as pd
import pybel
import pybel.constants as pc
import pyobo
import pystow
from more_click import verbose_option
from pybel.dsl import BaseConcept
from pyobo.xrefdb.sources.famplex import _get_famplex_df
from tqdm.autonotebook import tqdm

from causal_precedence_training.resources import HERE

logger = logging.getLogger(__name__)

module = pystow.module('causal_precedence_training', 'selventa')

fplx_df = _get_famplex_df()
bel_fplx = dict(fplx_df.loc[fplx_df['target_ns'] == 'BEL',
                            ['target_id', 'source_id']].values)


@click.command()
@verbose_option
@click.option('--force', is_flag=True)
def main(force: bool):
    for graph_name in 'large_corpus', 'small_corpus':
        click.secho(f'{graph_name} results:', fg='blue')
        df = get_normalized_dataframe(graph_name=graph_name, force=force)
        click.echo(df.head())
Esempio n. 7
0
import pystow
import yaml

HERE = os.path.abspath(os.path.dirname(__file__))

# Local resources
VOCABULARY_PATH = os.path.join(HERE, 'vocabulary.json')
RESULTS = os.path.join(HERE, 'results.json')

DATA_PATH = os.path.abspath(os.path.join(HERE, os.pardir, '_data'))
SURVEY_PATH = os.path.join(DATA_PATH, 'surveys.yml')
BENCHMARKS_PATH = os.path.join(DATA_PATH, 'benchmarks.yml')
CURATION_PATH = os.path.join(DATA_PATH, 'curation.yml')

# Cached resources
MOD = pystow.module('pykeen', 'metareview')
PAPERS_PATH = MOD.join(name='papers.json')

# Loaders


def _load_yaml(path):
    with open(path) as file:
        return yaml.safe_load(file)


def _load_json(path):
    with open(path) as file:
        return json.load(file)

Esempio n. 8
0
import bioversions
import click
import more_click
import pystow
from tabulate import tabulate
from tqdm import tqdm
from zenodo_client import Creator, Metadata, ensure_zenodo

# unlike BioGRID, ExCAPE-DB can be considered a static resource
EXCAPE_URL = "https://zenodo.org/record/2543724/files/pubchem.chembl.dataset4publication_inchi_smiles_v2.tsv.xz"
EXCAPE_VERSION = "v2"

DISGENET_URL = "https://www.disgenet.org/static/disgenet_ap1/files/downloads/curated_gene_disease_associations.tsv.gz"

NSOCKG_MODULE = pystow.module("nsockg")
BIO = pystow.module("bio")

metadata = Metadata(
    title="Not Scared of Chemistry Knowledge Graph",
    upload_type="dataset",
    description="A combination of ExCAPE-DB, BioGRID, HomoloGene, and chemical similarities in a knowledge graph.",
    creators=[
        Creator(
            name="Hoyt, Charles Tapley",
            affiliation="Harvard Medical School",
            orcid="0000-0003-4423-4370",
        ),
    ],
)
Esempio n. 9
0
)

__all__ = [
    "PYKEEN_HOME",
    "PYKEEN_DATASETS",
    "PYKEEN_BENCHMARKS",
    "PYKEEN_EXPERIMENTS",
    "PYKEEN_CHECKPOINTS",
    "PYKEEN_LOGS",
    "AGGREGATIONS",
]

#: A manager around the PyKEEN data folder. It defaults to ``~/.data/pykeen``.
#  This can be overridden with the envvar ``PYKEEN_HOME``.
#: For more information, see https://github.com/cthoyt/pystow
PYKEEN_MODULE: pystow.Module = pystow.module("pykeen")
#: A path representing the PyKEEN data folder
PYKEEN_HOME: Path = PYKEEN_MODULE.base
#: A subdirectory of the PyKEEN data folder for datasets, defaults to ``~/.data/pykeen/datasets``
PYKEEN_DATASETS: Path = PYKEEN_MODULE.join("datasets")
#: A subdirectory of the PyKEEN data folder for benchmarks, defaults to ``~/.data/pykeen/benchmarks``
PYKEEN_BENCHMARKS: Path = PYKEEN_MODULE.join("benchmarks")
#: A subdirectory of the PyKEEN data folder for experiments, defaults to ``~/.data/pykeen/experiments``
PYKEEN_EXPERIMENTS: Path = PYKEEN_MODULE.join("experiments")
#: A subdirectory of the PyKEEN data folder for checkpoints, defaults to ``~/.data/pykeen/checkpoints``
PYKEEN_CHECKPOINTS: Path = PYKEEN_MODULE.join("checkpoints")
#: A subdirectory for PyKEEN logs
PYKEEN_LOGS: Path = PYKEEN_MODULE.join("logs")

PYKEEN_DEFAULT_CHECKPOINT = "PyKEEN_just_saved_my_day.pt"