def pipeline(dataset):
    """
    Decide which pipeline to run on a dataset, and run luigi.build.
    :param analysis_driver.dataset.Dataset dataset:
    """
    luigi.interface.setup_interface_logging.has_run = True  # turn off Luigi's default logging setup
    log_cfg.get_logger('luigi-interface', 20)  # just calling log_cfg.get_logger registers the luigi-interface

    dataset.resolve_pipeline_and_toolset()
    dataset.start()
    final_stage = dataset.pipeline.build_pipeline(dataset)

    luigi_params = {
        'tasks': [final_stage],
        'local_scheduler': cfg.query('luigi', 'local_scheduler'),
        'workers': cfg.query('luigi', 'max_parallel_jobs', ret_default=4)
    }
    if luigi_params['local_scheduler'] is not True:
        luigi_params['scheduler_url'] = cfg['luigi']['scheduler_url']

    success = luigi.build(**luigi_params)

    # if any exception occurred during the pipeline raise them here again
    dataset.raise_exceptions()

    return 0 if success is True else 9
import os
from egcg_core import util, archive_management
from egcg_core.app_logging import logging_default as log_cfg
from analysis_driver.exceptions import AnalysisDriverError

app_logger = log_cfg.get_logger(__name__)


def create_output_links(input_dir, output_cfg, link_dir, **kwargs):
    exit_status = 0
    links = []

    for output_record in output_cfg.content.values():
        src_pattern = os.path.join(
            input_dir,
            os.path.join(*output_record.get('location', [''])),
            output_record['basename']
        ).format(**kwargs)

        source = util.find_file(src_pattern)
        if source:
            link_file = os.path.join(
                link_dir,
                output_record.get('new_name', os.path.basename(source))
            ).format(**kwargs)
            if os.path.islink(link_file):
                os.unlink(link_file)
            os.symlink(source, link_file)
            links.append(link_file)
        else:
            app_logger.warning('No file found for pattern ' + src_pattern)
import datetime
import pymongo
from egcg_core import clarity
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core.exceptions import ConfigError
from egcg_core.rest_communication import Communicator

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import rest_config as rest_cfg
from config import reporting_app_config as app_cfg
from bin.retrigger_aggregation import retrigger_aggregation_run_elements


if __name__ == '__main__':
    log_cfg.add_stdout_handler()
    app_logger = log_cfg.get_logger('migration_v0.15_v0.16')

    a = argparse.ArgumentParser()
    a.add_argument('--username', required=True)
    a.add_argument('--password', required=True)
    args = a.parse_args()

    # check the config is set
    if not rest_cfg or not app_cfg or 'db_host' not in rest_cfg or 'rest_api' not in app_cfg:
        raise ConfigError('Configuration file was not set or is missing values')

    cli = pymongo.MongoClient(rest_cfg['db_host'], rest_cfg['db_port'])
    db = cli[rest_cfg['db_name']]

    # Rename the variable in the mongo database
    collection = db['samples']
import os
import re
import subprocess
from time import sleep
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core.exceptions import ArchivingError

app_logger = log_cfg.get_logger(__name__)
state_re = re.compile('^(.+): \((0x\w+)\)(.+)?')


def _get_stdout(cmd):
    p = subprocess.Popen(cmd.split(' '),
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    exit_status = p.wait()
    o, e = p.stdout.read(), p.stderr.read()
    msg = '%s -> (%s, %s, %s)' % (cmd, exit_status, o, e)
    if exit_status:
        app_logger.error(msg)
        return None
    else:
        app_logger.debug(msg)
        return o.decode('utf-8').strip()


def archive_states(file_path):
    val = _get_stdout('lfs hsm_state ' + file_path)
    match = state_re.match(val)
    if match:
        file_name = match.group(1)
Example #5
0
import sys
import argparse
import datetime
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core.exceptions import ConfigError
from egcg_core.rest_communication import Communicator

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from rest_api import settings
from config import reporting_app_config as app_cfg
from bin.retrigger_aggregation import retrigger_aggregation_run_elements, retrigger_aggregation_projects

if __name__ == '__main__':

    log_cfg.add_stdout_handler()
    app_logger = log_cfg.get_logger('migration_v0.16_v0.17')

    a = argparse.ArgumentParser()
    a.add_argument('--username', required=True)
    a.add_argument('--password', required=True)
    args = a.parse_args()

    # check the config is set
    if not app_cfg or 'rest_api' not in app_cfg:
        raise ConfigError(
            'Configuration file was not set or is missing values')

    two_month_ago = datetime.datetime.now() - datetime.timedelta(days=90)

    app_logger.info('Retrigger aggregation')
    # only retrigger aggregation on recent runs
import os
import sys
import argparse
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core.exceptions import ConfigError
from egcg_core.rest_communication import Communicator

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import reporting_app_config as app_cfg
from bin.retrigger_aggregation import retrigger_aggregation_samples

if __name__ == '__main__':

    log_cfg.add_stdout_handler()
    app_logger = log_cfg.get_logger('migration_v0.18_v0.19')

    a = argparse.ArgumentParser()
    a.add_argument('--username', required=True)
    a.add_argument('--password', required=True)
    args = a.parse_args()

    # check the config is set
    if not app_cfg or 'rest_api' not in app_cfg:
        raise ConfigError(
            'Configuration file was not set or is missing values')

    app_logger.info('Retrigger aggregation')
    c = Communicator(auth=(args.username, args.password),
                     baseurl=app_cfg['rest_api'])
    retrigger_aggregation_samples(c)
import os
import logging
import argparse
import sys
from egcg_core import executor, util, rest_communication, archive_management
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core import constants as c
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from analysis_driver.util import bash_commands
from analysis_driver.config import default as cfg, load_config
from analysis_driver.exceptions import PipelineError
from analysis_driver.dataset import RunDataset
from analysis_driver.report_generation import RunCrawler

app_logger = log_cfg.get_logger('Remove_phix')


def main():
    args = _parse_args()
    load_config()
    log_cfg.default_level = logging.DEBUG
    log_cfg.add_stdout_handler(logging.DEBUG)
    remove_phix(args.sample_id)


def _parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('--sample_id', required=True)
    return parser.parse_args()

Example #8
0
import re
from genologics.lims import Lims
from egcg_core.config import cfg
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core.exceptions import EGCGError

app_logger = log_cfg.get_logger('clarity')
try:
    from egcg_core.ncbi import get_species_name
except ImportError:
    app_logger.warning(
        'Could not import egcg_core.ncbi. Is sqlite3 available?')

    def get_species_name(query_species):
        raise EGCGError(
            'Could not import egcg_core.ncbi.get_species_name - sqlite3 seems to be unavailable.'
        )


_lims = None


def connection():
    global _lims
    if not _lims:
        _lims = Lims(**cfg.get('clarity'))
    return _lims


def get_valid_lanes(flowcell_name):
    """
Example #9
0
import os
import os.path
import shutil
from glob import glob
from egcg_core.exceptions import EGCGError
from egcg_core.app_logging import logging_default as log_cfg

app_logger = log_cfg.get_logger('util')


def find_files(*path_parts):
    return sorted(glob(os.path.join(*path_parts)))


def find_file(*path_parts):
    files = find_files(*path_parts)
    if files:
        return files[0]


def str_join(*parts, separator=''):
    return separator.join(parts)


def find_fastqs(location, project_id, sample_id, lane=None):
    """
    Find all .fastq.gz files in an input folder 'location/project_id'.
    :param location: Top-level directory
    :param str project_id: Project subdirectory to search
    :param str sample_id: Sample subdirectory to search
    :param lane: Specific lane to search for (optional)
import os
import sys
import pymongo
import argparse
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core.rest_communication import Communicator

sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from config import rest_config as rest_cfg


if __name__ == '__main__':
    log_cfg.add_stdout_handler()
    app_logger = log_cfg.get_logger('migration_v0.21_v0.22')

    a = argparse.ArgumentParser()
    a.add_argument('baseurl')
    a.add_argument('username')
    a.add_argument('password')
    args = a.parse_args()

    c = Communicator(auth=(args.username, args.password), baseurl=args.baseurl)
    cli = pymongo.MongoClient(rest_cfg['db_host'], rest_cfg['db_port'])
    db = cli[rest_cfg['db_name']]
    collection = db['samples']

    query = {'gender_validation': {'$exists': True}}
    app_logger.info('Renaming gender to sex for %s samples', collection.count(query))
    count = 0
    for s in collection.find(query):
        count += 1
import os
import sys
import logging
import argparse
import signal
import traceback
from egcg_core import rest_communication
from egcg_core.executor import stop_running_jobs
from egcg_core.app_logging import logging_default as log_cfg
from analysis_driver import exceptions
from analysis_driver.config import default as cfg, load_config
from analysis_driver.dataset_scanner import RunScanner, SampleScanner, ProjectScanner, DATASET_READY,\
    DATASET_FORCE_READY, DATASET_NEW, DATASET_REPROCESS, DATASET_RESUME

app_logger = log_cfg.get_logger('client')


def main(argv=None):
    args = _parse_args(argv)

    load_config()

    log_cfg.set_log_level(logging.DEBUG)
    log_cfg.cfg = cfg.get('logging', {})
    log_cfg.configure_handlers_from_config()

    if args.run:
        scanner = RunScanner()
    elif args.sample:
        scanner = SampleScanner()
    elif args.project:
Example #12
0
import re
import sqlite3
import requests
from egcg_core.config import cfg
from egcg_core.app_logging import logging_default as log_cfg

app_logger = log_cfg.get_logger('ncbi')

data_cache = None
cursor = None


def _connect():
    global data_cache
    global cursor

    data_cache = sqlite3.connect(cfg['ncbi_cache'])
    cursor = data_cache.cursor()


def _create_tables():
    _create = 'CREATE TABLE IF NOT EXISTS '
    cursor.execute(
        _create +
        'species (taxid text UNIQUE, scientific_name text UNIQUE, common_name text)'
    )
    cursor.execute(
        _create +
        'aliases (query_name text UNIQUE, taxid text REFERENCES species(taxid))'
    )
import os
import re
import subprocess
from egcg_core.app_logging import logging_default as log_cfg
from egcg_core.exceptions import EGCGError

app_logger = log_cfg.get_logger('archive_management')
state_re = re.compile('^(.+): \((0x\w+)\)(.+)?')


class ArchivingError(EGCGError):
    pass


def _get_stdout(cmd):
    p = subprocess.Popen(cmd.split(' '),
                         stdout=subprocess.PIPE,
                         stderr=subprocess.PIPE)
    exit_status = p.wait()
    o, e = p.stdout.read(), p.stderr.read()
    msg = '%s -> (%s, %s, %s)' % (cmd, exit_status, o, e)
    if exit_status:
        app_logger.error(msg)
        return None
    else:
        app_logger.debug(msg)
        return o.decode('utf-8').strip()


def archive_states(file_path):
    cmd = 'lfs hsm_state %s' % file_path
Example #14
0
import argparse
from egcg_core.rest_communication import Communicator
from egcg_core.app_logging import logging_default as log_cfg
"""
This script runs through all run elements and patches each with a null entity, retriggering database hook aggregation.
Superelements will be re-aggregated automatically.
"""

app_logger = log_cfg.get_logger('retrigger_aggregation')


def main():
    a = argparse.ArgumentParser()
    a.add_argument('--baseurl', required=True)
    a.add_argument('--username', required=True)
    a.add_argument('--password', required=True)
    args = a.parse_args()

    log_cfg.add_stdout_handler()
    c = Communicator((args.username, args.password), args.baseurl)
    retrigger_aggregation_run_elements(c)


def retrigger_aggregation_run_elements(communicator, **params):
    all_runs = communicator.get_documents('runs', all_pages=True, **params)
    app_logger.info('%s runs to process', len(all_runs))
    count = 0
    for r in all_runs:
        count += 1
        if count % 100 == 0:
            app_logger.info('%s runs processed', count)
import statistics
from cached_property import cached_property
from egcg_core.app_logging import logging_default
from rest_api import cfg

logger = logging_default.get_logger(__name__)


class Expression:
    def __init__(self, *args, filter_func=None):
        self.args = args
        self.filter_func = filter_func

    def evaluate(self, e):
        raise NotImplementedError


class Calculation(Expression):
    default_return_value = None

    def _expression(self, *args):
        raise NotImplementedError

    def _resolve_element(self, element, query_string):
        e = element.copy()
        queries = query_string.split('.')

        for q in queries[:-1]:
            e = e.get(q, {})
        return e.get(queries[-1])