def pipeline(dataset): """ Decide which pipeline to run on a dataset, and run luigi.build. :param analysis_driver.dataset.Dataset dataset: """ luigi.interface.setup_interface_logging.has_run = True # turn off Luigi's default logging setup log_cfg.get_logger('luigi-interface', 20) # just calling log_cfg.get_logger registers the luigi-interface dataset.resolve_pipeline_and_toolset() dataset.start() final_stage = dataset.pipeline.build_pipeline(dataset) luigi_params = { 'tasks': [final_stage], 'local_scheduler': cfg.query('luigi', 'local_scheduler'), 'workers': cfg.query('luigi', 'max_parallel_jobs', ret_default=4) } if luigi_params['local_scheduler'] is not True: luigi_params['scheduler_url'] = cfg['luigi']['scheduler_url'] success = luigi.build(**luigi_params) # if any exception occurred during the pipeline raise them here again dataset.raise_exceptions() return 0 if success is True else 9
import os from egcg_core import util, archive_management from egcg_core.app_logging import logging_default as log_cfg from analysis_driver.exceptions import AnalysisDriverError app_logger = log_cfg.get_logger(__name__) def create_output_links(input_dir, output_cfg, link_dir, **kwargs): exit_status = 0 links = [] for output_record in output_cfg.content.values(): src_pattern = os.path.join( input_dir, os.path.join(*output_record.get('location', [''])), output_record['basename'] ).format(**kwargs) source = util.find_file(src_pattern) if source: link_file = os.path.join( link_dir, output_record.get('new_name', os.path.basename(source)) ).format(**kwargs) if os.path.islink(link_file): os.unlink(link_file) os.symlink(source, link_file) links.append(link_file) else: app_logger.warning('No file found for pattern ' + src_pattern)
import datetime import pymongo from egcg_core import clarity from egcg_core.app_logging import logging_default as log_cfg from egcg_core.exceptions import ConfigError from egcg_core.rest_communication import Communicator sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from config import rest_config as rest_cfg from config import reporting_app_config as app_cfg from bin.retrigger_aggregation import retrigger_aggregation_run_elements if __name__ == '__main__': log_cfg.add_stdout_handler() app_logger = log_cfg.get_logger('migration_v0.15_v0.16') a = argparse.ArgumentParser() a.add_argument('--username', required=True) a.add_argument('--password', required=True) args = a.parse_args() # check the config is set if not rest_cfg or not app_cfg or 'db_host' not in rest_cfg or 'rest_api' not in app_cfg: raise ConfigError('Configuration file was not set or is missing values') cli = pymongo.MongoClient(rest_cfg['db_host'], rest_cfg['db_port']) db = cli[rest_cfg['db_name']] # Rename the variable in the mongo database collection = db['samples']
import os import re import subprocess from time import sleep from egcg_core.app_logging import logging_default as log_cfg from egcg_core.exceptions import ArchivingError app_logger = log_cfg.get_logger(__name__) state_re = re.compile('^(.+): \((0x\w+)\)(.+)?') def _get_stdout(cmd): p = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) exit_status = p.wait() o, e = p.stdout.read(), p.stderr.read() msg = '%s -> (%s, %s, %s)' % (cmd, exit_status, o, e) if exit_status: app_logger.error(msg) return None else: app_logger.debug(msg) return o.decode('utf-8').strip() def archive_states(file_path): val = _get_stdout('lfs hsm_state ' + file_path) match = state_re.match(val) if match: file_name = match.group(1)
import sys import argparse import datetime from egcg_core.app_logging import logging_default as log_cfg from egcg_core.exceptions import ConfigError from egcg_core.rest_communication import Communicator sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from rest_api import settings from config import reporting_app_config as app_cfg from bin.retrigger_aggregation import retrigger_aggregation_run_elements, retrigger_aggregation_projects if __name__ == '__main__': log_cfg.add_stdout_handler() app_logger = log_cfg.get_logger('migration_v0.16_v0.17') a = argparse.ArgumentParser() a.add_argument('--username', required=True) a.add_argument('--password', required=True) args = a.parse_args() # check the config is set if not app_cfg or 'rest_api' not in app_cfg: raise ConfigError( 'Configuration file was not set or is missing values') two_month_ago = datetime.datetime.now() - datetime.timedelta(days=90) app_logger.info('Retrigger aggregation') # only retrigger aggregation on recent runs
import os import sys import argparse from egcg_core.app_logging import logging_default as log_cfg from egcg_core.exceptions import ConfigError from egcg_core.rest_communication import Communicator sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from config import reporting_app_config as app_cfg from bin.retrigger_aggregation import retrigger_aggregation_samples if __name__ == '__main__': log_cfg.add_stdout_handler() app_logger = log_cfg.get_logger('migration_v0.18_v0.19') a = argparse.ArgumentParser() a.add_argument('--username', required=True) a.add_argument('--password', required=True) args = a.parse_args() # check the config is set if not app_cfg or 'rest_api' not in app_cfg: raise ConfigError( 'Configuration file was not set or is missing values') app_logger.info('Retrigger aggregation') c = Communicator(auth=(args.username, args.password), baseurl=app_cfg['rest_api']) retrigger_aggregation_samples(c)
import os import logging import argparse import sys from egcg_core import executor, util, rest_communication, archive_management from egcg_core.app_logging import logging_default as log_cfg from egcg_core import constants as c sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from analysis_driver.util import bash_commands from analysis_driver.config import default as cfg, load_config from analysis_driver.exceptions import PipelineError from analysis_driver.dataset import RunDataset from analysis_driver.report_generation import RunCrawler app_logger = log_cfg.get_logger('Remove_phix') def main(): args = _parse_args() load_config() log_cfg.default_level = logging.DEBUG log_cfg.add_stdout_handler(logging.DEBUG) remove_phix(args.sample_id) def _parse_args(): parser = argparse.ArgumentParser() parser.add_argument('--sample_id', required=True) return parser.parse_args()
import re from genologics.lims import Lims from egcg_core.config import cfg from egcg_core.app_logging import logging_default as log_cfg from egcg_core.exceptions import EGCGError app_logger = log_cfg.get_logger('clarity') try: from egcg_core.ncbi import get_species_name except ImportError: app_logger.warning( 'Could not import egcg_core.ncbi. Is sqlite3 available?') def get_species_name(query_species): raise EGCGError( 'Could not import egcg_core.ncbi.get_species_name - sqlite3 seems to be unavailable.' ) _lims = None def connection(): global _lims if not _lims: _lims = Lims(**cfg.get('clarity')) return _lims def get_valid_lanes(flowcell_name): """
import os import os.path import shutil from glob import glob from egcg_core.exceptions import EGCGError from egcg_core.app_logging import logging_default as log_cfg app_logger = log_cfg.get_logger('util') def find_files(*path_parts): return sorted(glob(os.path.join(*path_parts))) def find_file(*path_parts): files = find_files(*path_parts) if files: return files[0] def str_join(*parts, separator=''): return separator.join(parts) def find_fastqs(location, project_id, sample_id, lane=None): """ Find all .fastq.gz files in an input folder 'location/project_id'. :param location: Top-level directory :param str project_id: Project subdirectory to search :param str sample_id: Sample subdirectory to search :param lane: Specific lane to search for (optional)
import os import sys import pymongo import argparse from egcg_core.app_logging import logging_default as log_cfg from egcg_core.rest_communication import Communicator sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from config import rest_config as rest_cfg if __name__ == '__main__': log_cfg.add_stdout_handler() app_logger = log_cfg.get_logger('migration_v0.21_v0.22') a = argparse.ArgumentParser() a.add_argument('baseurl') a.add_argument('username') a.add_argument('password') args = a.parse_args() c = Communicator(auth=(args.username, args.password), baseurl=args.baseurl) cli = pymongo.MongoClient(rest_cfg['db_host'], rest_cfg['db_port']) db = cli[rest_cfg['db_name']] collection = db['samples'] query = {'gender_validation': {'$exists': True}} app_logger.info('Renaming gender to sex for %s samples', collection.count(query)) count = 0 for s in collection.find(query): count += 1
import os import sys import logging import argparse import signal import traceback from egcg_core import rest_communication from egcg_core.executor import stop_running_jobs from egcg_core.app_logging import logging_default as log_cfg from analysis_driver import exceptions from analysis_driver.config import default as cfg, load_config from analysis_driver.dataset_scanner import RunScanner, SampleScanner, ProjectScanner, DATASET_READY,\ DATASET_FORCE_READY, DATASET_NEW, DATASET_REPROCESS, DATASET_RESUME app_logger = log_cfg.get_logger('client') def main(argv=None): args = _parse_args(argv) load_config() log_cfg.set_log_level(logging.DEBUG) log_cfg.cfg = cfg.get('logging', {}) log_cfg.configure_handlers_from_config() if args.run: scanner = RunScanner() elif args.sample: scanner = SampleScanner() elif args.project:
import re import sqlite3 import requests from egcg_core.config import cfg from egcg_core.app_logging import logging_default as log_cfg app_logger = log_cfg.get_logger('ncbi') data_cache = None cursor = None def _connect(): global data_cache global cursor data_cache = sqlite3.connect(cfg['ncbi_cache']) cursor = data_cache.cursor() def _create_tables(): _create = 'CREATE TABLE IF NOT EXISTS ' cursor.execute( _create + 'species (taxid text UNIQUE, scientific_name text UNIQUE, common_name text)' ) cursor.execute( _create + 'aliases (query_name text UNIQUE, taxid text REFERENCES species(taxid))' )
import os import re import subprocess from egcg_core.app_logging import logging_default as log_cfg from egcg_core.exceptions import EGCGError app_logger = log_cfg.get_logger('archive_management') state_re = re.compile('^(.+): \((0x\w+)\)(.+)?') class ArchivingError(EGCGError): pass def _get_stdout(cmd): p = subprocess.Popen(cmd.split(' '), stdout=subprocess.PIPE, stderr=subprocess.PIPE) exit_status = p.wait() o, e = p.stdout.read(), p.stderr.read() msg = '%s -> (%s, %s, %s)' % (cmd, exit_status, o, e) if exit_status: app_logger.error(msg) return None else: app_logger.debug(msg) return o.decode('utf-8').strip() def archive_states(file_path): cmd = 'lfs hsm_state %s' % file_path
import argparse from egcg_core.rest_communication import Communicator from egcg_core.app_logging import logging_default as log_cfg """ This script runs through all run elements and patches each with a null entity, retriggering database hook aggregation. Superelements will be re-aggregated automatically. """ app_logger = log_cfg.get_logger('retrigger_aggregation') def main(): a = argparse.ArgumentParser() a.add_argument('--baseurl', required=True) a.add_argument('--username', required=True) a.add_argument('--password', required=True) args = a.parse_args() log_cfg.add_stdout_handler() c = Communicator((args.username, args.password), args.baseurl) retrigger_aggregation_run_elements(c) def retrigger_aggregation_run_elements(communicator, **params): all_runs = communicator.get_documents('runs', all_pages=True, **params) app_logger.info('%s runs to process', len(all_runs)) count = 0 for r in all_runs: count += 1 if count % 100 == 0: app_logger.info('%s runs processed', count)
import statistics from cached_property import cached_property from egcg_core.app_logging import logging_default from rest_api import cfg logger = logging_default.get_logger(__name__) class Expression: def __init__(self, *args, filter_func=None): self.args = args self.filter_func = filter_func def evaluate(self, e): raise NotImplementedError class Calculation(Expression): default_return_value = None def _expression(self, *args): raise NotImplementedError def _resolve_element(self, element, query_string): e = element.copy() queries = query_string.split('.') for q in queries[:-1]: e = e.get(q, {}) return e.get(queries[-1])