Esempio n. 1
0
def wasp_step_2(name, remapped, pipeline=None, dependency=None):
    """Run filter_remapped_reads.py following second mapping.

    :name:       The name of the original mapped bam or sam, used to make file
                 names
    :remapped:   The file created by the second mapping.
    :pipeline:   The path to the WASP pipeline.
    :dependency: The job number of the remapping step.
    :returns:    The job number.

    """
    command = os.path.join(os.path.abspath(pipeline),
                           'filter_remapped_reads.py') \
        if pipeline else 'filter_remapped_reads.py'
    # Trim the name
    shortname = '.'.join(name.split('.')[:-1]) if name.endswith('.bam') \
        or name.endswith('.sam') else name
    logme.log('Submitting wasp step 2 for {}'.format(shortname), level='debug')
    return slurmy.monitor_submit(slurmy.make_job_file(
        'python2 {} {} {} {} {}'.format(command,
                                        shortname + '.to.remap.bam',
                                        remapped,
                                        shortname + '.remap.keep.bam',
                                        shortname + '.to.remap.num.gz'),
        shortname + '_step2', '16:00:00', 8, '30000', partition=PARTITION,
        modules=['python/2.7.5']), dependency, MAX_JOBS)
Esempio n. 2
0
def sqlite_from_bed(bedfile):
    """Make an sqlite file from a bedfile."""
    db_name = bedfile if bedfile.endswith('.db') else bedfile + '.db'
    # Check if the alternate db exists if db doesn't exist
    exists = False
    if not os.path.exists(db_name):
        if bedfile.endswith('.gz'):
            alt_path = '.'.join(bedfile.split('.')[:-1]) + '.db'
        else:
            alt_path = bedfile + '.gz' + '.db'
        if os.path.exists(alt_path):
            db_name = alt_path
            exists = True
    else:
        exists = True

    # If the database already exists, use it
    if exists:
        logme.log('Using existing db, if this is ' +
                    'not what you want, delete ' + db_name,
                    level='info')
        conn = sqlite3.connect(db_name)
        c = conn.cursor()
        return

    # Create an sqlite database from bed file
    logme.log('Creating sqlite database, this ' +
                'may take a long time.', level='info')
    conn = sqlite3.connect(db_name)
    c = conn.cursor()

    with open_zipped(bedfile) as infile:
        for line in infile:
            f = line.rstrip().split('\t')
            if len(f) < 4:
                continue
            # Check if db exists and create if it does
            expr = ("SELECT * FROM sqlite_master WHERE name = '{}' " +
                    "and type='table';").format(f[0])
            c.execute(expr)
            if not c.fetchall():
                exp = ("CREATE TABLE '{}' (name text, start int, " +
                        "end int);").format(f[0])
                c.execute(exp)
                conn.commit()
            expr = ("INSERT INTO '{}' VALUES " +
                    "('{}','{}','{}')").format(f[0], f[3], f[1], f[2])
            c.execute(expr)
        conn.commit()
        # Create indicies
        c.execute('''SELECT name FROM sqlite_master WHERE type='table';''')
        for i in c.fetchall():
            exp = ("CREATE INDEX '{0}_start_end' ON '{0}' " +
                    "(start, end)").format(i[0])
            c.execute(exp)
            conn.commit()

    return db_name
Esempio n. 3
0
def sqlite_from_bed(bedfile):
    """Make an sqlite file from a bedfile."""
    db_name = bedfile if bedfile.endswith('.db') else bedfile + '.db'
    # Check if the alternate db exists if db doesn't exist
    exists = False
    if not os.path.exists(db_name):
        if bedfile.endswith('.gz'):
            alt_path = '.'.join(bedfile.split('.')[:-1]) + '.db'
        else:
            alt_path = bedfile + '.gz' + '.db'
        if os.path.exists(alt_path):
            db_name = alt_path
            exists = True
    else:
        exists = True

    # If the database already exists, use it
    if exists:
        logme.log('Using existing db, if this is ' +
                  'not what you want, delete ' + db_name,
                  level='info')
        conn = sqlite3.connect(db_name)
        c = conn.cursor()
        return

    # Create an sqlite database from bed file
    logme.log('Creating sqlite database, this ' + 'may take a long time.',
              level='info')
    conn = sqlite3.connect(db_name)
    c = conn.cursor()

    with open_zipped(bedfile) as infile:
        for line in infile:
            f = line.rstrip().split('\t')
            if len(f) < 4:
                continue
            # Check if db exists and create if it does
            expr = ("SELECT * FROM sqlite_master WHERE name = '{}' " +
                    "and type='table';").format(f[0])
            c.execute(expr)
            if not c.fetchall():
                exp = ("CREATE TABLE '{}' (name text, start int, " +
                       "end int);").format(f[0])
                c.execute(exp)
                conn.commit()
            expr = ("INSERT INTO '{}' VALUES " + "('{}','{}','{}')").format(
                f[0], f[3], f[1], f[2])
            c.execute(expr)
        conn.commit()
        # Create indicies
        c.execute('''SELECT name FROM sqlite_master WHERE type='table';''')
        for i in c.fetchall():
            exp = ("CREATE INDEX '{0}_start_end' ON '{0}' " +
                   "(start, end)").format(i[0])
            c.execute(exp)
            conn.commit()

    return db_name
def get_gene_counts(bed_file, alleleseq_output, chrom_to_num=False,
                    logfile=sys.stderr):
    """Return a list of Gene objects from all snps in exons.

    :chrom_to_num: If true, convert 'chr1' to 1
    """
    logme.log('Parsing gene bed')
    exons, genes = parse_gene_bed(bed_file)

    # Stats
    total_count = 0
    not_in_gene = 0

    snps = []

    # Parse the file
    logme.log('Parsing alleleseq output')
    lines = int(check_output(['wc', '-l', alleleseq_output]).decode().split(' ')[0])
    with open_zipped(alleleseq_output) as fin:
        # File format test
        header = fin.readline()
        if not header.startswith('chrm'):
            raise Exception("Invalid alleleseq file format")
        # Loop through the file
        siter = tqdm(fin, unit='snps', total=lines) if 'PS1' in os.environ \
                else fin
        for line in siter:
            snpinfo = line.rstrip().split('\t')
            total_count += 1
            chrm = chr2num(snpinfo[0]) if chrom_to_num else snpinfo[0]
            gene = exons[chrm].find(int(snpinfo[1]))
            # Skip everything not in a gene
            if gene is not None:
                # The SNP adds itself to the genes list
                s = SNP(gene, snpinfo)
                snps.append(s)
            else:
                not_in_gene += 1

    newgenes = {}
    for name, gene in genes.items():
        if gene:
            gene.sum_counts()
            gene.calc_pval()
            gene.calc_winner()
            newgenes[name] = gene
    return newgenes
    def calc_winner(self):
        """Sum winners, try to pick Gene-level winner.

        Only chooses Gene-level winner if all SNPs aggree.

        Sets self.win to one of:
            'mat', 'pat', 'WEIRD', '?', or 'NS' (for non-significant)

        Ratio calculations use total SNP counts, not the sum of the parental
        alleles.
        """
        for snp in self.snps:
            if not hasattr(snp, 'win') or not snp.win:
                snp.calc_winner()
            if not snp.win:
                continue
            if snp.win == 'M':
                self.mat_win += 1
            elif snp.win == 'P':
                self.pat_win += 1
            elif snp.win == '?':
                self.failed += 1  # Not bothering with greater res now.
            if snp.cls == 'Sym':
                self.no_ase += 1
            elif snp.cls == 'Asym':
                self.has_ase += 1
            elif snp.cls == 'Weird':
                self.weird += 1

        # Winner must have more than 60% of alleles with gene-level
        # significance
        if not self.pval:
            self.calc_pval()
        if not self.pval:
            logme.log('No pvalue for gene {}'.format(self), 'debug')
            self.win = 'NA'
            return

        if self.weird/len(self) > 0.4:
            self.win = 'WEIRD'
        elif self.mat_win > self.pat_win and self.mat_win/len(self) > 0.6:
            self.win = 'mat'
        elif self.pat_win > self.mat_win and self.pat_win/len(self) > 0.6:
            self.win = 'pat'
        else:
            self.win = '?'
Esempio n. 6
0
def wasp_step_1(fl, snp_dir, pipeline=None, dependency=None):
    """Run find_intersecting_snps.py on fl.

    :fl:         The sam or bam file to run on.
    :snp_dir:    The SNP directory required by WASP.
    :pipeline:   The path to the WASP pipeline.
    :dependency: The job number of the remapping step.
    :returns:    The job number.
    """
    command = os.path.join(os.path.abspath(pipeline),
                           'find_intersecting_snps.py') \
        if pipeline else 'find_intersecting_snps.py'
    logme.log('Submitting wasp step 1 for {}'.format(fl), level='debug')
    return slurmy.monitor_submit(slurmy.make_job_file(
        'python2 {} -m 1000000 {} {}'.format(command, fl, snp_dir),
        fl + '_step1', '16:00:00', 8, '30000', partition=PARTITION,
        modules=['python/2.7.5']), dependency, MAX_JOBS)
Esempio n. 7
0
def test_module_logger(caplog, scope):
    logger = logme.log(scope=scope, config='my_test_logger', name='blah')

    assert type(logger) == ModuleLogger
    assert logger.name == 'blah'

    logger.info('module logger message.')

    assert caplog.record_tuples[0] == ('blah', 20, 'module logger message.')
Esempio n. 8
0
def peak_merge(peak_file, outfile=sys.stdout, overlap=.75, logfile=sys.stderr):
    """Merge peaks.

    :peak_file: A file handle or sequence file, currently bed only. File
                extension used for parsing, gzip or bzip compression OK, file
                handle OK.
    :outfile:   A bed file of merged peaks with the following fields:
                chr, start, end, count, mean_fold_change, mean_log10pval
                count is the number of members that went into the peak.
    :overlap:   The amount a peak can overlap a prior peak before being moved
                into a new cluster.
    :logfile:   A file to contain some summary stats.
    """
    # Make sure overlap is specified
    if not isinstance(overlap, float):
        overlap = .75
        logme.log('Overlap not specified or not float, using .75', 'info')
    # Specify an offset that is subtracted from peaks to avoid clustering of
    # minor overlaps
    offset         = 4
    # Load file generator
    if isinstance(peak_file, str):
        line_count = int(check_output(
            "wc -l data/all_peaks_sorted.bed | sed 's/ .*//'", shell=True)
                         .decode().rstrip())
    else:
        line_count = None
    pfile          = peak_file_parser(peak_file)
    # Create objects to track stats
    lines          = 0
    clusters       = 0
    cluster_sizes  = {}
    extra_pops     = {}
    # First peak
    prior_peak     = next(pfile)
    cluster        = Cluster(prior_peak)
    lines         += 1
    clusters      += 1
    # Current peak, due to the nature of iterators we need to create a lag.
    peak           = next(pfile)
    lines         += 1
    # Use progress bar if outfile not open already.
    piter = pfile if not isinstance(outfile, str) or logme.MIN_LEVEL == 'debug'\
        else tqdm(pfile, total=line_count, unit='lines')
    # Open outfile and run algorithm
    with open_zipped(outfile, 'w') as fout:
        # Loop through peaks
        for next_peak in piter:
            lines += 1
            overlap_prior = peak.start < prior_peak.end - offset
            logme.log('Prior overlap: {}'.format(overlap_prior), 'debug')
            overlap_next = next_peak.start < peak.end - offset
            logme.log('Next overlap: {}'.format(overlap_next), 'debug')

            ######################
            #  Actual Algorithm  #
            ######################

            # Overlap both, decide by overlap amount.
            if overlap_prior and overlap_next:
                prior_overlap_amount = abs(float(prior_peak.end - peak.start) /
                                           float(cluster.len))
                logme.log('Prior overlap amount: {}'
                          .format(prior_overlap_amount), 'debug')
                prior_peak = peak
                # Overlap prior more than 75%, add to prior cluster.
                if prior_overlap_amount > overlap:
                    cluster.add(peak)
                    # Prep for next run
                    prior_peak = peak
                    peak       = next_peak
                    continue
                # Overlap prior less than 75%, make new cluster.
                else:
                    # Stats
                    if cluster.count in cluster_sizes:
                        cluster_sizes[cluster.count] += 1
                    else:
                        cluster_sizes[cluster.count]  = 1
                    diff = len(cluster.pops)-len(set(cluster.pops))
                    if diff:
                        if diff in extra_pops:
                            extra_pops[diff] += 1
                        else:
                            extra_pops[diff]  = 1
                    clusters    += 1
                    # Write cluster and make new one
                    cluster.write(fout)
                    cluster = Cluster(peak)
                    # Prep for next run
                    prior_peak = peak
                    peak       = next_peak
                    continue
            # Overlap only prior, add to cluster.
            elif overlap_prior:
                cluster.add(peak)
                # Prep for next run
                prior_peak = peak
                peak       = next_peak
                continue
            # Overlap next or none, make new cluster.
            else:
                # Stats
                if cluster.count in cluster_sizes:
                    cluster_sizes[cluster.count] += 1
                else:
                    cluster_sizes[cluster.count]  = 1
                diff = len(cluster.pops)-len(set(cluster.pops))
                if diff:
                    if diff in extra_pops:
                        extra_pops[diff] += 1
                    else:
                        extra_pops[diff]  = 1
                clusters    += 1
                # Write cluster and make new one
                cluster.write(fout)
                cluster      = Cluster(peak)
                # Prep for next run
                prior_peak = peak
                peak       = next_peak
                continue

            # Prep for next run
            prior_peak = peak
            peak       = next_peak

        # Last line
        logme.log('Last peak of file: {}_{}'.format(peak.chrom, peak.start),
                  'debug')
        overlap_prior = peak.start - offset < prior_peak.end
        logme.log('Prior overlap: {}'.format(overlap_prior), 'debug')
        if overlap_prior:
            cluster.add(peak)
            prior_peak = peak
        # Overlap next or none, make new cluster.
        else:
            # Stats
            if cluster.count in cluster_sizes:
                cluster_sizes[cluster.count] += 1
            else:
                cluster_sizes[cluster.count]  = 1
            diff = len(cluster.pops)-len(set(cluster.pops))
            if diff:
                if diff in extra_pops:
                    extra_pops[diff] += 1
                else:
                    extra_pops[diff]  = 1
            clusters    += 1
            # Write cluster and make new one
            cluster.write(fout)
            cluster      = Cluster(peak)
        # This is the end so write the last cluster
        # Stats
        if cluster.count in cluster_sizes:
            cluster_sizes[cluster.count] += 1
        else:
            cluster_sizes[cluster.count]  = 1
        diff = len(cluster.pops)-len(set(cluster.pops))
        if diff:
            if diff in extra_pops:
                extra_pops[diff] += 1
            else:
                extra_pops[diff]  = 1
        # Write cluster and make new one
        cluster.write(fout)

        # Done.

    # Print stats
    logfile.write('\n')
    logme.log('Clustering complete,\nstats:', 'info')
    logfile.write('Total lines:\t{}\n'.format(lines) +
                  'Total clusters:\t{}\n'.format(clusters) +
                  'Total clustered:{}\n'.format(
                      sum([k*v for k,v in cluster_sizes.items()])) +
                  'Cluster sizes:\n')
    for k, v in OrderedDict(cluster_sizes).items():
        logfile.write('\t{}:\t{}\n'.format(k, v))
    if extra_pops:
        logfile.write('Extra peaks for a single population in clusters:\n')
        for k, v in OrderedDict(extra_pops).items():
            logfile.write('\t{}:\t{}\n'.format(k, v))
    else:
        logfile.write('No extra peaks in any cluster.\n')
def main(argv=None):
    """Run as a script."""
    if not argv:
        argv = sys.argv[1:]

    parser  = argparse.ArgumentParser(
        description=__doc__,
        formatter_class=argparse.RawDescriptionHelpFormatter)

    # Positional arguments
    parser.add_argument('exon_positions_bed',
                        help="A bed file of exons to include.")
    parser.add_argument('alleleseq_output',
                        help="The output of CombineSnpCounts.py, filtered " +
                        "FDR")

    parser.add_argument('-i', '--ind', help='Individual name')
    parser.add_argument('-n', '--tonum', action='store_true',
                        help='Convert chr# to #')

    # Optional Files
    optfiles = parser.add_argument_group('Optional Files')
    optfiles.add_argument('-o', '--outfile', default=sys.stdout,
                          help="Output file, Default STDOUT")
    optfiles.add_argument('-l', '--logfile', default=sys.stderr,
                          help="Log File, Default STDERR (append mode)")
    optfiles.add_argument('--data',
                          help="Output raw gene dictionary to this file.")
    optfiles.add_argument('--pandas',
                          help="Output a pickled pandas dataframe here.")

    # FDR Calulations
    fdrcalc = parser.add_argument_group('FDR Calculation')
    fdrcalc.add_argument('--filter-fdr', action='store_true',
                         help="Filter the output by FDR")
    fdrcalc.add_argument('-f', '--fdr-cutoff', type=float, default=0.1,
                         metavar='', help="FDR cutoff (Default 0.1).")
    fdrcalc.add_argument('-s', '--simulations', type=int, default=10,
                         metavar='',
                         help="# simulations for FDR calculation " +
                         "(Default: 10)")

    args = parser.parse_args(argv)

    ind = args.ind if args.ind \
            else os.path.basename(args.alleleseq_output).split('.')[0]

    genes = get_gene_counts(args.exon_positions_bed, args.alleleseq_output,
                            args.tonum)

    giter = tqdm(genes.values(), unit='genes') if 'PS1' in os.environ \
            else genes.values()
    for gene in giter:
        gene.sum_counts()

    if args.data:
        with open(args.data, 'wb') as fout:
            pickle.dump(genes, fout)

    df    = genes_to_df(genes, ind)

    fdr_pval = calc_fdr(
        [tuple(x) for x in df[['Mat_Counts', 'Pat_Counts', 'pval']].values],
        target=args.fdr_cutoff, sims=args.simulations)

    logme.log('In ind {} p-values smaller than {} beat FDR of {}'
              .format(ind, fdr_pval, args.fdr_cutoff), 'info')

    # Filter by FDR if requested
    if args.filter_fdr:
        logme.log('Filtering genes by FDR less than {}'
                  .format(args.fdr_cutoff), 'info')
        df = df[df.pval < fdr_pval]

    if args.pandas:
        df.to_pickle(args.pandas)

    with open_zipped(args.outfile, 'w') as fout:
        df.to_csv(fout, sep='\t')

    return 0
Esempio n. 10
0
from paths import NODE_PATH, LND_PATH, LNCLI_PATH, MASTER_NODE_PATH
import shutil
import subprocess
from time import sleep

import logme

log = logme.log(scope="module", name="node_runner")
LOGLEVEL = log.logger.master_level


def create_node_folder(path):
    node_folder = path / ".lnd"
    return node_folder


def create_node_command(node_folder, port, rpcport, restport):
    lnd_cmd = [
        f"{LND_PATH}",
        "--bitcoin.active",
        "--bitcoin.testnet",
        "--bitcoin.node=neutrino",
        "--neutrino.connect=faucet.lightning.community",
        f"--lnddir={node_folder}",
        f"--listen=localhost:{port}",
        f"--rpclisten=localhost:{rpcport}",
        f"--restlisten=localhost:{restport}",
        # "--no-macaroons",
        "--debuglevel=debug",
    ]
    print(lnd_cmd)
Esempio n. 11
0
from pathlib import Path
import json
import pycountry
import logme
from lxml import etree

from qa.common import (
    fetch_url,
    check,
)

log = logme.log(scope='module', name='inspire_qa')

# In theory, Protected Sites labels can be found using INSPIRE Theme documents as well,
# e.g.:
#  http://inspire.ec.europa.eu/theme/ps/ps.ro.xml
# However, resources for some languages (Catalan, Norwegian) are currently missing,
# so the GEMET API is queried instead.
PS_URL_PATTERN = "https://www.eionet.europa.eu/gemet/getConcept?concept_uri=" \
                 "http://inspire.ec.europa.eu/theme/ps&language={language_code}"

DEFAULT_PS_LANGUAGES = (
    "Bulgarian",
    "Catalan",
    "Croatian",
    "Czech",
    "Danish",
    "Dutch",
    "English",
    "Estonian",
    "Finnish",
Esempio n. 12
0
import os
import sys
import logme
import email

from io import StringIO
from imapclient import IMAPClient, exceptions
from settings import EMAIL_SERVER, EMAIL_USER, EMAIL_PASS, FOLDER_TO_SCAN
from sylk_parser import SylkParser

SERVER = None
ATTACHMENTS_PATH = "./attachments"
logger = logme.log(scope="module")


def setup():
    global SERVER

    if not all([EMAIL_USER, EMAIL_PASS, EMAIL_SERVER]):
        logger.error("Please provide all of the login information: email, password and host")
        sys.exit(1)

    logger.info(f"Connecting to {EMAIL_SERVER}")
    SERVER = IMAPClient(EMAIL_SERVER)

    logger.info(f"Logging in: {EMAIL_USER}")
    SERVER.login(EMAIL_USER, EMAIL_PASS)


def get_attachments():
    try:
Esempio n. 13
0
    LND_PATH,
    LNCLI_PATH,
    MASTER_NODE_PATH,
    MASTER_NODE_LNDDIR_PATH,
    MASTER_NODE_LND_DATAGRAPH,
    MASTER_NODE_LND_LOGS,
    MASTER_NODE_TLS_CERT,
    MASTER_NODE_TLS_KEY,
)
import shutil
import subprocess
from time import sleep

import logme

log = logme.log(scope="module", name="node_sync")
LOGLEVEL = log.logger.master_level

lnd_cmd = [
    f"{LND_PATH}",
    "--bitcoin.active",
    "--bitcoin.testnet",
    "--bitcoin.node=neutrino",
    "--neutrino.connect=faucet.lightning.community",
    f"--lnddir={MASTER_NODE_LNDDIR_PATH}",
]

print("removing master node")
shutil.rmtree(MASTER_NODE_LNDDIR_PATH, ignore_errors=True)

Esempio n. 14
0
def run_wasp(files, snp_dir, genome, algorithm='star', gtf=None, pipeline=None,
             step=1, remapped_bam=None):
    """Run the complete WASP pipeline.

    :files:     All the files to run on, can be fastq or sam/bam. If fastq, or
                directory, an initial mapping is done.
    :snp_dir:   The SNP directory required by WASP.
    :genome:    A genome directory for tophat, or the STAR index directory for
                STAR
    :algorithm: 'star' or 'tophat'
    :gtf:       A GTF of genes for tophat, not required.
    :pipeline:  The location of the WASP pipeline
    :step:      Start at steps 1, 2, 3, or 4 instead of at the beginning,
                ignored if files are fastq.
    :returns:   None.

    """
    all_jobs  = {}
    save_file = 'wasp_jobs.pickle'
    # Detect if need to run mapping
    if files[0].endswith('.fq') or files[0].endswith('.fastq') \
            or os.path.isdir(files[0]):
        logme.log('File contains fastq, running initial mapping',
                  also_write='stderr')
        initial_map = True
    else:
        initial_map = False

    initial_step = step
    if step == 2:
        step_1 = None
    elif step == 3:
        remapped = None
        remap    = None
    elif step == 4:
        step_2 = None

    # Loop through every file and run all steps of the pipeline.
    for fl in files:
        step = initial_step
        map_job = None

        # Initial mapping
        if initial_map:
            if os.path.isdir(fl):
                fl = []
                for i in os.listdir(fl):
                    if os.path.isfile(i):
                        if 'fq' in i.split('.') \
                                or 'fastq' in i.split('.'):
                            fl.append(os.path.join(fl, i))
                single = []
                pair_1 = []
                pair_2 = []
                for i in fl:
                    if '_1' in i:
                        pair_1.append(i)
                    elif '_2' in i:
                        pair_2.append(i)
                    else:
                        single.append(i)
                if single and pair_1 or pair_2:
                    raise Exception('Cannot have both single and paired')
                if single:
                    map_files = ','.join(single)
                else:
                    map_files = ' '.join([','.join(pair_1),
                                          ','.join(pair_2)])
            else:
                map_files = fl
            map_job, bamfile = run_mapping(fl + '_map1', map_files, genome,
                                           algorithm, gtf)
            fl = bamfile  # Switch back to normal mode
            all_jobs[map_job] = fl + '_map1'
            with open(save_file, 'wb') as outf:
                pickle.dump(all_jobs, outf)

        # WASP 1
        if step == 1:
            step_1 = wasp_step_1(fl, snp_dir, pipeline, map_job)
            logme.log('{} WASP step 1: {}'.format(fl, step_1))
            step += 1
            all_jobs[step_1] = fl + '_step1'
            with open(save_file, 'wb') as outf:
                pickle.dump(all_jobs, outf)

        # Remapping
        if step == 2:
            readfile = '.'.join(fl.split('.')[:-1]) + '.remap.fq.gz'
            remap, remapped = run_mapping(fl.split('_')[0] + '_remap',
                                          readfile, genome, algorithm, gtf,
                                          step_1)
            logme.log('{} Remapping: {}'.format(fl, remap))
            step += 1
            all_jobs[remap] = fl + '_remap'
            with open(save_file, 'wb') as outf:
                pickle.dump(all_jobs, outf)

        # WASP 2
        if step == 3:
            if not remapped:
                remapped = fl + '_remapAligned.sortedByCoord.out.bam'
            step_2 = wasp_step_2(fl, remapped, pipeline, remap)
            logme.log('{} WASP step 2: {}'.format(fl, step_2))
            step += 1
            all_jobs[step_2] = fl + '_step2'
            with open(save_file, 'wb') as outf:
                pickle.dump(all_jobs, outf)

        # Merge Files
        if step == 4:
            merge_job = merge_bams(fl, step_2)
            logme.log('{} Merge Step: {}'.format(fl, merge_job))
            all_jobs[merge_job] = fl + '_merge'
            with open(save_file, 'wb') as outf:
                pickle.dump(all_jobs, outf)

    return 0
Esempio n. 15
0
import luigi
import logme
import wget
import tarfile
from pathlib import Path

from paths import ROOT_PATH, BIN_PATH, DOWNLOAD_URL, DOWNLOAD_FILE_PATH, EXTRACTED_FOLDER_PATH

log = logme.log(scope="module", name="downloader")


class DownloadFileTask(luigi.Task):
    def output(self):
        return luigi.LocalTarget(str(DOWNLOAD_FILE_PATH))

    def run(self):
        log.info("Downloading LND.")
        wget.download(DOWNLOAD_URL, self.output().path)


class ExtractFilesTask(luigi.Task):
    def requires(self):
        return DownloadFileTask()

    def output(self):
        return luigi.LocalTarget(str(EXTRACTED_FOLDER_PATH))

    def run(self):
        log.info("Extracting LND.")
        with tarfile.open(self.input().path) as tar:
            tar.extractall(ROOT_PATH)
Esempio n. 16
0
from qa.gemet import check_gemet_thesaurus, check_ps_keyword

from qa.inspire import (
    WFS_PROTO,
    check_priority_ds_thesaurus,
    check_n2k_keywords,
    get_online_resources,
    check_supported_protocols,
    check_list_stored_queries_support,
    check_n2k_stored_query_exists,
    get_n2k_spatial_data,
)

from qa.etf import check_md_conformance

log = logme.log(scope="module", name="inspire_qa")

DEFAULT_ETF_CHECK_INTERVAL = 30
DEFAULT_ETF_TEST_TIMEOUT = 180


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--urls-csv")
    parser.add_argument("--files-path", default=Path.cwd())
    parser.add_argument("--etf-url")
    parser.add_argument("--etf-timeout", default=DEFAULT_ETF_TEST_TIMEOUT)
    parser.add_argument("--etf-interval", default=DEFAULT_ETF_CHECK_INTERVAL)
    args = parser.parse_args()

    urls = {}
Esempio n. 17
0

@logme.log
class DummyClassDefault:
    pass


@logme.log(config='my_test_logger', name='custom_class_logger')
class DummyClassCustom:
    pass


class DummyClassWithMethods:
    @logme.log(name='class_with_methods')
    def method_one(self, logger=None):
        logger.debug('test class with method logging message.')
        pass

    @logme.log(name='another_logger_with_args', config='my_test_logger')
    def method_with_args(self, name, age, logger=None, **kwargs):
        logger.info('method logger with args')
        return name, age, kwargs


module_logger = logme.log(scope='module', name='change_config')


def log_this():
    module_logger.info('change my config.')
    return module_logger
Esempio n. 18
0
class DummyClassWithMethods:
    @logme.log(name='class_with_methods')
    def method_one(self, logger=None):
        logger.debug('test class with method logging message.')
        pass

    @logme.log(name='another_logger_with_args', config='my_test_logger')
    def method_with_args(self, name, age, logger=None, **kwargs):
        logger.warning('method logger with args')
        return name, age, kwargs


# ---------------------------------------------------------------------------
# Dummy module logger
# ---------------------------------------------------------------------------
module_logger = logme.log(scope='module', name='change_config')


def log_this():
    module_logger.info('change my config.')
    return module_logger


# null module handler test
null_module_logger = logme.log(scope='module',
                               config='null_config',
                               name='null_module')


def my_log_null():
    null_module_logger.critical('expect output after config_change')