Esempio n. 1
0
import os
import sys
import time
import logging
import argparse
import tempfile
import resource
import subprocess
import collections
import distutils.spawn
import parallel_tools
import seqtools
import shims
# There can be problems with the submodules, but none are essential.
# Try to load these modules, but if there's a problem, load a harmless dummy and continue.
simplewrap = shims.get_module_or_shim('utillib.simplewrap')
version = shims.get_module_or_shim('utillib.version')
phone = shims.get_module_or_shim('ET.phone')

#TODO: Warn if it looks like the two input FASTQ files are the same (i.e. the _1 file was given
#      twice). Can tell by whether the alpha and beta (first and last 12bp) portions of the barcodes
#      are always identical. This would be a good thing to warn about, since it's an easy mistake
#      to make, but it's not obvious that it happened. The pipeline won't fail, but will just
#      produce pretty weird results.

USAGE = """$ %(prog)s [options] families.tsv > families.msa.tsv
       $ cat families.tsv | %(prog)s [options] > families.msa.tsv"""
DESCRIPTION = """Read in sorted FASTQ data and do multiple sequence alignments of each family."""

def make_argparser():
Esempio n. 2
0
#!/usr/bin/env python3
import argparse
import gzip
import logging
import os
import subprocess
import sys
import shims
assert sys.version_info.major >= 3, 'Python 3 required'
version = shims.get_module_or_shim('utillib.version')

SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
DESCRIPTION = """Run the entire Du Novo pipeline."""


def make_argparser():
    parser = argparse.ArgumentParser(description=DESCRIPTION, add_help=False)
    io = parser.add_argument_group('Inputs and outputs')
    io.add_argument('fastq1',
                    metavar='reads_1.fq',
                    type=open_as_text_or_gzip,
                    help='Input reads (mate 1). Can be gzipped.')
    io.add_argument('fastq2',
                    metavar='reads_2.fq',
                    type=open_as_text_or_gzip,
                    help='Input reads (mate 2). Can be gzipped.')
    io.add_argument(
        '-o',
        '--outdir',
        help=
        'The directory to create the output (and intermediate) files in. Must exist already and '
Esempio n. 3
0
#!/usr/bin/env python
from __future__ import division
import os
import sys
import time
import logging
import argparse
import resource
import collections
import parallel_tools
import consensus
import swalign
import shims
# There can be problems with the submodules, but none are essential.
# Try to load these modules, but if there's a problem, load a harmless dummy and continue.
simplewrap = shims.get_module_or_shim('utillib.simplewrap')
version = shims.get_module_or_shim('utillib.version')
phone = shims.get_module_or_shim('ET.phone')

# The ascii values that represent a 0 PHRED score.
QUAL_OFFSETS = {'sanger':33, 'solexa':64}
USAGE = """$ %(prog)s [options] families.msa.tsv -1 duplexes_1.fa -2 duplexes_2.fa
       $ cat families.msa.tsv | %(prog)s [options] -1 duplexes_1.fa -2 duplexes_2.fa"""
DESCRIPTION = """Build consensus sequences from read aligned families. Prints duplex consensus \
sequences in FASTA to stdout. The sequence ids are BARCODE.MATE, e.g. "CTCAGATAACATACCTTATATGCA.1", \
where "BARCODE" is the input barcode, and "MATE" is "1" or "2" as an arbitrary designation of the \
two reads in the pair. The id is followed by the count of the number of reads in the two families \
(one from each strand) that make up the duplex, in the format READS1/READS2. If the duplex is \
actually a single-strand consensus because the matching strand is missing, only one number is \
listed.
Rules for consensus building: Single-strand consensus sequences are made by counting how many of \
Esempio n. 4
0
from __future__ import print_function
import os
import sys
import gzip
import time
import logging
import argparse
import resource
import subprocess
import networkx
import parallel_tools
import swalign
import shims
# There can be problems with the submodules, but none are essential.
# Try to load these modules, but if there's a problem, load a harmless dummy and continue.
version = shims.get_module_or_shim('utillib.version')
phone = shims.get_module_or_shim('ET.phone')

VERBOSE = (logging.DEBUG+logging.INFO)//2
USAGE = '$ %(prog)s [options] families.tsv barcodes.fa barcodes.sam > families.corrected.tsv'
DESCRIPTION = """Correct barcodes using an alignment of all barcodes to themselves. Reads the
alignment in SAM format and corrects the barcodes in an input "families" file (the output of
make-barcodes.awk). It will print the "families" file to stdout with barcodes (and orders)
corrected."""


def make_argparser():
  parser = argparse.ArgumentParser(usage=USAGE, description=DESCRIPTION)
  parser.add_argument('families', type=open_as_text_or_gzip,
    help='The sorted output of make-barcodes.awk. The important part is that it\'s a tab-delimited '
         'file with at least 2 columns: the barcode sequence and order, and it must be sorted in '