Exemple #1
0
            sys.exit(0)
    else:
        opt, args = parser.parse_args()

    break

progress = None
if not opt.output:
    opt.quiet = True
if opt.maxreads == None:
    opt.maxreads = 1e+20
progress = ProgressText(quiet=opt.quiet)

from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile

progress.stage("Read SNV data", len(opt.snvs))
snvheaders = filter(None, """
CHROM POS REF ALT
""".split())

snvdata = {}
# extrasnvheaders = []
# usedsnvheaders = set()
snvchroms = defaultdict(set)
for filename in opt.snvs:

    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
Exemple #2
0
base = os.path.split(os.path.abspath(opt.counts))[0]

TRNA = {}
NRNA = {}
GDNA = {}
SDNA = {}

from chromreg import ChromLabelRegistry

chrreg = ChromLabelRegistry()
labels = list(map(str, list(range(1, 100)))) + ["X", "Y", "MT"]
chrreg.add_labels(opt.counts, labels)
chrreg.default_chrom_order()
chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts, l))

progress.stage("Parsing read-counts")
f = open(opt.counts, mode='rt', encoding='utf8')
reader = csv.DictReader(f, delimiter='\t')
types2files = defaultdict(set)
files2types = defaultdict(set)
for row in reader:
    key = (row['CHROM'], row['POS'])
    filename = row['AlignedReads']
    for k in row:
        if k.endswith('Count') and row[k] != "":
            row[k] = int(row[k])
        if k.endswith('Sc') and row[k] != "":
            row[k] = float(row[k])
    if re.search(regex["GDNA"], filename) and key not in GDNA:
        GDNA[key] = row
        types2files["GDNA"].add(filename)
Exemple #3
0
        except UserCancelledError:
            sys.exit(0)
    else:
        opt, args = parser.parse_args()

    break

progress = None
if not opt.output:
    opt.quiet = True
progress = ProgressText(quiet=opt.quiet)

from pysamimport import pysam
from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile

progress.stage("Read SNV data", len(opt.snvs))
snvheaders = [_f for _f in """
CHROM POS REF ALT
""".split() if _f]

snvdata = {}
extrasnvheaders = []
usedsnvheaders = set()
for filename in opt.snvs:

    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        snvs = VCFFile(filename=filename)
Exemple #4
0
            sys.exit(0)
    else:
        opt, args = parser.parse_args()

    break

progress = None
if not opt.output:
    opt.quiet = True
if opt.maxreads == None:
    opt.maxreads = 1e+20
progress = ProgressText(quiet=opt.quiet)

from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile

progress.stage("Read SNV data", len(opt.snvs))
snvheaders = filter(None, """
CHROM POS REF ALT
""".split())

snvdata = {}
# extrasnvheaders = []
# usedsnvheaders = set()
snvchroms = defaultdict(set)
for filename in opt.snvs:

    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snvs = CSVFileTable(filename=filename)
    elif extn == 'vcf':
Exemple #5
0
  ReadCounts Files (-c): %s
  Matrix Output (-M):    %s
  Min. Reads (-m):       %s%s
  Quiet (-q):            %s
  Outfile File (-o):     %s

Command-Line: readCountsMatrix %s
""" % (", ".join(opt.counts), None if not matrix else opt.matrix, opt.minreads,
       "" if opt.matrix not in ("Ref:Var", "Ref;Var") or opt.minreads == 0 else
       " (ignored)", opt.quiet, opt.output, cmdargs)

progress.message(execution_log)

from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable

progress.stage("Read ReadCounts input files", len(opt.counts))
headers = "CHROM POS REF ALT ReadGroup RefCount SNVCount GoodReads".split()
# NOTE: This *MUST* correspond to the columns in the readCounts .txt file output
txtheaders = "CHROM   POS     REF     ALT     ReadGroup       SNVCountForward SNVCountReverse RefCountForward RefCountReverse SNVCount   RefCount GoodReads".split(
)

allrg = set()
vafmatrix = defaultdict(dict)
for filename in opt.counts:
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        counts = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        counts = VCFFile(filename=filename)
    elif extn == 'tsv':
Exemple #6
0
        except UserCancelledError:
            sys.exit(0)
    else:
        opt, args = parser.parse_args()

    break

progress = None
if not opt.output:
    opt.quiet = True
progress = ProgressText(quiet=opt.quiet)

import pysam
from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile

progress.stage("Read SNP data", len(opt.snps))
snpheaders = filter(None, """
CHROM POS REF ALT
""".split())

snpdata = {}
extrasnpheaders = []
usedsnpheaders = set()
for filename in opt.snps:

    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    if extn == 'csv':
        snps = CSVFileTable(filename=filename)
    elif extn == 'vcf':
        snps = VCFFile(filename=filename)
Exemple #7
0
regex["TRNA"] = opt.tumortransre

progress = ProgressText()

base = os.path.split(os.path.abspath(opt.counts))[0]

TRNA = {}; NRNA = {}; GDNA = {}; SDNA = {}

from chromreg import ChromLabelRegistry
chrreg = ChromLabelRegistry()
labels = map(str,range(1,100)) + ["X","Y","MT"]
chrreg.add_labels(opt.counts,labels)
chrreg.default_chrom_order()
chrorder = lambda l: chrreg.chrom_order(chrreg.label2chrom(opt.counts,l))

progress.stage("Parsing read-counts")
f = open(opt.counts, 'r')
reader = csv.DictReader(f, delimiter='\t')
types2files = defaultdict(set)
files2types = defaultdict(set)
for row in reader:
    key = (row['CHROM'],row['POS'])
    filename = row['AlignedReads']
    for k in row:
        if k.endswith('Count') and row[k] != "":
            row[k] = int(row[k])
        if k.endswith('Sc') and row[k] != "":
            row[k] = float(row[k])
    if re.search(regex["GDNA"],filename) and key not in GDNA:
        GDNA[key] = row; types2files["GDNA"].add(filename); files2types[filename].add("GDNA")
    if re.search(regex["NRNA"],filename) and key not in NRNA:
Exemple #8
0
    else:
        opt, args = parser.parse_args()

    break

opts.mates = False

progress = None
if not opt.output:
    opt.quiet = True
progress = ProgressText(quiet=opt.quiet)

import pysam
from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile

progress.stage("Read SNP data", len(opt.snps))
snpheaders = [_f for _f in """
CHROM POS REF ALT
""".split() if _f]

snvdata = {}
snvchroms = defaultdict(set)
extrasnpheaders = []
usedsnpheaders = set()
for filename in opt.snps:
    filename0 = filename
    base, extn = filename.rsplit('.', 1)
    extn = extn.lower()
    tempfilename = None
    if opt.exoncoords:
        if extn != 'vcf':
Exemple #9
0
        except UserCancelledError:
            sys.exit(0)
    else:
        opt, args = parser.parse_args()

    break

progress = None
if not opt.output:
    opt.quiet = True
progress = ProgressText(quiet=opt.quiet)

from pysamimport import pysam
from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable, BEDFile, VCFFile

progress.stage("Read SNV data", len(opt.snvs))
snvheaders = filter(
    None,
    """
CHROM POS REF ALT
""".split(),
)

snvdata = {}
extrasnvheaders = []
usedsnvheaders = set()
for filename in opt.snvs:

    base, extn = filename.rsplit(".", 1)
    extn = extn.lower()
    if extn == "csv":
Exemple #10
0
    break

progress = None
if not opt.output:
    opt.quiet = True
progress = ProgressText(quiet=opt.quiet)

sumkeys = [
    _f for _f in map(
        str.strip, """
SNPJuncIntronCount SNPJuncNoIntronCount NoSNPJuncIntronCount NoSNPJuncNoIntronCount SNPMateCount NoSNPMateCount SNPCount NoSNPCount MatesCount NotMatesCount IntronCount NoIntronCount SpanningReads RemovedDuplicateReads SNPLociReads"""
        .split()) if _f
]
countdata = defaultdict(dict)
progress.stage("Read SNP/Junction counts")
from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable
countheaders = None
for filename in opt.counts:
    base, extn = filename.rsplit('.', 1)
    path, base = os.path.split(base)
    extn = extn.lower()
    if extn == 'csv':
        counts = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        counts = TSVFileTable(filename=filename)
    elif extn == 'xls':
        counts = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        counts = XLSXFileTable(filename=filename)
    else:
Exemple #11
0
        except UserCancelledError:
            sys.exit(0)
    else:
        opt, args = parser.parse_args()

    break

progress = None
if not opt.output:
    opt.quiet = True
progress = ProgressText(quiet=opt.quiet)

sumkeys = filter(None, map(str.strip, """
SNPJuncIntronCount SNPJuncNoIntronCount NoSNPJuncIntronCount NoSNPJuncNoIntronCount SNPMateCount NoSNPMateCount SNPCount NoSNPCount MatesCount NotMatesCount IntronCount NoIntronCount SpanningReads RemovedDuplicateReads SNPLociReads""".split()))
countdata = defaultdict(dict)
progress.stage("Read SNP/Junction counts")
from dataset import XLSFileTable, CSVFileTable, TSVFileTable, XLSXFileTable, TXTFileTable
countheaders = None
for filename in opt.counts:
    base, extn = filename.rsplit('.', 1)
    path, base = os.path.split(base)
    extn = extn.lower()
    if extn == 'csv':
        counts = CSVFileTable(filename=filename)
    elif extn == 'tsv':
        counts = TSVFileTable(filename=filename)
    elif extn == 'xls':
        counts = XLSFileTable(filename=filename)
    elif extn == 'xlsx':
        counts = XLSXFileTable(filename=filename)
    else: