Beispiel #1
0
            if bam.startswith(prefix):
                sampleDict[individual][tissue].append(bam)
# Check each idividual has a normal and every tissue has a BAM
for individual in sampleDict:
    # Check for normal
    if not 'NORM' in sampleDict[individual]:
        raise IOError('no normal for %s' % (individual))
    # check for individual BAM files
    for tissue in sampleDict[individual]:
        if len(sampleDict[individual][tissue]) < 2:
            raise IOError('no bam found for %s %s' % (individual, tissue))
        if len(sampleDict[individual][tissue]) > 2:
            raise IOError('multiple bams found for %s %s' %
                          (individual, tissue))
# Loop through individuals data
slurmJobs = slurm.submitJobs()
for individual, indvData in sampleDict.items():
    # Create lists to store job IDs
    pileupFileList = []
    pileupJobList = []
    varscanJobList = []
    # Create log directory
    logDir = os.path.join(args['<outdir>'], individual + '.log')
    if not os.path.isdir(logDir):
        os.mkdir(logDir)
    # Create and store pileup commands
    for tissue, tissueData in indvData.items():
        # Extract sample name and BAM file
        name, bam = tissueData
        # Create and store file names
        pileup = os.path.join(args['<outdir>'], name + '.mpileup')
Beispiel #2
0
        read1List.extend(read1)
    read1List = [os.path.abspath(x) for x in read1List]
else:
    for prefix in args['<prefix>']:
        read1, read2 = fastqFind.findFastq(prefix=prefix,
                                           dirList=args['<indir>'],
                                           pair=True)
        read1List.extend(read1)
        read2List.extend(read2)
    read1List = [os.path.abspath(x) for x in read1List]
    read2List = [os.path.abspath(x) for x in read2List]
# Raise Error if no Fastq files identified
if len(read1List) == 0:
    raise IOError('Failed to find FASTQ files')
# Create object to store jobs and create log folder
jobObject = slurm.submitJobs()
bamList = []
jobList = []
args['<logfolder>'] = args['<outbam>'][:-4]
os.mkdir(args['<logfolder>'])
# Perform alignment for each pair of FASTQ files
for readgroup, (read1, read2) in enumerate(
        itertools.izip_longest(read1List, read2List)):
    # Modify and format read group and library id
    readgroup = format(readgroup + 1, '03d')
    # Create file names for alignment
    alignLog = os.path.join(args['<logfolder>'],
                            'rg{}.align.log'.format(readgroup))
    alignBam = args['<outbam>'][:-4] + '.rg{}.sort.bam'.format(readgroup)
    # Generate command for alignment
    alignCommand = fastqAlign.bwaMemAlign(
# Generate and store standard output directories
args['fastqDir'] = os.path.join(args['<outdir>'], 'trimFastq')
args['fastqSampleDir'] = os.path.join(args['fastqDir'], args['name'])
args['rsemTranDir'] = os.path.join(args['<outdir>'],  'rsemAlign')
args['rsemTranSampleDir'] = os.path.join(args['rsemTranDir'], args['name'])
args['tophatDir'] = os.path.join(args['<outdir>'], 'tophat2Align')
args['tophatSampleDir'] = os.path.join(args['tophatDir'], args['name'])
dirList = [args['fastqDir'], args['fastqSampleDir'], args['rsemTranDir'],
    args['rsemTranSampleDir'], args['tophatDir'], args['tophatSampleDir']]
# Create output directories
for directory in dirList:
    if not os.path.exists(directory):
        os.mkdir(directory)
# Extract path and module data and create object to submit commands
pmDict = slurm.parsePathModule(args['<paths>'])
slurmJobs = slurm.submitJobs()

###############################################################################
## Find and process fastq files
###############################################################################
# Extract fastq files and check
if args['--singleend']:
    args['read1']  = fastqFind.findFastq(
        prefix = args['prefix'],
        dirList = args['<indir>'].split(','),
        pair = False
    )
else:
    args['read1'], args['read2'] = fastqFind.findFastq(
        prefix = args['prefix'],
        dirList = args['<indir>'].split(','),