Example #1
0
coverFiles = []
for sequence in sequences:
    if sequence != '':
        coverFiles.append((inPath+sequence+'/'+sequence+'_CoverDepthMatrix.txt'))


output = "replicon__gene"
outCover = outPath + refName + "_CoverMatrix.csv"
outDepth = outPath + refName + "_DepthMatrix.csv"

geneList = []
coverList = []
depthList = []
first_file = True
for file in coverFiles:
    (prefix, name, ext) = splitPath(file)        
    output = output + "," + name[:-17]
    coverFile = open(file)
    count = 0
    for line in coverFile:
        if line.find('\n') != -1:
            line = line[:-1]
        cover = line.split(',')
        if first_file == True:
            geneList.append(cover[0])
            coverList.append([])
            depthList.append([])
        coverList[count].append(cover[1])
        depthList[count].append(cover[2])
        count += 1
    coverFile.close()
Example #2
0
If HetsVCF is set to 'True', the heterozygous SNP calls will be written to a VCF

example: 
python finalFilter.py <raw>.vfc <q30>.vcf <outHetFile> <HetsVCF>

Created:	24/01/2013
Modified:	13/11/2015
author: David Edwards
"""
import sys
from pipe_utils import splitPath

inFile = sys.argv[1]
outFile = sys.argv[2]
(prefix, middle, ext) = splitPath(outFile)

outHetFile = sys.argv[3] + "/" + middle[:-3] + "het.txt"
if sys.argv[4] == "True":
    HetsVCF = True
else:
    HetsVCF = False
vcfIn = open(inFile)
vcfOut = open(outFile, "w")
hetOut = open(outHetFile, "w")
if HetsVCF:
    outHetPosFile = prefix + "/" + middle[:-3] + "het.vcf"
    hetVcfOut = open(outHetPosFile, "w")
hetCount = 0

for line in vcfIn:
Example #3
0
example:
python deriveRepStats.py <isolate>_rep_cover.txt replicon depth_fail cover_fail runType mapped_fail check_reads_mapped

Created:	29042013
Modified:	16072014
author: David Edwards
'''
import sys
from pipe_utils import splitPath

output_RepStats = ""

repCoverFile = sys.argv[1]
repCover = open(repCoverFile, "r") 
(prefix, middle, ext) = splitPath(repCoverFile)
seq_name = middle[:-10]

output_RepStats += seq_name +"\t"

replicon = sys.argv[2]
if replicon.find('.') != -1:
    temp_rep = replicon.split('.')
    replicon = temp_rep[0]
replicon_names = []

#get % cover and depth for each replicon
depth_test_value = 0.0
for line in repCover:
	entry = line.split()
	replicon_names.append(entry[0])
Example #4
0
Last Modified: 17/02/2015 - fixed locus tag designation
author: David Edwards
'''
import sys, glob
from pipe_utils import splitPath, get_key
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation
from Bio.Alphabet import IUPAC

outPath = sys.argv[1]
genbankName = sys.argv[2]
coverFileName = sys.argv[3]

(prefix, name, ext) = splitPath(coverFileName)        
outCover = outPath + name[:-9] + "/" + name[:-9] + "_CoverDepthMatrix.txt"

geneList = []
geneCoverList = []
geneDepthList = []
totalBases = 0
repliconList =[]

records = SeqIO.parse(genbankName, "genbank")
for record in records:
    feature_count = 0
    for f in record.features:
        if f.type == "CDS":
#        if f.type == "misc_feature":
            feature_count += 1
Example #5
0
example:
python getSNPList.py <statsFile> <replicon> <output>

Created:	12052013
Modified:	15042014
author: David Edwards
'''
import sys, glob
from pipe_utils import splitPath
statsFile_name = sys.argv[1]
replicon = sys.argv[2]
outputFile_name = sys.argv[3]
outputFile = open(outputFile_name, "w")
finalList = []
(prefix, name, ext) = splitPath(statsFile_name)
vcfs = prefix + '/vcf/*_' + replicon + '_q30.vcf'
for file in glob.glob(vcfs):
    (prefix, name, ext) = splitPath(file)    
    name = name[:(-1*(5+len(replicon)))]
    statsFile = open(statsFile_name)
    for sample in statsFile:
        if sample.startswith("Isolate") != True:
            splitSample = sample.split("\t")
            if (name == splitSample[0]) and (splitSample[-1].startswith("f") != True):
                vcfFile = open(file)
                varList = []
                mergeList = []
                for line in vcfFile:
                    if line.startswith("#") == False:
                        splitLine = line.split("\t")        
Example #6
0
for line in snpList:
    splitLine = line.split()
    SNP.append([splitLine[0]])
snpList.close()

#populate reference calls from reference fasta

references = SeqIO.parse(reference_name, "fasta")
for reference in references:
    if reference.name == replicon:
        for call in range(len(SNP)):
            base = reference.seq[int(SNP[call][0])-1]
            SNP[call].append(base.upper())

# call bases for each SNP from consensus file of the strain
(prefix, name, ext) = splitPath(consensus_in)
if merge_prefix == '-':
    warning_file = prefix[:-4] + replicon +'_'+name +'_warning.txt'
else:
    warning_file = merge_prefix + replicon +'_'+name +'_warning.txt'
name = name[:-4]   
consensus = SeqIO.parse(consensus_in, "fastq")
statsFile = open(stats)
for sample in statsFile:
    if sample.startswith("Isolate") != True:
        splitSample = sample.split("\t")
        record_found = False
        if (name == splitSample[0]) and (splitSample[-1].startswith("f") != True):
            header = header + ',' + name
            for record in consensus:
                if record.name == replicon:
Example #7
0
example:
python mergeRepStats.py <new_replicon_RepStats.tab> sdOutgroupMutiplier reads_to_replace <mergeDirectory> runType 

Created:	23/10/2012
Modified:	28/10/2013 to mergeRepStats from mergeStats
            15/04/2014 changed to produce outgroup.txt file if there are any outgroups to report
            20/05/2014 fix to outgroup reporting
            04/07/2014 change to replace_reads handling

author: David Edwards
'''
import sys, glob
from pipe_utils import splitPath

inFileName = sys.argv[1]
(inPrefix, inName, inExt) = splitPath(inFileName)
sdOutgroupMutiplier = int(sys.argv[2])
mergeFileName = sys.argv[4] + inName + inExt
replace = sys.argv[3]
runType = sys.argv[5]
outgroup_outfile_name = sys.argv[4] + inName[:-9] +  '_outgroups.txt'
outgroups = []

average = 0
sd = 0
count = 0
output = ""

# Combine the two sets
# note: only need to count SNPs for 'phylogeny' runType
Example #8
0
	parser.add_option("-i", "--input_file", action="store", dest="input_file", help="allele table (required)", default="")
	parser.add_option("-o", "--output_file", action="store", dest="output_file", help="name for output file (no ext, only required if '_alleles' missing from input name), Note: if '_alleles(_*)' ending present, this is removed and '_SNP_diff.nxs' added [setting for RedDog pipeline] (default: none)", default="")
	parser.add_option("-d", "--directory", action="store", dest="directory", help="directory to send output files (default: none)", default="")
		
	return parser.parse_args()

if __name__ == "__main__":

	(options, args) = main()

	### MAIN PROCESS
	inputName = options.input_file
	inputFile = open(inputName)

	(prefix, name, ext) = splitPath(inputName)

	strains = []

	if name.find('_alleles') != -1:
		split_name = name.split('_')
		split = 0
		refName = split_name[split]
		split += 1
		while split_name[split] != 'alleles':
			refName += '_' + split_name[split]
			split += 1 
		directory = options.directory		
		if directory == '':
			if prefix != '':
				outName = prefix + '/' + refName + '_SNP_diff.nxs'