Ejemplo n.º 1
0
def sortInvcmd(inversion,data):
	newpath = data['out']+"/bams"
	infile = newpath + "/" + inversion + ".bam"
	outfile = newpath + "/" + inversion + ".sort"
	cmd = ' '.join(['samtools sort ',infile,outfile])
	p=run('sortInvcmd',cmd, data['tempdir']+"/log.sort")
	return(p)
Ejemplo n.º 2
0
def createIndex(data, log):
    log.info("index with STAR")
    newpath = data['refdir'] + "/genome"
    listFasta = ""
    p = 0
    if not os.path.exists(newpath):
        os.makedirs(newpath)

    if not os.path.exists(newpath + "/Genome"):
        for item in os.listdir(data['refdir']):
            if item.find(".fa") > 0:
                listFasta = listFasta + " " + data['refdir'] + "/" + item

        cmd = " ".join([
            'STAR --runMode genomeGenerate --genomeDir ', newpath,
            ' --genomeFastaFiles', listFasta, ' --runThreadN ',
            str(data['threads'])
        ])
        log.debug(cmd)
        p = run("create index", cmd, data['tempdir'] + "/log.index.genome",
                data['tempdir'] + "/log.index.genome")
        log.info(p)
    else:
        log.info("genome already exists")
    return (p)
Ejemplo n.º 3
0
def normalizationref(data,log):
	infile = data['outcounts'] + "/reference.tab"  
	cmd = ' '.join(['Rscript $INVFUSION/R/normalization.R ',infile])
	p = 0
	if not os.path.exists(data['outcounts'] + "/sizefactors" ):
		p=run('normalizationref',cmd,data['tempdir']+"/log.norm",data['tempdir']+"/log.norm")
	return(p)
Ejemplo n.º 4
0
def normalizationref(data, log):
    infile = data['outcounts'] + "/reference.tab"
    cmd = ' '.join(['Rscript $INVFUSION/R/normalization.R ', infile])
    p = 0
    if not os.path.exists(data['outcounts'] + "/sizefactors"):
        p = run('normalizationref', cmd, data['tempdir'] + "/log.norm",
                data['tempdir'] + "/log.norm")
    return (p)
Ejemplo n.º 5
0
def normalizationcandidates(inv,data,log):
	infile = data['outcounts'] + "/" + inv + ".tab"
	sizefactors =   data['outcounts'] + "/sizefactors"
	cmd = ' '.join(['Rscript $INVFUSION/R/candidatesnorm.R ',infile,data['genotype'],sizefactors,data['fasta'],inv])
	p = 0
	if os.path.exists(infile):
		if not os.path.exists(infile + ".norm1" ):
			p=run('normalizationcandidates',cmd, data['tempdir']+"/log.cannorm",data['tempdir']+"/log.cannorm")
	return(p)
Ejemplo n.º 6
0
def createFigures(data,log):
	newpath = data['out'] + "/img"
	if not os.path.exists(newpath):
		os.makedirs(newpath)
	infile=data['out']+"/svdg.rda"
	cmd = ' '.join(['Rscript $INVFUSION/R/figures.R ',infile,newpath])
	p = 0
	if os.path.exists(infile):
		p=run('createFigures',cmd, data['tempdir']+"/log.fig",data['tempdir']+"/log.fig")
	return(p)
Ejemplo n.º 7
0
def createFigures(data, log):
    newpath = data['out'] + "/img"
    if not os.path.exists(newpath):
        os.makedirs(newpath)
    infile = data['out'] + "/svdg.rda"
    cmd = ' '.join(['Rscript $INVFUSION/R/figures.R ', infile, newpath])
    p = 0
    if os.path.exists(infile):
        p = run('createFigures', cmd, data['tempdir'] + "/log.fig",
                data['tempdir'] + "/log.fig")
    return (p)
Ejemplo n.º 8
0
def mergeInvcmd(listindv,inversion,data):
	newpath = data['out']+"/bams"
	outfile = newpath + "/" + inversion + ".bam"
	infile = data['tempdir'] + "/" + "inversion.list"
	inf = open(infile,'w')
	for f in listindv:
		inf.write(newpath+"/"+f+".grep.sort.bam\n")
	inf.close()
	cmd = ' '.join(['bamtools filter -region',inversion,' -list',infile,'-out',outfile])
	p=run('mergeInvcmd',cmd, data['tempdir']+"/log.merge",data['tempdir']+"/log.merge")
	return(p)
Ejemplo n.º 9
0
def insilico(invname, bedfile, fastaref, tmp, output, size, log):
    log.info(invname)
    cmd = " ".join([
        '$INVFUSION/makeref.insilico.sh ', invname, bedfile, fastaref, size,
        tmp, output
    ])
    log.debug(cmd)
    p = run("create insilico sequence", cmd, tmp + "/log." + invname,
            tmp + "/log." + invname)
    log.info(p)
    return (p)
Ejemplo n.º 10
0
def countscmd(data,
              gtf,
              outfile,
              bamfiles,
              threads=1,
              metafeature="-g exon_id -O "):
    cmd = ' '.join([
        'featureCounts ', metafeature, '-T',
        str(threads), ' -a ', gtf, '-o', outfile, bamfiles
    ])
    p = run('countscmd', cmd, data['tempdir'] + "/log.counts")
    return (p)
Ejemplo n.º 11
0
def normalizationcandidates(inv, data, log):
    infile = data['outcounts'] + "/" + inv + ".tab"
    sizefactors = data['outcounts'] + "/sizefactors"
    cmd = ' '.join([
        'Rscript $INVFUSION/R/candidatesnorm.R ', infile, data['genotype'],
        sizefactors, data['fasta'], inv
    ])
    p = 0
    if os.path.exists(infile):
        if not os.path.exists(infile + ".norm1"):
            p = run('normalizationcandidates', cmd,
                    data['tempdir'] + "/log.cannorm",
                    data['tempdir'] + "/log.cannorm")
    return (p)
Ejemplo n.º 12
0
def mapGenome(data,log):
	log.info("mapping with STAR")
	genomepath = data['refdir']+"/genome"
	newpath = data['out']+"/bams"
	if not os.path.exists(newpath): os.makedirs(newpath)
	for l in open(data['fasta'],'r'):
		cols = l.strip().split("\t")
		output = newpath + "/" + cols[0] + ".bam"
		p = 0
		if not os.path.exists(output):
			log.info(cols[0])
			cmd = ' '.join(["STAR  --genomeDir",genomepath,"genome --readFilesIn",cols[1],cols[2],"--readFilesCommand zcat --runThreadN ",str(data['threads'])," --outStd SAM --outSAMmode Full --outSAMattributes All --outFilterType BySJout --outSJfilterReads Unique   --outFilterMultimapNmax 1 --outSAMstrandField intronMotif  | samtools view -bS - >",output])
			log.debug(cmd)
			p=run("mapping "+cols[0],cmd, data['tempdir']+"/log.map.genome",data['tempdir']+"/log.map.genome")
			log.info(p)
	return(p)
Ejemplo n.º 13
0
def CLASScmd(inv,listindv,data,log):
	f = open(data['list'],'r')
	p = 0
	#merge individuals
	p = mergeInvcmd(listindv,inv,data)
	#sort
	p = sortInvcmd(inv,data)
	#run class
	log.info("Running CLASS")
	newpath = data['out']
	infile = newpath + "/bams/" + inv + ".sort" 
	output = newpath + "/gtf/" + inv + ".gtf"
	cmd = ' '.join(['$INVFUSION/class.sh',infile,output])
	log.debug(cmd)
	p = run('CLASScmd',cmd, data['tempdir']+"/log.class")
	#log.info(p)
	return(p)
Ejemplo n.º 14
0
def sortBam(data,log):
	log.info("Sort bams")
	newpath = data['out']+"/bams"
	if not os.path.exists(newpath): os.makedirs(newpath)
	for l in open(data['fasta'],'r'):
		cols = l.strip().split("\t")
		log.info(cols[0])
		infile = newpath + "/" + cols[0] + ".bam"
		outfile = newpath + "/" + cols[0] + ".sort.bam"
		if not os.path.exists(outfile):
			cmd = ' '.join(['bamtools sort -in',infile,'-out',outfile])
			p=run('sortBam with'+infile,cmd, data['tempdir']+"/log.sort")
			log.info(p)
			if p == 0:
				#p = os.remove(infile)
				log.info("remove file")
	log.info(p)
	return(p)
Ejemplo n.º 15
0
def createIndex(data,log):
	log.info("index with STAR")
	newpath = data['refdir']+"/genome"
	listFasta = ""
	p = 0
	if not os.path.exists(newpath):
		os.makedirs(newpath)

	if not os.path.exists(newpath+"/Genome"):
		for item in os.listdir(data['refdir']):
			if item.find(".fa")>0: listFasta = listFasta + " " + data['refdir'] + "/" + item 

		cmd=" ".join(['STAR --runMode genomeGenerate --genomeDir ',newpath,' --genomeFastaFiles',listFasta,' --runThreadN ',str(data['threads'])])
		log.debug(cmd)
		p=run("create index",cmd, data['tempdir']+"/log.index.genome",data['tempdir']+"/log.index.genome")
		log.info(p)
	else:
		log.info("genome already exists")
	return(p)
Ejemplo n.º 16
0
def createIntronsBam(inv,data,log):
	cmd = ' '.join(['samtools view -h ',data['out'] + "/bams/" + inv + ".sort.bam | awk '$6~/N/ || $0~/@/' | samtools view -Sb - |bedtools intersect -abam - -b",data['out'] + "/gtf/" + inv + "_filter.gtf","| bedtools bamtobed -split -i -  "])
	p=run('createIntronsBam',cmd, data['tempdir']+"/log.bedtools",data['out'] + "/gtf/" + inv + ".bed")
	#subprocess.call(cmd, shell=True,stderr=file(data['tempdir']+"/log.bedtools",'w'),stdout=file(data['out'] + "/gtf/" + inv + ".bed",'w'))
	gen = ''
	out = open(data['out'] + "/gtf/" + inv + "_bam_introns.bed",'w')
	for line in open(data['out'] + "/gtf/" + inv + ".bed",'r'):
		cols = line.strip().split("\t")
		tr = cols[3]
		if gen == tr:
			idx += 1
			intron = tr + "_i" + str(idx)
			end = cols[1]
			out.write("%s\t%s\t%s\t%s\n" % (cols[0],start,end,intron))
			start = cols[2]
		else:
			gen = tr
			start = cols[2]
			idx = 0
	out.close()
Ejemplo n.º 17
0
def mapGenome(data, log):
    log.info("mapping with STAR")
    genomepath = data['refdir'] + "/genome"
    newpath = data['out'] + "/bams"
    if not os.path.exists(newpath): os.makedirs(newpath)
    for l in open(data['fasta'], 'r'):
        cols = l.strip().split("\t")
        output = newpath + "/" + cols[0] + ".bam"
        p = 0
        if not os.path.exists(output):
            log.info(cols[0])
            cmd = ' '.join([
                "STAR  --genomeDir", genomepath, "genome --readFilesIn",
                cols[1], cols[2], "--readFilesCommand zcat --runThreadN ",
                str(data['threads']),
                " --outStd SAM --outSAMmode Full --outSAMattributes All --outFilterType BySJout --outSJfilterReads Unique   --outFilterMultimapNmax 1 --outSAMstrandField intronMotif  | samtools view -bS - >",
                output
            ])
            log.debug(cmd)
            p = run("mapping " + cols[0], cmd,
                    data['tempdir'] + "/log.map.genome",
                    data['tempdir'] + "/log.map.genome")
            log.info(p)
    return (p)
Ejemplo n.º 18
0
def convertGenes(inv,data,log):
	cmd = ' '.join(["$INVFUSION/genesConversion.sh",inv,data['refdir']+"/mask.bed",data['refdir'],data['annotation'],data['tempdir'],data['out']+"/gtf"])
	p = run('convertGenes',cmd, data['tempdir']+"/log.convert",data['tempdir']+"/log.convert")
	return(p)
Ejemplo n.º 19
0
def overlapKnownGenes(inv,data,log):
	afile = data['out']+"/gtf/"+inv+".knowngenes.gtf"
	bfile = data['out']+"/gtf/"+inv+"_filter.gtf"
	cmd = ' '.join(['bedtools coverage -hist -a ',afile,'-b',bfile])
	p = run('overlapKnownGenes',cmd,data['tempdir']+"/log.overlapKnownGenes",data['out']+"/gtf/"+inv+".overlap")
	return(p)
Ejemplo n.º 20
0
def countscmd(data,gtf,outfile,bamfiles,threads=1,metafeature="-g exon_id -O "):
	cmd = ' '.join(['featureCounts ',metafeature,'-T',str(threads),' -a ',gtf,'-o',outfile,bamfiles])
	p=run('countscmd',cmd, data['tempdir']+"/log.counts")
	return(p)
Ejemplo n.º 21
0
def countJuntions(inv,data,log):
	cmd = ' '.join(['bedtools intersect -r -f 0.95 -wo  -a ',data['out'] + "/gtf/" + inv + "_introns.bed",' -b ',data['out'] + "/gtf/" + inv + "_bam_introns.bed | awk '($2-5)<$6 && ($2+5)>$6 && ($3-5)<$7 && ($3+5)>$7'| cut -f 4 | sort | uniq -c "])
	p = run('countJuntions',cmd, data['tempdir']+"/log.counts",data['out'] + "/gtf/" + inv + ".junctions")
	return(p)
Ejemplo n.º 22
0
if __name__ == '__main__':

    data = utils.get_mnist_data()

    # Network Topologies
    n_features = []
    n_features.append(784)  # Input size. Image(28 x 28)
    n_features.append(512)  # Layer 1
    n_features.append(512)  # Layer 2
    n_features.append(256)  # Layer 3
    n_features.append(10)  # Output. number of classes

    x, y, weights, biases, dropout_keep_prob = create_network(n_features)

    # Prediction
    pred = multilayer_perceptron(x, weights, biases, dropout_keep_prob)

    # Loss and optimizer
    cost, optm, corr, accr = utils.get_functions(pred, y)

    # Parameters
    training_epochs = 20
    batch_size = 100
    display_step = 4

    additional_inputs = {}
    additional_inputs[dropout_keep_prob] = [0.6, 1.0]

    utils.run(x, y, data, pred, cost, optm, corr, accr, training_epochs,
              batch_size, display_step, additional_inputs)
Ejemplo n.º 23
0
def grepBamcmd(infile,outfile,data):
	cmd = ' '.join(['bamtools filter -script $INVFUSION/libs/filterbam -in',infile,'-out',outfile])
	p=run('grepBamcmd',cmd, data['tempdir']+"/log.grep")
	return(p)
Ejemplo n.º 24
0
def indexBamcmd(infile,data):
	cmd = ' '.join(['samtools index ',infile])
	p=run('indexBamcmd',cmd, data['tempdir']+"/log.index")
	return(p)
Ejemplo n.º 25
0
    pred = multilayer_perceptron(x, weights, biases)

    # Loss and optimizer
    cost = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))

    # Optimizer
    rate = 0.1
    optm = tf.train.GradientDescentOptimizer(learning_rate=rate).minimize(cost)
    # optm = tf.train.AdamOptimizer(learning_rate=rate).minimize(cost)
    corr = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accr = tf.reduce_mean(tf.cast(corr, 'float'))
    print('Functions Ready')

    # parameters
    training_epochs = 50
    batch_size = 100
    display_step = 1

    utils.run(x,
              y,
              data,
              pred,
              cost,
              optm,
              corr,
              accr,
              training_epochs=training_epochs,
              batch_size=batch_size,
              display_step=display_step)
Ejemplo n.º 26
0
def complexity(inv,data,log):
	afile = data['out']+"/gtf/"+inv+"_filter.gtf"
	cmd = ' '.join(['bedtools intersect -wa -c -a ',afile,'-b',afile])
	p = run('complexity',cmd, data['tempdir']+"/log.complexity",data['out']+"/gtf/"+inv+".complexity")
	return(p)
Ejemplo n.º 27
0
def intronBP(inv,data,log):
	bfile = data['out']+"/gtf/"+inv+"_introns.bed"
	afile = data['refdir']+"/"+inv+".bed"
	cmd = ' '.join(['bedtools coverage -hist -a ',afile,'-b',bfile,"| grep -v all  | awk '$8<0.9'"])
	p = run('intronBP',cmd, data['tempdir']+"/log.overlapKnownGenes",data['out']+"/gtf/"+inv+".intronBP")
	return(p)
Ejemplo n.º 28
0
def mask(fastaref, maskbed, output, param, log):
    log.info("masking genome")
    cmd = " ".join(['$INVFUSION/mask.genome.sh ', fastaref, maskbed, output])
    p = run("masking genome", cmd, param['tempdir'] + "/log.mask",
            param['tempdir'] + "/log.mask")
    return (p)