#!/usr/bin/python # script for computing small RNA phasing from a bowtie output # version 1 4-1-2013 # Usage small_RNA_phasing.py <bowtie input> <minsize query> <maxsize query> <min scope> <max scope> <output> <bowtie index> import sys, subprocess from smRtools import get_fasta, get_fasta_from_history, antipara, RNAtranslate, SmRNAwindow from collections import defaultdict if sys.argv[-1] == "--extract_index": geneDic = get_fasta(sys.argv[7]) else: geneDic = get_fasta_from_history(sys.argv[7]) objDic = {} F = open(sys.argv[1], "r") # F is the bowtie output taken as input for line in F: fields = line.split() polarity = fields[1] gene = fields[2] offset = int(fields[3]) size = len(fields[4]) try: objDic[gene].addread(polarity, offset, size) except KeyError: objDic[gene] = SmRNAwindow(gene, geneDic[gene]) objDic[gene].addread(polarity, offset, size) F.close() minquery = int(sys.argv[2])
#!/usr/bin/python # script for computing z10 signature from a bowtie genomic output # version 1 8-5-2012 using SmRNAtools class imported from smRtools # Usage z10_genome.py <bowtie input> <windowsize> <outputpipi> <outputpisi> <outputsisi> <bowtie index> import sys, subprocess from smRtools import get_fasta, antipara, RNAtranslate, SmRNAwindow from collections import defaultdict from numpy import mean, std def split_len(seq, length): return [seq[i:i+length] for i in range(0, len(seq), length)] geneDic = get_fasta (sys.argv[6]) objDic = defaultdict(dict) windowsize = int(sys.argv[2]) for chrom in geneDic: for windowindex, sequencewindow in enumerate (split_len(geneDic[chrom], windowsize)): objDic[chrom][windowindex * windowsize] = SmRNAwindow(chrom, sequencewindow, windowindex * windowsize) F = open (sys.argv[1], "r") # F is the bowtie output taken as input for line in F: fields = line.split() polarity = fields[1] chrom = fields[2] offset = int(fields[3]) size = len (fields[4]) objDic[chrom][offset/windowsize*windowsize].addread (polarity, offset, size) F.close()
#!/usr/bin/python # script for computing overlap signatures from a bowtie output # version 3 17-5-2012 complete refactoring with OOP approach # Usage pairer.py <bowtie input> <minsize query> <maxsize query> <minsize target> <maxsize target> <output> <output1> <<output2> <<output3> <<output4> <<output5> <<output6> <<bowtie index> import sys, subprocess from collections import defaultdict from smRtools import get_fasta, antipara, RNAtranslate, SmRNAwindow fasta_dic = get_fasta(sys.argv[13]) objDic = {} F = open(sys.argv[1], "r") # F is the bowtie output taken as input for line in F: fields = line.split() polarity = fields[1] gene = fields[2] offset = int(fields[3]) size = len(fields[4]) try: objDic[gene].addread(polarity, offset, size) except KeyError: objDic[gene] = SmRNAwindow(gene, fasta_dic[gene]) objDic[gene].addread(polarity, offset, size) F.close() OUT = open(sys.argv[6], "w") for x in objDic: sequence_list = objDic[x].pairer(10, sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])
#!/usr/bin/python # script for outputing pairABLE reads from a bowtie output # version 1 version 3-12-2012 # Usage pairable.py <bowtie input> <minsize query> <maxsize query> <minsize target> <maxsize target> <output> <bowtie index> import sys, subprocess from collections import defaultdict from smRtools import get_fasta, antipara, RNAtranslate, SmRNAwindow fasta_dic = get_fasta (sys.argv[7]) objDic = {} F = open (sys.argv[1], "r") # F is the bowtie output taken as input for line in F: fields = line.split() polarity = fields[1] gene = fields[2] offset = int(fields[3]) size = len (fields[4]) try: objDic[gene].addread (polarity, offset, size) except KeyError: objDic[gene] = SmRNAwindow(gene, fasta_dic[gene]) objDic[gene].addread (polarity, offset, size) F.close() OUT = open (sys.argv[6], "w") for x in objDic: sequence_list= objDic[x].newpairable_bowtie( 10, sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5])