def getFastaSeqs(): parser = OptionParser(usage="List of genes as std input and parameters") parser.add_option("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.", type="str", default=None) parser.add_option("-f", "--fasta_file", dest="fasta_file", help="Provide the path to your fasta file.", type="str", default=None) parser.add_option("-t", "--tab_file", dest="tab_file", help="Provide the path to your genom tab file.", type="str", default=None) parser.add_option("-r", "--ranges", dest="ranges", help="Provide ranges(flanks) for genes.", type="int", default=0) parser.add_option("-a", "--5end", dest="five_end", help="Set up 5` flank. If minus then print only 3` end. Python slicing [a:b] i.e. [200:401] - from 200 to 400; [-200:] - last 200; " "[:-200] from begining till -200 before end", type="int", default=None) parser.add_option("-b", "--3end", dest="three_end", help="Set up 5` flank. If minus then print only 5` end. Python slicing [a:b]", type="int", default=None) (options, args) = parser.parse_args() signal(SIGPIPE,SIG_DFL) # to manage with stdin and stdout #crating gtf object gtf = GTF2.Parse_GTF() gtf.read_GTF(gtm.getGTF(options.gtf_file)) gtf.read_FASTA(gtm.getFASTA(options.fasta_file)) gtf.read_TAB(gtm.getTAB(options.tab_file)) for i in sys.stdin: gene_name = str(i.strip()) genomic_seq = gtf.genomicSequence(gene_name, ranges=options.ranges) print '>'+gene_name print genomic_seq[options.five_end:options.three_end]+'\n'
def getFastaSeqs(): parser = OptionParser(usage="List of genes as std input and parameters") parser.add_option("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.", type="str", default=None) parser.add_option("-f", "--fasta_file", dest="fasta_file", help="Provide the path to your fasta file.", type="str", default=None) parser.add_option("-t", "--tab_file", dest="tab_file", help="Provide the path to your genom tab file.", type="str", default=None) parser.add_option("-r", "--ranges", dest="ranges", help="Provide ranges(flanks) for genes.", type="int", default=0) parser.add_option( "-a", "--5end", dest="five_end", help= "Set up 5` flank. If minus then print only 3` end. Python slicing [a:b] i.e. [200:401] - from 200 to 400; [-200:] - last 200; " "[:-200] from begining till -200 before end", type="int", default=None) parser.add_option( "-b", "--3end", dest="three_end", help= "Set up 5` flank. If minus then print only 5` end. Python slicing [a:b]", type="int", default=None) (options, args) = parser.parse_args() signal(SIGPIPE, SIG_DFL) # to manage with stdin and stdout #crating gtf object gtf = GTF2.Parse_GTF() gtf.read_GTF(gtm.getGTF(options.gtf_file)) gtf.read_FASTA(gtm.getFASTA(options.fasta_file)) gtf.read_TAB(gtm.getTAB(options.tab_file)) for i in sys.stdin: gene_name = str(i.strip()) genomic_seq = gtf.genomicSequence(gene_name, ranges=options.ranges) print '>' + gene_name print genomic_seq[options.five_end:options.three_end] + '\n' gtf.codingSequence()
#seting up option parser parser = argparse.ArgumentParser(description='Usage: ruffus scirpt designed to make concat file from *.novo files. Make new folder, cp or ln into all novofiles and run novo2concat. IMPORTANT: name of novo file should be name of experiment') parser.add_argument("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.", type=str, default=None) parser.add_argument("-t", "--tab_file", dest="tab_file", help="Provide the path to your tab genome file.", type=str, default=None) parser.add_argument("-r", dest="ranges", help="Set up ranges for pyPileup. Default = 250", default=250) parser.add_argument("--3end", dest="three_end", help="Use pyPileup option --3end to only report counts for the 3' end of the reads. Default = False", action="store_true", default=False) parser.add_argument("-l", dest="list_file", help="Provide the FULL path to your gene_names.list file.", type=str, default=None, required=True) parser.add_argument("--tree", dest="tree", help="If you want to leave tree of catalogs including pilups within. Default = None.", action="store_true", default=False) parser.add_argument("-p", dest="prefix", help="Prefix for concat file name", type=str, default="") args = parser.parse_args() gtf, tab, ranges = gtm.getGTF(args.gtf_file), gtm.getTAB(args.tab_file), str(args.ranges) print "Using GTF file: " + gtf print "Using TAB genome file: " + tab #listing novo files files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.novo')] #gives list of files in current directory directories = [re.sub(r'.novo$', '', d) for d in files] links = [] root_dir = os.getcwd() #making directories for f, d in zip(files, directories): os.mkdir(d) os.chdir(d) subprocess.call('ln -s ../' + f + ' ' + f, shell=True) links.append(os.path.abspath('./'+f))
parser.add_argument("-r", dest="ranges", help="Set up ranges for pyPileup. Default = 250", default=250) parser.add_argument("--3end", dest="three_end", help="Use pyPileup option --3end to only report counts for the 3' end of the reads. Default = False", action="store_true", default=False) parser.add_argument("--5end", dest="five_end", help="Use pyPileup option --5end to only report counts for the 5' end of the reads. Default = False", action="store_true", default=False) parser.add_argument("-l", dest="list_file", help="Provide the FULL path to your gene_names.list file.", type=str, default=None, required=True) parser.add_argument("--tree", dest="tree", help="If you want to leave tree of catalogs including pilups within. Default = None.", action="store_true", default=False) parser.add_argument("--anti", dest="anti", help="Create additional concat file with antisense reads Default = None.", action="store_true", default=False) parser.add_argument("-p", dest="prefix", help="Prefix for concat file name", type=str, default="") args = parser.parse_args() gtf, tab, ranges = gtm.getGTF(args.gtf_file), gtm.getTAB(args.tab_file), str(args.ranges) print "Using GTF file: " + gtf print "Using TAB genome file: " + tab #listing novo files files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.novo')] #gives list of files in current directory directories = [re.sub(r'.novo$', '', d) for d in files] links = [] root_dir = os.getcwd() #making directories for f, d in zip(files, directories): os.mkdir(d) os.chdir(d) subprocess.call('ln -s ../' + f + ' ' + f, shell=True) links.append(os.path.abspath('./'+f))