Example #1
0
def getFastaSeqs():
    parser = OptionParser(usage="List of genes as std input and parameters")
    parser.add_option("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type="str", default=None)
    parser.add_option("-f", "--fasta_file", dest="fasta_file", help="Provide the path to your fasta file.",
                     type="str", default=None)
    parser.add_option("-t", "--tab_file", dest="tab_file", help="Provide the path to your genom tab file.",
                     type="str", default=None)
    parser.add_option("-r", "--ranges", dest="ranges",
                     help="Provide ranges(flanks) for genes.",
                     type="int", default=0)
    parser.add_option("-a", "--5end", dest="five_end",
                     help="Set up 5` flank. If minus then print only 3` end. Python slicing [a:b] i.e. [200:401] - from 200 to 400; [-200:] - last 200; "
                          "[:-200] from begining till -200 before end",
                     type="int", default=None)
    parser.add_option("-b", "--3end", dest="three_end",
                     help="Set up 5` flank. If minus then print only 5` end. Python slicing [a:b]",
                     type="int", default=None)
    (options, args) = parser.parse_args()

    signal(SIGPIPE,SIG_DFL) # to manage with stdin and stdout
    #crating gtf object
    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))
    gtf.read_FASTA(gtm.getFASTA(options.fasta_file))
    gtf.read_TAB(gtm.getTAB(options.tab_file))

    for i in sys.stdin:
        gene_name = str(i.strip())
        genomic_seq = gtf.genomicSequence(gene_name, ranges=options.ranges)
        print '>'+gene_name
        print genomic_seq[options.five_end:options.three_end]+'\n'
Example #2
0
def getFastaSeqs():
    parser = OptionParser(usage="List of genes as std input and parameters")
    parser.add_option("-g",
                      "--gtf_file",
                      dest="gtf_file",
                      help="Provide the path to your gtf file.",
                      type="str",
                      default=None)
    parser.add_option("-f",
                      "--fasta_file",
                      dest="fasta_file",
                      help="Provide the path to your fasta file.",
                      type="str",
                      default=None)
    parser.add_option("-t",
                      "--tab_file",
                      dest="tab_file",
                      help="Provide the path to your genom tab file.",
                      type="str",
                      default=None)
    parser.add_option("-r",
                      "--ranges",
                      dest="ranges",
                      help="Provide ranges(flanks) for genes.",
                      type="int",
                      default=0)
    parser.add_option(
        "-a",
        "--5end",
        dest="five_end",
        help=
        "Set up 5` flank. If minus then print only 3` end. Python slicing [a:b] i.e. [200:401] - from 200 to 400; [-200:] - last 200; "
        "[:-200] from begining till -200 before end",
        type="int",
        default=None)
    parser.add_option(
        "-b",
        "--3end",
        dest="three_end",
        help=
        "Set up 5` flank. If minus then print only 5` end. Python slicing [a:b]",
        type="int",
        default=None)
    (options, args) = parser.parse_args()

    signal(SIGPIPE, SIG_DFL)  # to manage with stdin and stdout
    #crating gtf object
    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))
    gtf.read_FASTA(gtm.getFASTA(options.fasta_file))
    gtf.read_TAB(gtm.getTAB(options.tab_file))

    for i in sys.stdin:
        gene_name = str(i.strip())
        genomic_seq = gtf.genomicSequence(gene_name, ranges=options.ranges)
        print '>' + gene_name
        print genomic_seq[options.five_end:options.three_end] + '\n'

    gtf.codingSequence()
Example #3
0
#seting up option parser
parser = argparse.ArgumentParser(description='Usage: ruffus scirpt designed to make concat file from *.novo files. Make new folder, cp or ln into all novofiles and run novo2concat. IMPORTANT: name of novo file should be name of experiment')
parser.add_argument("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type=str, default=None)
parser.add_argument("-t", "--tab_file", dest="tab_file", help="Provide the path to your tab genome file.",
                     type=str, default=None)
parser.add_argument("-r", dest="ranges", help="Set up ranges for pyPileup. Default = 250", default=250)
parser.add_argument("--3end", dest="three_end", help="Use pyPileup option --3end to only report counts for the 3' end of the reads. Default = False",
                    action="store_true", default=False)
parser.add_argument("-l", dest="list_file", help="Provide the FULL path to your gene_names.list file.", type=str, default=None, required=True)
parser.add_argument("--tree", dest="tree", help="If you want to leave tree of catalogs including pilups within. Default = None.",
                     action="store_true", default=False)
parser.add_argument("-p", dest="prefix", help="Prefix for concat file name", type=str, default="")
args = parser.parse_args()

gtf, tab, ranges = gtm.getGTF(args.gtf_file), gtm.getTAB(args.tab_file), str(args.ranges)
print "Using GTF file: " + gtf
print "Using TAB genome file: " + tab

#listing novo files
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.novo')]  #gives list of files in current directory
directories = [re.sub(r'.novo$', '', d) for d in files]
links = []
root_dir = os.getcwd()

#making directories
for f, d in zip(files, directories):
    os.mkdir(d)
    os.chdir(d)
    subprocess.call('ln -s ../' + f + ' ' + f, shell=True)
    links.append(os.path.abspath('./'+f))
Example #4
0
parser.add_argument("-r", dest="ranges", help="Set up ranges for pyPileup. Default = 250", default=250)
parser.add_argument("--3end", dest="three_end",
                    help="Use pyPileup option --3end to only report counts for the 3' end of the reads. Default = False",
                    action="store_true", default=False)
parser.add_argument("--5end", dest="five_end",
                    help="Use pyPileup option --5end to only report counts for the 5' end of the reads. Default = False",
                    action="store_true", default=False)
parser.add_argument("-l", dest="list_file", help="Provide the FULL path to your gene_names.list file.", type=str, default=None, required=True)
parser.add_argument("--tree", dest="tree", help="If you want to leave tree of catalogs including pilups within. Default = None.",
                     action="store_true", default=False)
parser.add_argument("--anti", dest="anti",  help="Create additional concat file with antisense reads Default = None.",
                    action="store_true", default=False)
parser.add_argument("-p", dest="prefix", help="Prefix for concat file name", type=str, default="")
args = parser.parse_args()

gtf, tab, ranges = gtm.getGTF(args.gtf_file), gtm.getTAB(args.tab_file), str(args.ranges)
print "Using GTF file: " + gtf
print "Using TAB genome file: " + tab

#listing novo files
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.novo')]  #gives list of files in current directory
directories = [re.sub(r'.novo$', '', d) for d in files]
links = []
root_dir = os.getcwd()

#making directories
for f, d in zip(files, directories):
    os.mkdir(d)
    os.chdir(d)
    subprocess.call('ln -s ../' + f + ' ' + f, shell=True)
    links.append(os.path.abspath('./'+f))