コード例 #1
0
ファイル: parserTools.py プロジェクト: tturowski/gwide
def getGeneNamesFromGTF():
    parser = OptionParser(usage="getGenesNames; type usage: %prog [options] -f filename")
    files = OptionGroup(parser, "File input options")
    files.add_option("-f", "--input_file", dest="gtf_file",
                     help="Provide the path to your gtf data file. Default is standard input.",
                     type="str", default=None)
    files.add_option("-g", "--genes", dest="genes",
                     help="Which biotype of features to get: mRNA, tRNA, rRNA, snRNA, snoRNA",
                     type="str", default='tRNA')
    files.add_option("-i", "--introns", dest="introns",
                     help="Introns? both - not discriminate; int_cont -only intron containing; int_less - only int less",
                     choices=["both", "int_cont", "int_less"], default="both"),
    files.add_option("-o", "--output_file", dest="output_file",
                     help="Use this flag to provide an output file name. Default is standard output.", default=None)
    parser.add_option_group(files)
    (options, args) = parser.parse_args()

    ### By default, input and output are expected from the standard input or standard output.
    signal(SIGPIPE,SIG_DFL)
    outfile = sys.stdout
    if options.output_file:
        outfile = open(options.output_file, "w")

    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))
    names_list = list()

### for loop extracting tRNA names
    for line in open(gtm.getGTF(options.gtf_file), "r"):
        if not line.startswith('#'):
            line_elements = line.strip().split('\t')
            # assert len(line_elements) == 10, 'Unexpected number of elements found in gtf line: ' + line
            if str(line_elements[1]) == options.genes:
                try:
                    name = re.search("gene_name\s\"(.*?)\"", str(line_elements[8])).group(1)
                except:
                    pass
                #     name = re.search("gene_id\s\"(.*?)\"", str(line_elements[8])).group(1)
                if options.introns == "both":
                    if name not in names_list:
                        names_list.append(name)
                    # outfile.write(str(name) + '\n')
                elif options.introns == "int_cont":
                    if gtf.intronCoordinates(name):
                        if name not in names_list:
                            names_list.append(name)
                        # outfile.write(str(name) + '\n')
                elif options.introns == "int_less":
                    if not gtf.intronCoordinates(name):
                        if name not in names_list:
                            names_list.append(name)
                        # outfile.write(str(name) + '\n')

    outfile.write('\n'.join(names_list)+'\n')
    outfile.close()
コード例 #2
0
ファイル: parserTools.py プロジェクト: tturowski/gwide
def getFastaSeqs():
    parser = OptionParser(usage="List of genes as std input and parameters")
    parser.add_option("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type="str", default=None)
    parser.add_option("-f", "--fasta_file", dest="fasta_file", help="Provide the path to your fasta file.",
                     type="str", default=None)
    parser.add_option("-t", "--tab_file", dest="tab_file", help="Provide the path to your genom tab file.",
                     type="str", default=None)
    parser.add_option("-r", "--ranges", dest="ranges",
                     help="Provide ranges(flanks) for genes.",
                     type="int", default=0)
    parser.add_option("-a", "--5end", dest="five_end",
                     help="Set up 5` flank. If minus then print only 3` end. Python slicing [a:b] i.e. [200:401] - from 200 to 400; [-200:] - last 200; "
                          "[:-200] from begining till -200 before end",
                     type="int", default=None)
    parser.add_option("-b", "--3end", dest="three_end",
                     help="Set up 5` flank. If minus then print only 5` end. Python slicing [a:b]",
                     type="int", default=None)
    (options, args) = parser.parse_args()

    signal(SIGPIPE,SIG_DFL) # to manage with stdin and stdout
    #crating gtf object
    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))
    gtf.read_FASTA(gtm.getFASTA(options.fasta_file))
    gtf.read_TAB(gtm.getTAB(options.tab_file))

    for i in sys.stdin:
        gene_name = str(i.strip())
        genomic_seq = gtf.genomicSequence(gene_name, ranges=options.ranges)
        print '>'+gene_name
        print genomic_seq[options.five_end:options.three_end]+'\n'
コード例 #3
0
ファイル: parserTools.py プロジェクト: MingleiYang/gwide
def getNameFromId4Tab():
    parser = OptionParser(usage="usage: List of genes as std input")
    parser.add_option("-g",
                      "--gtf_file",
                      dest="gtf_file",
                      help="Provide the path to your gtf file.",
                      type="str",
                      default=None)
    (options, args) = parser.parse_args()
    signal(SIGPIPE, SIG_DFL)

    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))

    id_to_gene = dict()
    for gene_name in gtf.genes:
        gene_id = gtf.genes[gene_name]['gene_id']
        id_to_gene[gene_id] = gene_name

    for i in sys.stdin:
        i_elem = i.strip().split("\t")
        gene_id = i_elem[0]
        seq = i_elem[1]
        gene_name = id_to_gene[gene_id]
        print gene_name + '\t' + seq
コード例 #4
0
ファイル: gwidemRNA.py プロジェクト: tturowski/gwide
def mRNA():
    usage = "Usage: To create input concat file run novo2concat.py"
    parser = argparse.ArgumentParser(usage=usage)

    files = parser.add_argument_group('Options for input files')
    files.add_argument("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type=str, default=None)
    files.add_argument("-i", "--input_file", dest="input_file", help="Provide the path to your concat file. REQUIRED.",
                     metavar="FILE", default=None, required=True)
    files.add_argument("--5flank", dest="five_prime_flank", type=int, help="Set up 5 prime flank in pileup file. Default = 0", default=0)
    files.add_argument("--3flank", dest="three_prime_flank", type=int, help="Set up 3 prime flank in pileup file. Default = 0", default=0)

    universal = parser.add_argument_group('Universal options')
    universal.add_argument("-t", "--hits_threshold", dest="hits_threshold", type=int, help="Set up threshold for pileup. Default 0 reads",
                      default=0)
    universal.add_argument("-n", "--normalized", dest="normalized", action="store_true", help="Use when you want to work on data normalized 'reads per Milion'. Default: False", default=False)

    output = parser.add_argument_group('Options for output files')
    output.add_argument("-p", "--prefix", dest="out_prefix", type=str, help="Prefix for output files. Default to standard output. Not supported for -o ratio.", default=None)
    output.add_argument("-o", dest="output_files", choices=['bind'], help="Select from following options:"
                        "(1) Print binding windows in fasta file", default="bind")
    output.add_argument("--peaks", dest="print_peaks", action="store_true", help="print peaks on plots. Default: False", default=False)
    output.add_argument("--valleys", dest="print_valleys", action="store_true", help="print valleys on plots. Default: False", default=False)

    special = parser.add_argument_group('Special options for some -o choices')
    special.add_argument("--lookahead", dest="lookahead", type=int, help="Set up lookahead parameter for pypeaks function. Default = 20", default=20)
    special.add_argument("-w", "--window", dest="window", type=int, help="Set up size of window for bind calculation (-o bind). Default: 10",
                      default=10)
    special.add_argument("-e", dest="experiment_to_use", type=str, help="For -o bind, which experiment to use.")
    # special.add_argument("--ntotal", dest="ntotal", action="store_true", help="Normalize data to sum of all reads (sum = 1). Default: False", default=False)
    # special.add_argument("--nmax", dest="nmax", action="store_true", help="Normalize data to maximal value (max = 1). Default: False", default=False)
    # special.add_argument("-a", dest="to_divide", type=str, help="experiment to divide by -b (-o fig_ratio)",
    #                   default=None)
    # special.add_argument("-b", dest="divisor", type=str, help="experiment being divisor for -a (-o fig_ratio)",
    #                   default=None)
    options = parser.parse_args()

    #checking input
    input_file = options.input_file

    #preparing naming of output files
    if options.out_prefix:
        prefix = options.out_prefix+'_'
    else:
        prefix = str()
    if options.normalized == True:
        prefix = 'normalized_'+prefix

    data = mRNAFromConcat(gtf_file=gtm.getGTF(options.gtf_file), five_prime_flank=options.five_prime_flank, three_prime_flank=options.three_prime_flank,
                          hits_threshold=options.hits_threshold, lookahead=options.lookahead, prefix=prefix, npM=options.normalized)

    if options.output_files == "bind":
        #reading csv file
        data.read_csv(input_file, use='deletions')
        #calculating readthrough, details, normalize
        # data.calculate(details=options.details, ntotal=options.ntotal, nmax=options.nmax)
        data.bind(exp_to_use=options.experiment_to_use, window=options.window)

    print '# Done.'
コード例 #5
0
ファイル: parserTools.py プロジェクト: MingleiYang/gwide
def getFastaSeqs():
    parser = OptionParser(usage="List of genes as std input and parameters")
    parser.add_option("-g",
                      "--gtf_file",
                      dest="gtf_file",
                      help="Provide the path to your gtf file.",
                      type="str",
                      default=None)
    parser.add_option("-f",
                      "--fasta_file",
                      dest="fasta_file",
                      help="Provide the path to your fasta file.",
                      type="str",
                      default=None)
    parser.add_option("-t",
                      "--tab_file",
                      dest="tab_file",
                      help="Provide the path to your genom tab file.",
                      type="str",
                      default=None)
    parser.add_option("-r",
                      "--ranges",
                      dest="ranges",
                      help="Provide ranges(flanks) for genes.",
                      type="int",
                      default=0)
    parser.add_option(
        "-a",
        "--5end",
        dest="five_end",
        help=
        "Set up 5` flank. If minus then print only 3` end. Python slicing [a:b] i.e. [200:401] - from 200 to 400; [-200:] - last 200; "
        "[:-200] from begining till -200 before end",
        type="int",
        default=None)
    parser.add_option(
        "-b",
        "--3end",
        dest="three_end",
        help=
        "Set up 5` flank. If minus then print only 5` end. Python slicing [a:b]",
        type="int",
        default=None)
    (options, args) = parser.parse_args()

    signal(SIGPIPE, SIG_DFL)  # to manage with stdin and stdout
    #crating gtf object
    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))
    gtf.read_FASTA(gtm.getFASTA(options.fasta_file))
    gtf.read_TAB(gtm.getTAB(options.tab_file))

    for i in sys.stdin:
        gene_name = str(i.strip())
        genomic_seq = gtf.genomicSequence(gene_name, ranges=options.ranges)
        print '>' + gene_name
        print genomic_seq[options.five_end:options.three_end] + '\n'

    gtf.codingSequence()
コード例 #6
0
ファイル: gwiderRNA.py プロジェクト: tturowski/gwide
def rRNA():
    usage = "Usage: create pileups with pyPileup (pyCRAC package) then in directory containing pileup files type run i.e.:"+"\n"+ \
            "cat file.concat | gwiderRNA.py or gwiderRNA.py -i file.concat"
    parser = OptionParser(usage=usage)
    parser.add_option("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     metavar="FILE", default=None)
    parser.add_option("-i", dest="input_file", help="Provide the path to your concat file.",
                     metavar="FILE", default=None)
    parser.add_option("--5flank", dest="five_prime_flank", type="int", help="Set up 5 prime flank in pileup file. Default = 1000", default=1000)
    parser.add_option("--3flank", dest="three_prime_flank", type="int", help="Set up 3 prime flank in pileup file. Default = 1000", default=1000)
    parser.add_option("-l", "--lookahead", dest="lookahead", type="int", help="Set up lookahead parameter for pypeaks function. Default = 20", default=20)
    parser.add_option("-t", "--hits_threshold", dest="hits_threshold", type="int", help="Set up threshold for pileup. Default 100 reads",
                      default=100)
    # parser.add_option("-r", "--readthrough", dest="readthrough", type="int", help="Set up when readthrough should start countin. Default: 0",
    #                   default=0)
    parser.add_option("-p", "--prefix", dest="out_prefix", type="str", help="Prefix for output files. Default to standard output", default=None)
    parser.add_option("--peaks", dest="print_peaks", action="store_true", help="Add into command line if you want to print peaks on plots. Default: False",
                      default=False)
    parser.add_option("-o", "--output", dest="output_files", choices=["std", "ratio", "single", "correlations", "ratio_smooth"], help="Select from following options: (1) std - RDN37-1; experiment after experimen ;"+'\n'
                                                                                                   "(2)ratio - ratio for -a divided by -b; (3)single - plot RDN37-1 plots 1 per page; (4) correlations - calculate correlations for different experiments; (5)ratio_smooth - ratio for -a divided by -b", default="std")
    parser.add_option("-a", dest="to_divide", type="str", help="experiment to divide by -b", default=None)
    parser.add_option("-b", dest="divisor", type="str", help="experiment being divisor for -a", default=None)
    parser.add_option("-n", "--normalized", dest="normalized", action="store_true", help="Use when you want to work on data normalized reads per Milion? Default: False", default=False)
    (options, args) = parser.parse_args()

    gtf_file = gtm.getGTF(options.gtf_file)

    if options.out_prefix:
        prefix = options.out_prefix+'_'
    else:
        prefix = str()

    if options.output_files == "ratio":
        options.normalized = True

    data = rRNAFromConcat(gtf_file=gtf_file, five_prime_flank=options.five_prime_flank, print_peaks=options.print_peaks,
                             three_prime_flank=options.three_prime_flank, hits_threshold=options.hits_threshold, lookahead=options.lookahead, prefix=prefix, normalized=options.normalized)
    data.read_csv(options.input_file)
    data.slice_data()
    if options.print_peaks == True:
        data.find_peaks()
    if options.output_files == "std":
        data.print_rRNA()   # RDN37 should be prepared with 1000 nt flanks
    if options.output_files == "single":
        data.single_rRNA()   # RDN37 should be prepared with 1000 nt flanks
    if options.output_files == "ratio":
        # data.fig_ratio(options.to_divide, options.divisor)  # plots ratio to_divide/divisor
        data.fig_log2ratio(options.to_divide, options.divisor)  # plots log2 ratio to_divide/divisor
    if options.output_files == "ratio_smooth":
        data.fig_smoothlog2ratio(options.to_divide, options.divisor)  # plots log2 ratio to_divide/divisor using smoothed data
    if options.output_files == "correlations":
        data.correlations()
    print '# Done.'
コード例 #7
0
ファイル: parserTools.py プロジェクト: tturowski/gwide
def getGeneLength():
    parser = OptionParser(usage="usage: List of genes as std input")
    parser.add_option("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type="str", default=None)
    (options, args) = parser.parse_args()

    signal(SIGPIPE,SIG_DFL)
    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))

    for i in sys.stdin:
        gene_name = str(i.strip())
        gene_length = gtf.geneLength(gene_name)
        print gene_name+"\t"+str(gene_length)
コード例 #8
0
ファイル: parserTools.py プロジェクト: MingleiYang/gwide
def getGeneLength():
    parser = OptionParser(usage="usage: List of genes as std input")
    parser.add_option("-g",
                      "--gtf_file",
                      dest="gtf_file",
                      help="Provide the path to your gtf file.",
                      type="str",
                      default=None)
    (options, args) = parser.parse_args()

    signal(SIGPIPE, SIG_DFL)
    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))

    for i in sys.stdin:
        gene_name = str(i.strip())
        gene_length = gtf.geneLength(gene_name)
        print gene_name + "\t" + str(gene_length)
コード例 #9
0
ファイル: parserTools.py プロジェクト: tturowski/gwide
def getNameFromId():
    parser = OptionParser(usage="usage: List of genes as std input")
    parser.add_option("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type="str", default=None)
    (options, args) = parser.parse_args()
    signal(SIGPIPE,SIG_DFL)

    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))

    id_to_gene = dict()
    for gene_name in gtf.genes:
        gene_id = gtf.genes[gene_name]['gene_id']
        id_to_gene[gene_id] = gene_name

    for i in sys.stdin:
        gene_id = str(i.strip())
        gene_name = id_to_gene[gene_id]
        print gene_name
コード例 #10
0
ファイル: gwideHittable.py プロジェクト: tturowski/gwide
def hittable():
    ## option parser
    usage = "For more options type -h"
    description = "Downstream analysis on hittables crated by pyReadCounter. Chose type of analysys Usage: create hittables using pyReadCounter then run script in the folder containing hittables"
    parser = argparse.ArgumentParser(usage=usage, description=description)
    #functions
    parser.add_argument('--output', required=True, dest="function", choices=['correlation', 'count', 'piechart'], help='REQUIRED, Calculate "correlations"; '
                                                                                                                         '"count" hittables for further analysis. Ideal to work with multiple experiments; '
                                                                                                                         'Plot "piechart"s for hittable classes')
    # parser for input files options
    files = parser.add_argument_group('Input file options')
    files.add_argument("-g", dest="gtf_file", help="Provide the path to your gtf file.", type=str, default=None)
    files.add_argument("--stdin", dest="stdin", action="store_true", help="Use standard input instead ./*hittable* Default: False", default=False)
    # universal options
    universal = parser.add_argument_group('universal options')
    universal.add_argument("-n", dest="normalized", action="store_true", help="Use when you want to work on data normalized 'reads per Milion'. Default: False", default=False)
    universal.add_argument("-w", dest="whole_name", action="store_true", help="As defauls scripts takes 'a_b_c' from a_b_c_hittable_reads.txt as experiment name. Use this option if your file names do not suit to this pattern. Default: False", default=False)
    universal.add_argument("-p", dest="out_prefix", type=str, help="Prefix for output files.", default=None)
    # parser specific for counts
    corr_group = parser.add_argument_group("counts options")
    corr_group.add_argument("--rpkm", dest="rpkm", action="store_true", help="Use RPKM instead of hits. Default: False", default=False)
    # parser specific for correlations
    corr_group = parser.add_argument_group("correlation options")
    corr_group.add_argument("-c", dest="gene_class", action="store_true", help="Calculate Pearson coefficient for different classes separately. Default: False", default=False)
    corr_group.add_argument("-o", dest="output", choices=["p", "s", "k", "a"], help="Select from following options: p - Pearson (standard correlation coefficient); s - Spearman rank correlation; k - Kendall Tau correlation coefficient; a - all at once", default="p")
    #parser specific for piecharts
    piechart_group = parser.add_argument_group("piechart options")
    piechart_group.add_argument("-s", "--single", dest="print_single", help="Print hittables in single files",
                             action="store_true", default=False)
    options = parser.parse_args()

    ## Creating HittableClass object
    data = ghc.HittableClass(gtf=gtm.getGTF(options.gtf_file), whole_name=options.whole_name, n_rpM=options.normalized, out_prefix=options.out_prefix, read_stdin=options.stdin)

    #running chosen function
    if options.function == 'correlation':
        data.correlation(output=options.output, gene_class=options.gene_class)
    elif options.function == 'count':
        data.count(normalize=options.normalized, use_RPKM=options.rpkm)
    elif options.function == 'piechart':
        data.plot(print_single=options.print_single)

    print "Done."
コード例 #11
0
def rRNA():
    usage = "Usage: create pileups with pyPileup (pyCRAC package) then in directory containing pileup files type run i.e.:"+"\n"+ \
            "cat file.concat | gwiderRNA.py or gwiderRNA.py -i file.concat"
    parser = OptionParser(usage=usage)
    parser.add_option("-g",
                      "--gtf_file",
                      dest="gtf_file",
                      help="Provide the path to your gtf file.",
                      metavar="FILE",
                      default=None)
    parser.add_option("-i",
                      dest="input_file",
                      help="Provide the path to your concat file.",
                      metavar="FILE",
                      default=None)
    parser.add_option(
        "--5flank",
        dest="five_prime_flank",
        type="int",
        help="Set up 5 prime flank in pileup file. Default = 1000",
        default=1000)
    parser.add_option(
        "--3flank",
        dest="three_prime_flank",
        type="int",
        help="Set up 3 prime flank in pileup file. Default = 1000",
        default=1000)
    parser.add_option(
        "-l",
        "--lookahead",
        dest="lookahead",
        type="int",
        help="Set up lookahead parameter for pypeaks function. Default = 20",
        default=20)
    parser.add_option("-t",
                      "--hits_threshold",
                      dest="hits_threshold",
                      type="int",
                      help="Set up threshold for pileup. Default 100 reads",
                      default=100)
    # parser.add_option("-r", "--readthrough", dest="readthrough", type="int", help="Set up when readthrough should start countin. Default: 0",
    #                   default=0)
    parser.add_option(
        "-p",
        "--prefix",
        dest="out_prefix",
        type="str",
        help="Prefix for output files. Default to standard output",
        default=None)
    parser.add_option(
        "--peaks",
        dest="print_peaks",
        action="store_true",
        help=
        "Add into command line if you want to print peaks on plots. Default: False",
        default=False)
    parser.add_option(
        "-o",
        "--output",
        dest="output_files",
        choices=["std", "ratio", "single", "correlations", "ratio_smooth"],
        help=
        "Select from following options: (1) std - RDN37-1; experiment after experimen ;"
        + '\n'
        "(2)ratio - ratio for -a divided by -b; (3)single - plot RDN37-1 plots 1 per page; (4) correlations - calculate correlations for different experiments; (5)ratio_smooth - ratio for -a divided by -b",
        default="std")
    parser.add_option("-a",
                      dest="to_divide",
                      type="str",
                      help="experiment to divide by -b",
                      default=None)
    parser.add_option("-b",
                      dest="divisor",
                      type="str",
                      help="experiment being divisor for -a",
                      default=None)
    parser.add_option(
        "-n",
        "--normalized",
        dest="normalized",
        action="store_true",
        help=
        "Use when you want to work on data normalized reads per Milion? Default: False",
        default=False)
    (options, args) = parser.parse_args()

    gtf_file = gtm.getGTF(options.gtf_file)

    if options.out_prefix:
        prefix = options.out_prefix + '_'
    else:
        prefix = str()

    if options.output_files == "ratio":
        options.normalized = True

    data = rRNAFromConcat(gtf_file=gtf_file,
                          five_prime_flank=options.five_prime_flank,
                          print_peaks=options.print_peaks,
                          three_prime_flank=options.three_prime_flank,
                          hits_threshold=options.hits_threshold,
                          lookahead=options.lookahead,
                          prefix=prefix,
                          normalized=options.normalized)
    data.read_csv(options.input_file)
    data.slice_data()
    if options.print_peaks == True:
        data.find_peaks()
    if options.output_files == "std":
        data.print_rRNA()  # RDN37 should be prepared with 1000 nt flanks
    if options.output_files == "single":
        data.single_rRNA()  # RDN37 should be prepared with 1000 nt flanks
    if options.output_files == "ratio":
        # data.fig_ratio(options.to_divide, options.divisor)  # plots ratio to_divide/divisor
        data.fig_log2ratio(
            options.to_divide,
            options.divisor)  # plots log2 ratio to_divide/divisor
    if options.output_files == "ratio_smooth":
        data.fig_smoothlog2ratio(
            options.to_divide, options.divisor
        )  # plots log2 ratio to_divide/divisor using smoothed data
    if options.output_files == "correlations":
        data.correlations()
    print '# Done.'
コード例 #12
0
def hittable():
    ## option parser
    usage = "For more options type -h"
    description = "Downstream analysis on hittables crated by pyReadCounter. Chose type of analysys Usage: create hittables using pyReadCounter then run script in the folder containing hittables"
    parser = argparse.ArgumentParser(usage=usage, description=description)
    #functions
    parser.add_argument(
        '--output',
        required=True,
        dest="function",
        choices=['correlation', 'count', 'piechart', 'classes'],
        help='REQUIRED, Calculate "correlations"; '
        '"count" hittables for further analysis. Ideal to work with multiple experiments; '
        'Plot "piechart"s for hittable classes')
    # parser for input files options
    files = parser.add_argument_group('Input file options')
    files.add_argument("-g",
                       dest="gtf_file",
                       help="Provide the path to your gtf file.",
                       type=str,
                       default=None)
    files.add_argument(
        "--stdin",
        dest="stdin",
        action="store_true",
        help="Use standard input instead ./*hittable* Default: False",
        default=False)
    # universal options
    universal = parser.add_argument_group('universal options')
    universal.add_argument(
        "-n",
        dest="normalized",
        action="store_true",
        help=
        "Use when you want to work on data normalized 'reads per Milion'. Default: False",
        default=False)
    universal.add_argument(
        "-w",
        dest="whole_name",
        action="store_true",
        help=
        "As defauls scripts takes 'a_b_c' from a_b_c_hittable_reads.txt as experiment name. Use this option if your file names do not suit to this pattern. Default: False",
        default=False)
    universal.add_argument("-p",
                           dest="out_prefix",
                           type=str,
                           help="Prefix for output files.",
                           default=None)
    # parser specific for counts
    corr_group = parser.add_argument_group("counts options")
    corr_group.add_argument("--rpkm",
                            dest="rpkm",
                            action="store_true",
                            help="Use RPKM instead of hits. Default: False",
                            default=False)
    # parser specific for correlations
    corr_group = parser.add_argument_group("correlation options")
    corr_group.add_argument(
        "-c",
        dest="gene_class",
        action="store_true",
        help=
        "Calculate Pearson coefficient for different classes separately. Default: False",
        default=False)
    corr_group.add_argument(
        "-o",
        dest="output",
        choices=["p", "s", "k", "a"],
        help=
        "Select from following options: p - Pearson (standard correlation coefficient); s - Spearman rank correlation; k - Kendall Tau correlation coefficient; a - all at once",
        default="p")
    #parser specific for piecharts
    piechart_group = parser.add_argument_group("piechart options")
    piechart_group.add_argument("-s",
                                "--single",
                                dest="print_single",
                                help="Print hittables in single files",
                                action="store_true",
                                default=False)
    options = parser.parse_args()

    ## Creating HittableClass object
    data = ghc.HittableClass(gtf=gtm.getGTF(options.gtf_file),
                             whole_name=options.whole_name,
                             n_rpM=options.normalized,
                             out_prefix=options.out_prefix,
                             read_stdin=options.stdin)

    #running chosen function
    if options.function == 'correlation':
        data.correlation(output=options.output, gene_class=options.gene_class)
    elif options.function == 'count':
        data.count(normalize=options.normalized, use_RPKM=options.rpkm)
    elif options.function == 'piechart':
        data.plot(print_single=options.print_single)
    elif options.function == 'classes':
        data.classes_to_tab()
    print "Done."
コード例 #13
0
ファイル: gwidetRNA.py プロジェクト: MingleiYang/gwide
def tRNA():
    usage = "Usage: UNDER CONSTRUCTION - not all functions are available. To create concat file run novo2concat"
    parser = argparse.ArgumentParser(usage=usage)

    files = parser.add_argument_group('Options for input files')
    files.add_argument("-g",
                       "--gtf_file",
                       dest="gtf_file",
                       help="Provide the path to your gtf file.",
                       type=str,
                       default=None)
    files.add_argument("-i",
                       "--input_file",
                       dest="input_file",
                       help="Provide the path to your concat file. REQUIRED.",
                       metavar="FILE",
                       default=None,
                       required=True)
    files.add_argument(
        "--5flank",
        dest="five_prime_flank",
        type=int,
        help="Set up 5 prime flank in pileup file. Default = 250",
        default=250)
    files.add_argument(
        "--3flank",
        dest="three_prime_flank",
        type=int,
        help="Set up 3 prime flank in pileup file. Default = 250",
        default=250)

    universal = parser.add_argument_group('Universal options')
    universal.add_argument("-t",
                           "--hits_threshold",
                           dest="hits_threshold",
                           type=int,
                           help="Set up threshold for pileup. Default 0 reads",
                           default=0)
    universal.add_argument("-r",
                           "--readthrough",
                           dest="readthrough",
                           type=int,
                           help="Set up when readthrough starts. Default: 15",
                           default=15)
    universal.add_argument(
        "-n",
        "--normalized",
        dest="normalized",
        action="store_true",
        help=
        "Use when you want to work on data normalized 'reads per Milion'. Default: False",
        default=False)

    output = parser.add_argument_group('Options for output files')
    output.add_argument(
        "-p",
        "--prefix",
        dest="out_prefix",
        type=str,
        help=
        "Prefix for output files. Default to standard output. Not supported for -o ratio.",
        default=None)
    output.add_argument(
        "-d",
        "--details",
        dest="details",
        action="store_true",
        help=
        "Print details in text file. WARNING: works only with one experiment.",
        default=False)
    output.add_argument(
        "-o",
        dest="output_files",
        choices=[
            "fig", "fig_std", "fig_tight", "fig_ratio", "fig_boxes", "nuc3",
            "nuc5", "nuc_gene", "nuc_energy", "termination_valleys",
            "termination", "termination_text", "stat_text", "both"
        ],
        help="Select from following options:"
        "(1) fig - plot tRNA genes coverage; (2) figstd - plot gene after gene; (3) fig_tight; (4) fig_ratio - log2 for -a divided by -b, uses normalized data"
        "(5) fig_boxes - mark A and B boxes (provide path to boxes position file)"
        "(6) nuc3 - for nucleotide 3' end resolution; (7) nuc_gene - for nucleotide resolution of gene only"
        "(8) nuc_energy - plots 3' end under nucleotide resolution with energy plots"
        "(9) termination_valleys - for each valley calculate termination efficiency"
        "(10) termination - calculate termination efficiency for last 20 nt"
        "(11) termination_text - calculate termination efficiency for first 20 nt of 3' end and print text file"
        "(12) stat_text - tab-deliminated; (13) both - fig and stat_text; (14) - nuc5 - for nucleotide 5' end resolution",
        default="both")
    output.add_argument("--peaks",
                        dest="print_peaks",
                        action="store_true",
                        help="print peaks on plots. Default: False",
                        default=False)
    output.add_argument("--valleys",
                        dest="print_valleys",
                        action="store_true",
                        help="print valleys on plots. Default: False",
                        default=False)
    output.add_argument("--mark",
                        dest="mark",
                        choices=["A", "T", "C", "G"],
                        help="mark nucleotide on plots. Default: None",
                        default=None)

    special = parser.add_argument_group('Special options for some -o choices')
    special.add_argument(
        "--lookahead",
        dest="lookahead",
        type=int,
        help="Set up lookahead parameter for pypeaks function. Default = 20",
        default=20)
    special.add_argument(
        "-w",
        "--window",
        dest="window",
        type=int,
        help=
        "Set up size of window for energy calculation (-o nuc_energy). Default: 5",
        default=5)
    special.add_argument(
        "--ntotal",
        dest="ntotal",
        action="store_true",
        help="Normalize data to sum of all reads (sum = 1). Default: False",
        default=False)
    special.add_argument(
        "--nmax",
        dest="nmax",
        action="store_true",
        help="Normalize data to maximal value (max = 1). Default: False",
        default=False)
    special.add_argument("-a",
                         dest="to_divide",
                         type=str,
                         help="experiment to divide by -b (-o fig_ratio)",
                         default=None)
    special.add_argument("-b",
                         dest="divisor",
                         type=str,
                         help="experiment being divisor for -a (-o fig_ratio)",
                         default=None)
    special.add_argument(
        "--abox",
        dest="abox_file",
        help="Provide the path to your tab file with A box start.",
        metavar="FILE",
        default=None)
    special.add_argument(
        "--bbox",
        dest="bbox_file",
        help="Provide the path to your tab file with B box start.",
        metavar="FILE",
        default=None)
    options = parser.parse_args()

    #checking input
    input_file = options.input_file
    if options.output_files == 'fig_boxes' and (options.abox_file == None
                                                or options.bbox_file == None):
        exit(
            'Please provide path to both box.tab files using options --abox and --bbox.'
        )
    if options.output_files == 'fig_ratio' and (options.to_divide == None
                                                or options.divisor == None):
        exit('Please provide experiments names using options -a and -b.')

    #preparing naming of output files
    if options.out_prefix:
        prefix = options.out_prefix + '_'
        filename = options.out_prefix + '_rt' + str(
            options.readthrough) + '_l' + str(options.lookahead) + '_t' + str(
                options.hits_threshold) + '.list'
    else:
        prefix = str()
        filename = 'rt' + str(options.readthrough) + '_l' + str(
            options.lookahead) + '_t' + str(options.hits_threshold) + '.list'
    if options.print_peaks == True:
        prefix = prefix + 'peaks_'
    if options.print_valleys == True:
        prefix = prefix + 'valleys_'
    if options.normalized == True:
        prefix = 'normalized_' + prefix

    #setting up dependencies
    if options.output_files == "fig_ratio":
        options.normalized = True
    if options.output_files == 'termination_valleys':
        options.print_peaks = True
        options.print_valleys = True

    data = tRNAFromConcatv2(gtf_file=gtm.getGTF(options.gtf_file),
                            five_prime_flank=options.five_prime_flank,
                            print_valleys=options.print_valleys,
                            print_peaks=options.print_peaks,
                            readthrough_start=options.readthrough,
                            three_prime_flank=options.three_prime_flank,
                            hits_threshold=options.hits_threshold,
                            lookahead=options.lookahead,
                            prefix=prefix,
                            normalized=options.normalized)

    #reading csv file
    if options.output_files != "fig_ratio":
        data.read_csv(input_file, null_substitution=False)
    # elif options.output_files == "fig_ratio":
    #     data.read_csv(input_file, null_substitution=True) ## makes all 0 as 1 in hittable

    #finding peaks
    if (options.print_peaks == True or options.print_valleys
            == True) and options.output_files != "fig_ratio":
        data.find_peaks()

    #calculating readthrough, details, normalize
    if options.output_files != "fig_ratio":
        data.calculate(details=options.details,
                       ntotal=options.ntotal,
                       nmax=options.nmax)
    elif options.output_files == "fig_ratio":
        data.calculate(details=options.details,
                       ntotal=options.ntotal,
                       nmax=options.nmax,
                       pscounts=True)

    #making text files
    if options.output_files == "stat_text" or options.output_files == "both":
        text_file = open(filename, "w")
        data.make_text_file(text_file,
                            details=options.details,
                            ntotal=options.ntotal,
                            nmax=options.nmax)

    if options.output_files == "fig" or options.output_files == "both":
        data.slice_dataframe()
        data.fig_gene_pp()

    if options.output_files == "fig_tight":
        data.slice_dataframe()
        data.fig_gene_pp_tight()

    #marks all T on the plots
    if options.output_files == "markT":
        data.slice_dataframe()
        data.mark_T()

    #marks all T and CG on the plots
    if options.output_files == "markTCG":
        data.slice_dataframe()
        data.mark_T(anti_plot=True)

    if options.output_files == "fig_ratio":
        data.slice_dataframe()
        data.fig_ratio(options.to_divide, options.divisor)

    if options.output_files == "nuc3":
        data.fig_3end_nucleotide_resolution()

    if options.output_files == "nuc5":
        data.fig_5end_nucleotide_resolution()

    if options.output_files == "nuc_gene":
        print 'Needs update. Talk to Tomasz.'
    #     data.fig_nucleotide_gene()

    # if options.output_files == "nuc_energy":
    #     data.fig_energy(options.window)

    if options.output_files == "fig_std":
        data.slice_dataframe()
        data.fig_gene_after_gene()

    if options.output_files == "fig_boxes":
        print 'Needs update. Talk to Tomasz.'
        data.slice_dataframe()
    #     data.fig_boxes(open(options.abox_file), open(options.bbox_file))
    #
    if options.output_files == "termination_valleys":
        print 'Needs update. Talk to Tomasz.'
        data.slice_dataframe()
    #     data.termination_efficency_valleys()
    #
    if options.output_files == "termination":
        print 'Needs update. Talk to Tomasz.'
        data.slice_dataframe()
    #     data.termination_efficency()

    if options.output_files == "termination_text":
        data.calculate_dG()
        text_file = open(filename, "w")
        data.make_text_file(text_file, print_dG=True)

    print '# Done.'
コード例 #14
0
ファイル: gwidetRNA.py プロジェクト: tturowski/gwide
def tRNA():
    usage = "Usage: UNDER CONSTRUCTION - not all functions are available. To create concat file run novo2concat"
    parser = argparse.ArgumentParser(usage=usage)

    files = parser.add_argument_group('Options for input files')
    files.add_argument("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type=str, default=None)
    files.add_argument("-i", "--input_file", dest="input_file", help="Provide the path to your concat file. REQUIRED.",
                     metavar="FILE", default=None, required=True)
    files.add_argument("--5flank", dest="five_prime_flank", type=int, help="Set up 5 prime flank in pileup file. Default = 250", default=250)
    files.add_argument("--3flank", dest="three_prime_flank", type=int, help="Set up 3 prime flank in pileup file. Default = 250", default=250)

    universal = parser.add_argument_group('Universal options')
    universal.add_argument("-t", "--hits_threshold", dest="hits_threshold", type=int, help="Set up threshold for pileup. Default 0 reads",
                      default=0)
    universal.add_argument("-r", "--readthrough", dest="readthrough", type=int, help="Set up when readthrough starts. Default: 15",
                      default=15)
    universal.add_argument("-n", "--normalized", dest="normalized", action="store_true", help="Use when you want to work on data normalized 'reads per Milion'. Default: False", default=False)

    output = parser.add_argument_group('Options for output files')
    output.add_argument("-p", "--prefix", dest="out_prefix", type=str, help="Prefix for output files. Default to standard output. Not supported for -o ratio.", default=None)
    output.add_argument("-d", "--details", dest="details", action="store_true", help="Print details in text file. WARNING: works only with one experiment.", default=False)
    output.add_argument("-o", dest="output_files", choices=["fig", "fig_std", "fig_tight", "fig_ratio", "fig_boxes", "nuc3", "nuc5", "nuc_gene", "nuc_energy", "termination_valleys", "termination", "termination_text", "stat_text", "both"], help="Select from following options:"
                        "(1) fig - plot tRNA genes coverage; (2) figstd - plot gene after gene; (3) fig_tight; (4) fig_ratio - log2 for -a divided by -b, uses normalized data"
                        "(5) fig_boxes - mark A and B boxes (provide path to boxes position file)"
                        "(6) nuc3 - for nucleotide 3' end resolution; (7) nuc_gene - for nucleotide resolution of gene only"
                        "(8) nuc_energy - plots 3' end under nucleotide resolution with energy plots"
                        "(9) termination_valleys - for each valley calculate termination efficiency"
                        "(10) termination - calculate termination efficiency for last 20 nt"
                        "(11) termination_text - calculate termination efficiency for first 20 nt of 3' end and print text file"
                        "(12) stat_text - tab-deliminated; (13) both - fig and stat_text; (14) - nuc5 - for nucleotide 5' end resolution", default="both")
    output.add_argument("--peaks", dest="print_peaks", action="store_true", help="print peaks on plots. Default: False", default=False)
    output.add_argument("--valleys", dest="print_valleys", action="store_true", help="print valleys on plots. Default: False", default=False)
    output.add_argument("--mark", dest="mark", choices=["A","T","C","G"], help="mark nucleotide on plots. Default: None", default=None)

    special = parser.add_argument_group('Special options for some -o choices')
    special.add_argument("--lookahead", dest="lookahead", type=int, help="Set up lookahead parameter for pypeaks function. Default = 20", default=20)
    special.add_argument("-w", "--window", dest="window", type=int, help="Set up size of window for energy calculation (-o nuc_energy). Default: 5",
                      default=5)
    special.add_argument("--ntotal", dest="ntotal", action="store_true", help="Normalize data to sum of all reads (sum = 1). Default: False", default=False)
    special.add_argument("--nmax", dest="nmax", action="store_true", help="Normalize data to maximal value (max = 1). Default: False", default=False)
    special.add_argument("-a", dest="to_divide", type=str, help="experiment to divide by -b (-o fig_ratio)",
                      default=None)
    special.add_argument("-b", dest="divisor", type=str, help="experiment being divisor for -a (-o fig_ratio)",
                      default=None)
    special.add_argument("--abox", dest="abox_file", help="Provide the path to your tab file with A box start.",
                     metavar="FILE", default=None)
    special.add_argument("--bbox", dest="bbox_file", help="Provide the path to your tab file with B box start.",
                     metavar="FILE", default=None)
    options = parser.parse_args()

    #checking input
    input_file = options.input_file
    if options.output_files == 'fig_boxes' and ( options.abox_file == None or options.bbox_file == None ):
        exit('Please provide path to both box.tab files using options --abox and --bbox.')
    if options.output_files == 'fig_ratio' and ( options.to_divide == None or options.divisor == None ):
        exit('Please provide experiments names using options -a and -b.')

    #preparing naming of output files
    if options.out_prefix:
        prefix = options.out_prefix+'_'
        filename = options.out_prefix+'_rt'+str(options.readthrough)+'_l'+str(options.lookahead)+'_t'+str(options.hits_threshold)+'.list'
    else:
        prefix = str()
        filename = 'rt'+str(options.readthrough)+'_l'+str(options.lookahead)+'_t'+str(options.hits_threshold)+'.list'
    if options.print_peaks == True:
        prefix = prefix+'peaks_'
    if options.print_valleys == True:
        prefix = prefix+'valleys_'
    if options.normalized == True:
        prefix = 'normalized_'+prefix

    #setting up dependencies
    if options.output_files == "fig_ratio":
        options.normalized = True
    if options.output_files == 'termination_valleys':
        options.print_peaks = True
        options.print_valleys = True

    data = tRNAFromConcatv2(gtf_file=gtm.getGTF(options.gtf_file), five_prime_flank=options.five_prime_flank, print_valleys=options.print_valleys, print_peaks=options.print_peaks, readthrough_start=options.readthrough,
                             three_prime_flank=options.three_prime_flank, hits_threshold=options.hits_threshold, lookahead=options.lookahead, prefix=prefix, normalized=options.normalized)

    #reading csv file
    if options.output_files != "fig_ratio":
        data.read_csv(input_file, null_substitution=False)
    # elif options.output_files == "fig_ratio":
    #     data.read_csv(input_file, null_substitution=True) ## makes all 0 as 1 in hittable

    #finding peaks
    if (options.print_peaks == True or options.print_valleys == True ) and options.output_files != "fig_ratio":
        data.find_peaks()

    #calculating readthrough, details, normalize
    if options.output_files != "fig_ratio":
        data.calculate(details=options.details, ntotal=options.ntotal, nmax=options.nmax)
    elif options.output_files == "fig_ratio":
        data.calculate(details=options.details, ntotal=options.ntotal, nmax=options.nmax, pscounts=True)

    #making text files
    if options.output_files == "stat_text" or options.output_files == "both":
        text_file = open(filename, "w")
        data.make_text_file(text_file, details=options.details, ntotal=options.ntotal, nmax=options.nmax)

    if options.output_files == "fig" or options.output_files == "both":
        data.slice_dataframe()
        data.fig_gene_pp()

    if options.output_files == "fig_tight":
        data.slice_dataframe()
        data.fig_gene_pp_tight()

    #marks all T on the plots
    if options.output_files == "markT":
        data.slice_dataframe()
        data.mark_T()

    #marks all T and CG on the plots
    if options.output_files == "markTCG":
        data.slice_dataframe()
        data.mark_T(anti_plot=True)

    if options.output_files == "fig_ratio":
        data.slice_dataframe()
        data.fig_ratio(options.to_divide, options.divisor)

    if options.output_files == "nuc3":
        data.fig_3end_nucleotide_resolution()

    if options.output_files == "nuc5":
        data.fig_5end_nucleotide_resolution()

    if options.output_files == "nuc_gene":
        print 'Needs update. Talk to Tomasz.'
    #     data.fig_nucleotide_gene()

    # if options.output_files == "nuc_energy":
    #     data.fig_energy(options.window)

    if options.output_files == "fig_std":
        data.slice_dataframe()
        data.fig_gene_after_gene()

    if options.output_files == "fig_boxes":
        print 'Needs update. Talk to Tomasz.'
        data.slice_dataframe()
    #     data.fig_boxes(open(options.abox_file), open(options.bbox_file))
    #
    if options.output_files == "termination_valleys":
        print 'Needs update. Talk to Tomasz.'
        data.slice_dataframe()
    #     data.termination_efficency_valleys()
    #
    if options.output_files == "termination":
        print 'Needs update. Talk to Tomasz.'
        data.slice_dataframe()
    #     data.termination_efficency()

    if options.output_files == "termination_text":
        data.calculate_dG()
        text_file = open(filename, "w")
        data.make_text_file(text_file, print_dG=True)

    print '# Done.'
コード例 #15
0
def plot():
    """ Script working with concat file generated by pileupsToConcat.py script. Read concat file and according to options.
    Can plot intron, and peaks found by pypeaks script."""

    #setup option parser
    usage = "Usage: gwide function -i input -o output [options]"
    parser = argparse.ArgumentParser(usage=usage)

    files = parser.add_argument_group('Options for input files')
    files.add_argument("-g",
                       "--gtf_file",
                       dest="gtf_file",
                       help="Provide the path to your gtf file.",
                       type=str,
                       default=None)
    files.add_argument("-i",
                       "--input_file",
                       dest="input_file",
                       help="Provide the path to your input file. Required.",
                       metavar="FILE",
                       default=None,
                       required=True)
    # files.add_argument("--input_type", dest="input_type", choices=['concat'], help="Type of input file. Default: concat",
    #                  type=str, default='concat')
    files.add_argument("--5flank",
                       dest="five_prime_flank",
                       type=int,
                       help="Set up 5 prime flank. Default = 250",
                       default=250)
    files.add_argument("--3flank",
                       dest="three_prime_flank",
                       type=int,
                       help="Set up 3 prime flank. Default = 250",
                       default=250)
    files.add_argument(
        "-l",
        "--list_file",
        dest="list_file",
        help=
        "Provide the path to your (tab) file genes.list. Only listed genes will be plotted. Can be aligned as second column",
        type=str)

    peaks = parser.add_argument_group('Option for peaks finder (pypeaks')
    peaks.add_argument(
        "--lookahead",
        dest="lookahead",
        type=int,
        help="Set up lookahead parameter for pypeaks function. Default = 20",
        default=20)

    universal = parser.add_argument_group('Universal options')
    universal.add_argument(
        "-t",
        "--hits_threshold",
        dest="hits_threshold",
        type=int,
        help=
        "Set up threshold for pileup. Default 100 reads. Genes with highest peak below are not included",
        default=0)
    universal.add_argument(
        "-r",
        "--readthrough",
        dest="readthrough",
        type=int,
        help="Set up nt when readthrough should start countin. Default: 15",
        default=15)
    universal.add_argument(
        "-n",
        "--normalized",
        dest="normalized",
        action="store_true",
        help="to work on data normalized 'reads per Milion'. Default: False",
        default=False)

    output = parser.add_argument_group('Options for output files')
    output.add_argument(
        "-p",
        "--prefix",
        dest="out_prefix",
        type=str,
        help=
        "Prefix for output files. Default to standard output. Not supported for -o ratio.",
        default=None)
    output.add_argument(
        "-o",
        "--output",
        dest="output",
        choices=[
            "std", "ratio", "aligner", "RTendalign", "table", "Tdensity",
            "makeGTF", "transcript_length", "makeRTGTF"
        ],
        help="Select from following options:" + '\n'
        "(1) std - 5` and 3` end aligned only; (2) ratio - plot gwide ratio a exp / b exp"
        + '\n'
        "(3) aligner - std plus chosen aligner from file (-l option)" + '\n'
        "(4) RTendalign - std and aligned to 3` end of read-through (-l option). -e works to choose experiment to align and filter"
        + '\n'
        "(5) table - make *.csv file to plot heatmaps; (6) Tdensity - calculate p-value for non-canonical termination"
        + '\n'
        "(7) makeGTF - make GTF file with transcripts length ; (8) transcript_length - save *.txt file with trancripts length for all experiment; "
        "(9) makeRTGTF - make GTF with tRNA extensions only",
        default="std")
    special = parser.add_argument_group('Special options for some -o choices')
    special.add_argument(
        "--ntotal",
        dest="ntotal",
        action="store_true",
        help="Normalize to sum of all reads (sum = 1). Default: False",
        default=False)
    special.add_argument(
        "--nmax",
        dest="nmax",
        action="store_true",
        help="Normalize to maximal value (max = 1). Default: False",
        default=False)
    special.add_argument(
        "--publish",
        dest="publish",
        action="store_true",
        help=
        "Print plots as separate figures in publication quality. Works with -o ratio and std",
        default=False)
    special.add_argument(
        "--LRR",
        dest="left_right_ratio",
        action="store_true",
        help=
        "Print ratio between left and right part of the metaprofiles (before and after aligning line)",
        default=False)
    special.add_argument(
        "-f",
        dest="filter",
        type=str,
        help=
        "Filter in results factor_above_value; type i.e. RT_above_0.25 or a_below_1.5. To chose: RT, a, b, i, e, f, intron",
        default=None)
    special.add_argument(
        "-e",
        dest="experiment",
        type=str,
        help="Filter according to values from one experiment only",
        default=None)
    special.add_argument("-a",
                         dest="to_divide",
                         type=str,
                         help="experiment to divide by -b (-o ratio)",
                         default=None)
    special.add_argument("-b",
                         dest="divisor",
                         type=str,
                         help="experiment being divisor for -a (-o ratio)",
                         default=None)
    special.add_argument(
        "--select",
        dest="select",
        type=str,
        help=
        "To print additional plot with selecter area and no titles keep form 200_300 (range from 200 to 300)",
        default=None)
    special.add_argument(
        "--peak_min",
        dest="peak_min",
        type=int,
        help="minimum of peak average for -o Tdensity. Default = 300",
        default=300)
    special.add_argument("--peak_size",
                         dest="peak_size",
                         type=int,
                         help="peak size for -o Tdensity. Default = 20",
                         default=20)

    options = parser.parse_args()

    gtf_file = gtm.getGTF(options.gtf_file)
    list_file = options.list_file

    #preparing naming of output files
    if options.out_prefix:
        prefix = options.out_prefix + '_'
        filename = options.out_prefix + '_rt' + str(
            options.readthrough) + '_l' + str(options.lookahead) + '_t' + str(
                options.hits_threshold) + '.list'
    else:
        prefix = str()
        filename = 'rt' + str(options.readthrough) + '_l' + str(
            options.lookahead) + '_t' + str(options.hits_threshold) + '.list'
    if options.normalized == True:
        prefix = 'nRpM_' + prefix

    data = GenomeWidePlot(gtf_file=gtf_file,
                          five_prime_flank=options.five_prime_flank,
                          readthrough_start=options.readthrough,
                          three_prime_flank=options.three_prime_flank,
                          hits_threshold=options.hits_threshold,
                          lookahead=options.lookahead,
                          prefix=prefix,
                          normalized=options.normalized,
                          publish=options.publish,
                          left_right_ratio=options.left_right_ratio)

    #setting up dependencies
    if options.output == "ratio":
        options.normalized = True

    #reading csv file
    data.read_csv(options.input_file, skip_nucleotide=True)

    #plotting
    if options.output == 'std':
        data.calculate(details=False, ntotal=True, nmax=True)
        data.std(filter=options.filter,
                 experiment_to_filter=options.experiment)
        if options.ntotal == True:
            data.std(filter=options.filter,
                     experiment_to_filter=options.experiment,
                     exp_to_use='_ntotal')
        if options.nmax == True:
            data.std(filter=options.filter,
                     experiment_to_filter=options.experiment,
                     exp_to_use='_nmax')

    if options.output == 'aligner':
        if not list_file:
            print "Please provide path how to align files using -l file.list"
        else:
            data.calculate(details=True, ntotal=False, nmax=False)
            data.read_list(list_file)
            data.aligner(file=os.path.basename(list_file),
                         filter=options.filter,
                         experiment_to_filter=options.experiment)

    if options.output == 'RTendalign':
        data.calculate(details=True, ntotal=False, nmax=False)
        data.RT_aligner(filter=options.filter,
                        experiment_to_align=options.experiment)

    if options.output == "ratio":
        data.calculate(details=False, ntotal=True, nmax=True, pscounts=True)
        if options.ntotal == True:
            data.ratio(to_divide=options.to_divide,
                       divisor=options.divisor,
                       exp_to_use='_ntotal',
                       filter=options.filter)
            if options.select:
                data.ratio(to_divide=options.to_divide,
                           divisor=options.divisor,
                           exp_to_use='_ntotal',
                           select=options.select,
                           filter=options.filter)
        if options.nmax == True:
            data.ratio(to_divide=options.to_divide,
                       divisor=options.divisor,
                       exp_to_use='_nmax',
                       filter=options.filter)
        data.ratio(to_divide=options.to_divide,
                   divisor=options.divisor,
                   filter=options.filter)

    if options.output == "makeRTGTF":
        data.find_peaks()
        data.makeRTGTF()

    if options.output == "table":
        data.table(filter=options.filter,
                   experiment_to_filter=options.experiment)

    if options.output == "Tdensity":
        data.find_peaks()
        # data.calculate()
        data.Tdensity(peak_min=options.peak_min, size=options.peak_size)

    if options.output == "makeGTF":
        data.find_peaks()
        data.maketranscriptGTF()

    if options.output == "transcript_length":
        data.find_peaks()
        data.printTrancriptLength()

    print '# Done.'
コード例 #16
0
ファイル: gwidePlot.py プロジェクト: tturowski/gwide
def plot():
    """ Script working with concat file generated by pileupsToConcat.py script. Read concat file and according to options.
    Can plot intron, and peaks found by pypeaks script."""

    #setup option parser
    usage = "Usage: gwide function -i input -o output [options]"
    parser = argparse.ArgumentParser(usage=usage)

    files = parser.add_argument_group('Options for input files')
    files.add_argument("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type=str, default=None)
    files.add_argument("-i", "--input_file", dest="input_file", help="Provide the path to your input file. Required.",
                     metavar="FILE", default=None, required=True)
    # files.add_argument("--input_type", dest="input_type", choices=['concat'], help="Type of input file. Default: concat",
    #                  type=str, default='concat')
    files.add_argument("--5flank", dest="five_prime_flank", type=int, help="Set up 5 prime flank. Default = 250", default=250)
    files.add_argument("--3flank", dest="three_prime_flank", type=int, help="Set up 3 prime flank. Default = 250", default=250)
    files.add_argument("-l", "--list_file", dest="list_file", help="Provide the path to your (tab) file genes.list. Only listed genes will be plotted. Can be aligned as second column",
                     type=str)

    peaks = parser.add_argument_group('Option for peaks finder (pypeaks')
    peaks.add_argument("--lookahead", dest="lookahead", type=int, help="Set up lookahead parameter for pypeaks function. Default = 20", default=20)

    universal = parser.add_argument_group('Universal options')
    universal.add_argument("-t", "--hits_threshold", dest="hits_threshold", type=int, help="Set up threshold for pileup. Default 100 reads. Genes with highest peak below are not included",
                      default=0)
    universal.add_argument("-r", "--readthrough", dest="readthrough", type=int, help="Set up nt when readthrough should start countin. Default: 15",
                      default=15)
    universal.add_argument("-n", "--normalized", dest="normalized", action="store_true", help="to work on data normalized 'reads per Milion'. Default: False", default=False)

    output = parser.add_argument_group('Options for output files')
    output.add_argument("-p", "--prefix", dest="out_prefix", type=str, help="Prefix for output files. Default to standard output. Not supported for -o ratio.", default=None)
    output.add_argument("-o", "--output", dest="output", choices=["std", "ratio", "aligner", "RTendalign", "table", "Tdensity", "makeGTF", "transcript_length", "makeRTGTF"], help="Select from following options:"+'\n'
                       "(1) std - 5` and 3` end aligned only; (2) ratio - plot gwide ratio a exp / b exp"+'\n'
                        "(3) aligner - std plus chosen aligner from file (-l option)"+'\n'
                        "(4) RTendalign - std and aligned to 3` end of read-through (-l option). -e works to choose experiment to align and filter"+'\n'
                        "(5) table - make *.csv file to plot heatmaps; (6) Tdensity - calculate p-value for non-canonical termination"+'\n'
                        "(7) makeGTF - make GTF file with transcripts length ; (8) transcript_length - save *.txt file with trancripts length for all experiment; "
                        "(9) makeRTGTF - make GTF with tRNA extensions only", default="std")
    special = parser.add_argument_group('Special options for some -o choices')
    special.add_argument("--ntotal", dest="ntotal", action="store_true", help="Normalize to sum of all reads (sum = 1). Default: False", default=False)
    special.add_argument("--nmax", dest="nmax", action="store_true", help="Normalize to maximal value (max = 1). Default: False", default=False)
    special.add_argument("--publish", dest="publish", action="store_true", help="Print plots as separate figures in publication quality. Works with -o ratio and std", default=False)
    special.add_argument("--LRR", dest="left_right_ratio", action="store_true", help="Print ratio between left and right part of the metaprofiles (before and after aligning line)", default=False)
    special.add_argument("-f", dest="filter", type=str, help="Filter in results factor_above_value; type i.e. RT_above_0.25 or a_below_1.5. To chose: RT, a, b, i, e, f, intron", default=None)
    special.add_argument("-e", dest="experiment", type=str, help="Filter according to values from one experiment only", default=None)
    special.add_argument("-a", dest="to_divide", type=str, help="experiment to divide by -b (-o ratio)",
                      default=None)
    special.add_argument("-b", dest="divisor", type=str, help="experiment being divisor for -a (-o ratio)", default=None)
    special.add_argument("--select", dest="select", type=str, help="To print additional plot with selecter area and no titles keep form 200_300 (range from 200 to 300)", default=None)
    special.add_argument("--peak_min", dest="peak_min", type=int, help="minimum of peak average for -o Tdensity. Default = 300", default=300)
    special.add_argument("--peak_size", dest="peak_size", type=int, help="peak size for -o Tdensity. Default = 20", default=20)


    options = parser.parse_args()

    gtf_file = gtm.getGTF(options.gtf_file)
    list_file = options.list_file

    #preparing naming of output files
    if options.out_prefix:
        prefix = options.out_prefix+'_'
        filename = options.out_prefix+'_rt'+str(options.readthrough)+'_l'+str(options.lookahead)+'_t'+str(options.hits_threshold)+'.list'
    else:
        prefix = str()
        filename = 'rt'+str(options.readthrough)+'_l'+str(options.lookahead)+'_t'+str(options.hits_threshold)+'.list'
    if options.normalized == True:
        prefix = 'nRpM_'+prefix

    data = GenomeWidePlot(gtf_file=gtf_file, five_prime_flank=options.five_prime_flank, readthrough_start=options.readthrough,
                          three_prime_flank=options.three_prime_flank, hits_threshold=options.hits_threshold, lookahead=options.lookahead,
                          prefix=prefix, normalized=options.normalized, publish=options.publish, left_right_ratio=options.left_right_ratio)

    #setting up dependencies
    if options.output == "ratio":
        options.normalized = True

    #reading csv file
    data.read_csv(options.input_file, skip_nucleotide=True)

    #plotting
    if options.output == 'std':
        data.calculate(details=False, ntotal=True, nmax=True)
        data.std(filter=options.filter, experiment_to_filter=options.experiment)
        if options.ntotal == True:
            data.std(filter=options.filter, experiment_to_filter=options.experiment, exp_to_use='_ntotal')
        if options.nmax == True:
            data.std(filter=options.filter, experiment_to_filter=options.experiment, exp_to_use='_nmax')

    if options.output == 'aligner':
        if not list_file:
            print "Please provide path how to align files using -l file.list"
        else:
            data.calculate(details=True, ntotal=False, nmax=False)
            data.read_list(list_file)
            data.aligner(file=os.path.basename(list_file), filter=options.filter, experiment_to_filter=options.experiment)

    if options.output == 'RTendalign':
        data.calculate(details=True, ntotal=False, nmax=False)
        data.RT_aligner(filter=options.filter, experiment_to_align=options.experiment)

    if options.output == "ratio":
        data.calculate(details=False, ntotal=True, nmax=True, pscounts=True)
        if options.ntotal == True:
            data.ratio(to_divide=options.to_divide, divisor=options.divisor, exp_to_use='_ntotal', filter=options.filter)
            if options.select:
                data.ratio(to_divide=options.to_divide, divisor=options.divisor, exp_to_use='_ntotal', select=options.select, filter=options.filter)
        if options.nmax == True:
            data.ratio(to_divide=options.to_divide, divisor=options.divisor, exp_to_use='_nmax', filter=options.filter)
        data.ratio(to_divide=options.to_divide, divisor=options.divisor, filter=options.filter)

    if options.output == "makeRTGTF":
        data.find_peaks()
        data.makeRTGTF()

    if options.output == "table":
        data.table(filter=options.filter, experiment_to_filter=options.experiment)

    if options.output == "Tdensity":
        data.find_peaks()
        # data.calculate()
        data.Tdensity(peak_min=options.peak_min, size=options.peak_size)

    if options.output == "makeGTF":
        data.find_peaks()
        data.maketranscriptGTF()

    if options.output == "transcript_length":
        data.find_peaks()
        data.printTrancriptLength()

    print '# Done.'
コード例 #17
0
ファイル: aminoacidCounter.py プロジェクト: MingleiYang/gwide
                   metavar="FILE", default=None)
files.add_argument("-w", dest="window", help="size of sliding window. Default = 10",
                   type=int, default=10)
files.add_argument("-a", dest="aa_type", help="Type of filter applied i.e. b_a_70 = basic aminoacides above or equal 70% within window "
                                              "or ar_b_20 = aromatic aminoacids below 20% within window. Option for position 1: positive, negative, charged, polar, hydrophobic, aromatic"
                                              "Options for position 2: a - above or equal. Position 3 is percent within sliding window i.e 20 = 2/10 or 3/15"
                                              "Not used when -c used",
                   type=str, default=None)
files.add_argument("-c", dest="config_list", help="Config.list. Default=False",
                   metavar="FILE", default=None)
files.add_argument("--id", dest="id_given", help="gene ID given instead of gene names", action="store_true", default=False)
args = parser.parse_args()

#reading GTF file to GTF parser and creating id_to_gene list
gtf = GTF2.Parse_GTF()
gtf.read_GTF(gtm.getGTF(args.gtf_file))

id_to_gene = dict()
for gene_name in gtf.genes:
    gene_id = gtf.genes[gene_name]['gene_id']
    id_to_gene[gene_id] = gene_name

#reading fasta file
in_seq_handle = open(args.fasta_file)
seq_dict = SeqIO.to_dict(SeqIO.parse(in_seq_handle, "fasta")) #dictionary with
in_seq_handle.close()
seq_dict_keys = seq_dict.keys()

#function to check -a or config.list syntax
def filter_parser(input_str):
    #checks
コード例 #18
0
ファイル: novo2concat.py プロジェクト: tturowski/gwide
#seting up option parser
parser = argparse.ArgumentParser(description='Usage: ruffus scirpt designed to make concat file from *.novo files. Make new folder, cp or ln into all novofiles and run novo2concat. IMPORTANT: name of novo file should be name of experiment')
parser.add_argument("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                     type=str, default=None)
parser.add_argument("-t", "--tab_file", dest="tab_file", help="Provide the path to your tab genome file.",
                     type=str, default=None)
parser.add_argument("-r", dest="ranges", help="Set up ranges for pyPileup. Default = 250", default=250)
parser.add_argument("--3end", dest="three_end", help="Use pyPileup option --3end to only report counts for the 3' end of the reads. Default = False",
                    action="store_true", default=False)
parser.add_argument("-l", dest="list_file", help="Provide the FULL path to your gene_names.list file.", type=str, default=None, required=True)
parser.add_argument("--tree", dest="tree", help="If you want to leave tree of catalogs including pilups within. Default = None.",
                     action="store_true", default=False)
parser.add_argument("-p", dest="prefix", help="Prefix for concat file name", type=str, default="")
args = parser.parse_args()

gtf, tab, ranges = gtm.getGTF(args.gtf_file), gtm.getTAB(args.tab_file), str(args.ranges)
print "Using GTF file: " + gtf
print "Using TAB genome file: " + tab

#listing novo files
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.novo')]  #gives list of files in current directory
directories = [re.sub(r'.novo$', '', d) for d in files]
links = []
root_dir = os.getcwd()

#making directories
for f, d in zip(files, directories):
    os.mkdir(d)
    os.chdir(d)
    subprocess.call('ln -s ../' + f + ' ' + f, shell=True)
    links.append(os.path.abspath('./'+f))
コード例 #19
0
ファイル: novo2concat.py プロジェクト: MingleiYang/gwide
parser.add_argument("-r", dest="ranges", help="Set up ranges for pyPileup. Default = 250", default=250)
parser.add_argument("--3end", dest="three_end",
                    help="Use pyPileup option --3end to only report counts for the 3' end of the reads. Default = False",
                    action="store_true", default=False)
parser.add_argument("--5end", dest="five_end",
                    help="Use pyPileup option --5end to only report counts for the 5' end of the reads. Default = False",
                    action="store_true", default=False)
parser.add_argument("-l", dest="list_file", help="Provide the FULL path to your gene_names.list file.", type=str, default=None, required=True)
parser.add_argument("--tree", dest="tree", help="If you want to leave tree of catalogs including pilups within. Default = None.",
                     action="store_true", default=False)
parser.add_argument("--anti", dest="anti",  help="Create additional concat file with antisense reads Default = None.",
                    action="store_true", default=False)
parser.add_argument("-p", dest="prefix", help="Prefix for concat file name", type=str, default="")
args = parser.parse_args()

gtf, tab, ranges = gtm.getGTF(args.gtf_file), gtm.getTAB(args.tab_file), str(args.ranges)
print "Using GTF file: " + gtf
print "Using TAB genome file: " + tab

#listing novo files
files = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith('.novo')]  #gives list of files in current directory
directories = [re.sub(r'.novo$', '', d) for d in files]
links = []
root_dir = os.getcwd()

#making directories
for f, d in zip(files, directories):
    os.mkdir(d)
    os.chdir(d)
    subprocess.call('ln -s ../' + f + ' ' + f, shell=True)
    links.append(os.path.abspath('./'+f))
コード例 #20
0
ファイル: codonCounter.py プロジェクト: tturowski/gwide
parser = argparse.ArgumentParser(usage=usage, formatter_class=RawTextHelpFormatter)
files = parser.add_argument_group('Options for input files')
files.add_argument("-f", dest="fasta_file", help="Provide the path to your fasta file",
                 metavar="FILE", default=None)
files.add_argument("-g", "--gtf_file", dest="gtf_file", help="Provide the path to your gtf file.",
                   metavar="FILE", default=None)
files.add_argument("-c", dest="codone", help="codone that want to count",
                   type=str, default='CGA')
files.add_argument("--all", dest="save_matrix", help="Saves number of all codones as a matrix. Default=False",
                   action="store_true", default=False)
files.add_argument("--id", dest="id_given", help="gene ID given instead of gene names", action="store_true", default=False)
args = parser.parse_args()

gtf = GTF2.Parse_GTF()
gtf.read_GTF(gtm.getGTF(args.gtf_file))

id_to_gene = dict()
for gene_name in gtf.genes:
    gene_id = gtf.genes[gene_name]['gene_id']
    id_to_gene[gene_id] = gene_name

gene_name = id_to_gene[gene_id]

in_seq_handle = open(args.fasta_file)
seq_dict = SeqIO.to_dict(SeqIO.parse(in_seq_handle, "fasta"))
in_seq_handle.close()
seq_dict_keys =  seq_dict.keys()

matrix = pd.DataFrame()
コード例 #21
0
ファイル: gwidemRNA.py プロジェクト: MingleiYang/gwide
def mRNA():
    usage = "Usage: To create input concat file run novo2concat.py"
    parser = argparse.ArgumentParser(usage=usage)

    files = parser.add_argument_group('Options for input files')
    files.add_argument("-g",
                       "--gtf_file",
                       dest="gtf_file",
                       help="Provide the path to your gtf file.",
                       type=str,
                       default=None)
    files.add_argument("-i",
                       "--input_file",
                       dest="input_file",
                       help="Provide the path to your concat file. REQUIRED.",
                       metavar="FILE",
                       default=None,
                       required=True)
    files.add_argument("--5flank",
                       dest="five_prime_flank",
                       type=int,
                       help="Set up 5 prime flank in pileup file. Default = 0",
                       default=0)
    files.add_argument("--3flank",
                       dest="three_prime_flank",
                       type=int,
                       help="Set up 3 prime flank in pileup file. Default = 0",
                       default=0)

    universal = parser.add_argument_group('Universal options')
    universal.add_argument("-t",
                           "--hits_threshold",
                           dest="hits_threshold",
                           type=int,
                           help="Set up threshold for pileup. Default 0 reads",
                           default=0)
    universal.add_argument(
        "-n",
        "--normalized",
        dest="normalized",
        action="store_true",
        help=
        "Use when you want to work on data normalized 'reads per Milion'. Default: False",
        default=False)

    output = parser.add_argument_group('Options for output files')
    output.add_argument(
        "-p",
        "--prefix",
        dest="out_prefix",
        type=str,
        help=
        "Prefix for output files. Default to standard output. Not supported for -o ratio.",
        default=None)
    output.add_argument("-o",
                        dest="output_files",
                        choices=['bind'],
                        help="Select from following options:"
                        "(1) Print binding windows in fasta file",
                        default="bind")
    output.add_argument("--peaks",
                        dest="print_peaks",
                        action="store_true",
                        help="print peaks on plots. Default: False",
                        default=False)
    output.add_argument("--valleys",
                        dest="print_valleys",
                        action="store_true",
                        help="print valleys on plots. Default: False",
                        default=False)

    special = parser.add_argument_group('Special options for some -o choices')
    special.add_argument(
        "--lookahead",
        dest="lookahead",
        type=int,
        help="Set up lookahead parameter for pypeaks function. Default = 20",
        default=20)
    special.add_argument(
        "-w",
        "--window",
        dest="window",
        type=int,
        help=
        "Set up size of window for bind calculation (-o bind). Default: 10",
        default=10)
    special.add_argument("-e",
                         dest="experiment_to_use",
                         type=str,
                         help="For -o bind, which experiment to use.")
    # special.add_argument("--ntotal", dest="ntotal", action="store_true", help="Normalize data to sum of all reads (sum = 1). Default: False", default=False)
    # special.add_argument("--nmax", dest="nmax", action="store_true", help="Normalize data to maximal value (max = 1). Default: False", default=False)
    # special.add_argument("-a", dest="to_divide", type=str, help="experiment to divide by -b (-o fig_ratio)",
    #                   default=None)
    # special.add_argument("-b", dest="divisor", type=str, help="experiment being divisor for -a (-o fig_ratio)",
    #                   default=None)
    options = parser.parse_args()

    #checking input
    input_file = options.input_file

    #preparing naming of output files
    if options.out_prefix:
        prefix = options.out_prefix + '_'
    else:
        prefix = str()
    if options.normalized == True:
        prefix = 'normalized_' + prefix

    data = mRNAFromConcat(gtf_file=gtm.getGTF(options.gtf_file),
                          five_prime_flank=options.five_prime_flank,
                          three_prime_flank=options.three_prime_flank,
                          hits_threshold=options.hits_threshold,
                          lookahead=options.lookahead,
                          prefix=prefix,
                          npM=options.normalized)

    if options.output_files == "bind":
        #reading csv file
        data.read_csv(input_file, use='deletions')
        #calculating readthrough, details, normalize
        # data.calculate(details=options.details, ntotal=options.ntotal, nmax=options.nmax)
        data.bind(exp_to_use=options.experiment_to_use, window=options.window)

    print '# Done.'
コード例 #22
0
ファイル: parserTools.py プロジェクト: MingleiYang/gwide
def getGeneNamesFromGTF():
    parser = OptionParser(
        usage="getGenesNames; type usage: %prog [options] -f filename")
    files = OptionGroup(parser, "File input options")
    files.add_option(
        "-f",
        "--input_file",
        dest="gtf_file",
        help=
        "Provide the path to your gtf data file. Default is standard input.",
        type="str",
        default=None)
    files.add_option(
        "-g",
        "--genes",
        dest="genes",
        help=
        "Which biotype of features to get: mRNA, tRNA, rRNA, snRNA, snoRNA",
        type="str",
        default='tRNA')
    files.add_option(
        "-i",
        "--introns",
        dest="introns",
        help=
        "Introns? both - not discriminate; int_cont -only intron containing; int_less - only int less",
        choices=["both", "int_cont", "int_less"],
        default="both"),
    files.add_option(
        "-o",
        "--output_file",
        dest="output_file",
        help=
        "Use this flag to provide an output file name. Default is standard output.",
        default=None)
    parser.add_option_group(files)
    (options, args) = parser.parse_args()

    ### By default, input and output are expected from the standard input or standard output.
    signal(SIGPIPE, SIG_DFL)
    outfile = sys.stdout
    if options.output_file:
        outfile = open(options.output_file, "w")

    gtf = GTF2.Parse_GTF()
    gtf.read_GTF(gtm.getGTF(options.gtf_file))
    names_list = list()

    ### for loop extracting tRNA names
    for line in open(gtm.getGTF(options.gtf_file), "r"):
        if not line.startswith('#'):
            line_elements = line.strip().split('\t')
            # assert len(line_elements) == 10, 'Unexpected number of elements found in gtf line: ' + line
            if str(line_elements[1]) == options.genes:
                try:
                    name = re.search("gene_name\s\"(.*?)\"",
                                     str(line_elements[8])).group(1)
                except:
                    pass
                #     name = re.search("gene_id\s\"(.*?)\"", str(line_elements[8])).group(1)
                if options.introns == "both":
                    if name not in names_list:
                        names_list.append(name)
                    # outfile.write(str(name) + '\n')
                elif options.introns == "int_cont":
                    if gtf.intronCoordinates(name):
                        if name not in names_list:
                            names_list.append(name)
                        # outfile.write(str(name) + '\n')
                elif options.introns == "int_less":
                    if not gtf.intronCoordinates(name):
                        if name not in names_list:
                            names_list.append(name)
                        # outfile.write(str(name) + '\n')

    outfile.write('\n'.join(names_list) + '\n')
    outfile.close()