Ejemplo n.º 1
0
def add_tfbscan_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=35, width=90)
	description = "Find positions of Transcription Factor Binding Sites (TFBS) in FASTA sequences by scanning with motifs.\n\n" 
	description += "Usage:\nTOBIAS TFBScan --motifs <motifs.txt> --fasta <genome.fa> \n\n"
	description += "By setting --outdir, the output files are:\n- <outdir>/<TF1>.bed\n- <outdir>/<TF2>.bed\n- (...)\n\n"
	description += "By setting --outfile, all TFBS are written to one file (with motif specified in the 4th column of the .bed)."
	parser.description = format_help_description("TFBScan", description)

	parser._action_groups.pop()	#pop -h

	required_arguments = parser.add_argument_group('Required arguments')
	required_arguments.add_argument('-m', '--motifs', metavar="", help='File containing motifs in either MEME, PFM or JASPAR format')
	required_arguments.add_argument('-f', '--fasta', metavar="", help='A fasta file of sequences to use for scanning motifs') 	# whole genome file or regions of interest in FASTA format to be scanned with motifs')

	#all other arguments are optional
	optional_arguments = parser.add_argument_group('Optional arguments')
	optional_arguments.add_argument('-r', '--regions', metavar="", help='Subset scanning to regions of interest')
	optional_arguments.add_argument('--outdir', metavar="", help='Output directory for TFBS sites in one file per motif (default: ./tfbscan_output/). NOTE: Select either --outdir or --outfile.', default=None)
	optional_arguments.add_argument('--outfile', metavar="", help='Output file for TFBS sites joined in one bed-file (default: not set). NOTE: Select either --outdir or --outfile.', default=None)

	optional_arguments.add_argument('--naming', metavar="", help="Naming convention for bed-ids and output files ('id', 'name', 'name_id', 'id_name') (default: 'name_id')", choices=["id", "name", "name_id", "id_name"], default="name_id")
	optional_arguments.add_argument('--gc', metavar="", type=lambda x: restricted_float(x,0,1), help='Set the gc content for background regions (default: will be estimated from fasta)')
	optional_arguments.add_argument('--pvalue', metavar="", type=lambda x: restricted_float(x,0,1), help='Set p-value for motif matches (default: 0.0001)', default=0.0001)
	optional_arguments.add_argument('--keep-overlaps', action='store_true', help='Keep overlaps of same motifs (default: overlaps are resolved by keeping best-scoring site)')
	optional_arguments.add_argument('--add-region-columns', action='store_true', help="Add extra information columns (starting from 4th column) from --regions to the output .bed-file(s) (default: off)")

	RUN = parser.add_argument_group('Run arguments')
	RUN.add_argument('--split', metavar="<int>", type=int, help="Split of multiprocessing jobs (default: 100)", default=100)
	RUN.add_argument('--cores', metavar="", type=int, help='Number of cores to use (default: 1)', default=1)
	RUN.add_argument('--debug', action="store_true", help=argparse.SUPPRESS)
	RUN = add_logger_args(optional_arguments)

	return(parser)
Ejemplo n.º 2
0
def add_bindetect_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=35, width=90)
	description = "BINDetect takes motifs, signals (footprints) and genome as input to estimate bound transcription factor binding sites and differential binding between conditions. "
	description += "The underlying method is a modified motif enrichment test to see which motifs have the largest differences in signal across input conditions. "
	description += "The output is an in-depth overview of global changes as well as the individual binding site signal-differences.\n\n"
	description += "Usage:\nTOBIAS BINDetect --signals <bigwig1> (<bigwig2> (...)) --motifs <motifs.txt> --genome <genome.fasta> --peaks <peaks.bed>\n\n"
	description += "Output files:\n- <outdir>/<prefix>_figures.pdf\n- <outdir>/<prefix>_results.{txt,xlsx}\n- <outdir>/<prefix>_distances.txt\n"
	description += "- <outdir>/<TF>/<TF>_overview.{txt,xlsx} (per motif)\n- <outdir>/<TF>/beds/<TF>_all.bed (per motif)\n"
	description += "- <outdir>/<TF>/beds/<TF>_<condition>_bound.bed (per motif-condition pair)\n- <outdir>/<TF>/beds/<TF>_<condition>_unbound.bed (per motif-condition pair)\n\n"
	parser.description = format_help_description("BINDetect", description)

	parser._action_groups.pop()	#pop -h
	
	required = parser.add_argument_group('Required arguments')
	required.add_argument('--signals', metavar="<bigwig>", help="Signal per condition (.bigwig format)", nargs="*")
	required.add_argument('--peaks', metavar="<bed>", help="Peaks.bed containing open chromatin regions across all conditions")
	required.add_argument('--motifs', metavar="<motifs>", help="Motif file(s) in pfm/jaspar/meme format", nargs="*")
	required.add_argument('--genome', metavar="<fasta>", help="Genome .fasta file")

	optargs = parser.add_argument_group('Optional arguments')
	optargs.add_argument('--cond-names', metavar="<name>", nargs="*", help="Names of conditions fitting to --signals (default: prefix of --signals)")
	optargs.add_argument('--peak-header', metavar="<file>", help="File containing the header of --peaks separated by whitespace or newlines (default: peak columns are named \"_additional_<count>\")")
	optargs.add_argument('--naming', metavar="<string>", help="Naming convention for TF output files ('id', 'name', 'name_id', 'id_name') (default: 'name_id')", choices=["id", "name", "name_id", "id_name"], default="name_id")
	optargs.add_argument('--motif-pvalue', metavar="<float>", type=lambda x: restricted_float(x, 0, 1), help="Set p-value threshold for motif scanning (default: 1e-4)", default=0.0001)
	optargs.add_argument('--bound-pvalue', metavar="<float>", type=lambda x: restricted_float(x, 0, 1), help="Set p-value threshold for bound/unbound split (default: 0.001)", default=0.001)
	#optargs.add_argument('--volcano-diff-thresh', metavar="<float>", help="", default=0.2)	#not yet implemented
	#optargs.add_argument('--volcano-p-thresh', metavar="<float>", help="", default=0.05)	#not yet implemented

	optargs.add_argument('--pseudo', type=float, metavar="<float>", help="Pseudocount for calculating log2fcs (default: estimated from data)", default=None)
	optargs.add_argument('--time-series', action='store_true', help="Will only compare signals1<->signals2<->signals3 (...) in order of input, and skip all-against-all comparison.")
	optargs.add_argument('--skip-excel', action='store_true', help="Skip creation of excel files - for large datasets, this will speed up BINDetect considerably")

	runargs = parser.add_argument_group("Run arguments")
	runargs.add_argument('--outdir', metavar="<directory>", help="Output directory to place TFBS/plots in (default: bindetect_output)", default="bindetect_output")
	optargs.add_argument('--prefix', metavar="<prefix>", help="Prefix for overview files in --outdir folder (default: bindetect)", default="bindetect")
	runargs.add_argument('--cores', metavar="<int>", type=int, help="Number of cores to use for computation (default: 1)", default=1)
	runargs.add_argument('--split', metavar="<int>", type=int, help="Split of multiprocessing jobs (default: 100)", default=100)
	runargs.add_argument('--debug', help=argparse.SUPPRESS, action='store_true')
	
	runargs = add_logger_args(runargs)

	return(parser)
Ejemplo n.º 3
0
def add_aggregate_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=40, width=90)
    description = ""
    parser.description = format_help_description("PlotAggregate", description)

    parser._action_groups.pop()  #pop -h

    IO = parser.add_argument_group('Input / output arguments')
    IO.add_argument('--TFBS',
                    metavar="<bed>",
                    nargs="*",
                    help="TFBS sites (*required)")  #default is None
    IO.add_argument(
        '--signals',
        metavar="<bigwig>",
        nargs="*",
        help="Signals in bigwig format (*required)")  #default is None
    IO.add_argument('--regions',
                    metavar="<bed>",
                    nargs="*",
                    help="Regions to overlap with TFBS (optional)",
                    default=[])
    IO.add_argument('--whitelist',
                    metavar="<bed>",
                    nargs="*",
                    help="Only plot sites overlapping whitelist (optional)",
                    default=[])
    IO.add_argument('--blacklist',
                    metavar="<bed>",
                    nargs="*",
                    help="Exclude sites overlapping blacklist (optional)",
                    default=[])
    IO.add_argument('--output',
                    metavar="",
                    help="Path to output plot (default: TOBIAS_aggregate.pdf)",
                    default="TOBIAS_aggregate.pdf")
    IO.add_argument(
        '--output-txt',
        metavar="",
        help="Path to output file for aggregates in .txt-format (default: None)"
    )

    PLOT = parser.add_argument_group('Plot arguments')
    PLOT.add_argument('--title',
                      metavar="",
                      help="Title of plot (default: \"Aggregated signals\")",
                      default="Aggregated signals")
    PLOT.add_argument(
        '--flank',
        metavar="",
        help=
        "Flanking basepairs (+/-) to show in plot (counted from middle of the TFBS) (default: 60)",
        default=60,
        type=int)
    PLOT.add_argument(
        '--TFBS-labels',
        metavar="",
        help="Labels used for each TFBS file (default: prefix of each --TFBS)",
        nargs="*")
    PLOT.add_argument(
        '--signal-labels',
        metavar="",
        help=
        "Labels used for each signal file (default: prefix of each --signals)",
        nargs="*")
    PLOT.add_argument(
        '--region-labels',
        metavar="",
        help=
        "Labels used for each regions file (default: prefix of each --regions)",
        nargs="*")
    PLOT.add_argument(
        '--share-y',
        metavar="",
        help=
        "Share y-axis range across plots (none/signals/sites/both). Use \"--share-y signals\" if bigwig signals have similar ranges. Use \"--share_y sites\" if sites per bigwig are comparable, but bigwigs themselves aren't comparable (default: none)",
        choices=["none", "signals", "sites", "both"],
        default="none")

    #Signals / regions
    PLOT.add_argument(
        '--normalize',
        action='store_true',
        help=
        "Normalize the aggregate signal(s) to be between 0-1 (default: the true range of values is shown)"
    )
    PLOT.add_argument('--negate',
                      action='store_true',
                      help="Negate overlap with regions")
    PLOT.add_argument(
        '--smooth',
        metavar="<int>",
        type=int,
        help=
        "Smooth output signal by taking the mean of <smooth> bp windows (default: 1 (no smooth)",
        default=1)
    PLOT.add_argument('--log-transform',
                      help="Log transform the signals before aggregation",
                      action="store_true")
    PLOT.add_argument(
        '--plot-boundaries',
        help=
        "Plot TFBS boundaries (Note: estimated from first region in each --TFBS)",
        action='store_true')
    PLOT.add_argument(
        '--signal-on-x',
        help=
        "Show signals on x-axis and TFBSs on y-axis (default: signal is on y-axis)",
        action='store_true')
    PLOT.add_argument(
        '--remove-outliers',
        metavar="<float>",
        help=
        "Value between 0-1 indicating the percentile of regions to include, e.g. 0.99 to remove the sites with 1%% highest values (default: 1)",
        type=lambda x: restricted_float(x, 0, 1),
        default=1)

    RUN = parser.add_argument_group("Run arguments")
    RUN = add_logger_args(RUN)

    return (parser)