예제 #1
0
def add_network_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=40, width=90)
    description = "Creates a TF-TF gene regulation network from annotated transcription factor binding sites"
    parser.description = format_help_description("CreateNetwork", description)

    parser._action_groups.pop()  #pop -h

    #Required arguments
    required = parser.add_argument_group('Required arguments')
    required.add_argument(
        '--TFBS',
        metavar="",
        help="File(s) containing TFBS (with annotation) to create network from",
        nargs="*")
    required.add_argument('--origin',
                          metavar="",
                          help="File containing mapping of TF <-> origin gene")

    #Optional arguments
    optional = parser.add_argument_group('Optional arguments')
    optional.add_argument('--start',
                          metavar="",
                          help="Name of node to start in (default: all nodes)")
    optional.add_argument(
        '--max-len',
        metavar="",
        help="Maximum number of nodes in paths through graph (default: 4)",
        type=int,
        default=4)
    #optional.add_argument('--unique', action='store_true', help="Only include edges once (default: edges can occur multiple times in case of multiple binding sites)")

    runargs = parser.add_argument_group("Run arguments")
    runargs.add_argument(
        '--outdir',
        metavar="",
        help="Path to output directory (default: tobias_network)",
        default="tobias_network")
    runargs = add_logger_args(runargs)

    return (parser)
예제 #2
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_formatmotifs_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
	description = ""
	parser.description = format_help_description("FormatMotifs", description) 
	
	parser._action_groups.pop()	#pop -h

	#Required arguments
	required = parser.add_argument_group('Required arguments')
	required.add_argument('--input', metavar="", nargs="*", help="One or more input motif files (required)")			
	required.add_argument('--output', metavar="", help="If task == join, output is the joined output file; if task == split, output is a directory (required)")
	
	additional = parser.add_argument_group('Additional arguments')
	additional.add_argument('--format', metavar="", help="Desired motif output format (pfm, jaspar, meme) (default: \"jaspar\")", choices=["pfm", "jaspar", "meme"], default="jaspar")
	additional.add_argument('--task', metavar="", help="Which task to perform on motif files (join/split) (default: join)", choices=["join", "split"], default="join")
	additional.add_argument('--filter', metavar="", help="File containing list of motif names/ids to filter on. Only motifs fitting entries in filter will be output.")
	additional = add_logger_args(additional)

	return(parser)
예제 #3
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_downloaddata_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
	description = "Download test data for the TOBIAS commandline examples"
	parser.description = format_help_description("DownloadData", description) 

	parser._action_groups.pop()	#pop -h

	arguments = parser.add_argument_group('Arguments')
	arguments.add_argument('--endpoint', metavar="", help="Link to the s3 server (default: The loosolab s3 server)", default="https://s3.mpi-bn.mpg.de")
	arguments.add_argument('--bucket', metavar="", help="Name of bucket to download (default: data-tobias-2020)", default="data-tobias-2020")
	#arguments.add_argument('--target', metavar="", help="Name of directory to save files to (default: name of bucket)")
	arguments.add_argument('--patterns', metavar="", help="List of patterns for files to download e.g. '*.txt' (default: *)", default="*")
	arguments.add_argument('--username', metavar="", help="Username for endpoint (default: None set)")
	arguments.add_argument("--key", metavar="", help="Access key for endpoint (default: None set)")
	arguments.add_argument("--yaml", metavar="", help="Set the endpoint/bucket/access information through a config file in .yaml format (NOTE: overwrites commandline arguments)")
	arguments.add_argument("--force", action="store_true", help="Force download of already existing files (default: warn and skip)")

	arguments = add_logger_args(arguments)

	return parser
예제 #4
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_maxpos_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
	description = "MaxPos identifies the position of maximum signal (from bigwig) within a given set of .bed-regions. Used to identify peak of signals such as accessibility or footprint scores.\n\n"
	description += "Usage:\nTOBIAS MaxPos --bed <regions.bed> --bigwig <signal.bw>\n\n"
	description += "The output is a 4-column .bed-file (default output is stdout, but you can use --output to set a specific output file).\n"
	parser.description = format_help_description("MaxPos", description)

	parser._action_groups.pop()	#pop -h
	
	#Required arguments
	required = parser.add_argument_group('Required arguments')
	required.add_argument('--bed', metavar="", help="Regions to search for max position within")
	required.add_argument('--bigwig', metavar="", help="Scores used to identify maximum value")

	#Optional arguments
	optional = parser.add_argument_group('Optional arguments')
	optional.add_argument('--output', metavar="", help="Path to output .bed-file (default: scored sites are written to stdout)") 
	optional.add_argument('--invert', help="Find minimum position instead of maximum position", action='store_true', default=False)

	return(parser)
예제 #5
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_plotchanges_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=35, width=90)
	description = "PlotChanges is a utility to plot the changes in TF binding across multiple conditions as predicted by TOBIAS BINDetect.\n\n"
	description += "Example usage:\n$ echo CTCF GATA > TFS.txt\n$ TOBIAS PlotChanges --bindetect <bindetect_results.txt> --TFS TFS.txt\n\n"

	parser.description = format_help_description("PlotChanges", description)

	parser._action_groups.pop()	#pop -h

	required_arguments = parser.add_argument_group('Required arguments')
	required_arguments.add_argument('--bindetect', metavar="", help='Bindetect_results.txt file from BINDetect run')
	
	#All other arguments are optional
	optional_arguments = parser.add_argument_group('Optional arguments')
	optional_arguments.add_argument('--TFS', metavar="", help='Text file containing names of TFs to show in plot (one per line)') 
	optional_arguments.add_argument('--output', metavar="", help='Output file for plot (default: bindetect_changes.pdf)', default="bindetect_changes.pdf")
	optional_arguments.add_argument('--conditions', metavar="", help="Ordered list of conditions to show (default: conditions are ordered as within the bindetect file)", nargs="*")
	optional_arguments = add_logger_args(optional_arguments)
	
	return(parser)
예제 #6
0
파일: parsers.py 프로젝트: tomtommie/TOBIAS
def add_filterfragments_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=40, width=90)
    description = "FilterFragments can filter out fragments from a .bam-file of paired-end reads based on the overlap with regions in the given .bed-file."
    #description += "Usage: "
    parser.description = format_help_description("FilterFragments",
                                                 description)

    parser._action_groups.pop()  #pop -h

    IO = parser.add_argument_group('Input / output arguments')
    IO.add_argument('--bam', metavar="", help=".bam-file to filter")
    IO.add_argument(
        '--regions',
        metavar="",
        help=".bed-file containing regions to filter fragments from")
    IO.add_argument(
        '--mode',
        help=
        "Mode 1: Remove fragment if both reads overlap .bed-regions. Mode 2: Remove whole fragment if one read is overlapping .bed-regions (default: 1)",
        choices=[1, 2],
        default=1)
    IO.add_argument(
        '--output',
        metavar="",
        help=
        "Path to the filtered .bam-file (default: <prefix of --bam>_filtered.bam)"
    )
    IO.add_argument(
        '--threads',
        metavar="",
        type=int,
        help=
        "Number of threads used for decompressing/compressing bam (default: 10)",
        default=10)

    IO = add_logger_args(IO)

    return (parser)
예제 #7
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_scorebigwig_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
	description = "ScoreBigwig calculates scores (such as footprint-scores) from bigwig files (such as ATAC-seq cutsites calculated using the ATACorrect tool).\n\n"
	description += "Usage: ScoreBigwig --signal <cutsites.bw> --regions <regions.bed> --output <output.bw>\n\n"
	description += "Output:\n- <output.bw>"
	parser.description = format_help_description("ScoreBigwig", description)
	
	parser._action_groups.pop()	#pop -h

	#Required arguments
	required = parser.add_argument_group('Required arguments')
	required.add_argument('-s', '--signal', metavar="<bigwig>", help="A .bw file of ATAC-seq cutsite signal")
	required.add_argument('-o', '--output', metavar="<bigwig>", help="Full path to output bigwig")			
	required.add_argument('-r', '--regions', metavar="<bed>", help="Genomic regions to run footprinting within")

	optargs = parser.add_argument_group('Optional arguments')
	optargs.add_argument('--score', metavar="<score>", choices=["footprint", "sum", "mean", "none"], help="Type of scoring to perform on cutsites (footprint/sum/mean/none) (default: footprint)", default="footprint")
	optargs.add_argument('--absolute', action='store_true', help="Convert bigwig signal to absolute values before calculating score")
	optargs.add_argument('--extend', metavar="<int>", type=int, help="Extend input regions with bp (default: 100)", default=100)
	optargs.add_argument('--smooth', metavar="<int>", type=int, help="Smooth output signal by mean in <bp> windows (default: no smoothing)", default=1)
	optargs.add_argument('--min-limit', metavar="<float>", type=float, help="Limit input bigwig value range (default: no lower limit)") 		#default none
	optargs.add_argument('--max-limit', metavar="<float>", type=float, help="Limit input bigwig value range (default: no upper limit)") 		#default none

	footprintargs = parser.add_argument_group('Parameters for score == footprint')
	footprintargs.add_argument('--fp-min', metavar="<int>", type=int, help="Minimum footprint width (default: 20)", default=20)
	footprintargs.add_argument('--fp-max', metavar="<int>", type=int, help="Maximum footprint width (default: 50)", default=50)
	footprintargs.add_argument('--flank-min', metavar="<int>", type=int, help="Minimum range of flanking regions (default: 10)", default=10)
	footprintargs.add_argument('--flank-max', metavar="<int>", type=int, help="Maximum range of flanking regions (default: 30)", default=30)
	
	sumargs = parser.add_argument_group('Parameters for score == sum')
	sumargs.add_argument('--window', metavar="<int>", type=int, help="The window for calculation of sum (default: 100)", default=100)

	runargs = parser.add_argument_group('Run arguments')
	runargs.add_argument('--cores', metavar="<int>", type=int, help="Number of cores to use for computation (default: 1)", default=1)
	runargs.add_argument('--split', metavar="<int>", type=int, help="Split of multiprocessing jobs (default: 100)", default=100)
	runargs = add_logger_args(runargs)

	return(parser)
예제 #8
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_motifclust_arguments(parser):
	"""Parsing arguments using argparse

	Returns
	-------
	ArgumentParser
		an object containing all parameters given.
	"""

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
	description = "Cluster motifs based on similarity and create one consensus motif per cluster.\n\n"
	description += "Usage:\nTOBIAS ClusterMotifs --motifs <motifs.jaspar>\n\n"
	description += "NOTE: This tool requres the python package 'gimmemotifs'."
	parser.description = format_help_description("ClusterMotifs", description) 

	parser._action_groups.pop() #pop -h

	required = parser.add_argument_group('Required arguments')
	required.add_argument("-m", "--motifs", required=True, help="One or more motif files to compare and cluster", nargs="*", metavar="")
	
	optional = parser.add_argument_group('Optional arguments')
	optional.add_argument("-t", "--threshold", metavar="", help="Clustering threshold (Default = 0.3)", type=float, default=0.3)  
	optional.add_argument('--dist_method', metavar="", help="Method for calculating similarity between motifs (default: pcc)", choices=["pcc", "seqcor", "ed", "distance", "wic", "chisq", "akl", "sdd"], default="pcc")
	optional.add_argument('--clust_method', metavar="", help="Method for clustering (See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.cluster.hierarchy.linkage.html)", default="average", choices=["single","complete","average","weighted","centroid","median","ward"])
	optional.add_argument("-a", "--cons_format", metavar="", choices= ['transfac', 'meme', 'pwm', 'pfm', 'jaspar'], help="Format of consensus motif file [‘transfac’, ‘meme’, ‘pwm’, 'pfm', 'jaspar'] (Default: jaspar)", default="jaspar")
	optional.add_argument("-p", "--prefix", metavar="", help="Output prefix (default: 'motif_comparison')", default="motif_comparison")
	optional.add_argument("-o", "--outdir", metavar="", help="Output directory (default: 'clustermotifs_output')", default="clustermotifs_output")
	optional = add_logger_args(optional)

	visualisation = parser.add_argument_group('Visualisation arguments')
	visualisation.add_argument("-e", "--type", dest="type", choices= ['png', 'pdf', 'jpg'], help="Plot file type [png, pdf, jpg] (Default: pdf)", default="pdf")
	#visualisation.add_argument("-x", "--width", dest="width", help="Width of Heatmap (Default: autoscaling)", type=int)
	#visualisation.add_argument("-y", "--height", dest="height", help="Height of Heatmap (Default: autoscaling)", type=int)
	visualisation.add_argument("-d", "--dpi", dest="dpi", help="Dpi for plots (Default: 100)", type=int, default=100)
	#visualisation.add_argument("-ncc", "--no_col_clust", dest="ncc", help="No column clustering", action="store_true")
	#visualisation.add_argument("-nrc", "--no_row_clust", dest="nrc", help="No row clustering", action="store_true")
	visualisation.add_argument("-c", "--color_palette", dest="color", help="Color palette (All possible paletts: https://python-graph-gallery.com/197-available-color-palettes-with-matplotlib/. Add '_r' to reverse palette.)", default="YlOrRd_r")

	return parser
예제 #9
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_subsample_arguments(parser):

	parser.formatter_class=lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=100)
	description = "Subsample a .bam-file into random subsets (of percentage size --start to --end with step --step).\n"
	description = "NOTE: Requires 'samtools' command available at runtime."
	parser.description = format_help_description("SubsampleBam", description)

	parser._action_groups.pop()	#pop -h

	#Required arguments
	args = parser.add_argument_group('Input arguments')
	args.add_argument('--bam', metavar="", help="Path to .bam-file")
	args.add_argument('--no_rand', metavar="", type=int, help="Number of randomizations (per step) (default: 3)", default=3)
	args.add_argument('--start', metavar="", type=int, help="Start of percent subsample (default: 0)", default=0)
	args.add_argument('--end', metavar="", type=int, help="End of percent subsample (default: 100)", default=100)
	args.add_argument('--step', metavar="", type=int, help="Step between --start and --end (default: 10)", default=10)
	args.add_argument('--cores', metavar="", type=int, help="Cores for multiprocessing (default: 1)", default=1)
	args.add_argument('--outdir', metavar="", help="Output directory (default: subsamplebam_output)", default="subsamplebam_output")
	args.add_argument('--prefix', metavar="", help="Prefix for output files (default: prefix of .bam)")
	args.add_argument('--force', action="store_true", help="Force creation of subsampled .bam-files (default: only create if not existing)")
	args = add_logger_args(args)

	return(parser)
예제 #10
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_aggregate_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
	description = ""
	parser.description = format_help_description("PlotAggregate", description)

	parser._action_groups.pop()	#pop -h

	IO = parser.add_argument_group('Input / output arguments')
	IO.add_argument('--TFBS', metavar="<bed>", nargs="*", help="TFBS sites (*required)") 						#default is None
	IO.add_argument('--signals', metavar="<bigwig>", nargs="*", help="Signals in bigwig format (*required)")	#default is None
	IO.add_argument('--regions', metavar="<bed>", nargs="*", help="Regions to overlap with TFBS (optional)", default=[])
	IO.add_argument('--whitelist', metavar="<bed>", nargs="*", help="Only plot sites overlapping whitelist (optional)", default=[])
	IO.add_argument('--blacklist', metavar="<bed>", nargs="*", help="Exclude sites overlapping blacklist (optional)", default=[])
	IO.add_argument('--output', metavar="", help="Path to output (default: TOBIAS_aggregate.pdf)", default="TOBIAS_aggregate.pdf")

	PLOT = parser.add_argument_group('Plot arguments')
	PLOT.add_argument('--title', metavar="", help="Title of plot (default: \"Aggregated signals\")", default="Aggregated signals")
	PLOT.add_argument('--flank', metavar="", help="Flanking basepairs (+/-) to show in plot (counted from middle of the TFBS) (default: 60)", default=60, type=int)
	PLOT.add_argument('--TFBS-labels', metavar="", help="Labels used for each TFBS file (default: prefix of each --TFBS)", nargs="*")
	PLOT.add_argument('--signal-labels', metavar="", help="Labels used for each signal file (default: prefix of each --signals)", nargs="*")
	PLOT.add_argument('--region-labels', metavar="", help="Labels used for each regions file (default: prefix of each --regions)", nargs="*")
	PLOT.add_argument('--share-y', metavar="", help="Share y-axis range across plots (none/signals/sites/both). Use \"--share-y signals\" if bigwig signals have similar ranges. Use \"--share_y sites\" if sites per bigwig are comparable, but bigwigs themselves aren't comparable (default: none)", choices=["none", "signals", "sites", "both"], default="none")
	
	#Signals / regions
	PLOT.add_argument('--normalize', action='store_true', help="Normalize the aggregate signal(s) to be between 0-1 (default: the true range of values is shown)")
	PLOT.add_argument('--negate', action='store_true', help="Negate overlap with regions")
	PLOT.add_argument('--smooth', metavar="<int>", type=int, help="Smooth output signal by taking the mean of <smooth> bp windows (default: 1 (no smooth)", default=1)
	PLOT.add_argument('--log-transform', help="Log transform the signals before aggregation", action="store_true")
	PLOT.add_argument('--plot-boundaries', help="Plot TFBS boundaries (Note: estimated from first region in each --TFBS)", action='store_true')
	PLOT.add_argument('--signal-on-x', help="Show signals on x-axis and TFBSs on y-axis (default: signal is on y-axis)", action='store_true')
	PLOT.add_argument('--remove-outliers', metavar="<float>", help="Value between 0-1 indicating the percentile of regions to include, e.g. 0.99 to remove the sites with 1%% highest values (default: 1)", type=lambda x: restricted_float(x, 0, 1), default=1)

	RUN = parser.add_argument_group("Run arguments")
	RUN = add_logger_args(RUN)
	
	return(parser)
예제 #11
0
파일: parsers.py 프로젝트: smallfade/TOBIAS
def add_scorebed_arguments(parser):

	parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(prog, max_help_position=40, width=90)
	description = "ScoreBed is a utility to score .bed-file regions with values from a .bigwig-file. The output is a .bed-file with the bigwig value(s) as extra column(s). Options --position and --math can be used to adjust scoring scheme."
	parser.description = format_help_description("ScoreBed", description)

	parser._action_groups.pop()	#pop -h
	
	#Required arguments
	required = parser.add_argument_group('Required arguments')
	required.add_argument('--bed', metavar="", help="Sites to score (.bed file)")
	required.add_argument('--bigwigs', metavar="", nargs="*",  help="Scores to assign to regions in .bed (.bw file(s))")
	
	#Optional arguments
	optional = parser.add_argument_group('Optional arguments')
	optional.add_argument('--output', metavar="", help="Path to output .bed-file (default: scored sites are written to stdout)") 
	optional.add_argument('--subset', metavar="", help="Subset scoring to .bed regions and set all other sites to --null value (default: all sites in input file will be scored)")
	optional.add_argument('--null', metavar="", help="If --subset is given, which score/label to add to non-scored regions (default: 0)", default="0", type=float)
	optional.add_argument('--position', metavar="", help="Position in sites to score (start/mid/end/full) (default: full)", choices=["mid", "start", "end", "full"], default="full")
	optional.add_argument('--math', metavar="", help="If position == full, choose math to perform on signal (min/max/mean/sum) (default: mean)", choices=["min", "max", "mean", "sum"], default="mean")
	optional = add_logger_args(optional)
	#optional.add_argument('--buffer', metavar="", help="Lines to buffer before writing (default: 10000)", type=int, default=10000)

	return(parser)
예제 #12
0
파일: parsers.py 프로젝트: tomtommie/TOBIAS
def add_tracks_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=40, width=90)
    description = "Plot genomic tracks (such as cutsite or footprint scores) in specific genomic regions.\n"
    description += "This function is a wrapper for the svist4get package (Artyom A. Egorov, \"svist4get: a simple visualization tool for genomic tracks from sequencing experiments\", BMC Bioinformatics, Volume 20, 2019, 113)"
    description += " which allows for automatic creation of multiple plots by using bigwigs/bedfiles."
    parser.description = format_help_description("PlotTracks", description)
    parser._action_groups.pop()  #pop -h

    IO = parser.add_argument_group('Input / output arguments')
    IO.add_argument(
        '--bigwigs',
        metavar="",
        action='append',
        nargs="*",
        help=
        "One or more bigwigs to show. Note: All bigwigs within one \"--bigwigs\" argument will be normalized to each other. "
        +
        "It is possible to give multiple \"--bigwigs\" arguments, which will be normalized independently per group (required)",
        default=[])
    IO.add_argument('--regions',
                    metavar="",
                    help="Genomic regions to plot (required)")
    IO.add_argument('--sites',
                    metavar="",
                    help="Genomic sites to show in plot (optional)")
    IO.add_argument('--highlight',
                    metavar="",
                    help="Regions to highlight in plot (optional)")
    IO.add_argument('--gtf',
                    metavar="",
                    help="GTF file containing genes to show (optional)")

    IO.add_argument('--width',
                    metavar="",
                    help="Width of plot in cm (default 15)",
                    type=float,
                    default=15)
    #IO.add_argument('--height', metavar="")
    IO.add_argument('--colors',
                    metavar="",
                    nargs="*",
                    help="List of specific colors to use for plotting tracks",
                    default=None)
    IO.add_argument('--labels',
                    metavar="",
                    nargs="*",
                    help="Labels for tracks (default: prefix of bigwig)")
    IO.add_argument(
        '--max-transcripts',
        metavar="",
        type=int,
        help=
        "Set a limit on number of transcripts per gene shown in plot (default: 1)",
        default=1)

    IO.add_argument('--outdir',
                    metavar="",
                    help="Output folder (default: plottracks_output)",
                    default="plottracks_output")
    IO = add_logger_args(IO)

    return (parser)
예제 #13
0
파일: parsers.py 프로젝트: tomtommie/TOBIAS
def add_heatmap_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=40, width=90)
    description = "PlotHeatmap plots a heatmap of signals from bigwig(s) (each row is one site) as well as the aggregate signal across all sites."
    parser.description = format_help_description("PlotHeatmap", description)

    parser._action_groups.pop()  #pop -h

    IO = parser.add_argument_group('Input / output arguments')
    IO.add_argument('--TFBS',
                    metavar="",
                    nargs="*",
                    action='append',
                    help="TFBS sites per column (*required)"
                    )  #if more than one, set to next column
    IO.add_argument('--signals',
                    metavar="",
                    nargs="*",
                    help="Signals in bigwig format (*required)")
    IO.add_argument('--output',
                    metavar="",
                    help="Output filename (default: TOBIAS_heatmap.pdf)",
                    default="TOBIAS_heatmap.pdf")

    PLOT = parser.add_argument_group('Plot arguments')
    PLOT.add_argument('--plot-boundaries',
                      help="Plot TFBS boundaries",
                      action='store_true')
    PLOT.add_argument(
        '--share-colorbar',
        help=
        "Share colorbar across all bigwigs (default: estimate colorbar per bigwig)",
        action='store_true')
    PLOT.add_argument('--flank', metavar="", help="", type=int, default=75)

    PLOT.add_argument('--title', metavar="", default="TOBIAS heatmap")
    PLOT.add_argument('--TFBS-labels',
                      metavar="",
                      nargs="*",
                      action='append',
                      help="Labels of TFBS (default: basename of --TFBS)")
    PLOT.add_argument(
        '--signal-labels',
        metavar="",
        nargs="*",
        help="Labels of signals (default: basename of --signals)")

    PLOT.add_argument(
        '--show-columns',
        nargs="*",
        metavar="",
        type=int,
        help=
        "Show scores from TFBS column besides heatmap. Set to 0-based python coordinates (for example -1 for last column) (default: None)",
        default=[])
    PLOT.add_argument(
        '--sort-by',
        metavar="",
        help=
        "Columns in .bed to sort heatmap by (default: input .beds are not sorted)",
        type=int)

    RUN = parser.add_argument_group('Run arguments')
    RUN = add_logger_args(RUN)

    return (parser)
예제 #14
0
파일: parsers.py 프로젝트: tomtommie/TOBIAS
def add_tfbscan_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=35, width=90)
    description = "Find positions of Transcription Factor Binding Sites (TFBS) in FASTA sequences by scanning with motifs.\n\n"
    description += "Usage:\nTOBIAS TFBScan --motifs <motifs.txt> --fasta <genome.fa> \n\n"
    description += "By setting --outdir, the output files are:\n- <outdir>/<TF1>.bed\n- <outdir>/<TF2>.bed\n- (...)\n\n"
    description += "By setting --outfile, all TFBS are written to one file (with motif specified in the 4th column of the .bed)."
    parser.description = format_help_description("TFBScan", description)

    parser._action_groups.pop()  #pop -h

    required_arguments = parser.add_argument_group('Required arguments')
    required_arguments.add_argument(
        '-m',
        '--motifs',
        metavar="",
        help='File containing motifs in either MEME, PFM or JASPAR format')
    required_arguments.add_argument(
        '-f',
        '--fasta',
        metavar="",
        help='A fasta file of sequences to use for scanning motifs'
    )  # whole genome file or regions of interest in FASTA format to be scanned with motifs')

    #all other arguments are optional
    optional_arguments = parser.add_argument_group('Optional arguments')
    optional_arguments.add_argument(
        '-r',
        '--regions',
        metavar="",
        help='Subset scanning to regions of interest')
    optional_arguments.add_argument(
        '--outdir',
        metavar="",
        help=
        'Output directory for TFBS sites in one file per motif (default: ./tfbscan_output/). NOTE: Select either --outdir or --outfile.',
        default=None)
    optional_arguments.add_argument(
        '--outfile',
        metavar="",
        help=
        'Output file for TFBS sites joined in one bed-file (default: not set). NOTE: Select either --outdir or --outfile.',
        default=None)

    optional_arguments.add_argument(
        '--naming',
        metavar="",
        help=
        "Naming convention for bed-ids and output files ('id', 'name', 'name_id', 'id_name') (default: 'name_id')",
        choices=["id", "name", "name_id", "id_name"],
        default="name_id")
    optional_arguments.add_argument(
        '--gc',
        metavar="",
        type=lambda x: restricted_float(x, 0, 1),
        help=
        'Set the gc content for background regions (default: will be estimated from fasta)'
    )
    optional_arguments.add_argument(
        '--pvalue',
        metavar="",
        type=lambda x: restricted_float(x, 0, 1),
        help='Set p-value for motif matches (default: 0.0001)',
        default=0.0001)
    optional_arguments.add_argument(
        '--keep-overlaps',
        action='store_true',
        help=
        'Keep overlaps of same motifs (default: overlaps are resolved by keeping best-scoring site)'
    )
    optional_arguments.add_argument(
        '--add-region-columns',
        action='store_true',
        help=
        "Add extra information columns (starting from 4th column) from --regions to the output .bed-file(s) (default: off)"
    )

    RUN = parser.add_argument_group('Run arguments')
    RUN.add_argument('--split',
                     metavar="<int>",
                     type=int,
                     help="Split of multiprocessing jobs (default: 100)",
                     default=100)
    RUN.add_argument('--cores',
                     metavar="",
                     type=int,
                     help='Number of cores to use (default: 1)',
                     default=1)
    RUN.add_argument('--debug', action="store_true", help=argparse.SUPPRESS)
    RUN = add_logger_args(optional_arguments)

    return (parser)
예제 #15
0
파일: parsers.py 프로젝트: tomtommie/TOBIAS
def add_bindetect_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=35, width=90)
    description = "BINDetect takes motifs, signals (footprints) and genome as input to estimate bound transcription factor binding sites and differential binding between conditions. "
    description += "The underlying method is a modified motif enrichment test to see which motifs have the largest differences in signal across input conditions. "
    description += "The output is an in-depth overview of global changes as well as the individual binding site signal-differences.\n\n"
    description += "Usage:\nTOBIAS BINDetect --signals <bigwig1> (<bigwig2> (...)) --motifs <motifs.txt> --genome <genome.fasta> --peaks <peaks.bed>\n\n"
    description += "Output files:\n- <outdir>/<prefix>_figures.pdf\n- <outdir>/<prefix>_results.{txt,xlsx}\n- <outdir>/<prefix>_distances.txt\n"
    description += "- <outdir>/<TF>/<TF>_overview.{txt,xlsx} (per motif)\n- <outdir>/<TF>/beds/<TF>_all.bed (per motif)\n"
    description += "- <outdir>/<TF>/beds/<TF>_<condition>_bound.bed (per motif-condition pair)\n- <outdir>/<TF>/beds/<TF>_<condition>_unbound.bed (per motif-condition pair)\n\n"
    parser.description = format_help_description("BINDetect", description)

    parser._action_groups.pop()  #pop -h

    required = parser.add_argument_group('Required arguments')
    required.add_argument('--signals',
                          metavar="<bigwig>",
                          help="Signal per condition (.bigwig format)",
                          nargs="*")
    required.add_argument(
        '--peaks',
        metavar="<bed>",
        help="Peaks.bed containing open chromatin regions across all conditions"
    )
    required.add_argument('--motifs',
                          metavar="<motifs>",
                          help="Motif file(s) in pfm/jaspar/meme format",
                          nargs="*")
    required.add_argument('--genome',
                          metavar="<fasta>",
                          help="Genome .fasta file")

    optargs = parser.add_argument_group('Optional arguments')
    optargs.add_argument(
        '--cond-names',
        metavar="<name>",
        nargs="*",
        help=
        "Names of conditions fitting to --signals (default: prefix of --signals)"
    )
    optargs.add_argument(
        '--peak-header',
        metavar="<file>",
        help=
        "File containing the header of --peaks separated by whitespace or newlines (default: peak columns are named \"_additional_<count>\")"
    )
    optargs.add_argument(
        '--naming',
        metavar="<string>",
        help=
        "Naming convention for TF output files ('id', 'name', 'name_id', 'id_name') (default: 'name_id')",
        choices=["id", "name", "name_id", "id_name"],
        default="name_id")
    optargs.add_argument(
        '--motif-pvalue',
        metavar="<float>",
        type=lambda x: restricted_float(x, 0, 1),
        help="Set p-value threshold for motif scanning (default: 1e-4)",
        default=0.0001)
    optargs.add_argument(
        '--bound-pvalue',
        metavar="<float>",
        type=lambda x: restricted_float(x, 0, 1),
        help="Set p-value threshold for bound/unbound split (default: 0.001)",
        default=0.001)
    #optargs.add_argument('--volcano-diff-thresh', metavar="<float>", help="", default=0.2)	#not yet implemented
    #optargs.add_argument('--volcano-p-thresh', metavar="<float>", help="", default=0.05)	#not yet implemented

    optargs.add_argument(
        '--pseudo',
        type=float,
        metavar="<float>",
        help=
        "Pseudocount for calculating log2fcs (default: estimated from data)",
        default=None)
    optargs.add_argument(
        '--time-series',
        action='store_true',
        help=
        "Will only compare signals1<->signals2<->signals3 (...) in order of input, and skip all-against-all comparison."
    )
    optargs.add_argument(
        '--skip-excel',
        action='store_true',
        help=
        "Skip creation of excel files - for large datasets, this will speed up BINDetect considerably"
    )
    optargs.add_argument(
        '--output-peaks',
        metavar="<bed>",
        help=
        """Gives the possibility to set the output peak set differently than the input --peaks.
													 				This will limit all analysis to the regions in --output-peaks. 
																	NOTE: --peaks must still be set to the full peak set!""")

    runargs = parser.add_argument_group("Run arguments")
    runargs.add_argument(
        '--outdir',
        metavar="<directory>",
        help=
        "Output directory to place TFBS/plots in (default: bindetect_output)",
        default="bindetect_output")
    optargs.add_argument(
        '--prefix',
        metavar="<prefix>",
        help=
        "Prefix for overview files in --outdir folder (default: bindetect)",
        default="bindetect")
    runargs.add_argument(
        '--cores',
        metavar="<int>",
        type=int,
        help="Number of cores to use for computation (default: 1)",
        default=1)
    runargs.add_argument('--split',
                         metavar="<int>",
                         type=int,
                         help="Split of multiprocessing jobs (default: 100)",
                         default=100)
    runargs.add_argument(
        '--debug', help=argparse.SUPPRESS,
        action='store_true')  #creates extra output for debugging

    runargs = add_logger_args(runargs)

    return (parser)
예제 #16
0
파일: parsers.py 프로젝트: tomtommie/TOBIAS
def add_atacorrect_arguments(parser):

    parser.formatter_class = lambda prog: argparse.RawDescriptionHelpFormatter(
        prog, max_help_position=35, width=90)

    description = "ATACorrect corrects the cutsite-signal from ATAC-seq with regard to the underlying sequence preference of Tn5 transposase.\n\n"
    description += "Usage:\nTOBIAS ATACorrect --bam <reads.bam> --genome <genome.fa> --peaks <peaks.bed>\n\n"
    description += "Output files:\n"
    description += "\n".join([
        "- <outdir>/<prefix>_{0}.bw".format(track)
        for track in ["uncorrected", "bias", "expected", "corrected"]
    ]) + "\n"
    description += "- <outdir>/<prefix>_atacorrect.pdf"
    parser.description = format_help_description("ATACorrect", description)

    parser._action_groups.pop()  #pop -h

    #Required arguments
    reqargs = parser.add_argument_group('Required arguments')
    reqargs.add_argument('-b',
                         '--bam',
                         metavar="<bam>",
                         help="A .bam-file containing reads to be corrected")
    reqargs.add_argument(
        '-g',
        '--genome',
        metavar="<fasta>",
        help="A .fasta-file containing whole genomic sequence")
    reqargs.add_argument('-p',
                         '--peaks',
                         metavar="<bed>",
                         help="A .bed-file containing ATAC peak regions")

    #Optional arguments
    optargs = parser.add_argument_group('Optional arguments')
    optargs.add_argument(
        '--regions-in',
        metavar="<bed>",
        help=
        "Input regions for estimating bias (default: regions not in peaks.bed)"
    )
    optargs.add_argument('--regions-out',
                         metavar="<bed>",
                         help="Output regions (default: peaks.bed)")
    optargs.add_argument(
        '--blacklist',
        metavar="<bed>",
        help="Blacklisted regions in .bed-format (default: None)"
    )  #file containing blacklisted regions to be excluded from analysis")
    optargs.add_argument(
        '--extend',
        metavar="<int>",
        type=int,
        help=
        "Extend output regions with basepairs upstream/downstream (default: 100)",
        default=100)
    optargs.add_argument('--split-strands',
                         help="Write out tracks per strand",
                         action="store_true")
    optargs.add_argument(
        '--norm-off',
        help="Switches off normalization based on number of reads",
        action='store_true')
    optargs.add_argument(
        '--track-off',
        metavar="<track>",
        help=
        "Switch off writing of individual .bigwig-tracks (uncorrected/bias/expected/corrected)",
        nargs="*",
        choices=["uncorrected", "bias", "expected", "corrected"],
        default=[])

    optargs = parser.add_argument_group(
        'Advanced ATACorrect arguments (no need to touch)')
    optargs.add_argument(
        '--k_flank',
        metavar="<int>",
        help="Flank +/- of cutsite to estimate bias from (default: 12)",
        type=int,
        default=12)
    optargs.add_argument(
        '--read_shift',
        metavar="<int>",
        help="Read shift for forward and reverse reads (default: 4 -5)",
        nargs=2,
        type=int,
        default=[4, -5])
    optargs.add_argument(
        '--bg_shift',
        metavar="<int>",
        type=int,
        help=
        "Read shift for estimation of background frequencies (default: 100)",
        default=100)
    optargs.add_argument(
        '--window',
        metavar="<int>",
        help="Window size for calculating expected signal (default: 100)",
        type=int,
        default=100)
    optargs.add_argument(
        '--score_mat',
        metavar="<mat>",
        help=
        "Type of matrix to use for bias estimation (PWM/DWM) (default: DWM)",
        choices=["PWM", "DWM"],
        default="DWM")

    runargs = parser.add_argument_group('Run arguments')
    runargs.add_argument(
        '--prefix',
        metavar="<prefix>",
        help="Prefix for output files (default: same as .bam file)")
    runargs.add_argument(
        '--outdir',
        metavar="<directory>",
        help="Output directory for files (default: current working directory)",
        default="")
    runargs.add_argument(
        '--cores',
        metavar="<int>",
        type=int,
        help="Number of cores to use for computation (default: 1)",
        default=1)
    runargs.add_argument('--split',
                         metavar="<int>",
                         type=int,
                         help="Split of multiprocessing jobs (default: 100)",
                         default=100)

    runargs = add_logger_args(runargs)

    return (parser)