コード例 #1
0
def parse_input(args):
    def positive_int(num):
        try:
            val = int(num)
            if val <= 0:
                raise Exception("Go to the except")
        except:
            raise argparse.ArgumentTypeError(num + " must be positive")

        return val

    parser = argparse.ArgumentParser(
        prog="PolTools metaplot",
        description=
        "Compute the number of 5'/3'/whole prime reads at each position of the given region.\n"
        + "More information can be found at " +
        "https://github.com/GeoffSCollins/PolTools/blob/master/docs/metaplot.rst"
    )

    parser.add_argument('read_type',
                        metavar='read type',
                        type=str,
                        choices=["five", "three", "whole"],
                        help='either five, three, or whole')

    parser.add_argument(
        'regions_file',
        metavar='regions_file',
        type=str,
        help=
        'Bed formatted file containing all the regions you want to average the sequences'
    )

    parser.add_argument(
        'seq_files',
        metavar='sequencing_files',
        nargs='+',
        type=str,
        help='Bed formatted files from the sequencing experiment')

    parser.add_argument('-t',
                        '--threads',
                        dest='threads',
                        metavar='threads',
                        type=positive_int,
                        nargs='?',
                        default=multiprocessing.cpu_count())

    args = parser.parse_args(args)
    regions_filename = args.regions_file
    sequencing_files_list = args.seq_files
    max_threads = args.threads

    verify_bed_files(regions_filename)
    region_length = determine_region_length(regions_filename)
    verify_region_length_is_even(region_length)

    return args.read_type, regions_filename, sequencing_files_list, region_length, max_threads
コード例 #2
0
def parse_args(args):
    def positive_int(num):
        try:
            val = int(num)
            if val <= 0:
                raise Exception("Go to the except")
        except:
            raise argparse.ArgumentTypeError(num + " must be positive")

        return val

    parser = argparse.ArgumentParser(
        prog='PolTools tps_distance_per_gene',
        description=
        'Determine the most common pausing distance from the max TSS for each gene.\n'
        + "More information can be found at " +
        "https://geoffscollins.github.io/PolTools/tps_distance_per_gene.html")

    parser.add_argument(
        'regions_filename',
        metavar='regions_filename',
        type=str,
        help=
        'Bed formatted regions file with an even region length or a region length of one.'
    )

    parser.add_argument(
        'seq_files',
        metavar='sequencing_files',
        nargs='+',
        type=str,
        help='Bed formatted files from the sequencing experiment')

    parser.add_argument('-t',
                        '--threads',
                        dest='threads',
                        metavar='threads',
                        type=positive_int,
                        nargs='?',
                        default=multiprocessing.cpu_count())

    args = parser.parse_args(args)

    regions_filename = args.regions_filename
    sequencing_files_list = args.seq_files
    max_threads = args.threads

    if len(sequencing_files_list) == 0:
        sys.stderr.write("")

    region_length = determine_region_length(regions_filename)
    if region_length != 1:
        verify_region_length_is_even(region_length)

    return regions_filename, sequencing_files_list, region_length, max_threads
コード例 #3
0
def parse_args(args):
    # Argparse does not allow the input numbers to start with a -. So, we add a space to the input so it works.
    for i, arg in enumerate(sys.argv):
        if (arg[0] == '-') and arg[1].isdigit(): sys.argv[i] = ' ' + arg

    parser = argparse.ArgumentParser(prog='PolTools sequence_from_region_around_max_tss',
                                     description='Get the sequence from a certain region around the max TSS\n' +
                                                 "More information can be found at " +
                                                 "https://geoffscollins.github.io/PolTools/sequence_from_region_around_max_tss.html"
                                     )

    parser.add_argument('max_tss_file', metavar='max_tss_file', type=str,
                        help='Bed formatted file which has the base of the max TSS')

    parser.add_argument('left', metavar='left', type=int,
                        help='Left end of the region to get the sequence.')

    parser.add_argument('right', metavar='right', type=int,
                        help='Left end of the region to get the sequence.')

    parser.add_argument('-u', '--underscore', action="store_true", dest='underscore',
                        default=False, help='Split the sequences by an underscore instead of a new line.')

    args = parser.parse_args(args)
    max_tss_file = args.max_tss_file
    left = args.left
    right = args.right

    if 0 in [left, right]:
        sys.stderr.write("There is no 0 nucleotide. Please try again.\n")
        sys.exit(1)

    region_length = determine_region_length(max_tss_file)

    if region_length != 1:
        sys.stderr.write("The maxTSS bed file must have regions of 1 bp.\n")
        sys.exit(1)

    # Convert search region syntax to a list
    search_left = [
        "+" if left > 0 else "-",
        abs(left)
    ]

    search_right = [
        "+" if right > 0 else "-",
        abs(right)
    ]

    search = [search_left, search_right]

    return args.underscore, max_tss_file, search
コード例 #4
0
def parse_args(args):
    def positive_int(num):
        try:
            val = int(num)
            if val <= 0:
                raise Exception("Go to the except")
        except:
            raise argparse.ArgumentTypeError(num + " must be positive")
        return val

    parser = argparse.ArgumentParser(
        prog='PolTools divergent_pileup_metaplot',
        description='Generate a metaplot of the pileup reads.\n' +
        'More information can be found at https://geoffscollins.github.io/PolTools/divergent_pileup_metaplot.html'
    )

    parser.add_argument(
        'regions_file',
        metavar='regions_file',
        type=str,
        help=
        'Bed formatted file containing all the regions you want to average the sequences'
    )

    parser.add_argument(
        'seq_files',
        metavar='sequencing_files',
        nargs='+',
        type=str,
        help='Bed formatted files from the sequencing experiment')

    parser.add_argument('-t',
                        '--threads',
                        dest='threads',
                        metavar='threads',
                        type=positive_int,
                        nargs='?',
                        default=multiprocessing.cpu_count())

    args = parser.parse_args(args)
    regions_filename = args.regions_file
    max_threads = args.threads

    region_length = determine_region_length(regions_filename)
    verify_region_length_is_even(region_length)

    sequencing_files_list = args.seq_files

    return regions_filename, sequencing_files_list, region_length, max_threads
コード例 #5
0
def parse_args(args):
    parser = argparse.ArgumentParser(
        prog='PolTools base_distribution',
        description=
        'Compute the average base composition at each position of the given region.\n'
        + "More information can be found at " +
        "https://geoffscollins.github.io/PolTools/base_distribution.html")

    parser.add_argument(
        'regions_file',
        metavar='regions_file',
        type=str,
        help=
        'Bed formatted file containing all the regions you want to average the sequences'
    )

    args = parser.parse_args(args)

    regions_file = args.regions_file

    verify_bed_files(regions_file)
    region_length = determine_region_length(regions_file)
    verify_region_length_is_even(region_length)
    return args.regions_file, region_length
コード例 #6
0
def parse_input(args):
    parser = argparse.ArgumentParser(
        prog='PolTools sequence_searches',
        description=
        'Determine if a sequence in present in a region around the max TSS\n' +
        'More information can be found at ' +
        'https://geoffscollins.github.io/PolTools/sequence_searches.html')

    parser.add_argument(
        'regions_filename',
        metavar='regions_filename',
        type=str,
        help=
        'Bed formatted regions file with an even region length or a region length of one.'
    )

    parser.add_argument(
        'search',
        metavar='search',
        type=str,
        nargs='+',
        help=
        'Search region formatted as follows: (Sequence),(-/+)left:(-/+)right. Ex: TATA,-30:-20'
    )

    args = parser.parse_args(args)

    regions_file = args.regions_filename
    searching_sequences = args.search

    region_length = determine_region_length(regions_file)

    verify_region_length_is_even(region_length)

    # Max position can be either positive or negative
    max_position = int(region_length / 2)

    cleaned_search_sequences = []
    for search in searching_sequences:
        try:
            sequence, search_region = search.split(",")
            left, right = search_region.split(":")

            left = int(left)
            right = int(right)

        except Exception as _:
            raise InvalidSearchException(search)

        if right < left:
            raise InvalidSearchException(search)

        # Check in the left and right positions are in the area
        if left < (max_position * -1):
            raise InvalidSearchException(search)
        if right > max_position:
            raise InvalidSearchException(search)

        cleaned_search_sequences.append([sequence, left, right])

    return regions_file, cleaned_search_sequences, region_length
コード例 #7
0
def parse_input(args):
    def positive_int(num):
        try:
            val = int(num)
            if val <= 0:
                raise Exception("Go to the except")
        except:
            raise argparse.ArgumentTypeError(num + " must be positive")

        return val

    parser = argparse.ArgumentParser(
        prog='PolTools pausing_distance_distribution_from_maxTSS',
        description=
        'Quantify the number of transcripts at each length originating from the max TSS\n'
        + "More information can be found at " +
        "https://geoffscollins.github.io/PolTools/pausing_distance_distribution_from_maxTSS.html"
    )

    parser.add_argument(
        'regions_filename',
        metavar='regions_filename',
        type=str,
        help=
        'Bed formatted regions file with an even region length or a region length of one.'
    )

    parser.add_argument(
        'seq_files',
        metavar='sequencing_files',
        nargs='+',
        type=str,
        help='Bed formatted files from the sequencing experiment')

    parser.add_argument(
        '-m',
        '--max_transcript_length',
        dest='max_transcript_length',
        metavar='max_transcript_length',
        type=positive_int,
        nargs='?',
        help=
        'Set the maximum transcript length to be outputted. Default is 100',
        default=100)

    parser.add_argument('-t',
                        '--threads',
                        dest='threads',
                        metavar='threads',
                        type=positive_int,
                        nargs='?',
                        default=multiprocessing.cpu_count())

    args = parser.parse_args(args)
    regions_filename = args.regions_filename
    sequencing_files = args.seq_files
    max_transcript_length = args.max_transcript_length
    max_threads = args.threads

    region_length = determine_region_length(regions_filename)

    if region_length != 1:
        verify_region_length_is_even(region_length)

    return regions_filename, sequencing_files, max_transcript_length, max_threads
コード例 #8
0
def parse_input(args):
    def positive_float(num):
        try:
            val = float(num)
            if val <= 0:
                raise Exception("Go to the except")
        except:
            raise argparse.ArgumentTypeError(num + " must be positive")

        return val

    def positive_int(num):
        try:
            val = int(num)
            if val <= 0:
                raise Exception("Go to the except")
        except:
            raise argparse.ArgumentTypeError(num + " must be positive")

        return val

    parser = argparse.ArgumentParser(prog='PolTools region_heatmap')

    parser.add_argument('read_type',
                        metavar='read type',
                        type=str,
                        choices=["five", "three", "whole"],
                        help='either five, three, or whole')

    parser.add_argument(
        'regions_file',
        metavar='regions_file',
        type=str,
        help=
        'Bed formatted file containing all the regions you want to average the sequences'
    )

    parser.add_argument(
        '-s',
        '--seq_file',
        action='append',
        nargs=2,
        metavar=('seq_file', 'spike_in'),
        required=True,
        help=
        'Provide the sequencing file with its correction factor. You can supply '
        'more than one sequencing file by adding multiple -s arguments.')

    parser.add_argument('output_prefix',
                        metavar='output_prefix',
                        type=str,
                        help='Prefix for the output filename')

    parser.add_argument(
        '-m',
        '--max_black',
        metavar='max_black',
        dest='max_black',
        type=int,
        default=None,
        help=
        'Max black value of the heatmap. Default is the maximum possible value'
    )

    parser.add_argument(
        '-r',
        '--repeat_amount',
        metavar='repeat_amount',
        dest='repeat_amount',
        type=int,
        default=1,
        help=
        'Each base will be shown in this number of pixels. Default is 1 -- no repeating'
    )

    parser.add_argument(
        '-v',
        '--vertical_averaging',
        metavar='vertical_averaging',
        dest='vertical_averaging',
        type=int,
        default=1,
        help=
        'Average this number of rows into one row. Default is 1 -- no vertical averaging'
    )

    parser.add_argument(
        '-g',
        '--gamma',
        metavar='gamma',
        dest='gamma',
        type=positive_float,
        default=2.2,
        help=
        'Gamma value of the heatmap. Default is 2.2, which is no gamma correction.'
    )

    parser.add_argument(
        '--minor_ticks',
        metavar='minor_ticks',
        dest='minor_ticks',
        type=positive_int,
        default=None,
        help='Distance between minor ticks (bp). Default is no ticks.')

    parser.add_argument(
        '--major_ticks',
        metavar='major_ticks',
        dest='major_ticks',
        type=positive_int,
        default=None,
        help='Distance between major ticks (bp). Default is no ticks.')

    parser.add_argument('-t',
                        '--threads',
                        dest='threads',
                        metavar='threads',
                        type=positive_int,
                        nargs='?',
                        default=multiprocessing.cpu_count())

    args = parser.parse_args(args)

    verify_bed_files(args.regions_file)
    region_length = determine_region_length(args.regions_file)
    verify_region_length_is_even(region_length)

    filenames = args.regions_file, args.output_prefix
    repeat_amounts = args.repeat_amount, args.vertical_averaging
    heatmap_parameters = args.max_black, args.gamma

    seq_files_data = []

    for dataset in args.seq_file:
        seq_file, corr_factor = dataset

        corr_factor = positive_float(corr_factor)
        seq_files_data.append((seq_file, corr_factor))

        if not os.path.isfile(seq_file):
            sys.stderr.write("File " + seq_file + " was not found.\n")
            sys.exit(1)

    if args.minor_ticks != None and args.major_ticks != None:
        tick_parameters = (args.minor_ticks * args.repeat_amount,
                           args.major_ticks * args.repeat_amount)
    else:
        tick_parameters = (None, None)

    return args.read_type, filenames, seq_files_data, heatmap_parameters, repeat_amounts, tick_parameters, args.threads