def parse_args(raw_args):
    description = 'Creates a website showing the subnetworks output by HotNet.'
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')
    parser.add_argument('-r',
                        '--results_files',
                        nargs='+',
                        required=True,
                        help='Paths to results.json files output by HotNet')
    parser.add_argument(
        '-ef',
        '--edge_file',
        required=True,
        help='Path to TSV file listing edges of the interaction network, where\
                              each row contains the indices of two genes that are connected in the\
                              network.')
    parser.add_argument('-nn',
                        '--network_name',
                        default='Network',
                        help='Display name for the interaction network.')
    parser.add_argument(
        '-o',
        '--output_directory',
        required=True,
        help='Output directory in which the website should be generated.')
    args = parser.parse_args(raw_args)

    return args
Esempio n. 2
0
def parse_args(raw_args):
    description = "For HotNet output from mutation data, generates an annotated output file that\
                   includes the number of samples mutated in each gene and the mutation frequency\
                   of each subnetwork."
    parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')
    
    parser.add_argument('--hotnet_output_json', required=True,
                        help='Path to JSON output file produced by running "runHotNet.py"')
    
    return parser.parse_args(raw_args)
Esempio n. 3
0
def parse_args(raw_args): 
    description = "Helper script for simple runs of generalized HotNet,.\
                   including automated parameter selection."
    parser = hnap.HotNetArgParser(description=description, fromfile_prefix_chars='@')

    parser.add_argument('-r', '--runname', help='Name of run / disease.')
    parser.add_argument('-mf', '--infmat_file', required=True,
                        help='Path to .mat file containing influence matrix')
    parser.add_argument('-if', '--infmat_index_file', required=True,
                        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument('-hf', '--heat_file', required=True,
                        help='Path to a tab-separated file containing a gene name in the first\
                              column and the heat score for that gene in the second column of\
                              each line.')
    parser.add_argument('-ms', '--min_heat_score', type=float,
                        help='Minimum heat score for a gene to be eligible for inclusion in a\
                              returned connected component. By default, all genes with positive\
                              heat scores will be included. (To include genes with score zero, set\
                              min_heat_score to 0).')
    parser.add_argument('-ccs', '--min_cc_size', type=int, default=3,
                        help='Minimum size connected components that should be returned.')
    parser.add_argument('-n', '--num_permutations', type=int, default=100,
                        help='Number of permutations that should be used for parameter selection\
                              and statistical significance testing.')
    parser.add_argument('-o', '--output_directory', default='hotnet_output',
                        help='Output directory. Files results.json, components.txt, and\
                              significance.txt will be generated in subdirectories for each delta.')
    parser.add_argument('--parallel', dest='parallel', action='store_true',
                        help='Run permutation tests in parallel. Only recommended for machines\
                              with at least 8 cores.')
    parser.add_argument('--no-parallel', dest='parallel', action='store_false',
                        help='Run permutation tests sequentially. Recommended for machines\
                              with fewer than 8 cores.')
    parser.add_argument('-ef', '--edge_file',
                        help='Path to TSV file listing edges of the interaction network, where\
                              each row contains the indices of two genes that are connected in the\
                              network. This is used to create subnetwork visualizations; if not\
                              provided, visualizations will not be made.')
    parser.add_argument('-nn', '--network_name', default='Network',
                        help='Display name for the interaction network. (Used for subnetwork\
                              visualizations)')
    parser.set_defaults(parallel=False)
    
    return parser.parse_args(raw_args)
Esempio n. 4
0
def parse_args(raw_args):
    description = "Generates a JSON heat file for input to runHotNet."
    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')

    parent_parser = hnap.HotNetArgParser(add_help=False,
                                         fromfile_prefix_chars='@')
    parent_parser.add_argument(
        '-o',
        '--output_file',
        help='File path to which heat score information should be output \
                                     in JSON format. If none given, output will be written to stdout.'
    )

    subparsers = parser.add_subparsers(title='Heat score type')

    heat_parser = subparsers.add_parser('scores',
                                        help='Pre-computed heat scores',
                                        parents=[parent_parser])
    heat_parser.add_argument(
        '-hf',
        '--heat_file',
        required=True,
        help='Path to a tab-separated file containing a gene name in the first\
                                   column and the heat score for that gene in the second column of\
                                   each line.')
    heat_parser.add_argument(
        '-ms',
        '--min_heat_score',
        type=float,
        help='Minimum heat score for including genes in the resulting output\
                                   file. By default, all genes with positive heat scores will be\
                                   included.')
    heat_parser.add_argument(
        '-gff',
        '--gene_filter_file',
        default=None,
        help='Path to file listing genes whose heat scores should be\
                                   preserved, one per line. If present, all other heat scores\
                                   will be discarded.')
    heat_parser.add_argument(
        '-e',
        '--excluded_genes_output_file',
        help='File path to which the list of genes that were excluded from\
                                   the heat score output due to the specified filtering parameters\
                                   should be written, one gene per line. If no genes were filtered,\
                                   the file will be empty.')
    heat_parser.set_defaults(heat_fn=load_direct_heat)

    mutation_parser = subparsers.add_parser('mutation',
                                            help='Mutation data',
                                            parents=[parent_parser])
    mutation_parser.add_argument(
        '--snv_file',
        required=True,
        help='Path to a tab-separated file containing SNVs where the first\
                                       column of each line is a sample ID and subsequent columns\
                                       contain the names of genes with SNVs in that sample. Lines\
                                       starting with "#" will be ignored.')
    mutation_parser.add_argument(
        '--cna_file',
        required=True,
        help='Path to a tab-separated file containing CNAs where the first\
                                       column of each line is a sample ID and subsequent columns\
                                       contain gene names followed by "(A)" or "(D)" indicating an\
                                       amplification or deletion in that gene for the sample.\
                                       Lines starting with "#" will be ignored.'
    )
    mutation_parser.add_argument(
        '--sample_file',
        default=None,
        help='File listing samples. Any SNVs or CNAs in samples not listed\
                                       in this file will be ignored. If HotNet is run with mutation\
                                       permutation testing, all samples in this file will be eligible\
                                       for random mutations even if the sample did not have any\
                                       mutations in the real data. If not provided, the set of samples\
                                       is assumed to be all samples that are provided in the SNV\
                                       or CNA data.')
    mutation_parser.add_argument(
        '--gene_file',
        default=None,
        help='File listing tested genes. SNVs or CNAs in genes not listed\
                                       in this file will be ignored. If HotNet is run with mutation\
                                       permutation testing, every gene in this file will be eligible\
                                       for random mutations even if the gene did not have mutations\
                                       in any samples in the original data. If not provided, the set\
                                       of tested genes is assumed to be all genes that have mutations\
                                       in either the SNV or CNA data.')
    mutation_parser.add_argument(
        '--min_freq',
        type=int,
        default=1,
        help='The minimum number of samples in which a gene must have an\
                                       SNV to be considered mutated in the heat score calculation.'
    )
    mutation_parser.add_argument(
        '--cna_filter_threshold',
        type=valid_cna_filter_thresh,
        default=None,
        help='Proportion of CNAs in a gene across samples that must share\
                                       the same CNA type in order for the CNAs to be included. This\
                                       must either be > .5, or the default, None, in which case all\
                                       CNAs will be included.')
    mutation_parser.set_defaults(heat_fn=load_mutation_heat)

    return parser.parse_args(raw_args)
Esempio n. 5
0
def parse_args(raw_args):
    description = "Runs HotNet threshold-finding procedure.\
                   Note that some or all parameters can be specified via a configuration file by\
                   passing '@<ConfigFileName>' as a command-line parameter, e.g.\
                   'python findThreshold.py @testConf.txt --runname TestRun'."

    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')

    #create parent parser for arguments common to both permutation types
    parent_parser = hnap.HotNetArgParser(add_help=False,
                                         fromfile_prefix_chars='@')
    parent_parser.add_argument('-r',
                               '--runname',
                               help='Name of run / disease.')
    parent_parser.add_argument(
        '-mf',
        '--infmat_file',
        required=True,
        help='Path to .mat file containing influence matrix')
    parent_parser.add_argument(
        '-mn',
        '--infmat_name',
        default='Li',
        help='Variable name of the influence matrix in the .mat file')
    parent_parser.add_argument(
        '-if',
        '--infmat_index_file',
        required=True,
        default=None,
        help='Path to tab-separated file containing an index in the first\
                                      column and the name of the gene represented at that index in\
                                      the second column of each line.')
    parent_parser.add_argument(
        '-hf',
        '--heat_file',
        required=True,
        help='JSON heat score file generated via generateHeat.py')
    parent_parser.add_argument('-n',
                               '--num_permutations',
                               type=int,
                               required=True,
                               help='Number of permuted data sets to generate')
    parent_parser.add_argument(
        '--parallel',
        dest='parallel',
        action='store_true',
        help='Run permutation tests in parallel. Only recommended for machines\
                                     with at least 8 cores.')
    parent_parser.add_argument(
        '--no-parallel',
        dest='parallel',
        action='store_false',
        help='Run permutation tests sequentially. Recommended for machines\
                                     with fewer than 8 cores.')
    parent_parser.add_argument(
        '-o',
        '--output_file',
        help='Output file.  If none given, output will be written to stdout.')
    parent_parser.set_defaults(parallel=False)

    subparsers = parser.add_subparsers(title='Permutation techniques',
                                       dest='perm_type')

    #create subparser for options for permuting heat scores
    heat_parser = subparsers.add_parser('heat',
                                        help='Permute heat scores',
                                        parents=[parent_parser])
    heat_parser.add_argument(
        '-pgf',
        '--permutation_genes_file',
        default=None,
        help='Path to file containing a list of additional genes that can have\
                                   permuted heat values assigned to them in permutation tests'
    )

    #create subparser for options for permuting mutation data
    mutation_parser = subparsers.add_parser('mutations',
                                            help='Permute mutation data',
                                            parents=[parent_parser])
    mutation_parser.add_argument(
        '-glf',
        '--gene_length_file',
        required=True,
        help='Path to tab-separated file containing gene names in the\
                                       first column and the length of the gene in base pairs in\
                                       the second column')
    mutation_parser.add_argument(
        '-gof',
        '--gene_order_file',
        required=True,
        help='Path to file containing tab-separated lists of genes on\
                                 each chromosome, in order of their position on the chromosome, one\
                                  chromosome per line')
    mutation_parser.add_argument('-b',
                                 '--bmr',
                                 type=float,
                                 required=True,
                                 help='Default background mutation rate')
    mutation_parser.add_argument(
        '-bf',
        '--bmr_file',
        help='File listing gene-specific BMRs. If none, the default BMR\
                                       will be used for all genes.')

    return parser.parse_args(raw_args)
Esempio n. 6
0
def parse_args(raw_args):
    description = "Runs HotNet algorithm.\
                   Note that some or all parameters can be specified via a configuration file by\
                   passing '@<ConfigFileName>' as a command-line parameter, e.g.\
                   'python runHotNet.py --runname TestRun @testConf.txt'."

    parser = hnap.HotNetArgParser(description=description,
                                  fromfile_prefix_chars='@')

    parser.add_argument('-r', '--runname', help='Name of run / disease.')
    parser.add_argument('-mf',
                        '--infmat_file',
                        required=True,
                        help='Path to .mat file containing influence matrix')
    parser.add_argument(
        '-mn',
        '--infmat_name',
        default='Li',
        help='Variable name of the influence matrix in the .mat file')
    parser.add_argument(
        '-if',
        '--infmat_index_file',
        required=True,
        help='Path to tab-separated file containing an index in the first column\
                              and the name of the gene represented at that index in the second\
                              column of each line.')
    parser.add_argument(
        '-hf',
        '--heat_file',
        required=True,
        help='JSON heat score file generated via generateHeat.py')
    parser.add_argument('-d',
                        '--delta',
                        type=float,
                        required=True,
                        help='Weight threshold for edge removal')
    parser.add_argument(
        '-ccs',
        '--min_cc_size',
        type=int,
        default=3,
        help='Minimum size connected components that should be returned.')
    parser.add_argument(
        '-o',
        '--output_directory',
        required=True,
        help='Output directory. Files results.json, components.txt, and\
                              significance.txt will be generated.')

    #parent parser for arguments common to all permutation types
    parent_parser = hnap.HotNetArgParser(add_help=False,
                                         fromfile_prefix_chars='@')
    parent_parser.add_argument(
        '-n',
        '--num_permutations',
        type=int,
        required=True,
        help='Number of permutation tests to run; set to 0 to skip running\
                                     permutation tests.')
    parent_parser.add_argument(
        '-s',
        '--cc_start_size',
        type=int,
        default=2,
        help='Smallest connected component size to count in permutation tests')
    parent_parser.add_argument(
        '-l',
        '--cc_stop_size',
        type=int,
        default=10,
        help='Largest connected component size to count in permutation tests')
    parent_parser.add_argument(
        '--parallel',
        dest='parallel',
        action='store_true',
        help='Run permutation tests in parallel. Only recommended for machines\
                                     with at least 8 cores.')
    parent_parser.add_argument(
        '--no-parallel',
        dest='parallel',
        action='store_false',
        help='Run permutation tests sequentially. Recommended for machines\
                                     with fewer than 8 cores.')
    parent_parser.set_defaults(parallel=False)

    subparsers = parser.add_subparsers(title='Heat score type',
                                       dest='permutation_type')

    subparsers.add_parser(
        'none',
        help='Do not perform statistical significance permutation tests')

    heat_parser = subparsers.add_parser('heat',
                                        help='Permute heat scores',
                                        parents=[parent_parser])
    heat_parser.add_argument(
        '-pgf',
        '--permutation_genes_file',
        help='Path to file containing a list of additional genes that can have\
                                   permuted heat values assigned to them in permutation tests'
    )

    mutation_parser = subparsers.add_parser('mutations',
                                            help='Permute mutation data',
                                            parents=[parent_parser])
    mutation_parser.add_argument(
        '-glf',
        '--gene_length_file',
        required=True,
        help='Path to tab-separated file containing gene names in the\
                                       first column and the length of the gene in base pairs in\
                                       the second column')
    mutation_parser.add_argument(
        '-gof',
        '--gene_order_file',
        required=True,
        help='Path to file containing tab-separated lists of genes on\
                                 each chromosme, in order of their position on the chromosome, one\
                                  chromosome per line')
    mutation_parser.add_argument('-b',
                                 '--bmr',
                                 type=float,
                                 required=True,
                                 help='Default background mutation rate')
    mutation_parser.add_argument(
        '-bf',
        '--bmr_file',
        help='File listing gene-specific BMRs. If none, the default BMR\
                                       will be used for all genes.')

    return parser.parse_args(raw_args)