Python getHalGenomes Exemples, hal.stats.halStats.getHalGenomes Python Exemples

Exemple #1

0

Afficher le fichier

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Compute PhyloP scores (in wig format) for each genome in"
        " an alignment.  Scores are computed once per column with "
        "halPhyloPMP.py and iteratively lifted down the tree using "
        "halWiggleLiftover (starting at the root).")

    parser.add_argument("hal", help="input hal")
    parser.add_argument("mod", help="model file for PhyloP.  Can be created "
                        "with halPhyloPTrain.py")
    parser.add_argument("outWigDir", help="directory where output wig files"
                        " will be written")
    parser.add_argument("--root", help="Name of root.  If not specified the"
                        " HAL root will be used", default=None)
    parser.add_argument("--numProc",
                        help="Maximum number of processes.",
                        type=int, default=1)
    parser.add_argument("--bigWig",
                        help="Run wigToBigWig on each generated wiggle",
                        action="store_true", default=False)
    parser.add_argument("--subtree",
                        help="Run clade-specific acceleration/conservation on subtree below this node",
                        default=None)
    parser.add_argument("--prec",
                        help="Number of decimal places in wig output", type=int,
                        default=None)

    # need phyloP options here:
    
    args = parser.parse_args()

    if not os.path.isfile(args.hal):
        raise RuntimeError("Input hal file %s not found" % args.hal)
    if not os.path.isfile(args.mod):
        raise RuntimeError("Input mod file %s not found" % args.mod)
    if not os.path.isdir(args.outWigDir):
        os.makedirs(args.outWigDir)
    if not os.path.isdir(args.outWigDir):
        raise RuntimeError("%s not found" % args.outWigDir)

    args.halGenomes = getHalGenomes(args.hal)
    if args.root is None:
        args.root = getHalRootName(args.hal)

    if not args.root in args.halGenomes:
        raise RuntimeError("Root genome %s not found." % args.root)

    if args.subtree is not None and args.root not in args.halGenomes:
        raise RuntimeError("Subtree root %s not found." % args.subtree)

    # make a little id tag for temporary maf slices
    S = string.ascii_uppercase + string.digits
    args.tempID = 'halTreePhyloP' + ''.join(random.choice(S) for x in range(5))

    computeTreePhyloP(args)

Exemple #2

0

Afficher le fichier

Fichier : halLodBenchmark.py Projet : pombredanne/hal-1

def getScanTime(inHalPath, outDir, step):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    genomes = getHalGenomes(inHalPath)
    assert len(genomes) > 1
    genName = genomes[1]
    bedPath = makePath(inHalPath, outDir, step, genName, "bed")
    t1 = time.time()
    runShellCommand("halBranchMutations %s %s --refFile %s" % (srcHalPath, genName, bedPath))
    elapsedTime = time.time() - t1
    return [elapsedTime]

Exemple #3

0

Afficher le fichier

def getScanTime(inHalPath, outDir, step):
    srcHalPath = inHalPath
    if step > 0:
        srcHalPath = makePath(inHalPath, outDir, step, "lod", "hal")
    genomes = getHalGenomes(inHalPath)
    assert len(genomes) > 1
    genName = genomes[1]
    bedPath = makePath(inHalPath, outDir, step, genName, "bed")
    t1 = time.time()
    runShellCommand("halBranchMutations %s %s --refFile %s" % (
        srcHalPath, genName, bedPath))
    elapsedTime = time.time() - t1
    return [elapsedTime]

Exemple #4

0

Afficher le fichier

Fichier : halPhyloPTrain.py Projet : ifiddes/hal

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Compute a neutral substitution model for use with "
        "phyloP or halPhlyoP")

    parser.add_argument("hal", help="input hal")
    parser.add_argument("refGenome", help="Name of reference genome")
    parser.add_argument("bedDir", help="BED file or directory containing BED "
                        "files.  By "
                        "default, these files are interpreted to contain only"
                        " coordinates of coding exons, and fourfold degenerate"
                        " sites will automatically be extracted from them."
                        " To disable this behaviour and train on the entire "
                        " file, use the --no4d option.", default=None)
    parser.add_argument("outMod", help="Path to output model file")
    parser.add_argument("--no4d", help="Do not extract fourfold degenerate"
                        " positions from the input bed files.  Rather use "
                        "all bases they contain.",
                        default=False, action="store_true")
    parser.add_argument("--numProc",
                        help="Maximum number of processes for hal2maf.",
                        type=int, default=1)
    parser.add_argument("--noAncestors",
                        help="Don't write ancestral genomes in hal2maf",
                        action="store_true", default=False)
    parser.add_argument("--maxBedLines",
                        help="Split bed files so they have at most this many"
                        " lines",
                        type=int, default=None)
    parser.add_argument("--sliceSize",
                        help="Slice size for hal2maf.",
                        type=int, default=None)
    parser.add_argument("--tree",
                        help="String describing phylogeny in NEWICK format "
                        "that will be used instead of the tree stored in the"
                        " HAL file.  This tree should contain all the species"
                        " in the alignment. Note that it is best to enclose"
                        " this string in quotes",
                        default=None)
    parser.add_argument("--targetGenomes", default=None, nargs='+',
                        help="space separated list of targetGenomes to pass to "
                        "hal2maf. If used, the tree given to --tree should match.")
    parser.add_argument("--substMod", help="Substitution model for phyloFit"
                        ": valid options are JC69|F81|HKY85|HKY85+Gap|REV|"
                        "SSREV|UNREST|R2|R2S|U2|U2S|R3|R3S|U3|U3S",
                        default = "SSREV")
    parser.add_argument("--noModFreqs", help="By default, equilibrium "
                        "frequencies for the nucleotides of the trained model"
                        " are corrected with the observed frequencies of "
                        "the reference genome (using the PHAST modFreqs"
                        " tool.  This flag disables this step, and keeps the"
                        " trained frequencies", action="store_true",
                        default=False)
    parser.add_argument("--error", help="File in which to output confidence"
                        " intervals for the parameters in the model",
                        default=None)
    args = parser.parse_args()

    # validate inputs
    if not os.path.isfile(args.hal):
        raise RuntimeError("Input hal file %s not found" % args.hal)
    if not os.path.exists(args.bedDir):
        raise RuntimeError("%s not found" % args.bedDir)

    # validarte substitution model
    if not args.substMod in "JC69|F81|HKY85|HKY85+Gap|REV|SSREV|UNREST|R2|R2S|U2|U2S|R3|R3S|U3|U3S".split("|"):
        raise RuntimeError("Invalid substitution model: %s" % args.substMod)

    # validate BEDs
    if os.path.isdir(args.bedDir):
        args.bedFiles = [os.path.join(args.bedDir, f) for f
                         in os.listdir(args.bedDir)
                         if os.path.isfile(os.path.join(args.bedDir, f))]
    else:
        args.bedFiles = [args.bedDir]

    # test output is writeable and has valid extension
    outTest = open(args.outMod, "w")
    if not outTest:
        raise RuntimeError("Unable to open output %s" % args.outMod)
    if os.path.splitext(args.outMod)[1] != ".mod":
        raise RuntimeError("Output model must have .mod extension")

    # if targetGenomes is set, use those. Otherwise, extract from HAL
    if args.targetGenomes is not None:
        args.halGenomes = args.targetGenomes
    else:
        args.halGenomes = getHalGenomes(args.hal)

    # if tree is set, use that. Otherwise, extract from HAL
    if args.tree is None:
        args.tree = getHalTree(args.hal)

    # Make sure that all members of halGenomes and tree are in the actual HAL
    halTree = getHalTree(args.hal)
    if args.refGenome not in halTree:
        raise RuntimeError("Reference genome %s not found." % args.refGenome)
    for targetGenome in args.halGenomes:
        if targetGenome not in halTree:
            raise RuntimeError("Target genome %s not in HAL." % targetGenome)
        if targetGenome not in args.tree:
            raise RuntimeError("Target genome %s not in --tree." % targetGenome)
    args.halGenomes = ','.join(args.halGenomes)

    args.outDir = os.path.dirname(args.outMod)
    args.outName = os.path.splitext(os.path.basename(args.outMod))[0]
    args.outMafName = args.outName + "_halPhyloPTrain_temp.maf"
    args.outMafPath = os.path.join(args.outDir, args.outMafName)
    args.outMafAllPaths = args.outMafPath.replace("_halPhyloPTrain_temp.maf",
                                                  "_halPhyloPTrain_temp*.maf")
    args.outMafSS = args.outMafPath.replace("_halPhyloPTrain_temp.maf",
                                            "_halPhyloPTrain_temp.ss")
    computeModel(args)

Exemple #5

0

Afficher le fichier

def getHalTotalSegments(halPath):
    total = (0, 0)
    for genome in getHalGenomes(halPath):
        numSegs = getHalNumSegments(halPath, genome)
        total = (total[0] + numSegs[0], total[1] + numSegs[1])
    return total

Exemple #6

0

Afficher le fichier

Fichier : halPhyloPMP.py Projet : 5l1v3r1/hal-1

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Multi-Process wrapper for halPhyloP.")

    parser.add_argument("halFile", help="Input HAL file")
    parser.add_argument("refGenome", help="Reference genome to scan")
    parser.add_argument("modFile",
                        help="Neutral model for PhyloP. Can be "
                        "generated with halPhyloPTrain.py")
    parser.add_argument("wiggleFile", help="Output Wiggle file")

    parser.add_argument("--numProc",
                        help="Maximum number of processes to create.  If "
                        " neither --sliceSize or --splitBySequence are "
                        " specified, then the reference genome will be "
                        "sliced to require --numProc jobs",
                        type=int,
                        default=1)
    parser.add_argument("--sliceSize",
                        help="Maximum slice of reference sequence to process "
                        "in a single process.",
                        type=int,
                        default=None)
    parser.add_argument("--chromSizes",
                        help="Path of file to output chromosome sizes to. "
                        "Necessary for wigToBigWig",
                        default=None)

    ##################################################################
    #HDF5 OPTIONS (as copied from hal/api/hdf5_impl/hdf5CLParser.cpp)
    ##################################################################
    hdf5Grp = parser.add_argument_group('HDF5 HAL Options')
    hdf5Grp.add_argument("--cacheMDC",
                         help="number of metadata slots in hdf5 cache",
                         type=int,
                         default=None)
    hdf5Grp.add_argument("--cacheRDC",
                         help="number of regular slots in hdf5 cache.  "
                         "should be"
                         " a prime number ~= 10 * DefaultCacheRDCBytes / "
                         "chunk",
                         type=int,
                         default=None)
    hdf5Grp.add_argument("--cacheBytes",
                         help="maximum size in bytes of regular hdf5 cache",
                         type=int,
                         default=None)
    hdf5Grp.add_argument("--cacheW0",
                         help="w0 parameter fro hdf5 cache",
                         type=int,
                         default=None)
    hdf5Grp.add_argument("--inMemory",
                         help="load all data in memory (& disable hdf5 cache)",
                         action="store_true",
                         default=False)

    ##################################################################
    #HALPHYLOP OPTIONS (as copied from hal/maf/impl/hal2maf.cpp)
    ##################################################################
    hppGrp = parser.add_argument_group('halPhyloP Options')
    hppGrp.add_argument("--refSequence",
                        help="name of reference sequence within reference "
                        "genome (all sequences if empty)",
                        default=None)
    hppGrp.add_argument("--start",
                        help="coordinate within reference genome (or sequence"
                        " if specified) to start at",
                        type=int,
                        default=None)
    hppGrp.add_argument("--length",
                        help="length of the reference genome (or sequence"
                        " if specified) to convert.  If set to 0,"
                        " the entire thing is converted",
                        type=int,
                        default=None)
    hppGrp.add_argument("--targetGenomes",
                        help="comma-separated (no spaces) list of target "
                        "genomes (others are excluded) (vist all if empty)",
                        default=None)
    hppGrp.add_argument("--dupType",
                        help="Which duplications to mask according to dupMask "
                        "option. Choices are: "
                        "\"all\": Any duplicated region; or "
                        "\"ambiguous\": Regions within duplications where "
                        "alignments from the same species do not contain"
                        " the same base.",
                        default=None)
    hppGrp.add_argument(
        "--dupMask",
        help="What to do with duplicated regions. Choices are: "
        "\"hard\": mask entire alignment column if any "
        "duplications occur; or "
        "\"soft\": mask species where duplications occur.",
        default=None)
    hppGrp.add_argument("--step", help="step size", type=int, default=None)
    hppGrp.add_argument("--refBed",
                        help="Bed file with coordinates to annotate in the "
                        "reference genome to stream from standard "
                        " input.",
                        default=None)
    hppGrp.add_argument(
        "--subtree",
        help="Subtree root for lineage-specific acceleration/conservation",
        default=None)
    hppGrp.add_argument("--prec",
                        help="Number of decimal places in wig output",
                        type=int,
                        default=None)

    args = parser.parse_args()

    if not os.path.isfile(args.halFile):
        raise RuntimeError("Input hal file %s not found" % args.halFile)
    if not os.path.isfile(args.modFile):
        raise RuntimeError("Input mod file %s not found" % args.modFile)
    args.halGenomes = getHalGenomes(args.halFile)
    if not args.refGenome in args.halGenomes:
        raise RuntimeError("Reference genome %s not found." % args.refGenome)

    test = open(args.wiggleFile, "w")
    test.write("\n")
    test.close()
    os.remove(args.wiggleFile)

    if args.chromSizes is not None:
        test = open(args.chromSizes, "w")
        test.write("\n")
        test.close()
        os.remove(args.chromSizes)

    # make a little id tag for temporary slices
    S = string.ascii_uppercase + string.digits
    args.tempID = 'halPhyloPTemp' + ''.join(random.choice(S) for x in range(5))

    runParallelSlices(args)

Exemple #7

0

Afficher le fichier

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Compute a neutral substitution model for use with "
        "phyloP or halPhlyoP")

    parser.add_argument("hal", help="input hal")
    parser.add_argument("refGenome", help="Name of reference genome")
    parser.add_argument("bedDir",
                        help="BED file or directory containing BED "
                        "files.  By "
                        "default, these files are interpreted to contain only"
                        " coordinates of coding exons, and fourfold degenerate"
                        " sites will automatically be extracted from them."
                        " To disable this behaviour and train on the entire "
                        " file, use the --no4d option.",
                        default=None)
    parser.add_argument("outMod", help="Path to output model file")
    parser.add_argument("--no4d",
                        help="Do not extract fourfold degenerate"
                        " positions from the input bed files.  Rather use "
                        "all bases they contain.",
                        default=False,
                        action="store_true")
    parser.add_argument("--numProc",
                        help="Maximum number of processes for hal2maf.",
                        type=int,
                        default=1)
    parser.add_argument("--noAncestors",
                        help="Don't write ancestral genomes in hal2maf",
                        action="store_true",
                        default=False)
    parser.add_argument("--maxBedLines",
                        help="Split bed files so they have at most this many"
                        " lines",
                        type=int,
                        default=None)
    parser.add_argument("--tree",
                        help="String describing phylogeny in NEWICK format "
                        "that will be used instead of the tree stored in the"
                        " HAL file.  This tree should contain all the species"
                        " in the alignment. Note that it is best to enclose"
                        " this string in quotes",
                        default=None)
    parser.add_argument(
        "--targetGenomes",
        default=None,
        nargs='+',
        help="space separated list of targetGenomes to pass to "
        "hal2maf. If used, the tree given to --tree should match.")
    parser.add_argument("--substMod",
                        help="Substitution model for phyloFit"
                        ": valid options are JC69|F81|HKY85|HKY85+Gap|REV|"
                        "SSREV|UNREST|R2|R2S|U2|U2S|R3|R3S|U3|U3S",
                        default="SSREV")
    parser.add_argument("--noModFreqs",
                        help="By default, equilibrium "
                        "frequencies for the nucleotides of the trained model"
                        " are corrected with the observed frequencies of "
                        "the reference genome (using the PHAST modFreqs"
                        " tool.  This flag disables this step, and keeps the"
                        " trained frequencies",
                        action="store_true",
                        default=False)
    parser.add_argument("--precision",
                        help="Precision to pass to phyloFit (default MED)",
                        choices=["HIGH", "MED", "LOW"],
                        default="MED")
    parser.add_argument("--error",
                        help="File in which to output confidence"
                        " intervals for the parameters in the model",
                        default=None)
    args = parser.parse_args()

    # validate inputs
    if not os.path.isfile(args.hal):
        raise RuntimeError("Input hal file %s not found" % args.hal)
    if not os.path.exists(args.bedDir):
        raise RuntimeError("%s not found" % args.bedDir)

    # validarte substitution model
    if not args.substMod in "JC69|F81|HKY85|HKY85+Gap|REV|SSREV|UNREST|R2|R2S|U2|U2S|R3|R3S|U3|U3S".split(
            "|"):
        raise RuntimeError("Invalid substitution model: %s" % args.substMod)

    # validate BEDs
    if os.path.isdir(args.bedDir):
        args.bedFiles = [
            os.path.join(args.bedDir, f) for f in os.listdir(args.bedDir)
            if os.path.isfile(os.path.join(args.bedDir, f))
        ]
    else:
        args.bedFiles = [args.bedDir]

    # test output is writeable and has valid extension
    outTest = open(args.outMod, "w")
    if not outTest:
        raise RuntimeError("Unable to open output %s" % args.outMod)
    if os.path.splitext(args.outMod)[1] != ".mod":
        raise RuntimeError("Output model must have .mod extension")

    # if targetGenomes is set, use those. Otherwise, extract from HAL
    if args.targetGenomes is not None:
        args.halGenomes = args.targetGenomes
    else:
        args.halGenomes = getHalGenomes(args.hal)

    # if tree is set, use that. Otherwise, extract from HAL
    if args.tree is None:
        args.tree = getHalTree(args.hal)

    # Make sure that all members of halGenomes and tree are in the actual HAL
    halTree = getHalTree(args.hal)
    if args.refGenome not in halTree:
        raise RuntimeError("Reference genome %s not found." % args.refGenome)
    for targetGenome in args.halGenomes:
        if targetGenome not in halTree:
            raise RuntimeError("Target genome %s not in HAL." % targetGenome)
        if targetGenome not in args.tree:
            raise RuntimeError("Target genome %s not in --tree." %
                               targetGenome)
    args.halGenomes = ','.join(args.halGenomes)

    args.outDir = os.path.dirname(args.outMod)
    args.outName = os.path.splitext(os.path.basename(args.outMod))[0]
    # Random suffix so two runs don't collide
    suffix = "".join(
        [random.choice(string.ascii_uppercase) for _ in xrange(7)])
    args.outMafName = args.outName + "_halPhyloPTrain_temp_%s.maf" % suffix
    args.outMafPath = os.path.join(args.outDir, args.outMafName)
    args.outMafAllPaths = args.outMafPath.replace(
        "_halPhyloPTrain_temp_%s.maf" % suffix,
        "_halPhyloPTrain_temp_%s*.maf" % suffix)
    # replace .maf suffix with .ss
    args.outMafSS = args.outMafPath[:-4] + ".ss"
    computeModel(args)

Exemple #8

0

Afficher le fichier

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Compute a neutral substitution model for use with "
        "phyloP or halPhlyoP")

    parser.add_argument("hal", help="input hal")   
    parser.add_argument("refGenome", help="Name of reference genome")
    parser.add_argument("bedDir", help="BED file or directory containing BED "
                        "files.  By "
                        "default, these files are interpreted to contain only"
                        " coordinates of coding exons, and fourfold degenerate"
                        " sites will automatically be extracted from them."
                        " To disable this behaviour and train on the entire "
                        " file, use the --no4d option.", default=None)
    parser.add_argument("outMod", help="Path to output model file")
    parser.add_argument("--no4d", help="Do not extract fourfold degenerate"
                        " positions from the input bed files.  Rather use "
                        "all bases they contain.",
                        default=False, action="store_true")
    parser.add_argument("--numProc",
                        help="Maximum number of processes for hal2maf.",
                        type=int, default=1)
    parser.add_argument("--noAncestors",
                        help="Don't write ancestral genomes in hal2maf",
                        action="store_true", default=False)
    parser.add_argument("--maxBedLines",
                        help="Split bed files so they have at most this many"
                        " lines",
                        type=int, default=None)
    parser.add_argument("--sliceSize",
                        help="Slice size for hal2maf.",
                        type=int, default=None)
    parser.add_argument("--tree",
                        help="String describing phylogeny in NEWICK format "
                        "that will be used instead of the tree stored in the"
                        " HAL file.  This tree should contain all the species"
                        " in the alignment. Note that it is best to enclose"
                        " this string in quotes",
                        default=None)
    parser.add_argument("--substMod", help="Substitution model for phyloFit"
                        ": valid options are JC69|F81|HKY85|HKY85+Gap|REV|"
                        "SSREV|UNREST|R2|R2S|U2|U2S|R3|R3S|U3|U3S",
                        default = "SSREV")
    parser.add_argument("--noModFreqs", help="By default, equilibrium "
                        "frequencies for the nucleotides of the trained model"
                        " are corrected with the observed frequencies of "
                        "the reference genome (using the PHAST  modFreqs"
                        " tool.  This flag disables this step, and keeps the"
                        " trained frequencies", action="store_true",
                        default=False)
    parser.add_argument("--error", help="File in which to output confidence"
                        " intervals for the parameters in the model",
                        default=None)
    args = parser.parse_args()

    if not os.path.isfile(args.hal):
        raise RuntimeError("Input hal file %s not found" % args.hal)
    if not os.path.exists(args.bedDir):
        raise RuntimeError("%s not found" % args.bedDir)
    if os.path.isdir(args.bedDir):
        args.bedFiles = [os.path.join(args.bedDir, f) for f
                         in os.listdir(args.bedDir) 
                         if os.path.isfile(os.path.join(args.bedDir, f))]
    else:
        args.bedFiles = [args.bedDir]
    outTest = open(args.outMod, "w")
    if not outTest:
        raise RuntimeError("Unable to open output %s" % args.outMod)
    args.halGenomes = getHalGenomes(args.hal)
    if not args.refGenome in args.halGenomes:
        raise RuntimeError("Reference genome %s not found." % args.refGenome)
    if os.path.splitext(args.outMod)[1] != ".mod":
        raise RuntimeError("Output model must have .mod extension")
    if not args.substMod in "JC69|F81|HKY85|HKY85+Gap|REV|SSREV|UNREST|R2|R2S|U2|U2S|R3|R3S|U3|U3S".split("|"):
        raise RuntimeError("Invalid substitution model: %s" % args.substMod)

    args.outDir = os.path.dirname(args.outMod)
    args.outName = os.path.splitext(os.path.basename(args.outMod))[0]
    args.outMafName = args.outName + "_halPhyloPTrain_temp.maf"
    args.outMafPath = os.path.join(args.outDir, args.outMafName)
    args.outMafAllPaths = args.outMafPath.replace("_halPhyloPTrain_temp.maf", 
                                                  "_halPhyloPTrain_temp*.maf")
    args.outMafSS = args.outMafPath.replace("_halPhyloPTrain_temp.maf", 
                                            "_halPhyloPTrain_temp.ss")
    computeModel(args)

Exemple #9

0

Afficher le fichier

Fichier : halPhyloPMP.py Projet : dayin1989/hal

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description="Multi-Process wrapper for halPhyloP.")

    parser.add_argument("halFile", help="Input HAL file")
    parser.add_argument("refGenome", help="Reference genome to scan")
    parser.add_argument("modFile", help="Neutral model for PhyloP. Can be "
                        "generated with halPhyloPTrain.py")
    parser.add_argument("wiggleFile", help="Output Wiggle file")

    parser.add_argument("--numProc",
                        help="Maximum number of processes to create.  If "
                        " neither --sliceSize or --splitBySequence are "
                        " specified, then the reference genome will be "
                        "sliced to require --numProc jobs",
                        type=int, default=1)
    parser.add_argument("--sliceSize",
                        help="Maximum slice of reference sequence to process "
                        "in a single process.", type=int,
                        default=None)
    parser.add_argument("--chromSizes",
                        help="Path of file to output chromosome sizes to. "
                        "Necessary for wigToBigWig",
                        default=None)

    ##################################################################
    #HDF5 OPTIONS (as copied from hal/api/hdf5_impl/hdf5CLParser.cpp)
    ##################################################################
    hdf5Grp = parser.add_argument_group('HDF5 HAL Options')
    hdf5Grp.add_argument("--cacheMDC",
                         help="number of metadata slots in hdf5 cache",
                         type=int,
                         default=None)
    hdf5Grp.add_argument("--cacheRDC",
                         help="number of regular slots in hdf5 cache.  "
                         "should be"
                         " a prime number ~= 10 * DefaultCacheRDCBytes / "
                         "chunk",
                         type=int,
                         default=None)
    hdf5Grp.add_argument("--cacheBytes",
                         help="maximum size in bytes of regular hdf5 cache",
                         type=int,
                         default=None)
    hdf5Grp.add_argument("--cacheW0",
                         help="w0 parameter fro hdf5 cache", type=int,
                         default=None)
    hdf5Grp.add_argument("--inMemory",
                         help="load all data in memory (& disable hdf5 cache)",
                         action="store_true",
                         default=False)

    ##################################################################
    #HALPHYLOP OPTIONS (as copied from hal/maf/impl/hal2maf.cpp)
    ##################################################################
    hppGrp = parser.add_argument_group('halPhyloP Options')
    hppGrp.add_argument("--refSequence",
                        help="name of reference sequence within reference "
                        "genome (all sequences if empty)",
                        default=None)
    hppGrp.add_argument("--start",
                        help="coordinate within reference genome (or sequence"
                        " if specified) to start at", type=int,
                        default=None)
    hppGrp.add_argument("--length",
                        help="length of the reference genome (or sequence"
                        " if specified) to convert.  If set to 0,"
                        " the entire thing is converted", type=int,
                        default=None)
    hppGrp.add_argument("--targetGenomes",
                        help="comma-separated (no spaces) list of target "
                        "genomes (others are excluded) (vist all if empty)",
                        default=None)
    hppGrp.add_argument("--dupType", 
                        help="Which duplications to mask according to dupMask "
                        "option. Choices are: "
                        "\"all\": Any duplicated region; or "
                        "\"ambiguous\": Regions within duplications where "
                        "alignments from the same species do not contain"
                        " the same base.",
                        default=None)
    hppGrp.add_argument("--dupMask",
                        help="What to do with duplicated regions. Choices are: "
                        "\"hard\": mask entire alignment column if any "
                        "duplications occur; or "
                        "\"soft\": mask species where duplications occur.",
                        default=None);
    hppGrp.add_argument("--step",
                        help="step size", type=int, default=None)
    hppGrp.add_argument("--refBed", 
                        help="Bed file with coordinates to annotate in the "
                        "reference genome to stream from standard "
                        " input.",
                        default=None)
    hppGrp.add_argument("--subtree",
                        help="Subtree root for lineage-specific acceleration/conservation",
                        default=None)
    hppGrp.add_argument("--prec",
                        help="Number of decimal places in wig output", type=int,
                        default=None)

    args = parser.parse_args()

    if not os.path.isfile(args.halFile):
        raise RuntimeError("Input hal file %s not found" % args.halFile)
    if not os.path.isfile(args.modFile):
        raise RuntimeError("Input mod file %s not found" % args.modFile)
    args.halGenomes = getHalGenomes(args.halFile)
    if not args.refGenome in args.halGenomes:
        raise RuntimeError("Reference genome %s not found." % args.refGenome)

    test = open(args.wiggleFile, "w")
    test.write("\n")
    test.close()
    os.remove(args.wiggleFile)

    if args.chromSizes is not None:
        test = open(args.chromSizes, "w")
        test.write("\n")
        test.close()
        os.remove(args.chromSizes)

    # make a little id tag for temporary slices
    S = string.ascii_uppercase + string.digits
    args.tempID = 'halPhyloPTemp' + ''.join(random.choice(S) for x in range(5))
    
    runParallelSlices(args)