Ejemplo n.º 1
0
 def __str__(self):
     """return string representation."""
     return "%i\t%i\t%s\t%i\t%i\t%s\t%s\t%6.4e\t%6.4e\t%6.4e" % \
         (self.mSampleCountsCategory,
          self.mSampleCountsTotal,
          IOTools.prettyPercent(
              self.mSampleCountsCategory, self.mSampleCountsTotal),
          self.mBackgroundCountsCategory,
          self.mBackgroundCountsTotal,
          IOTools.prettyPercent(
              self.mBackgroundCountsCategory, self.mBackgroundCountsTotal),
          IOTools.val2str(self.mRatio),
          self.mPValue,
          self.mProbabilityOverRepresentation,
          self.mProbabilityUnderRepresentation)
Ejemplo n.º 2
0
    def __str__(self):

        return "\t".join(map(str, (
            self.mGenes1, self.mGenes2,
            self.mGenesOverlapping1, self.mGenesOverlapping2,
            self.mGenesUnique1, self.mGenesUnique2,
            self.mExons1, self.mExons2,
            self.mExonsOverlapping1, self.mExonsOverlapping2,
            self.mExonsUnique1, self.mExonsUnique2,
            self.mBases1, self.mBases2,
            self.mBasesOverlapping1, self.mBasesOverlapping2,
            self.mBasesUnique1, self.mBasesUnique2 ) ) ) + "\t" +\
            "\t".join([IOTools.prettyPercent(*x) for x in (
                (self.mGenesOverlapping1, self.mGenes1),
                (self.mGenesOverlapping2, self.mGenes2),
                (self.mGenesUnique1, self.mGenes1),
                (self.mGenesUnique2, self.mGenes2),
                (self.mExonsOverlapping1, self.mExons1),
                (self.mExonsOverlapping2, self.mExons2),
                (self.mExonsUnique1, self.mExons1),
                (self.mExonsUnique2, self.mExons2),
                (self.mBasesOverlapping1, self.mBases1),
                (self.mBasesOverlapping2, self.mBases2),
                (self.mBasesUnique1, self.mBases1),
                (self.mBasesUnique2, self.mBases2))])
Ejemplo n.º 3
0
def summary(infile, outfile):
    '''compute mapping stats.'''
    def _getfiles(filename):

        track = outfile[:-len(".mapped.summary")]
        if track.endswith(".merged"):
            xtrack = track[:-len(".merged")]
            finput = "%s.psl.gz" % xtrack
            fmerged = "%s.transcripts.transcripts.psl" % xtrack
            fmapped = "%s.mapped.psl" % track
        else:
            finput = "%s.psl.gz" % track
            fmerged = finput
            fmapped = "%s.mapped.psl" % track
        return track, finput, fmerged, fmapped

    outf = open(outfile, "w")
    outf.write("track\tinput\tmerged\tpmerged\tmapped\tpmapped\tpoutput\n")

    def countPSL(filename):
        if filename.endswith(".gz"):
            i = gzip.open(filename)
        else:
            i = open(filename)
        ll = [x[:10] for x in i.readlines() if not x.startswith("#")]
        if ll[0].startswith("psLayout"):
            return len(ll) - 5
        else:
            return len(ll)

    track, finput, fmerged, fmapped = _getfiles(outfile)
    ninput = countPSL(finput)
    # subtract header
    nmerged = countPSL(fmerged) - 5
    nmapped = countPSL(fmapped)

    outf.write("%s\t%i\t%i\t%s\t%i\t%s\t%s\n" %
               (track, ninput, nmerged, IOTools.prettyPercent(nmerged, ninput),
                nmapped, IOTools.prettyPercent(
                    nmapped, nmerged), IOTools.prettyPercent(nmapped, ninput)))
Ejemplo n.º 4
0
def main(argv=None):

    parser = E.OptionParser(
        version="%prog version: $Id$",
        usage=globals()["__doc__"])

    parser.add_option(
        "-s", "--species", dest="species", type="string",
        help="species to use [default=%default].")

    parser.add_option(
        "-i", "--slims", dest="filename_slims", type="string",
        help="filename with GO SLIM categories "
        "[default=%default].")

    parser.add_option(
        "-g", "--genes-tsv-file", dest="filename_genes", type="string",
        help="filename with genes to analyse "
        "[default=%default].")

    parser.add_option(
        "-b", "--background-tsv-file", dest="filename_background",
        type="string",
        help="filename with background genes to analyse "
        "[default=%default].")

    parser.add_option(
        "-m", "--min-counts", dest="minimum_counts",
        type="int",
        help="minimum count - ignore all categories that have "
        "fewer than # number of genes"
        " [default=%default].")

    parser.add_option(
        "-o", "--sort-order", dest="sort_order", type="choice",
        choices=("fdr", "pvalue", "ratio"),
        help="output sort order [default=%default].")

    parser.add_option(
        "--ontology", dest="ontology", type="string",
        action="append",
        help="go ontologies to analyze. Ontologies are tested "
        "separately [default=%default].")

    parser.add_option(
        "-t", "--threshold", dest="threshold", type="float",
        help="significance threshold [>1.0 = all ]. If --fdr is set, this "
        "refers to the fdr, otherwise it is a cutoff for p-values.")

    parser.add_option(
        "--filename-dump", dest="filename_dump", type="string",
        help="dump GO category assignments into a flatfile "
        "[default=%default].")

    parser.add_option(
        "--gene2name-map-tsv-file", dest="filename_gene2name", type="string",
        help="optional filename mapping gene identifiers to gene names "
        "[default=%default].")

    parser.add_option(
        "--filename-ontology", dest="filename_ontology", type="string",
        help="filename with ontology in OBO format [default=%default].")

    parser.add_option(
        "--filename-input", dest="filename_input", type="string",
        help="read GO category assignments from a flatfile "
        "[default=%default].")

    parser.add_option(
        "--sample-size", dest="sample", type="int",
        help="do sampling (with # samples) [default=%default].")

    parser.add_option(
        "--filename-output-pattern", "--output-filename-pattern",
        dest="output_filename_pattern", type="string",
        help="pattern with output filename pattern "
        "(should contain: %(go)s and %(section)s ) [default=%default]")

    parser.add_option(
        "--fdr", dest="fdr", action="store_true",
        help="calculate and filter by FDR default=%default].")

    parser.add_option(
        "--go2goslim", dest="go2goslim", action="store_true",
        help="convert go assignments in STDIN to goslim assignments and "
        "write to STDOUT [default=%default].")

    parser.add_option(
        "--gene-pattern", dest="gene_pattern", type="string",
        help="pattern to transform identifiers to GO gene names "
        "[default=%default].")

    parser.add_option(
        "--filename-map-slims", dest="filename_map_slims", type="string",
        help="write mapping between GO categories and GOSlims "
        "[default=%default].")

    parser.add_option(
        "--get-genes", dest="get_genes", type="string",
        help="list all genes in the with a certain GOID [default=%default].")

    parser.add_option(
        "--strict", dest="strict", action="store_true",
        help="require all genes in foreground to be part of background. "
        "If not set, genes in foreground will be added to the background "
        "[default=%default].")

    parser.add_option(
        "-q", "--fdr-method", dest="qvalue_method", type="choice",
        choices=("empirical", "storey", "BH"),
        help="method to perform multiple testing correction by controlling "
        "the fdr [default=%default].")

    parser.add_option(
        "--pairwise", dest="compute_pairwise", action="store_true",
        help="compute pairwise enrichment for multiple gene lists. "
        "[default=%default].")

    # parser.add_option( "--fdr-lambda", dest="qvalue_lambda", type="float",
    #                   help="fdr computation: lambda [default=%default]."  )

    # parser.add_option( "--qvalue-pi0-method", dest="qvalue_pi0_method", type="choice",
    #                    choices = ("smoother", "bootstrap" ),
    # help="fdr computation: method for estimating pi0 [default=%default]."  )

    parser.set_defaults(species=None,
                        filename_genes="-",
                        filename_background=None,
                        filename_slims=None,
                        minimum_counts=0,
                        ontology=[],
                        filename_dump=None,
                        sample=0,
                        fdr=False,
                        output_filename_pattern=None,
                        threshold=0.05,
                        filename_map_slims=None,
                        gene_pattern=None,
                        sort_order="ratio",
                        get_genes=None,
                        strict=False,
                        qvalue_method="empirical",
                        pairs_min_observed_counts=3,
                        compute_pairwise=False,
                        filename_gene2name=None
                        )

    (options, args) = E.start(parser, add_database_options=True)

    if options.go2goslim:
        GO.convertGo2Goslim(options)
        E.stop()
        sys.exit(0)

    if options.fdr and options.sample == 0:
        E.warn("fdr will be computed without sampling")

    #############################################################
    # dump GO
    if options.filename_dump:
        # set default orthologies to GO
        if not options.ontology:
            options.ontology = [
                "biol_process", "mol_function", "cell_location"]

        E.info("dumping GO categories to %s" % (options.filename_dump))

        dbhandle = connectToEnsembl(options)

        outfile = IOTools.open_file(options.filename_dump, "w", create_dir=True)
        GO.DumpGOFromDatabase(outfile,
                              dbhandle,
                              options)
        outfile.close()
        E.stop()
        sys.exit(0)

    #############################################################
    # read GO categories from file
    if options.filename_input:
        E.info("reading association of categories and genes from %s" %
               (options.filename_input))
        infile = IOTools.open_file(options.filename_input)
        gene2gos, go2infos = GO.ReadGene2GOFromFile(infile)
        infile.close()

    if options.filename_gene2name:
        E.info("reading gene identifier to gene name mapping from %s" %
               options.filename_gene2name)
        infile = IOTools.open_file(options.filename_gene2name)
        gene2name = IOTools.readMap(infile, has_header=True)
        infile.close()
        E.info("read %i gene names for %i gene identifiers" %
               (len(set(gene2name.values())),
                len(gene2name)))
    else:
        # use identity mapping
        gene2name = dict([(x, x) for x in list(gene2gos.keys())])

    #############################################################
    # read GO ontology from file
    if options.filename_ontology:
        E.info("reading ontology from %s" % (options.filename_ontology))

        infile = IOTools.open_file(options.filename_ontology)
        ontology = GO.readOntology(infile)
        infile.close()

        def _g():
            return collections.defaultdict(GO.GOInfo)
        go2infos = collections.defaultdict(_g)

        # substitute go2infos
        for go in list(ontology.values()):
            go2infos[go.mNameSpace][go.mId] = GO.GOInfo(
                go.mId,
                go_type=go.mNameSpace,
                description=go.mName)

    #############################################################
    # get foreground gene list
    input_foreground, genelists = GO.ReadGeneLists(
        options.filename_genes,
        gene_pattern=options.gene_pattern)

    E.info("read %i genes for forground in %i gene lists" %
           (len(input_foreground), len(genelists)))

    #############################################################
    # get background
    if options.filename_background:

        # nick - bug fix: background is the first tuple element from
        # ReadGeneLists
        input_background = GO.ReadGeneLists(
            options.filename_background,
            gene_pattern=options.gene_pattern)[0]
        E.info("read %i genes for background" % len(input_background))
    else:
        input_background = None

    #############################################################
    # sort out which ontologies to test
    if not options.ontology:
        if options.filename_input:
            options.ontology = list(gene2gos.keys())

    E.info("found %i ontologies: %s" %
           (len(options.ontology), options.ontology))

    summary = []
    summary.append("\t".join((
        "genelist",
        "ontology",
        "significant",
        "threshold",
        "ngenes",
        "ncategories",
        "nmaps",
        "nforegound",
        "nforeground_mapped",
        "nbackground",
        "nbackground_mapped",
        "nsample_counts",
        "nbackground_counts",
        "psample_assignments",
        "pbackground_assignments",
        "messages")) + "\n")

    #############################################################
    # get go categories for genes
    for test_ontology in sorted(options.ontology):

        # store results for aggregate output of multiple gene lists
        all_results = []
        all_significant_results = []
        all_genelists_with_results = []

        E.info("working on ontology %s" % test_ontology)
        #############################################################
        # get/read association of GO categories to genes
        if options.filename_input:
            gene2go, go2info = gene2gos[test_ontology], go2infos[test_ontology]
        else:
            E.info("reading data from database ...")

            dbhandle.Connect(options)
            gene2go, go2info = GO.ReadGene2GOFromDatabase(
                dbhandle,
                test_ontology,
                options.database, options.species)

            E.info("finished")

        if len(go2info) == 0:
            E.warn(
                "could not find information for terms - "
                "could be mismatch between ontologies")

        ngenes, ncategories, nmaps, counts_per_category = GO.CountGO(gene2go)
        E.info("assignments found: %i genes mapped to %i categories "
               "(%i maps)" %
               (ngenes, ncategories, nmaps))

        if options.minimum_counts > 0:
            to_remove = set(
                [x for x, y in counts_per_category.items()
                 if y < options.minimum_counts])
            E.info("removing %i categories with less than %i genes" %
                   (len(to_remove), options.minimum_counts))
            GO.removeCategories(gene2go, to_remove)

            ngenes, ncategories, nmaps, counts_per_category = \
                GO.CountGO(gene2go)
            E.info("assignments after filtering: %i genes mapped "
                   "to %i categories (%i maps)" % (
                       ngenes, ncategories, nmaps))

        for genelist_name, foreground in sorted(genelists.items()):

            msgs = []
            E.info("processing %s with %i genes" %
                   (genelist_name, len(foreground)))
            ##################################################################
            ##################################################################
            ##################################################################
            # build background - reconcile with foreground
            ##################################################################
            if input_background is None:
                background = list(gene2go.keys())
            else:
                background = list(input_background)

            # nick - bug-fix backgorund included the foreground in a tuple.
            # background is the first tuple element
            missing = foreground.difference(set(background))

            if options.strict:
                assert len(missing) == 0, \
                    "%i genes in foreground but not in background: %s" % (
                        len(missing), str(missing))
            else:
                if len(missing) != 0:
                    E.warn("%i genes in foreground that are not in "
                           "background - added to background of %i" %
                           (len(missing), len(background)))

                background.extend(missing)

            E.info("(unfiltered) foreground=%i, background=%i" %
                   (len(foreground), len(background)))

            # sort foreground and background, important for reproducibility
            # under random seed
            foreground = sorted(foreground)
            background = sorted(background)

            #############################################################
            # sanity checks:
            # are all of the foreground genes in the dataset
            # missing = set(genes).difference( set(gene2go.keys()) )
            # assert len(missing) == 0, "%i genes in foreground set without GO annotation: %s" % (len(missing), str(missing))

            #############################################################
            # read GO slims and map GO categories to GO slim categories
            if options.filename_slims:
                go_slims = GO.GetGOSlims(
                    IOTools.open_file(options.filename_slims, "r"))

                if options.loglevel >= 1:
                    v = set()
                    for x in list(go_slims.values()):
                        for xx in x:
                            v.add(xx)
                    options.stdlog.write(
                        "# read go slims from %s: go=%i, slim=%i\n" %
                        (options.filename_slims,
                         len(go_slims),
                         len(v)))

                if options.filename_map_slims:
                    if options.filename_map_slims == "-":
                        outfile = options.stdout
                    else:
                        outfile = IOTools.open_file(
                            options.filename_map_slims, "w")

                    outfile.write("GO\tGOSlim\n")
                    for go, go_slim in sorted(list(go_slims.items())):
                        outfile.write("%s\t%s\n" % (go, go_slim))

                    if outfile != options.stdout:
                        outfile.close()

                gene2go = GO.MapGO2Slims(gene2go, go_slims, ontology=ontology)

                if options.loglevel >= 1:
                    ngenes, ncategories, nmaps, counts_per_category = \
                        GO.CountGO(gene2go)
                    options.stdlog.write(
                        "# after go slim filtering: %i genes mapped to "
                        "%i categories (%i maps)\n" % (
                            ngenes, ncategories, nmaps))

            #############################################################
            # Just dump out the gene list
            if options.get_genes:
                fg, bg, ng = [], [], []

                for gene, vv in list(gene2go.items()):
                    for v in vv:
                        if v.mGOId == options.get_genes:
                            if gene in genes:
                                fg.append(gene)
                            elif gene in background:
                                bg.append(gene)
                            else:
                                ng.append(gene)

                # skip to next GO class
                if not (bg or ng):
                    continue

                options.stdout.write(
                    "# genes in GO category %s\n" % options.get_genes)
                options.stdout.write("gene\tset\n")
                for x in sorted(fg):
                    options.stdout.write("%s\t%s\n" % ("fg", x))
                for x in sorted(bg):
                    options.stdout.write("%s\t%s\n" % ("bg", x))
                for x in sorted(ng):
                    options.stdout.write("%s\t%s\n" % ("ng", x))

                E.info("nfg=%i, nbg=%i, nng=%i" % (len(fg), len(bg), len(ng)))

                E.stop()
                sys.exit(0)

            #############################################################
            outfile = GO.getFileName(options,
                                     go=test_ontology,
                                     section='foreground',
                                     set=genelist_name)

            outfile.write("gene_id\n%s\n" % ("\n".join(sorted(foreground))))
            if options.output_filename_pattern:
                outfile.close()

            outfile = GO.getFileName(options,
                                     go=test_ontology,
                                     section='background',
                                     set=genelist_name)

            # Jethro bug fix - see section 'build background' for assignment
            outfile.write("gene_id\n%s\n" % ("\n".join(sorted(background))))
            if options.output_filename_pattern:
                outfile.close()

            #############################################################
            # do the analysis
            go_results = GO.AnalyseGO(gene2go, foreground, background)

            if len(go_results.mSampleGenes) == 0:
                E.warn("%s: no genes with GO categories - analysis aborted" %
                       genelist_name)
                continue

            pairs = list(go_results.mResults.items())

            #############################################################
            # calculate fdr for each hypothesis
            if options.fdr:
                fdrs, samples, method = GO.computeFDRs(go_results,
                                                       foreground,
                                                       background,
                                                       options,
                                                       test_ontology,
                                                       gene2go,
                                                       go2info)
                for x, v in enumerate(pairs):
                    v[1].mQValue = fdrs[v[0]][0]
            else:
                fdrs, samples, method = {}, {}, None

            msgs.append("fdr=%s" % method)

            if options.sort_order == "fdr":
                pairs.sort(key=lambda x: x[1].mQValue)
            elif options.sort_order == "ratio":
                pairs.sort(key=lambda x: x[1].mRatio)
            elif options.sort_order == "pvalue":
                pairs.sort(key=lambda x: x[1].mPValue)

            #############################################################
            #############################################################
            #############################################################
            # output the full result
            outfile = GO.getFileName(options,
                                     go=test_ontology,
                                     section='overall',
                                     set=genelist_name)

            GO.outputResults(
                outfile, pairs, go2info, options, fdrs=fdrs, samples=samples)

            if options.output_filename_pattern:
                outfile.close()

            #############################################################
            #############################################################
            #############################################################
            # filter significant results and output
            filtered_pairs = GO.selectSignificantResults(pairs, fdrs, options)

            nselected = len(filtered_pairs)
            nselected_up = len([x for x in filtered_pairs if x[1].mRatio > 1])
            nselected_down = len(
                [x for x in filtered_pairs if x[1].mRatio < 1])

            assert nselected_up + nselected_down == nselected

            outfile = GO.getFileName(options,
                                     go=test_ontology,
                                     section='results',
                                     set=genelist_name)

            GO.outputResults(outfile,
                             filtered_pairs,
                             go2info,
                             options,
                             fdrs=fdrs,
                             samples=samples)

            if options.output_filename_pattern:
                outfile.close()

            #############################################################
            #############################################################
            #############################################################
            # save results for multi-gene-list analysis
            all_results.append(pairs)
            all_significant_results.append(filtered_pairs)
            all_genelists_with_results.append(genelist_name)

            #############################################################
            #############################################################
            #############################################################
            # output parameters
            ngenes, ncategories, nmaps, counts_per_category = \
                GO.CountGO(gene2go)

            outfile = GO.getFileName(options,
                                     go=test_ontology,
                                     section='parameters',
                                     set=genelist_name)

            nbackground = len(background)
            if nbackground == 0:
                nbackground = len(go_results.mBackgroundGenes)

            outfile.write(
                "# input go mappings for gene list '%s' and category '%s'\n" %
                (genelist_name, test_ontology))
            outfile.write("parameter\tvalue\tdescription\n")
            outfile.write("mapped_genes\t%i\tmapped genes\n" % ngenes)
            outfile.write(
                "mapped_categories\t%i\tmapped categories\n" % ncategories)
            outfile.write("mappings\t%i\tmappings\n" % nmaps)
            outfile.write("genes_in_fg\t%i\tgenes in foreground\n" %
                          len(foreground))
            outfile.write(
                "genes_in_fg_with_assignment\t%i\tgenes in foreground with GO assignments\n" %
                (len(go_results.mSampleGenes)))
            outfile.write(
                "genes_in_bg\t%i\tinput background\n" % nbackground)
            outfile.write(
                "genes_in_bg_with_assignment\t%i\tgenes in background with GO assignments\n" % (
                    len(go_results.mBackgroundGenes)))
            outfile.write(
                "associations_in_fg\t%i\tassociations in sample\n" %
                go_results.mSampleCountsTotal)
            outfile.write(
                "associations_in_bg\t%i\tassociations in background\n" %
                go_results.mBackgroundCountsTotal)
            outfile.write(
                "percent_genes_in_fg_with_association\t%s\tpercent genes in sample with GO assignments\n" % (
                    IOTools.prettyPercent(len(go_results.mSampleGenes),
                                          len(foreground), "%5.2f")))
            outfile.write(
                "percent_genes_in_bg_with_associations\t%s\tpercent genes background with GO assignments\n" % (
                    IOTools.prettyPercent(len(go_results.mBackgroundGenes),
                                          nbackground, "%5.2f")))
            outfile.write(
                "significant\t%i\tsignificant results reported\n" % nselected)
            outfile.write(
                "significant_up\t%i\tsignificant up-regulated results reported\n" % nselected_up)
            outfile.write(
                "significant_down\t%i\tsignificant up-regulated results reported\n" % nselected_down)
            outfile.write(
                "threshold\t%6.4f\tsignificance threshold\n" % options.threshold)

            if options.output_filename_pattern:
                outfile.close()

            summary.append("\t".join(map(str, (
                genelist_name,
                test_ontology,
                nselected,
                options.threshold,
                ngenes,
                ncategories,
                nmaps,
                len(foreground),
                len(go_results.mSampleGenes),
                nbackground,
                len(go_results.mBackgroundGenes),
                go_results.mSampleCountsTotal,
                go_results.mBackgroundCountsTotal,
                IOTools.prettyPercent(
                    len(go_results.mSampleGenes), len(foreground), "%5.2f"),
                IOTools.prettyPercent(
                    len(go_results.mBackgroundGenes), nbackground, "%5.2f"),
                ",".join(msgs)))) + "\n")

            #############################################################
            #############################################################
            #############################################################
            # output the fg patterns
            outfile = GO.getFileName(options,
                                     go=test_ontology,
                                     section='withgenes',
                                     set=genelist_name)

            GO.outputResults(outfile, pairs, go2info, options,
                             fdrs=fdrs,
                             samples=samples,
                             gene2go=gene2go,
                             foreground=foreground,
                             gene2name=gene2name)

            if options.output_filename_pattern:
                outfile.close()

        if len(genelists) > 1:

            ###################################################################
            # output various summary files
            # significant results
            GO.outputMultipleGeneListResults(all_significant_results,
                                             all_genelists_with_results,
                                             test_ontology,
                                             go2info,
                                             options,
                                             section='significant')

            # all results
            GO.outputMultipleGeneListResults(all_results,
                                             all_genelists_with_results,
                                             test_ontology,
                                             go2info,
                                             options,
                                             section='all')

            if options.compute_pairwise:
                GO.pairwiseGOEnrichment(all_results,
                                        all_genelists_with_results,
                                        test_ontology,
                                        go2info,
                                        options)

    outfile_summary = options.stdout
    outfile_summary.write("".join(summary))

    E.stop()
Ejemplo n.º 5
0
 def _write(outs, text, numerator, denominator, base):
     percent = IOTools.prettyPercent(numerator, denominator)
     outs.write('%s\t%i\t%s\t%s\n' % (text, numerator, percent, base))
Ejemplo n.º 6
0
def main(argv=None):
    """script main.

    parses command line options in sys.argv, unless *argv* is given.
    """

    if not argv:
        argv = sys.argv

    # setup command line parser
    parser = E.OptionParser(
        version=
        "%prog version: $Id: chain2psl.py 2899 2010-04-13 14:37:37Z andreas $",
        usage=globals()["__doc__"])

    parser.add_option("-m",
                      "--output-mismatches",
                      dest="output_mismatches",
                      action="store_true",
                      help="output mismatches [%default]")

    parser.add_option("-a",
                      "--output-matches",
                      dest="output_matches",
                      action="store_true",
                      help="output matches [%default]")

    parser.add_option("-u",
                      "--output-unique",
                      dest="output_unique",
                      action="store_true",
                      help="output unique positions [%default]")

    parser.add_option(
        "-r",
        "--restrict",
        dest="restrict",
        type="string",
        help="restrict analysis to a chromosome pair (chr1:chr1:+) [%default]")

    parser.set_defaults(output_mismatches=False,
                        output_unique=False,
                        restrict=None)

    # add common options (-h/--help, ...) and parse command line
    (options, args) = E.start(parser, argv=argv)

    if len(args) != 2:
        raise ValueError("expected two chain files")

    filename_chain1, filename_chain2 = args

    E.info("validating chain 1")
    if not validateChain(IOTools.open_file(filename_chain1)):
        E.warn("validation failed - exiting")
        return 1

    E.info("validating chain 2")
    if not validateChain(IOTools.open_file(filename_chain2)):
        E.warn("validation failed - exiting")
        return 1

    E.info("building pairs for %s" % filename_chain1)
    pairs1 = buildPairs(IOTools.open_file(filename_chain1))
    E.info("read %i pairs" % len(pairs1))

    E.info("building pairs for %s" % filename_chain2)
    pairs2 = buildPairs(IOTools.open_file(filename_chain2))
    E.info("read %i pairs" % len(pairs2))

    if options.restrict:
        restrict = tuple(options.restrict.split(":"))
        pairs1 = {restrict: pairs1[restrict]}
        pairs2 = {restrict: pairs2[restrict]}

    E.info("comparing 1 -> 2")
    comparison1 = compareChains(pairs1, pairs2)
    E.info("comparing 2 -> 1")
    comparison2 = compareChains(pairs2, pairs1)

    all_keys = sorted(
        list(set(list(comparison1.keys()) + list(comparison2.keys()))))

    outfile = options.stdout
    headers = ("mapped", "identical", "different", "unique")
    outfile.write(
        "contig1\tcontig2\tstrand\t%s\t%s\t%s\t%s\n" %
        ("\t".join(["%s1" % x for x in headers]), "\t".join(
            ["p%s1" % x for x in headers]), "\t".join(
                ["%s2" % x
                 for x in headers]), "\t".join(["p%s2" % x for x in headers])))

    totals = E.Counter()

    for key in all_keys:
        outfile.write("%s\t%s\t%s" % key)

        if key in comparison1:
            c = comparison1[key]
            outfile.write("\t%i\t%i\t%i\t%i\t" %
                          (c.total, c.same, c.different, c.unique))
            outfile.write("\t".join(
                [IOTools.prettyPercent(x, c.total) for x in c]))

            totals.total1 += c.total
            totals.same1 += c.same
            totals.different1 += c.different
            totals.unique1 += c.unique
        else:
            outfile.write("\t%i\t%i\t%i\t%i\t" % (0, 0, 0, 0))
            outfile.write("\t%i\t%i\t%i\t%i" % (0, 0, 0, 0))

        if key in comparison2:
            c = comparison2[key]
            outfile.write("\t%i\t%i\t%i\t%i\t" %
                          (c.total, c.same, c.different, c.unique))
            outfile.write("\t".join(
                [IOTools.prettyPercent(x, c.total) for x in c]))

            totals.same2 += c.same
            totals.total2 += c.total
            totals.different2 += c.different
            totals.unique2 += c.unique
        else:
            outfile.write("\t%i\t%i\t%i\t%i\t" % (0, 0, 0, 0))
            outfile.write("\t%i\t%i\t%i\t%i" % (0, 0, 0, 0))

        outfile.write("\n")

    outfile.write("total\ttotal\t.\t")
    outfile.write("\t".join(
        map(str, (
            totals.total1,
            totals.same1,
            totals.different1,
            totals.unique1,
            IOTools.prettyPercent(totals.total1, totals.total1),
            IOTools.prettyPercent(totals.same1, totals.total1),
            IOTools.prettyPercent(totals.different1, totals.total1),
            IOTools.prettyPercent(totals.unique1, totals.total1),
            totals.total2,
            totals.same2,
            totals.different2,
            totals.unique2,
            IOTools.prettyPercent(totals.total2, totals.total2),
            IOTools.prettyPercent(totals.same2, totals.total2),
            IOTools.prettyPercent(totals.different2, totals.total2),
            IOTools.prettyPercent(totals.unique2, totals.total2),
        ))) + "\n")

    # output mismapped residues
    if options.output_mismatches or options.output_unique:
        outputMismatches(
            pairs1,
            pairs2,
            output_mismatches=options.output_mismatches,
            output_unique=options.output_unique,
            output_matches=options.output_matches,
        )

    # write footer and output benchmark information.
    E.stop()