Example #1
0
def find_all_names(stoplist):
    ROOT = ['nltk']
    logger._verbosity = 0
    docindex = epydoc.docbuilder.build_doc_index(ROOT, add_submodules=True)
    valdocs = sorted(docindex.reachable_valdocs(
        imports=False,
        #packages=False, bases=False, submodules=False,
        #subclasses=False,
        private=False))
    logger._verbosity = 5
    names = nltk.defaultdict(list)
    n = 0
    for valdoc in valdocs:
        name = valdoc.canonical_name
        if (name is not epydoc.apidoc.UNKNOWN and
            name is not None and name[0] == 'nltk'):
            n += 1
            for i in range(len(name)):
                key = str(name[i:])
                if len(key) == 1: continue
                if key in stoplist: continue
                names[key].append(valdoc)

    log.info('Found %s names from %s objects' % (len(names), n))

    return names
Example #2
0
def scan_xml(filenames, names):
    fdist = nltk.FreqDist()

    def linesub(match):
        line = match.group()
        for token in TOKEN_RE.findall(line):
            if token in names:
                targets = names[token]
                fdist.inc(token)
                if len(targets) > 1:
                    log.warning('%s is ambiguous: %s' % (
                        token, ', '.join(str(v.canonical_name)
                                         for v in names[token])))
                line += INDEXTERM % token
                #line += INDEXTERM % names[token][0].canonical_name
        return line

    def scansub(match):
        return LINE_RE.sub(linesub, match.group())

    for filename in filenames:
        log.info('  %s' % filename)
        src = open(filename, 'rb').read()
        src = SCAN_RE.sub(scansub, src)
#        out = open(filename[:-4]+'.li.xml', 'wb')
        out = open(TARGET_DIR + filename, 'wb')
        out.write(src)
        out.close()

    for word in fdist:
        namestr = ('\n'+38*' ').join([str(v.canonical_name[:-1])
                                      for v in names[word][:1]])
        print('[%3d]  %-30s %s' % (fdist[word], word, namestr))
        sys.stdout.flush()
Example #3
0
def scan_xml(filenames, names):
    fdist = nltk.FreqDist()

    def linesub(match):
        line = match.group()
        for token in TOKEN_RE.findall(line):
            if token in names:
                targets = names[token]
                fdist.inc(token)
                if len(targets) > 1:
                    log.warning('%s is ambiguous: %s' % (token, ', '.join(
                        str(v.canonical_name) for v in names[token])))
                line += INDEXTERM % token
                #line += INDEXTERM % names[token][0].canonical_name
        return line

    def scansub(match):
        return LINE_RE.sub(linesub, match.group())

    for filename in filenames:
        log.info('  %s' % filename)
        src = open(filename, 'rb').read()
        src = SCAN_RE.sub(scansub, src)
        #        out = open(filename[:-4]+'.li.xml', 'wb')
        out = open(TARGET_DIR + filename, 'wb')
        out.write(src)
        out.close()

    for word in fdist:
        namestr = ('\n' + 38 * ' ').join(
            [str(v.canonical_name[:-1]) for v in names[word][:1]])
        print '[%3d]  %-30s %s' % (fdist[word], word, namestr)
        sys.stdout.flush()
Example #4
0
def merge_bases(baselist1, baselist2, precedence, cyclecheck, path):
    # Be careful here -- if we get it wrong, then we could end up
    # merging two unrelated classes, which could lead to bad
    # things (e.g., a class that's its own subclass).  So only
    # merge two bases if we're quite sure they're the same class.
    # (In particular, if they have the same canonical name.)

    # If the lengths don't match up, then give up.  This is most
    # often caused by __metaclass__.
    if len(baselist1) != len(baselist2):
        log.info("Not merging the introspected & parsed base lists "
                 "for %s, since their lengths don't match (%s vs %s)" %
                 (path, len(baselist1), len(baselist2)))
        if precedence == 'introspect': return baselist1
        else: return baselist2

    # If any names disagree, then give up.
    for base1, base2 in zip(baselist1, baselist2):
        if ((base1.canonical_name not in (None, UNKNOWN) and
             base2.canonical_name not in (None, UNKNOWN)) and
            base1.canonical_name != base2.canonical_name):
            log.info("Not merging the parsed & introspected base "
                     "lists for %s, since the bases' names don't match "
                     "(%s vs %s)" % (path, base1.canonical_name,
                                     base2.canonical_name))
            if precedence == 'introspect': return baselist1
            else: return baselist2

    for i, (base1, base2) in enumerate(zip(baselist1, baselist2)):
        base = merge_docs(base1, base2, cyclecheck,
                           '%s.__bases__[%d]' % (path, i))
        baselist1[i] = baselist2[i] = base

    return baselist1
Example #5
0
def find_all_names(stoplist):
    ROOT = ['nltk']
    logger._verbosity = 0
    docindex = epydoc.docbuilder.build_doc_index(ROOT, add_submodules=True)
    valdocs = sorted(
        docindex.reachable_valdocs(
            imports=False,
            #packages=False, bases=False, submodules=False,
            #subclasses=False,
            private=False))
    logger._verbosity = 5
    names = nltk.defaultdict(list)
    n = 0
    for valdoc in valdocs:
        name = valdoc.canonical_name
        if (name is not epydoc.apidoc.UNKNOWN and name is not None
                and name[0] == 'nltk'):
            n += 1
            for i in range(len(name)):
                key = str(name[i:])
                if len(key) == 1: continue
                if key in stoplist: continue
                names[key].append(valdoc)

    log.info('Found %s names from %s objects' % (len(names), n))

    return names
Example #6
0
def merge_bases(baselist1, baselist2, precedence, cyclecheck, path):
    # Be careful here -- if we get it wrong, then we could end up
    # merging two unrelated classes, which could lead to bad
    # things (e.g., a class that's its own subclass).  So only
    # merge two bases if we're quite sure they're the same class.
    # (In particular, if they have the same canonical name.)

    # If the lengths don't match up, then give up.  This is most
    # often caused by __metaclass__.
    if len(baselist1) != len(baselist2):
        log.info("Not merging the introspected & parsed base lists "
                 "for %s, since their lengths don't match (%s vs %s)" %
                 (path, len(baselist1), len(baselist2)))
        if precedence == 'introspect': return baselist1
        else: return baselist2

    # If any names disagree, then give up.
    for base1, base2 in zip(baselist1, baselist2):
        if ((base1.canonical_name not in (None, UNKNOWN)
             and base2.canonical_name not in (None, UNKNOWN))
                and base1.canonical_name != base2.canonical_name):
            log.info("Not merging the parsed & introspected base "
                     "lists for %s, since the bases' names don't match "
                     "(%s vs %s)" %
                     (path, base1.canonical_name, base2.canonical_name))
            if precedence == 'introspect': return baselist1
            else: return baselist2

    for i, (base1, base2) in enumerate(zip(baselist1, baselist2)):
        base = merge_docs(base1, base2, cyclecheck,
                          '%s.__bases__[%d]' % (path, i))
        baselist1[i] = baselist2[i] = base

    return baselist1
Example #7
0
def main():
    log.info('Loading stoplist...')
    stoplist = open(STOPLIST).read().split()
    log.info('  Stoplist contains %d words' % len(stoplist))

    log.info('Running epydoc to build a name index...')
    names = find_all_names(stoplist)

    log.info('Scanning xml files...')
    scan_xml(FILENAMES, names)
Example #8
0
def main():
    log.info('Loading stoplist...')
    stoplist = open(STOPLIST).read().split()
    log.info('  Stoplist contains %d words' % len(stoplist))

    log.info('Running epydoc to build a name index...')
    names = find_all_names(stoplist)

    log.info('Scanning xml files...')
    scan_xml(FILENAMES, names)
Example #9
0
def main():
    log.info("Loading stoplist...")
    stoplist = open(STOPLIST).read().split()
    log.info(f"  Stoplist contains {len(stoplist)} words")

    log.info("Running epydoc to build a name index...")
    names = find_all_names(stoplist)

    log.info("Scanning xml files...")
    scan_xml(FILENAMES, names)
Example #10
0
def _merge_posargs_and_defaults(introspect_doc, parse_doc, path):
    # If either is unknown, then let merge_attrib handle it.
    if introspect_doc.posargs == UNKNOWN or parse_doc.posargs == UNKNOWN:
        return 
        
    # If the introspected doc just has '...', then trust the parsed doc.
    if introspect_doc.posargs == ['...'] and parse_doc.posargs != ['...']:
        introspect_doc.posargs = parse_doc.posargs
        introspect_doc.posarg_defaults = parse_doc.posarg_defaults

    # If they are incompatible, then check the precedence.
    elif introspect_doc.posargs != parse_doc.posargs:
        log.info("Not merging the parsed & introspected arg "
                 "lists for %s, since they don't match (%s vs %s)"
                  % (path, introspect_doc.posargs, parse_doc.posargs))
        if (MERGE_PRECEDENCE.get('posargs', DEFAULT_MERGE_PRECEDENCE) ==
            'introspect'):
            parse_doc.posargs = introspect_doc.posargs
            parse_doc.posarg_defaults = introspect_doc.posarg_defaults
        else:
            introspect_doc.posargs = parse_doc.posargs
            introspect_doc.posarg_defaults = parse_doc.posarg_defaults
Example #11
0
def _merge_posargs_and_defaults(introspect_doc, parse_doc, path):
    # If either is unknown, then let merge_attrib handle it.
    if introspect_doc.posargs == UNKNOWN or parse_doc.posargs == UNKNOWN:
        return

    # If the introspected doc just has '...', then trust the parsed doc.
    if introspect_doc.posargs == ['...'] and parse_doc.posargs != ['...']:
        introspect_doc.posargs = parse_doc.posargs
        introspect_doc.posarg_defaults = parse_doc.posarg_defaults

    # If they are incompatible, then check the precedence.
    elif introspect_doc.posargs != parse_doc.posargs:
        log.info("Not merging the parsed & introspected arg "
                 "lists for %s, since they don't match (%s vs %s)" %
                 (path, introspect_doc.posargs, parse_doc.posargs))
        if (MERGE_PRECEDENCE.get('posargs',
                                 DEFAULT_MERGE_PRECEDENCE) == 'introspect'):
            parse_doc.posargs = introspect_doc.posargs
            parse_doc.posarg_defaults = introspect_doc.posarg_defaults
        else:
            introspect_doc.posargs = parse_doc.posargs
            introspect_doc.posarg_defaults = parse_doc.posarg_defaults
Example #12
0
def parse_docstring(api_doc, docindex, suppress_warnings=[]):
    """
    Process the given C{APIDoc}'s docstring.  In particular, populate
    the C{APIDoc}'s C{descr} and C{summary} attributes, and add any
    information provided by fields in the docstring.
    
    @param docindex: A DocIndex, used to find the containing
        module (to look up the docformat); and to find any
        user docfields defined by containing objects.
    @param suppress_warnings: A set of objects for which docstring
        warnings should be suppressed.
    """
    if api_doc.metadata is not UNKNOWN:
        if not (isinstance(api_doc, RoutineDoc)
                and api_doc.canonical_name[-1] == '__init__'):
            log.debug("%s's docstring processed twice" %
                      api_doc.canonical_name)
        return

    initialize_api_doc(api_doc)

    # If there's no docstring, then check for special variables (e.g.,
    # __version__), and then return -- there's nothing else to do.
    if (api_doc.docstring in (None, UNKNOWN)):
        if isinstance(api_doc, NamespaceDoc):
            for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
                add_metadata_from_var(api_doc, field)
        return

    # Remove leading indentation from the docstring.
    api_doc.docstring = unindent_docstring(api_doc.docstring)

    # Decide which docformat is used by this module.
    docformat = get_docformat(api_doc, docindex)

    # A list of markup errors from parsing.
    parse_errors = []

    # Extract a signature from the docstring, if it has one.  This
    # overrides any signature we got via introspection/parsing.
    if isinstance(api_doc, RoutineDoc):
        parse_function_signature(api_doc, None, docformat, parse_errors)

    # Parse the docstring.  Any errors encountered are stored as
    # `ParseError` objects in the errors list.
    parsed_docstring = markup.parse(api_doc.docstring, docformat, parse_errors)

    # Divide the docstring into a description and a list of
    # fields.
    descr, fields = parsed_docstring.split_fields(parse_errors)
    api_doc.descr = descr

    field_warnings = []

    # Handle the constructor fields that have been defined in the class
    # docstring. This code assumes that a class docstring is parsed before
    # the same class __init__ docstring.
    if isinstance(api_doc, ClassDoc):

        # Parse ahead the __init__ docstring for this class
        initvar = api_doc.variables.get('__init__')
        if initvar and isinstance(initvar.value, RoutineDoc):
            init_api_doc = initvar.value
            parse_docstring(init_api_doc, docindex, suppress_warnings)

            parse_function_signature(init_api_doc, api_doc, docformat,
                                     parse_errors)
            init_fields = split_init_fields(fields, field_warnings)

            # Process fields
            for field in init_fields:
                try:
                    process_field(init_api_doc, docindex, field.tag(),
                                  field.arg(), field.body())
                except ValueError as e:
                    field_warnings.append(str(e))

    # Process fields
    for field in fields:
        try:
            process_field(api_doc, docindex, field.tag(), field.arg(),
                          field.body())
        except ValueError as e:
            field_warnings.append(str(e))

    # Check to make sure that all type parameters correspond to
    # some documented parameter.
    check_type_fields(api_doc, field_warnings)

    # Check for special variables (e.g., __version__)
    if isinstance(api_doc, NamespaceDoc):
        for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
            add_metadata_from_var(api_doc, field)

    # Extract a summary
    if api_doc.summary is None and api_doc.descr is not None:
        api_doc.summary, api_doc.other_docs = api_doc.descr.summary()

    # If the summary is empty, but the return field is not, then use
    # the return field to generate a summary description.
    if (isinstance(api_doc, RoutineDoc) and api_doc.summary is None
            and api_doc.return_descr is not None):
        s, o = api_doc.return_descr.summary()
        api_doc.summary = RETURN_PDS + s
        api_doc.other_docs = o

    # [XX] Make sure we don't have types/param descrs for unknown
    # vars/params?

    # Report any errors that occured
    if api_doc in suppress_warnings:
        if parse_errors or field_warnings:
            log.info("Suppressing docstring warnings for %s, since it "
                     "is not included in the documented set." %
                     api_doc.canonical_name)
    else:
        report_errors(api_doc, docindex, parse_errors, field_warnings)
Example #13
0
    # If the summary is empty, but the return field is not, then use
    # the return field to generate a summary description.
    if (isinstance(api_doc, RoutineDoc) and api_doc.summary is None
            and api_doc.return_descr is not None):
        s, o = api_doc.return_descr.summary()
        api_doc.summary = RETURN_PDS + s
        api_doc.other_docs = o

    # [XX] Make sure we don't have types/param descrs for unknown
    # vars/params?

    # Report any errors that occured
    if api_doc in suppress_warnings:
        if parse_errors or field_warnings:
            log.info("Suppressing docstring warnings for %s, since it "
                     "is not included in the documented set." %
                     api_doc.canonical_name)
    else:
        report_errors(api_doc, docindex, parse_errors, field_warnings)


def add_metadata_from_var(api_doc, field):
    for varname in field.varnames:
        # Check if api_doc has a variable w/ the given name.
        if varname not in api_doc.variables: continue

        # Check moved here from before the for loop because we expect to
        # reach rarely this point. The loop below is to be performed more than
        # once only for fields with more than one varname, which currently is
        # only 'author'.
        for md in api_doc.metadata:
Example #14
0
def parse_docstring(api_doc, docindex, suppress_warnings=[]):
    """
    Process the given C{APIDoc}'s docstring.  In particular, populate
    the C{APIDoc}'s C{descr} and C{summary} attributes, and add any
    information provided by fields in the docstring.
    
    @param docindex: A DocIndex, used to find the containing
        module (to look up the docformat); and to find any
        user docfields defined by containing objects.
    @param suppress_warnings: A set of objects for which docstring
        warnings should be suppressed.
    """
    if api_doc.metadata is not UNKNOWN:
        if not (isinstance(api_doc, RoutineDoc)
                and api_doc.canonical_name[-1] == '__init__'):
            log.debug("%s's docstring processed twice" %
                      api_doc.canonical_name)
        return
        
    initialize_api_doc(api_doc)

    # If there's no docstring, then check for special variables (e.g.,
    # __version__), and then return -- there's nothing else to do.
    if (api_doc.docstring in (None, UNKNOWN)):
        if isinstance(api_doc, NamespaceDoc):
            for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
                add_metadata_from_var(api_doc, field)
        return

    # Remove leading indentation from the docstring.
    api_doc.docstring = unindent_docstring(api_doc.docstring)

    # Decide which docformat is used by this module.
    docformat = get_docformat(api_doc, docindex)

    # A list of markup errors from parsing.
    parse_errors = []
    
    # Extract a signature from the docstring, if it has one.  This
    # overrides any signature we got via introspection/parsing.
    if isinstance(api_doc, RoutineDoc):
        parse_function_signature(api_doc, None, docformat, parse_errors)

    # Parse the docstring.  Any errors encountered are stored as
    # `ParseError` objects in the errors list.
    parsed_docstring = markup.parse(api_doc.docstring, docformat,
                                    parse_errors)
        
    # Divide the docstring into a description and a list of
    # fields.
    descr, fields = parsed_docstring.split_fields(parse_errors)
    api_doc.descr = descr

    field_warnings = []

    # Handle the constructor fields that have been defined in the class
    # docstring. This code assumes that a class docstring is parsed before
    # the same class __init__ docstring.
    if isinstance(api_doc, ClassDoc):

        # Parse ahead the __init__ docstring for this class
        initvar = api_doc.variables.get('__init__')
        if initvar and isinstance(initvar.value, RoutineDoc):
            init_api_doc = initvar.value
            parse_docstring(init_api_doc, docindex, suppress_warnings)

            parse_function_signature(init_api_doc, api_doc,
                                     docformat, parse_errors)
            init_fields = split_init_fields(fields, field_warnings)

            # Process fields
            for field in init_fields:
                try:
                    process_field(init_api_doc, docindex, field.tag(),
                                    field.arg(), field.body())
                except ValueError, e: field_warnings.append(str(e))

    # Process fields
    for field in fields:
        try:
            process_field(api_doc, docindex, field.tag(),
                               field.arg(), field.body())
        except ValueError, e: field_warnings.append(str(e))

    # Check to make sure that all type parameters correspond to
    # some documented parameter.
    check_type_fields(api_doc, field_warnings)

    # Check for special variables (e.g., __version__)
    if isinstance(api_doc, NamespaceDoc):
        for field in STANDARD_FIELDS + user_docfields(api_doc, docindex):
            add_metadata_from_var(api_doc, field)

    # Extract a summary
    if api_doc.summary is None and api_doc.descr is not None:
        api_doc.summary, api_doc.other_docs = api_doc.descr.summary()

    # If the summary is empty, but the return field is not, then use
    # the return field to generate a summary description.
    if (isinstance(api_doc, RoutineDoc) and api_doc.summary is None and
        api_doc.return_descr is not None):
        s, o = api_doc.return_descr.summary()
        api_doc.summary = RETURN_PDS + s
        api_doc.other_docs = o

    # [XX] Make sure we don't have types/param descrs for unknown
    # vars/params?

    # Report any errors that occured
    if api_doc in suppress_warnings:
        if parse_errors or field_warnings:
            log.info("Suppressing docstring warnings for %s, since it "
                     "is not included in the documented set." %
                     api_doc.canonical_name)
    else:
        report_errors(api_doc, docindex, parse_errors, field_warnings)
Example #15
0
def merge_docs(introspect_doc, parse_doc, cyclecheck=None, path=None):
    """
    Merge the API documentation information that was obtained from
    introspection with information that was obtained from parsing.
    C{introspect_doc} and C{parse_doc} should be two C{APIDoc} instances
    that describe the same object.  C{merge_docs} combines the
    information from these two instances, and returns the merged
    C{APIDoc}.

    If C{introspect_doc} and C{parse_doc} are compatible, then they will
    be I{merged} -- i.e., they will be coerced to a common class, and
    their state will be stored in a shared dictionary.  Once they have
    been merged, any change made to the attributes of one will affect
    the other.  The value for the each of the merged C{APIDoc}'s
    attributes is formed by combining the values of the source
    C{APIDoc}s' attributes, as follows:

      - If either of the source attributes' value is C{UNKNOWN}, then
        use the other source attribute's value.
      - Otherwise, if an attribute merge function has been registered
        for the attribute, then use that function to calculate the
        merged value from the two source attribute values.
      - Otherwise, if L{MERGE_PRECEDENCE} is defined for the
        attribute, then use the attribute value from the source that
        it indicates.
      - Otherwise, use the attribute value from the source indicated
        by L{DEFAULT_MERGE_PRECEDENCE}.

    If C{introspect_doc} and C{parse_doc} are I{not} compatible (e.g., if
    their values have incompatible types), then C{merge_docs()} will
    simply return either C{introspect_doc} or C{parse_doc}, depending on
    the value of L{DEFAULT_MERGE_PRECEDENCE}.  The two input
    C{APIDoc}s will not be merged or modified in any way.

    @param cyclecheck, path: These arguments should only be provided
        when C{merge_docs()} is called by an attribute merge
        function.  See L{register_attribute_mergefunc()} for more
        details.
    """
    assert isinstance(introspect_doc, APIDoc)
    assert isinstance(parse_doc, APIDoc)

    if cyclecheck is None:
        cyclecheck = set()
        if introspect_doc.canonical_name not in (None, UNKNOWN):
            path = '%s' % introspect_doc.canonical_name
        elif parse_doc.canonical_name not in (None, UNKNOWN):
            path = '%s' % parse_doc.canonical_name
        else:
            path = '??'

    # If we've already examined this pair, then there's nothing
    # more to do.  The reason that we check id's here is that we
    # want to avoid hashing the APIDoc objects for now, so we can
    # use APIDoc.merge_and_overwrite() later.
    if (id(introspect_doc), id(parse_doc)) in cyclecheck:
        return introspect_doc
    cyclecheck.add( (id(introspect_doc), id(parse_doc)) )

    # If these two are already merged, then we're done.  (Two
    # APIDoc's compare equal iff they are identical or have been
    # merged.)
    if introspect_doc == parse_doc:
        return introspect_doc

    # If both values are GenericValueDoc, then we don't want to merge
    # them.  E.g., we don't want to merge 2+2 with 4.  So just copy
    # the inspect_doc's pyval to the parse_doc, and return the parse_doc.
    if type(introspect_doc) == type(parse_doc) == GenericValueDoc:
        parse_doc.pyval = introspect_doc.pyval
        parse_doc.docs_extracted_by = 'both'
        return parse_doc

    # Perform several sanity checks here -- if we accidentally
    # merge values that shouldn't get merged, then bad things can
    # happen.
    mismatch = None
    if (introspect_doc.__class__ != parse_doc.__class__ and
        not (issubclass(introspect_doc.__class__, parse_doc.__class__) or
             issubclass(parse_doc.__class__, introspect_doc.__class__))):
        mismatch = ("value types don't match -- i=%r, p=%r." %
                    (introspect_doc.__class__, parse_doc.__class__))
    if (isinstance(introspect_doc, ValueDoc) and
        isinstance(parse_doc, ValueDoc)):
        if (introspect_doc.pyval is not UNKNOWN and
            parse_doc.pyval is not UNKNOWN and
            introspect_doc.pyval is not parse_doc.pyval):
            mismatch = "values don't match."
        elif (introspect_doc.canonical_name not in (None, UNKNOWN) and
            parse_doc.canonical_name not in (None, UNKNOWN) and
            introspect_doc.canonical_name != parse_doc.canonical_name):
            mismatch = "canonical names don't match."
    if mismatch is not None:
        log.info("Not merging the parsed & introspected values of %s, "
                 "since their %s" % (path, mismatch))
        if DEFAULT_MERGE_PRECEDENCE == 'introspect':
            return introspect_doc
        else:
            return parse_doc

    # If one apidoc's class is a superclass of the other's, then
    # specialize it to the more specific class.
    if introspect_doc.__class__ is not parse_doc.__class__:
        if issubclass(introspect_doc.__class__, parse_doc.__class__):
            parse_doc.specialize_to(introspect_doc.__class__)
        if issubclass(parse_doc.__class__, introspect_doc.__class__):
            introspect_doc.specialize_to(parse_doc.__class__)
    assert introspect_doc.__class__ is parse_doc.__class__

    # The posargs and defaults are tied together -- if we merge
    # the posargs one way, then we need to merge the defaults the
    # same way.  So check them first.  (This is a minor hack)
    if (isinstance(introspect_doc, RoutineDoc) and
        isinstance(parse_doc, RoutineDoc)):
        _merge_posargs_and_defaults(introspect_doc, parse_doc, path)
    
    # Merge the two api_doc's attributes.
    for attrib in set(introspect_doc.__dict__.keys() +
                      parse_doc.__dict__.keys()):
        # Be sure not to merge any private attributes (especially
        # __mergeset or __has_been_hashed!)
        if attrib.startswith('_'): continue
        merge_attribute(attrib, introspect_doc, parse_doc,
                             cyclecheck, path)

    # Set the dictionaries to be shared.
    return introspect_doc.merge_and_overwrite(parse_doc)
Example #16
0
def merge_docs(introspect_doc, parse_doc, cyclecheck=None, path=None):
    """
    Merge the API documentation information that was obtained from
    introspection with information that was obtained from parsing.
    C{introspect_doc} and C{parse_doc} should be two C{APIDoc} instances
    that describe the same object.  C{merge_docs} combines the
    information from these two instances, and returns the merged
    C{APIDoc}.

    If C{introspect_doc} and C{parse_doc} are compatible, then they will
    be I{merged} -- i.e., they will be coerced to a common class, and
    their state will be stored in a shared dictionary.  Once they have
    been merged, any change made to the attributes of one will affect
    the other.  The value for the each of the merged C{APIDoc}'s
    attributes is formed by combining the values of the source
    C{APIDoc}s' attributes, as follows:

      - If either of the source attributes' value is C{UNKNOWN}, then
        use the other source attribute's value.
      - Otherwise, if an attribute merge function has been registered
        for the attribute, then use that function to calculate the
        merged value from the two source attribute values.
      - Otherwise, if L{MERGE_PRECEDENCE} is defined for the
        attribute, then use the attribute value from the source that
        it indicates.
      - Otherwise, use the attribute value from the source indicated
        by L{DEFAULT_MERGE_PRECEDENCE}.

    If C{introspect_doc} and C{parse_doc} are I{not} compatible (e.g., if
    their values have incompatible types), then C{merge_docs()} will
    simply return either C{introspect_doc} or C{parse_doc}, depending on
    the value of L{DEFAULT_MERGE_PRECEDENCE}.  The two input
    C{APIDoc}s will not be merged or modified in any way.

    @param cyclecheck, path: These arguments should only be provided
        when C{merge_docs()} is called by an attribute merge
        function.  See L{register_attribute_mergefunc()} for more
        details.
    """
    assert isinstance(introspect_doc, APIDoc)
    assert isinstance(parse_doc, APIDoc)

    if cyclecheck is None:
        cyclecheck = set()
        if introspect_doc.canonical_name not in (None, UNKNOWN):
            path = '%s' % introspect_doc.canonical_name
        elif parse_doc.canonical_name not in (None, UNKNOWN):
            path = '%s' % parse_doc.canonical_name
        else:
            path = '??'

    # If we've already examined this pair, then there's nothing
    # more to do.  The reason that we check id's here is that we
    # want to avoid hashing the APIDoc objects for now, so we can
    # use APIDoc.merge_and_overwrite() later.
    if (id(introspect_doc), id(parse_doc)) in cyclecheck:
        return introspect_doc
    cyclecheck.add((id(introspect_doc), id(parse_doc)))

    # If these two are already merged, then we're done.  (Two
    # APIDoc's compare equal iff they are identical or have been
    # merged.)
    if introspect_doc == parse_doc:
        return introspect_doc

    # If both values are GenericValueDoc, then we don't want to merge
    # them.  E.g., we don't want to merge 2+2 with 4.  So just copy
    # the inspect_doc's pyval to the parse_doc, and return the parse_doc.
    if type(introspect_doc) == type(parse_doc) == GenericValueDoc:
        parse_doc.pyval = introspect_doc.pyval
        parse_doc.docs_extracted_by = 'both'
        return parse_doc

    # Perform several sanity checks here -- if we accidentally
    # merge values that shouldn't get merged, then bad things can
    # happen.
    mismatch = None
    if (introspect_doc.__class__ != parse_doc.__class__ and
            not (issubclass(introspect_doc.__class__, parse_doc.__class__) or
                 issubclass(parse_doc.__class__, introspect_doc.__class__))):
        mismatch = ("value types don't match -- i=%r, p=%r." %
                    (introspect_doc.__class__, parse_doc.__class__))
    if (isinstance(introspect_doc, ValueDoc)
            and isinstance(parse_doc, ValueDoc)):
        if (introspect_doc.pyval is not UNKNOWN
                and parse_doc.pyval is not UNKNOWN
                and introspect_doc.pyval is not parse_doc.pyval):
            mismatch = "values don't match."
        elif (introspect_doc.canonical_name not in (None, UNKNOWN)
              and parse_doc.canonical_name not in (None, UNKNOWN)
              and introspect_doc.canonical_name != parse_doc.canonical_name):
            mismatch = "canonical names don't match."
    if mismatch is not None:
        log.info("Not merging the parsed & introspected values of %s, "
                 "since their %s" % (path, mismatch))
        if DEFAULT_MERGE_PRECEDENCE == 'introspect':
            return introspect_doc
        else:
            return parse_doc

    # If one apidoc's class is a superclass of the other's, then
    # specialize it to the more specific class.
    if introspect_doc.__class__ is not parse_doc.__class__:
        if issubclass(introspect_doc.__class__, parse_doc.__class__):
            parse_doc.specialize_to(introspect_doc.__class__)
        if issubclass(parse_doc.__class__, introspect_doc.__class__):
            introspect_doc.specialize_to(parse_doc.__class__)
    assert introspect_doc.__class__ is parse_doc.__class__

    # The posargs and defaults are tied together -- if we merge
    # the posargs one way, then we need to merge the defaults the
    # same way.  So check them first.  (This is a minor hack)
    if (isinstance(introspect_doc, RoutineDoc)
            and isinstance(parse_doc, RoutineDoc)):
        _merge_posargs_and_defaults(introspect_doc, parse_doc, path)

    # Merge the two api_doc's attributes.
    for attrib in set(introspect_doc.__dict__.keys() +
                      parse_doc.__dict__.keys()):
        # Be sure not to merge any private attributes (especially
        # __mergeset or __has_been_hashed!)
        if attrib.startswith('_'): continue
        merge_attribute(attrib, introspect_doc, parse_doc, cyclecheck, path)

    # Set the dictionaries to be shared.
    return introspect_doc.merge_and_overwrite(parse_doc)