コード例 #1
0
ファイル: summarizer.py プロジェクト: vsurge/barista
def language_summarizer(resource, children, keep_details=False):
    """
    Populate a programming_language summary list of mappings such as
        {value: "programming_language", count: "count of occurences"}
    sorted by decreasing count.
    """
    PROG_LANG = 'programming_language'
    languages = []
    prog_lang = getattr(resource, PROG_LANG, [])
    if not prog_lang:
        if resource.is_file:
            # also count files with no detection
            languages.append(None)
    else:
        languages.append(prog_lang)

    # Collect direct children expression summaries
    for child in children:
        child_summaries = get_resource_summary(
            child, key=PROG_LANG, as_attribute=keep_details) or []
        for child_summary in child_summaries:
            values = [child_summary['value']] * child_summary['count']
            languages.extend(values)

    # summarize proper
    languages_counter = summarize_languages(languages)
    summarized = sorted_counter(languages_counter)
    set_resource_summary(resource,
                         key=PROG_LANG,
                         value=summarized,
                         as_attribute=keep_details)
    return summarized
コード例 #2
0
ファイル: summarizer.py プロジェクト: vsurge/barista
def license_summarizer(resource, children, keep_details=False):
    """
    Populate a license_expressions list of mappings such as
        {value: "expression", count: "count of occurences"}
    sorted by decreasing count.
    """
    LIC_EXP = 'license_expressions'
    license_expressions = []

    # Collect current data
    lic_expressions = getattr(resource, LIC_EXP, [])
    if not lic_expressions and resource.is_file:
        # also count files with no detection
        license_expressions.append(None)
    else:
        license_expressions.extend(lic_expressions)

    # Collect direct children expression summary
    for child in children:
        child_summaries = get_resource_summary(
            child, key=LIC_EXP, as_attribute=keep_details) or []
        for child_summary in child_summaries:
            # TODO: review this: this feels rather weird
            values = [child_summary['value']] * child_summary['count']
            license_expressions.extend(values)

    # summarize proper
    licenses_counter = summarize_licenses(license_expressions)
    summarized = sorted_counter(licenses_counter)
    set_resource_summary(resource,
                         key=LIC_EXP,
                         value=summarized,
                         as_attribute=keep_details)
    return summarized
コード例 #3
0
def build_summary(resource,
                  children,
                  attribute,
                  summarizer,
                  keep_details=False):
    """
    Update the `resource` Resource with a summary of itself and its `children`
    Resources and this for the `attribute` key (such as copyrights, etc).

     - `attribute` is the name of the attribute ('copyrights', 'holders' etc.)
     - `summarizer` is a function that takes a list of texts and returns
        summarized texts with counts
     """
    # Collect current data
    values = getattr(resource, attribute, [])

    no_detection_counter = 0

    if values:
        # keep current data as plain strings
        candidate_texts = [entry.get('value') for entry in values]
    else:
        candidate_texts = []
        if resource.is_file:
            no_detection_counter += 1

    # Collect direct children existing summaries
    for child in children:
        child_summaries = get_resource_summary(
            child, key=attribute, as_attribute=keep_details) or []
        for child_summary in child_summaries:
            count = child_summary['count']
            value = child_summary['value']
            if value:
                candidate_texts.append(Text(value, value, count))
            else:
                no_detection_counter += count

    # summarize proper using the provided function
    summarized = summarizer(candidate_texts)

    # add back the counter of things without detection
    if no_detection_counter:
        summarized.update({None: no_detection_counter})

    summarized = sorted_counter(summarized)
    if TRACE:
        logger_debug('COPYRIGHT summarized:', summarized)
    set_resource_summary(resource,
                         key=attribute,
                         value=summarized,
                         as_attribute=keep_details)
    return summarized
コード例 #4
0
ファイル: summarizer.py プロジェクト: vsurge/barista
def package_summarizer(resource, children, keep_details=False):
    """
    Populate a packages summary list of packages mappings.

    Note: `keep_details` is never used, as we are not keeping details of
    packages as this has no value.
    """
    packages = []

    # Collect current data
    current_packages = getattr(resource, 'packages') or []

    if TRACE_LIGHT and current_packages:
        from packagedcode.models import Package
        packs = [Package.create(**p) for p in current_packages]
        logger_debug('package_summarizer: for:', resource,
                     'current_packages are:', packs)

    current_packages = add_files(current_packages, resource)
    packages.extend(current_packages)

    if TRACE_LIGHT and packages:
        logger_debug()
        from packagedcode.models import Package  # NOQA
        packs = [Package.create(**p) for p in packages]
        logger_debug('package_summarizer: for:', resource, 'packages are:',
                     packs)

    # Collect direct children packages summary
    for child in children:
        child_summaries = get_resource_summary(
            child, key='packages', as_attribute=False) or []
        packages.extend(child_summaries)

    # summarize proper
    set_resource_summary(resource,
                         key='packages',
                         value=packages,
                         as_attribute=False)
    return packages