def language_summarizer(resource, children, keep_details=False): """ Populate a programming_language summary list of mappings such as {value: "programming_language", count: "count of occurences"} sorted by decreasing count. """ PROG_LANG = 'programming_language' languages = [] prog_lang = getattr(resource, PROG_LANG, []) if not prog_lang: if resource.is_file: # also count files with no detection languages.append(None) else: languages.append(prog_lang) # Collect direct children expression summaries for child in children: child_summaries = get_resource_summary( child, key=PROG_LANG, as_attribute=keep_details) or [] for child_summary in child_summaries: values = [child_summary['value']] * child_summary['count'] languages.extend(values) # summarize proper languages_counter = summarize_languages(languages) summarized = sorted_counter(languages_counter) set_resource_summary(resource, key=PROG_LANG, value=summarized, as_attribute=keep_details) return summarized
def license_summarizer(resource, children, keep_details=False): """ Populate a license_expressions list of mappings such as {value: "expression", count: "count of occurences"} sorted by decreasing count. """ LIC_EXP = 'license_expressions' license_expressions = [] # Collect current data lic_expressions = getattr(resource, LIC_EXP, []) if not lic_expressions and resource.is_file: # also count files with no detection license_expressions.append(None) else: license_expressions.extend(lic_expressions) # Collect direct children expression summary for child in children: child_summaries = get_resource_summary( child, key=LIC_EXP, as_attribute=keep_details) or [] for child_summary in child_summaries: # TODO: review this: this feels rather weird values = [child_summary['value']] * child_summary['count'] license_expressions.extend(values) # summarize proper licenses_counter = summarize_licenses(license_expressions) summarized = sorted_counter(licenses_counter) set_resource_summary(resource, key=LIC_EXP, value=summarized, as_attribute=keep_details) return summarized
def build_summary(resource, children, attribute, summarizer, keep_details=False): """ Update the `resource` Resource with a summary of itself and its `children` Resources and this for the `attribute` key (such as copyrights, etc). - `attribute` is the name of the attribute ('copyrights', 'holders' etc.) - `summarizer` is a function that takes a list of texts and returns summarized texts with counts """ # Collect current data values = getattr(resource, attribute, []) no_detection_counter = 0 if values: # keep current data as plain strings candidate_texts = [entry.get('value') for entry in values] else: candidate_texts = [] if resource.is_file: no_detection_counter += 1 # Collect direct children existing summaries for child in children: child_summaries = get_resource_summary( child, key=attribute, as_attribute=keep_details) or [] for child_summary in child_summaries: count = child_summary['count'] value = child_summary['value'] if value: candidate_texts.append(Text(value, value, count)) else: no_detection_counter += count # summarize proper using the provided function summarized = summarizer(candidate_texts) # add back the counter of things without detection if no_detection_counter: summarized.update({None: no_detection_counter}) summarized = sorted_counter(summarized) if TRACE: logger_debug('COPYRIGHT summarized:', summarized) set_resource_summary(resource, key=attribute, value=summarized, as_attribute=keep_details) return summarized
def package_summarizer(resource, children, keep_details=False): """ Populate a packages summary list of packages mappings. Note: `keep_details` is never used, as we are not keeping details of packages as this has no value. """ packages = [] # Collect current data current_packages = getattr(resource, 'packages') or [] if TRACE_LIGHT and current_packages: from packagedcode.models import Package packs = [Package.create(**p) for p in current_packages] logger_debug('package_summarizer: for:', resource, 'current_packages are:', packs) current_packages = add_files(current_packages, resource) packages.extend(current_packages) if TRACE_LIGHT and packages: logger_debug() from packagedcode.models import Package # NOQA packs = [Package.create(**p) for p in packages] logger_debug('package_summarizer: for:', resource, 'packages are:', packs) # Collect direct children packages summary for child in children: child_summaries = get_resource_summary( child, key='packages', as_attribute=False) or [] packages.extend(child_summaries) # summarize proper set_resource_summary(resource, key='packages', value=packages, as_attribute=False) return packages