def load(self): """ Populate license data from a YAML file stored in of self.src_dir. Does not load text files. Unknown fields are ignored and not bound to the License object. """ try: with io.open(self.data_file, encoding='utf-8') as f: data = saneyaml.load(f.read()) numeric_keys = ('minimum_coverage', 'relevance') for k, v in data.items(): if k in numeric_keys: v = int(v) if k == 'key': assert self.key == v, 'Inconsistent YAML key and file names for %r' % self.key setattr(self, k, v) except Exception as e: # this is a rare case: fail loudly print() print('#############################') print('INVALID LICENSE YAML FILE:', 'file://' + self.data_file) print('#############################') print(e) print('#############################') raise
def get_yaml_data(location): """ Parse the yaml file and return the metadata in dictionary format. """ yaml_lines = [] with io.open(location, encoding='utf-8') as loc: for line in loc.readlines(): if not line: continue yaml_lines.append(line) return saneyaml.load('\n'.join(yaml_lines))
def load_license_policy(license_policy_location): """ Return a license_policy dictionary loaded from a license policy file. """ if not license_policy_location or not exists(license_policy_location): return {} elif isdir(license_policy_location): return {} with open(license_policy_location, 'r') as conf: conf_content = conf.read() return saneyaml.load(conf_content)
def parse(location): """ Return a Package object from a +COMPACT_MANIFEST file or None. """ if not is_freebsd_manifest(location): return with io.open(location, encoding='utf-8') as loc: freebsd_manifest = saneyaml.load(loc) return build_package(freebsd_manifest)
def __attrs_post_init__(self, *args, **kwargs): if self.test_file: _, _, self.test_file_name = self.test_file.partition( os.path.join('licensedcode', 'data') + os.sep) data = {} if self.data_file: try: with io.open(self.data_file, encoding='utf-8') as df: data = saneyaml.load(df.read()) or {} except Exception as e: raise Exception('Failed to read:', 'file://' + self.data_file, e) self.license_expressions = data.pop('license_expressions', []) self.notes = data.pop('notes', None) # True if the test is expected to fail self.expected_failure = data.pop('expected_failure', False) if data: raise Exception( 'Unknown data elements: ' + repr(data) + ' for: file://' + self.data_file) if self.license_expressions: for i, exp in enumerate(self.license_expressions[:]): try: expression = self.licensing.parse(exp) except: raise Exception( 'Unable to parse License rule expression: ' +repr(exp) + ' for: file://' + self.data_file + '\n' + traceback.format_exc() ) if expression is None: raise Exception( 'Unable to parse License rule expression: ' +repr(exp) + ' for:' + repr(self.data_file)) new_exp = expression.render() self.license_expressions[i] = new_exp else: if not self.notes: raise Exception( 'A license test without expected license_expressions should ' 'have explanatory notes: for: file://' + self.data_file)
def load(self, load_notes=False): """ Load self from a .RULE YAML file stored in self.data_file. Does not load the rule text file. Unknown fields are ignored and not bound to the Rule object. """ try: with codecs.open(self.data_file, encoding='utf-8') as f: data = saneyaml.load(f.read()) except Exception, e: print('#############################') print('INVALID LICENSE RULE FILE:', self.data_file) print('#############################') print(e) print('#############################') # this is a rare case, but yes we abruptly stop. raise e
def load(self, src_dir): """ Populate license data from a YAML file stored in of src_dir. Does not load text files. Unknown fields are ignored and not bound to the License object. """ try: with codecs.open(self.data_file, encoding='utf-8') as f: data = saneyaml.load(f.read()) except Exception, e: # this is a rare case: fail loudly print() print('#############################') print('INVALID LICENSE YAML FILE:', self.data_file) print('#############################') print(e) print('#############################') raise
def get_yaml_data(location): """ Get variables and parse the yaml file, replace the variable with the value and return dictionary. """ variables = get_variables(location) yaml_lines = [] with io.open(location, encoding='utf-8') as loc: for line in loc.readlines(): if not line: continue pure_line = line.strip() if pure_line.startswith('{%') and pure_line.endswith('%}') and '=' in pure_line: continue # Replace the variable with the value if '{{' in line and '}}' in line: for variable, value in variables.items(): line = line.replace('{{ ' + variable + ' }}', value) yaml_lines.append(line) return saneyaml.load('\n'.join(yaml_lines))
def __attrs_post_init__(self, *args, **kwargs): if self.data_file: try: with io.open(self.data_file, encoding='utf-8') as df: for key, value in saneyaml.load(df.read()).items(): if value: setattr(self, key, value) except: import traceback msg = 'file://' + self.data_file + '\n' + repr(self) + '\n' + traceback.format_exc() raise Exception(msg) # fix counts to be ints: sane yaml loads everything as string for holders_sum in self.holders_summary: holders_sum['count'] = int(holders_sum['count']) for copyrs_sum in self.copyrights_summary: copyrs_sum['count'] = int(copyrs_sum['count']) for auths_sum in self.authors_summary: auths_sum['count'] = int(auths_sum['count'])
def load(self): """ Load self from a .RULE YAML file stored in self.data_file. Does not load the rule text file. Unknown fields are ignored and not bound to the Rule object. """ try: with io.open(self.data_file, encoding='utf-8') as f: data = saneyaml.load(f.read()) except Exception as e: print('#############################') print('INVALID LICENSE RULE FILE:', 'file://' + self.data_file) print('#############################') print(e) print('#############################') # this is a rare case, but yes we abruptly stop. raise e known_attributes = set(attr.fields_dict(self.__class__)) data_file_attributes = set(data) unknown_attributes = data_file_attributes.difference(known_attributes) if unknown_attributes: unknown_attributes = ', '.join(sorted(unknown_attributes)) msg = 'License rule {} data file has unknown attributes: {}' raise Exception(msg.format(self, unknown_attributes)) self.license_expression = data.get('license_expression') self.is_negative = data.get('is_negative', False) self.is_false_positive = data.get('is_false_positive', False) if not self.license_expression and not (self.is_negative or self.is_false_positive): msg = 'License rule {} is missing a license_expression.' raise Exception(msg.format(self)) relevance = float(data.get('relevance', 0)) if relevance: if relevance <= 0 or relevance > 100: msg = ('License rule {} data file has an invalid relevance. ' 'Should be above 0 and 100 or less: {}') raise Exception(msg.format(self, repr(relevance))) # Keep track if we have a stored relevance of not. self.relevance = relevance self.has_stored_relevance = True self.minimum_coverage = float(data.get('minimum_coverage', 0)) self._minimum_containment = self.minimum_coverage / 100 if not (0 <= self.minimum_coverage <= 100): msg = ( 'License rule {} data file has an invalid minimum_coverage. ' 'Should be between 0 and 100: {}') raise Exception(msg.format(self, self.minimum_coverage)) self.is_license_text = data.get('is_license_text', False) self.is_license_notice = data.get('is_license_notice', False) self.is_license_tag = data.get('is_license_tag', False) self.is_license_reference = data.get('is_license_reference', False) self.only_known_words = data.get('only_known_words', False) self.referenced_filenames = data.get('referenced_filenames', []) or [] if not isinstance(self.referenced_filenames, list): msg = ( 'License rule {} data file has an invalid referenced_filenames. ' 'Should be a list: {}') raise Exception(msg.format(self, self.referenced_filenames)) # these are purely informational and not used at run time notes = data.get('notes') if notes: self.notes = notes.strip() if not self.notes and (self.is_negative or self.is_false_positive): msg = 'Special License rule {} is missing explanatory notes.' raise Exception(msg.format(self)) self.ignorable_copyrights = data.get('ignorable_copyrights', []) self.ignorable_holders = data.get('ignorable_holders', []) self.ignorable_authors = data.get('ignorable_authors', []) self.ignorable_urls = data.get('ignorable_urls', []) self.ignorable_emails = data.get('ignorable_emails', []) return self
def test_load_with_and_without_tags(self): test_file_with_tag = self.get_test_loc('saneyaml/metadata1') test_file_without_tag = self.get_test_loc('saneyaml/metadata1.notag') with_tags = saneyaml.load(open(test_file_with_tag, 'rb').read()) without_tags = saneyaml.load(open(test_file_without_tag, 'rb').read()) assert with_tags == without_tags