def parse(self): # Before we parse the RDF here, maybe we have a link to some # other page via a link rel? # Okay, parse the RDF self._extractRdf() # We go through all the RDF chunks, looking for the one # that is about this local document, and we get all the information # from there. found = 0 for rdf_chunk in self.rdf_chunks: cc_rdf= ccrdf.ccRdf() try: cc_rdf.parse(rdf_chunk) except Exception, (strerror): print "BAD PARSING: %s" % strerror continue # We check all the works for work in cc_rdf.works(): # Is it about the right document? if work.subject != "" and work.subject != self.url: # Special case the situation where the subject is a prefix of the URL if self.url.find(work.subject) == -1: print "BAD SUBJECT: %s" % work.subject continue # Okay, we are dealing with the right thing found = 1 # Store the cc work data structure self.ccWork = work
def xmp(self): """If the XMP data has not been previously extracted or if reload is True, scan the file for embedded XMP. If the data has been previously extracted, just return our cached copy. Returns XXX. """ if self.__xmp is None: # scan the file for XMP self.__xmp = ccrdf.ccRdf() self.xmp_data = "" file_contents = file(self.filename,'r').read() for start_tag, end_tag in \ zip(self.START_STRINGS, self.END_STRINGS): if file_contents.find(start_tag) == -1: continue # found the embedded RDF rdf_string = file_contents[file_contents.find(start_tag): file_contents.find(end_tag) + len(end_tag)] # store the raw string self.xmp_data = rdf_string # parse the RDF self.__xmp.parse(rdf_string) return self.__xmp
def verify(filename): """Extracts license claim information from a file and verifies it. Returns the following status codes: 1 Verified 0 No RDF -1 Work information not found (possible SHA1 mismatch) -2 Verification license does not match claim. """ status = 0 claim = metadata(filename).getClaim() if claim is None: raise cctag.exceptions.NotLicensedException fileinfo = parseClaim(claim) fileinfo['sha'] = 'urn:sha1:%s' % cctag.rdf.fileHash(filename) verifyRdf = rdfextract.RdfExtractor().extractRdfText( rdfextract.retrieveUrl(fileinfo['verify at']) ) # check if we found any RDF at all, and update the status code if len(verifyRdf) > 0: status = -1 # check each block of RDF # (a verification page may also have it's own license RDF embedded) for block in verifyRdf: # parse/validate the RDF verifyCc = ccrdf.ccRdf() verifyCc.parse(block) # for each work in the RDF block... for work in verifyCc.works(): # if the subject matches... if work.subject == fileinfo['sha']: # we found the work information; # only one reason left to not verify status = -2 # we found the work, now make sure the license matches for license in work.licenses(): if license == fileinfo['license']: return 1 # either the file wasn't found, or the license didn't match return status