Esempio n. 1
0
    def parse(self):
        # Before we parse the RDF here, maybe we have a link to some
        # other page via a link rel?

        # Okay, parse the RDF
        self._extractRdf()

        # We go through all the RDF chunks, looking for the one
        # that is about this local document, and we get all the information
        # from there.
        found = 0
        for rdf_chunk in self.rdf_chunks:
            cc_rdf= ccrdf.ccRdf()
            try:
                cc_rdf.parse(rdf_chunk)
            except Exception, (strerror):
                print "BAD PARSING: %s" % strerror
                continue

            # We check all the works
            for work in cc_rdf.works():
                # Is it about the right document?
                if work.subject != "" and work.subject != self.url:
                    # Special case the situation where the subject is a prefix of the URL
                    if self.url.find(work.subject) == -1:
                        print "BAD SUBJECT: %s" % work.subject
                        continue

                # Okay, we are dealing with the right thing
                found = 1

                # Store the cc work data structure
                self.ccWork = work
Esempio n. 2
0
    def xmp(self):
        """If the XMP data has not been previously extracted or if reload is
        True, scan the file for embedded XMP.  If the data has been previously
        extracted, just return our cached copy.

        Returns XXX.
        """

        if self.__xmp is None:
            # scan the file for XMP
            self.__xmp = ccrdf.ccRdf()
            self.xmp_data = ""

            file_contents = file(self.filename,'r').read()

            for start_tag, end_tag in \
                    zip(self.START_STRINGS, self.END_STRINGS):

                if file_contents.find(start_tag) == -1:
                    continue

                # found the embedded RDF
                rdf_string = file_contents[file_contents.find(start_tag):
                                           file_contents.find(end_tag) + len(end_tag)]

                # store the raw string
                self.xmp_data = rdf_string

                # parse the RDF
                self.__xmp.parse(rdf_string)
                    
        return self.__xmp
Esempio n. 3
0
def verify(filename):
    """Extracts license claim information from a file and verifies it.
    Returns the following status codes:
    1     Verified
    0     No RDF
    -1    Work information not found (possible SHA1 mismatch)
    -2    Verification license does not match claim.
    """

    status = 0
    
    claim = metadata(filename).getClaim()
    if claim is None:
        raise cctag.exceptions.NotLicensedException
    
    fileinfo = parseClaim(claim)
    fileinfo['sha'] = 'urn:sha1:%s' % cctag.rdf.fileHash(filename)

    verifyRdf = rdfextract.RdfExtractor().extractRdfText(
        rdfextract.retrieveUrl(fileinfo['verify at'])
        )

    # check if we found any RDF at all, and update the status code
    if len(verifyRdf) > 0:
        status = -1

    # check each block of RDF
    #  (a verification page may also have it's own license RDF embedded)
    for block in verifyRdf:
        # parse/validate the RDF
        verifyCc = ccrdf.ccRdf()
        verifyCc.parse(block)

        # for each work in the RDF block...
        for work in verifyCc.works():
            
            # if the subject matches...
            if work.subject == fileinfo['sha']:
                # we found the work information;
                # only one reason left to not verify
                status = -2
                
                # we found the work, now make sure the license matches
                for license in work.licenses():
                    if license == fileinfo['license']:
                        return 1

    # either the file wasn't found, or the license didn't match
    return status