Ejemplo n.º 1
0
    def publish(self):
        """
        Try and publish or remove zip end if something went wrong.

        :return: (  boolean indicating if change in sink directory or subdirectories,
                    amount of resources definitively packaged,
                    the difference of resources provisionally packaged)
        """
        if not os.path.isdir(self.resource_dir):
            os.makedirs(self.resource_dir)
            #print "Created %s" % self.resource_dir

        if not os.path.isdir(self.publish_dir):
            os.makedirs(self.publish_dir)
            #print "Created %s" % self.publish_dir

        try:
            return self.do_publish()
        except:
            # Something went wrong. Best we can do is clean up end of zip chain.
            zip_end_files = glob(
                os.path.join(self.publish_dir, PREFIX_END_PART + "*.zip"))
            for ze_file in zip_end_files:
                os.remove(ze_file)
                print "error recovery: removed %s" % ze_file

            zip_end_xmls = glob(
                os.path.join(self.publish_dir, PREFIX_END_PART + "*.xml"))
            for ze_xml in zip_end_xmls:
                os.remove(ze_xml)
                print "error recovery: removed %s" % ze_xml

            zip_end_manis = glob(
                os.path.join(self.publish_dir,
                             PREFIX_MANIFEST + PREFIX_END_PART + "*.xml"))
            for ze_mani in zip_end_manis:
                os.remove(ze_mani)
                print "error recovery: removed %s" % ze_mani

            # remove zip-end entries from resource-dump.xml
            rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
            rs_dump = ResourceDump()
            if os.path.isfile(rs_dump_path):
                with open(rs_dump_path, "r") as rs_dump_file:
                    sm = Sitemap()
                    sm.parse_xml(rs_dump_file, resources=rs_dump)

            prefix = self.publish_url + PREFIX_END_PART

            for uri in rs_dump.resources.keys():
                if uri.startswith(prefix):
                    del rs_dump.resources[uri]
                    print "error recovery: removed %s from %s" % (uri,
                                                                  rs_dump_path)

            with open(rs_dump_path, "w") as rs_dump_file:
                rs_dump_file.write(rs_dump.as_xml())

            print "error recovery: walk through error recovery completed. Now raising ..."
            raise
Ejemplo n.º 2
0
 def test_build_ex_17(self):
     """Resource Dump with 3 entries and some metadata"""
     rd = ResourceDump()
     rd.up='http://example.com/dataset1/capabilitylist.xml'
     rd.md_at="2013-01-03T09:00:00Z"
     rd.md_completed="2013-01-03T09:04:00Z"
     z1 = Resource( uri='http://example.com/resourcedump-part1.zip',
                    mime_type="application/zip",
                    length=4765,
                    md_at="2013-01-03T09:00:00Z",
                    md_completed="2013-01-03T09:02:00Z" )
     z1.link_set( rel="contents",
                  href="http://example.com/resourcedump_manifest-part1.xml",
                  mime_type="application/xml" )
     rd.add( z1 )
     z2 = Resource( uri='http://example.com/resourcedump-part2.zip',
                    mime_type="application/zip",
                    length=9875,
                    md_at="2013-01-03T09:01:00Z",
                    md_completed="2013-01-03T09:03:00Z" )
     z2.link_set( rel="contents",
                  href="http://example.com/resourcedump_manifest-part2.xml",
                  mime_type="application/xml" )
     rd.add( z2 )
     z3 = Resource( uri='http://example.com/resourcedump-part3.zip',
                    mime_type="application/zip",
                    length=2298,
                    md_at="2013-01-03T09:03:00Z",
                    md_completed="2013-01-03T09:04:00Z" )
     z3.link_set( rel="contents",
                  href="http://example.com/resourcedump_manifest-part3.xml",
                  mime_type="application/xml" )
     rd.add( z3 )
     ex_xml = self._open_ex('resourcesync_ex_17').read()
     self._assert_xml_equal( rd.as_xml(), ex_xml )
Ejemplo n.º 3
0
    def publish_metadata(self, new_zips, exluded_zip=None):
        """
        (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published
        metadata.
        :param new_zips: a resourcelist with newly created zip resources
        :param exluded_zip: local path to zip file that will be removed from previously published metadata.
        """
        rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML
        rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
        capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML
        capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML)

        rs_dump = ResourceDump()

        # Load existing resource-dump, if any. Else set start time.
        if os.path.isfile(rs_dump_path):
            with open(rs_dump_path, "r") as rs_dump_file:
                sm = Sitemap()
                sm.parse_xml(rs_dump_file, resources=rs_dump)

        else:
            rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True)
            rs_dump.link_set(rel="up", href=capa_list_url)

        # Remove excluded zip, if any
        if exluded_zip:
            loc = self.publish_url + os.path.basename(exluded_zip)
            if loc in rs_dump.resources:
                del rs_dump.resources[loc]
            else:
                raise RuntimeError("Could not find %s in %s" %
                                   (loc, rs_dump_path))

        # Add new zips
        for resource in new_zips:
            rs_dump.add(resource)

        # Write resource-dump.xml
        rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True)
        with open(rs_dump_path, "w") as rs_dump_file:
            rs_dump_file.write(rs_dump.as_xml())

        # There are several ways to decode base64, among them
        # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n')
        # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n')
        iri = base64.urlsafe_b64decode(os.path.basename(
            self.publish_dir)).rstrip('\n')

        print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri)
        print "See %s" % rs_dump_url

        # Write capability-list.xml
        if not os.path.isfile(capa_list_path):
            capa_list = CapabilityList()
            capa_list.link_set(rel="up", href=self.src_desc_url)
            capa_list.add_capability(rs_dump, rs_dump_url)
            with open(capa_list_path, "w") as capa_list_file:
                capa_list_file.write(capa_list.as_xml())

            print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
Ejemplo n.º 4
0
 def test01_as_xml(self):
     rd = ResourceDump()
     rd.add( Resource('a.zip',timestamp=1) )
     rd.add( Resource('b.zip',timestamp=2) )
     xml = rd.as_xml()
     self.assertTrue( re.search(r'<rs:md .*capability="resourcedump"', xml), 'XML has capability' )
     self.assertTrue( re.search(r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>', xml), 'XML has resource a' ) 
Ejemplo n.º 5
0
 def test_build_ex_04(self):
     """Simple Resource Dump document """
     rd = ResourceDump()
     rd.md_at = '2013-01-03T09:00:00Z'
     rd.add( Resource(uri='http://example.com/resourcedump.zip',
                      lastmod='2013-01-03T09:00:00Z') )
     ex_xml = self._open_ex('resourcesync_ex_4').read()
     self._assert_xml_equal( rd.as_xml(), ex_xml )
Ejemplo n.º 6
0
    def publish_metadata(self, new_zips, exluded_zip=None):
        """
        (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published
        metadata.
        :param new_zips: a resourcelist with newly created zip resources
        :param exluded_zip: local path to zip file that will be removed from previously published metadata.
        """
        rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML
        rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
        capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML
        capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML)

        rs_dump = ResourceDump()

        # Load existing resource-dump, if any. Else set start time.
        if os.path.isfile(rs_dump_path):
            with open(rs_dump_path, "r") as rs_dump_file:
                sm = Sitemap()
                sm.parse_xml(rs_dump_file, resources=rs_dump)

        else:
            rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True)
            rs_dump.link_set(rel="up", href=capa_list_url)

        # Remove excluded zip, if any
        if exluded_zip:
            loc = self.publish_url + os.path.basename(exluded_zip)
            if loc in rs_dump.resources:
                del rs_dump.resources[loc]
            else:
                raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path))

        # Add new zips
        for resource in new_zips:
            rs_dump.add(resource)

        # Write resource-dump.xml
        rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True)
        with open(rs_dump_path, "w") as rs_dump_file:
            rs_dump_file.write(rs_dump.as_xml())

        # There are several ways to decode base64, among them
        # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n')
        # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n')
        iri = base64.urlsafe_b64decode(os.path.basename(self.publish_dir)).rstrip("\n")

        print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri)
        print "See %s" % rs_dump_url

        # Write capability-list.xml
        if not os.path.isfile(capa_list_path):
            capa_list = CapabilityList()
            capa_list.link_set(rel="up", href=self.src_desc_url)
            capa_list.add_capability(rs_dump, rs_dump_url)
            with open(capa_list_path, "w") as capa_list_file:
                capa_list_file.write(capa_list.as_xml())

            print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
Ejemplo n.º 7
0
    def publish(self):
        """
        Try and publish or remove zip end if something went wrong.

        :return: (  boolean indicating if change in sink directory or subdirectories,
                    amount of resources definitively packaged,
                    the difference of resources provisionally packaged)
        """
        if not os.path.isdir(self.resource_dir):
            os.makedirs(self.resource_dir)
            # print "Created %s" % self.resource_dir

        if not os.path.isdir(self.publish_dir):
            os.makedirs(self.publish_dir)
            # print "Created %s" % self.publish_dir

        try:
            return self.do_publish()
        except:
            # Something went wrong. Best we can do is clean up end of zip chain.
            zip_end_files = glob(os.path.join(self.publish_dir, PREFIX_END_PART + "*.zip"))
            for ze_file in zip_end_files:
                os.remove(ze_file)
                print "error recovery: removed %s" % ze_file

            zip_end_xmls = glob(os.path.join(self.publish_dir, PREFIX_END_PART + "*.xml"))
            for ze_xml in zip_end_xmls:
                os.remove(ze_xml)
                print "error recovery: removed %s" % ze_xml

            zip_end_manis = glob(os.path.join(self.publish_dir, PREFIX_MANIFEST + PREFIX_END_PART + "*.xml"))
            for ze_mani in zip_end_manis:
                os.remove(ze_mani)
                print "error recovery: removed %s" % ze_mani

            # remove zip-end entries from resource-dump.xml
            rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML)
            rs_dump = ResourceDump()
            if os.path.isfile(rs_dump_path):
                with open(rs_dump_path, "r") as rs_dump_file:
                    sm = Sitemap()
                    sm.parse_xml(rs_dump_file, resources=rs_dump)

            prefix = self.publish_url + PREFIX_END_PART

            for uri in rs_dump.resources.keys():
                if uri.startswith(prefix):
                    del rs_dump.resources[uri]
                    print "error recovery: removed %s from %s" % (uri, rs_dump_path)

            with open(rs_dump_path, "w") as rs_dump_file:
                rs_dump_file.write(rs_dump.as_xml())

            print "error recovery: walk through error recovery completed. Now raising ..."
            raise
Ejemplo n.º 8
0
 def test01_as_xml(self):
     rd = ResourceDump()
     rd.add(Resource('a.zip', timestamp=1))
     rd.add(Resource('b.zip', timestamp=2))
     xml = rd.as_xml()
     self.assertTrue(re.search(r'<rs:md .*capability="resourcedump"', xml),
                     'XML has capability')
     self.assertTrue(
         re.search(
             r'<url><loc>a.zip</loc><lastmod>1970-01-01T00:00:01Z</lastmod></url>',
             xml), 'XML has resource a')
Ejemplo n.º 9
0
    def get_resource_dump_xml(self, from_date=None, to_date=None):
        """
        Get content of resource dump.

        :return: (xml) resource dump content
        """
        if not self._validation():
            return None

        from .utils import parse_date
        if from_date:
            from_date = parse_date(from_date)
        if to_date:
            to_date = parse_date(to_date)

        r = get_items_by_index_tree(self.repository_id)
        rd = ResourceDump()
        rd.up = INVENIO_CAPABILITY_URL.format(request.url_root)
        for item in r:
            if item:
                resource_date = parse_date(item.get('_source').get('_updated'))
                if from_date and from_date > resource_date:
                    continue
                if to_date and to_date < resource_date:
                    continue
                id_item = item.get('_source').get('control_number')
                url = '{}resync/{}/{}/file_content.zip'.format(
                    request.url_root, self.repository_id, str(id_item))
                rs = Resource(url,
                              lastmod=item.get('_source').get('_updated'),
                              ln=[])
                if self.resource_dump_manifest:
                    href = '{}resync/{}/{}/resourcedump_manifest.xml'.format(
                        request.url_root, self.repository_id, str(id_item))
                    rs.ln.append({
                        'rel': 'contents',
                        'href': href,
                        'type': 'application/xml'
                    })
                rd.add(rs)
        return rd.as_xml()