Beispiel #1
0
    def get_resource_list_xml(self, from_date=None, to_date=None):
        """
        Get content of resource list.

        :return: (xml) resource list content
        """
        if not self._validation():
            return None
        r = get_items_by_index_tree(self.repository_id)

        rl = ResourceList()
        rl.up = INVENIO_CAPABILITY_URL.format(request.url_root)

        for item in r:
            if item:
                resource_date = str_to_datetime(
                    item.get('_source').get('_updated'))
                if from_date and str_to_datetime(from_date) > resource_date:
                    continue
                if to_date and str_to_datetime(to_date) < resource_date:
                    continue
                id_item = item.get('_source').get('control_number')
                # url = '{}records/{}'.format(request.url_root, str(id_item))
                url = '{}resync/{}/records/{}'.format(request.url_root,
                                                      str(self.repository_id),
                                                      str(id_item))
                rl.add(
                    Resource(url, lastmod=item.get('_source').get('_updated')))
        return rl.as_xml()
Beispiel #2
0
 def __process_resource__(self, resource):
     # the resource points to a resource dump.
     md_at = w3c.str_to_datetime(resource.md_at)  # 'may have' at attribute
     last_synced = ClientState().get_state(resource.uri)
     if last_synced is None or md_at is None or md_at > last_synced:
         self.__process_dump__(resource.uri)
     else:
         des.reporter.instance().log_status(uri=resource.uri, in_sync=True)
Beispiel #3
0
 def __process_resource__(self, resource):
     # the resource points to a resource dump.
     md_at = w3c.str_to_datetime(resource.md_at)  # 'may have' at attribute
     last_synced = ClientState().get_state(resource.uri)
     if last_synced is None or md_at is None or md_at > last_synced:
         self.__process_dump__(resource.uri)
     else:
         des.reporter.instance().log_status(uri=resource.uri, in_sync=True)
Beispiel #4
0
 def test02_str_to_datetime(self):
     """Reading."""
     self.assertEqual(str_to_datetime("1970-01-01T00:00:00Z"), 0)
     self.assertEqual(str_to_datetime("1970-01-01T00:00:00.000Z"), 0)
     self.assertEqual(str_to_datetime("1970-01-01T00:00:00+00:00"), 0)
     self.assertEqual(str_to_datetime("1970-01-01T00:00:00-00:00"), 0)
     self.assertEqual(str_to_datetime("1970-01-01T00:00:00.000001Z"), 0.000001)
     self.assertEqual(str_to_datetime("1970-01-01T00:00:00.1Z"), 0.1)
     self.assertEqual(str_to_datetime("1970-01-01T00:00:00.100000Z"), 0.1)
     # Random other datetime
     self.assertEqual(str_to_datetime("2009-02-13T23:31:30Z"), 1234567890)
Beispiel #5
0
 def test03_same(self):
     """Datetime values that are the same."""
     astr = '2012-01-01T00:00:00Z'
     a = str_to_datetime(astr)
     for bstr in ('2012',
                  '2012-01',
                  '2012-01-01',
                  '2012-01-01T00:00Z',
                  '2012-01-01T00:00:00Z',
                  '2012-01-01T00:00:00.000000Z',
                  '2012-01-01T00:00:00.000000000000Z',
                  '2012-01-01T00:00:00.000000000001Z',  # below resolution
                  '2012-01-01T00:00:00.00+00:00',
                  '2012-01-01T00:00:00.00-00:00',
                  '2012-01-01T02:00:00.00-02:00',
                  '2011-12-31T23:00:00.00+01:00'
                  ):
         b = str_to_datetime(bstr)
         self.assertEqual(a, b, ('%s (%f) == %s (%f)' % (astr, a, bstr, b)))
Beispiel #6
0
    def __process_lower__(self):
        # the source document is a urlset with url/loc's pointing to packaged resources.
        md_at = w3c.str_to_datetime(self.source_document.md_at)  # 'must have' at attribute
        last_synced = ClientState().get_state(self.source_uri)
        if last_synced is None or md_at > last_synced:
            for resource in self.source_document.resources:
                self.__process_resource__(resource)

            if len(self.exceptions) == 0:
                ClientState().set_state(self.source_uri, md_at)
        else:
            self.logger.debug("In sync: %s" % self.source_uri)
            des.reporter.instance().log_status(uri=self.source_uri, in_sync=True)
Beispiel #7
0
    def __process_lower__(self):
        # the source document is a urlset with url/loc's pointing to packaged resources.
        md_at = w3c.str_to_datetime(
            self.source_document.md_at)  # 'must have' at attribute
        last_synced = ClientState().get_state(self.source_uri)
        if last_synced is None or md_at > last_synced:
            for resource in self.source_document.resources:
                self.__process_resource__(resource)

            if len(self.exceptions) == 0:
                ClientState().set_state(self.source_uri, md_at)
        else:
            self.logger.debug("In sync: %s" % self.source_uri)
            des.reporter.instance().log_status(uri=self.source_uri,
                                               in_sync=True)
Beispiel #8
0
    def extract(self, rl):
        """Extract stats from ResourceList.

        Will append data to collection for this object so we
        get aggegate stats if extract() is called on multiple
        ResourceLists.
        """
        if (rl.md_at):
            ref_datetime = str_to_datetime(rl.md_at)
        else:
            ref_datetime = time.time()
        self.resource_count += len(rl)
        for r in rl:
            size = 0
            if (r.length is not None):
                size = r.length
                self.sizes.append(size)
                if (size > 0):
                    self.sizes_log.append(math.log10(size))
                if (size > self.sizes_max):
                    self.sizes_max = size
            else:
                self.no_size += 1
            if (r.timestamp is not None):
                updated = (r.timestamp - ref_datetime)  # seconds
                self.updates.append(updated)
                if (self.oldest == 0 or r.timestamp < self.oldest):
                    self.oldest = r.timestamp
                if (self.newest == 0 or r.timestamp > self.newest):
                    self.newest = r.timestamp
            else:
                self.no_timestamp += 1
            # URI/file extensions as surrograte for media type
            ext = self.extension(r.uri)
            self.extensions_count[ext] = self.extensions_count.get(ext, 0) + 1
            self.extensions_size[ext] = self.extensions_size.get(ext, 0) + size
Beispiel #9
0
    def extract(self, rl):
        """Extract stats from ResourceList.

        Will append data to collection for this object so we
        get aggegate stats if extract() is called on multiple
        ResourceLists.
        """
        if (rl.md_at):
            ref_datetime = str_to_datetime(rl.md_at)
        else:
            ref_datetime = time.time()
        self.resource_count += len(rl)
        for r in rl:
            size = 0
            if (r.length is not None):
                size = r.length
                self.sizes.append(size)
                if (size > 0):
                    self.sizes_log.append(math.log10(size))
                if (size > self.sizes_max):
                    self.sizes_max = size
            else:
                self.no_size += 1
            if (r.timestamp is not None):
                updated = (r.timestamp - ref_datetime)  # seconds
                self.updates.append(updated)
                if (self.oldest == 0 or r.timestamp < self.oldest):
                    self.oldest = r.timestamp
                if (self.newest == 0 or r.timestamp > self.newest):
                    self.newest = r.timestamp
            else:
                self.no_timestamp += 1
            # URI/file extensions as surrograte for media type
            ext = self.extension(r.uri)
            self.extensions_count[ext] = self.extensions_count.get(ext, 0) + 1
            self.extensions_size[ext] = self.extensions_size.get(ext, 0) + size
Beispiel #10
0
def rt(dts):
    """ Do simple round-trip """
    return(datetime_to_str(str_to_datetime(dts)))