def get_resource_list_xml(self, from_date=None, to_date=None): """ Get content of resource list. :return: (xml) resource list content """ if not self._validation(): return None r = get_items_by_index_tree(self.repository_id) rl = ResourceList() rl.up = INVENIO_CAPABILITY_URL.format(request.url_root) for item in r: if item: resource_date = str_to_datetime( item.get('_source').get('_updated')) if from_date and str_to_datetime(from_date) > resource_date: continue if to_date and str_to_datetime(to_date) < resource_date: continue id_item = item.get('_source').get('control_number') # url = '{}records/{}'.format(request.url_root, str(id_item)) url = '{}resync/{}/records/{}'.format(request.url_root, str(self.repository_id), str(id_item)) rl.add( Resource(url, lastmod=item.get('_source').get('_updated'))) return rl.as_xml()
def __process_resource__(self, resource): # the resource points to a resource dump. md_at = w3c.str_to_datetime(resource.md_at) # 'may have' at attribute last_synced = ClientState().get_state(resource.uri) if last_synced is None or md_at is None or md_at > last_synced: self.__process_dump__(resource.uri) else: des.reporter.instance().log_status(uri=resource.uri, in_sync=True)
def test02_str_to_datetime(self): """Reading.""" self.assertEqual(str_to_datetime("1970-01-01T00:00:00Z"), 0) self.assertEqual(str_to_datetime("1970-01-01T00:00:00.000Z"), 0) self.assertEqual(str_to_datetime("1970-01-01T00:00:00+00:00"), 0) self.assertEqual(str_to_datetime("1970-01-01T00:00:00-00:00"), 0) self.assertEqual(str_to_datetime("1970-01-01T00:00:00.000001Z"), 0.000001) self.assertEqual(str_to_datetime("1970-01-01T00:00:00.1Z"), 0.1) self.assertEqual(str_to_datetime("1970-01-01T00:00:00.100000Z"), 0.1) # Random other datetime self.assertEqual(str_to_datetime("2009-02-13T23:31:30Z"), 1234567890)
def test03_same(self): """Datetime values that are the same.""" astr = '2012-01-01T00:00:00Z' a = str_to_datetime(astr) for bstr in ('2012', '2012-01', '2012-01-01', '2012-01-01T00:00Z', '2012-01-01T00:00:00Z', '2012-01-01T00:00:00.000000Z', '2012-01-01T00:00:00.000000000000Z', '2012-01-01T00:00:00.000000000001Z', # below resolution '2012-01-01T00:00:00.00+00:00', '2012-01-01T00:00:00.00-00:00', '2012-01-01T02:00:00.00-02:00', '2011-12-31T23:00:00.00+01:00' ): b = str_to_datetime(bstr) self.assertEqual(a, b, ('%s (%f) == %s (%f)' % (astr, a, bstr, b)))
def __process_lower__(self): # the source document is a urlset with url/loc's pointing to packaged resources. md_at = w3c.str_to_datetime(self.source_document.md_at) # 'must have' at attribute last_synced = ClientState().get_state(self.source_uri) if last_synced is None or md_at > last_synced: for resource in self.source_document.resources: self.__process_resource__(resource) if len(self.exceptions) == 0: ClientState().set_state(self.source_uri, md_at) else: self.logger.debug("In sync: %s" % self.source_uri) des.reporter.instance().log_status(uri=self.source_uri, in_sync=True)
def __process_lower__(self): # the source document is a urlset with url/loc's pointing to packaged resources. md_at = w3c.str_to_datetime( self.source_document.md_at) # 'must have' at attribute last_synced = ClientState().get_state(self.source_uri) if last_synced is None or md_at > last_synced: for resource in self.source_document.resources: self.__process_resource__(resource) if len(self.exceptions) == 0: ClientState().set_state(self.source_uri, md_at) else: self.logger.debug("In sync: %s" % self.source_uri) des.reporter.instance().log_status(uri=self.source_uri, in_sync=True)
def extract(self, rl): """Extract stats from ResourceList. Will append data to collection for this object so we get aggegate stats if extract() is called on multiple ResourceLists. """ if (rl.md_at): ref_datetime = str_to_datetime(rl.md_at) else: ref_datetime = time.time() self.resource_count += len(rl) for r in rl: size = 0 if (r.length is not None): size = r.length self.sizes.append(size) if (size > 0): self.sizes_log.append(math.log10(size)) if (size > self.sizes_max): self.sizes_max = size else: self.no_size += 1 if (r.timestamp is not None): updated = (r.timestamp - ref_datetime) # seconds self.updates.append(updated) if (self.oldest == 0 or r.timestamp < self.oldest): self.oldest = r.timestamp if (self.newest == 0 or r.timestamp > self.newest): self.newest = r.timestamp else: self.no_timestamp += 1 # URI/file extensions as surrograte for media type ext = self.extension(r.uri) self.extensions_count[ext] = self.extensions_count.get(ext, 0) + 1 self.extensions_size[ext] = self.extensions_size.get(ext, 0) + size
def rt(dts): """ Do simple round-trip """ return(datetime_to_str(str_to_datetime(dts)))