def publish_metadata(self, new_zips, exluded_zip=None): """ (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published metadata. :param new_zips: a resourcelist with newly created zip resources :param exluded_zip: local path to zip file that will be removed from previously published metadata. """ rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML) capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML) rs_dump = ResourceDump() # Load existing resource-dump, if any. Else set start time. if os.path.isfile(rs_dump_path): with open(rs_dump_path, "r") as rs_dump_file: sm = Sitemap() sm.parse_xml(rs_dump_file, resources=rs_dump) else: rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True) rs_dump.link_set(rel="up", href=capa_list_url) # Remove excluded zip, if any if exluded_zip: loc = self.publish_url + os.path.basename(exluded_zip) if loc in rs_dump.resources: del rs_dump.resources[loc] else: raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path)) # Add new zips for resource in new_zips: rs_dump.add(resource) # Write resource-dump.xml rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True) with open(rs_dump_path, "w") as rs_dump_file: rs_dump_file.write(rs_dump.as_xml()) # There are several ways to decode base64, among them # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n') # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n') iri = base64.urlsafe_b64decode(os.path.basename( self.publish_dir)).rstrip('\n') print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri) print "See %s" % rs_dump_url # Write capability-list.xml if not os.path.isfile(capa_list_path): capa_list = CapabilityList() capa_list.link_set(rel="up", href=self.src_desc_url) capa_list.add_capability(rs_dump, rs_dump_url) with open(capa_list_path, "w") as capa_list_file: capa_list_file.write(capa_list.as_xml()) print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
def summary_page1(self): """Plot summary stats.""" fig = plt.figure(figsize=(10, 8)) fig.suptitle(self.title + " (page 1)") rows = 3 cols = 2 f1l = fig.add_subplot(rows, cols, 1) if (len(self.sizes) > 0): f1l.hist(self.sizes, bins=self.bins) f1l.set_title('Histogram of resource sizes') f1l.set_xlabel('Size (%s)' % (self.sizes_unit)) f1l.set_ylabel('Number of resources') else: f1l.text(0.1, 0.5, 'No resources with size') f1r = fig.add_subplot(rows, cols, 2) lines = [ '%d resources' % self.resource_count, '%d resources with size' % len(self.sizes), '%d resources with no size (omitted)' % self.no_size, 'max resource size = %s' % (self.human_size(self.sizes_max)) ] self.text_plot(f1r, lines, rows=6) f2l = fig.add_subplot(rows, cols, 3) if (len(self.sizes_log) > 0): f2l.hist(self.sizes_log, bins=self.bins) f2l.set_title('Histogram of log10(resource sizes)') f2l.set_xlabel('log10( Size (bytes) )') f2l.set_ylabel('Number of resources') else: f2l.text(0.1, 0.5, 'No resources with non-zero size') f2r = fig.add_subplot(rows, cols, 4) lines = [ '%d resources with non-zero size' % len(self.sizes_log), '%d resources with zero size (omitted)' % (len(self.sizes) - len(self.sizes_log)) ] self.text_plot(f2r, lines, rows=6) f3l = fig.add_subplot(rows, cols, 5) if (len(self.updates) > 0): f3l.hist(self.updates, bins=self.bins) f3l.set_title('Histogram of resource update times)') f3l.set_xlabel('Update time (%s before now)' % (self.updates_unit)) f3l.set_ylabel('Number of resources') else: f3l.text(0.1, 0.5, 'No resources with timestamp') f3r = fig.add_subplot(rows, cols, 6) lines = [ '%d resources with timestamp' % len(self.updates), 'oldest: %s' % datetime_to_str(self.oldest), 'newest: %s' % datetime_to_str(self.newest), '%d resources with no timestamp (omitted)' % self.no_timestamp ] self.text_plot(f3r, lines, rows=6) fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
def publish_metadata(self, new_zips, exluded_zip=None): """ (Re)publish metadata with addition of new_zips. An excluded zip will be removed from previously published metadata. :param new_zips: a resourcelist with newly created zip resources :param exluded_zip: local path to zip file that will be removed from previously published metadata. """ rs_dump_url = self.publish_url + RS_RESOURCE_DUMP_XML rs_dump_path = os.path.join(self.publish_dir, RS_RESOURCE_DUMP_XML) capa_list_url = self.publish_url + RS_CAPABILITY_LIST_XML capa_list_path = os.path.join(self.publish_dir, RS_CAPABILITY_LIST_XML) rs_dump = ResourceDump() # Load existing resource-dump, if any. Else set start time. if os.path.isfile(rs_dump_path): with open(rs_dump_path, "r") as rs_dump_file: sm = Sitemap() sm.parse_xml(rs_dump_file, resources=rs_dump) else: rs_dump.md_at = w3cdt.datetime_to_str(no_fractions=True) rs_dump.link_set(rel="up", href=capa_list_url) # Remove excluded zip, if any if exluded_zip: loc = self.publish_url + os.path.basename(exluded_zip) if loc in rs_dump.resources: del rs_dump.resources[loc] else: raise RuntimeError("Could not find %s in %s" % (loc, rs_dump_path)) # Add new zips for resource in new_zips: rs_dump.add(resource) # Write resource-dump.xml rs_dump.md_completed = w3cdt.datetime_to_str(no_fractions=True) with open(rs_dump_path, "w") as rs_dump_file: rs_dump_file.write(rs_dump.as_xml()) # There are several ways to decode base64, among them # iri = base64.b64decode(os.path.basename(self.publish_dir)).rstrip('\n') # iri = base64.b64decode(os.path.basename(self.publish_dir), '-_').rstrip('\n') iri = base64.urlsafe_b64decode(os.path.basename(self.publish_dir)).rstrip("\n") print "New %s for graph %s" % (RS_RESOURCE_DUMP_XML, iri) print "See %s" % rs_dump_url # Write capability-list.xml if not os.path.isfile(capa_list_path): capa_list = CapabilityList() capa_list.link_set(rel="up", href=self.src_desc_url) capa_list.add_capability(rs_dump, rs_dump_url) with open(capa_list_path, "w") as capa_list_file: capa_list_file.write(capa_list.as_xml()) print "New %s. See %s" % (RS_CAPABILITY_LIST_XML, capa_list_url)
def test1_datetime_to_str(self): """Writing...""" self.assertEqual( datetime_to_str(0), "1970-01-01T00:00:00Z" ) self.assertEqual( datetime_to_str(0.000001), "1970-01-01T00:00:00.000001Z" ) self.assertEqual( datetime_to_str(0.1), "1970-01-01T00:00:00.100000Z" ) self.assertEqual( datetime_to_str(1), "1970-01-01T00:00:01Z" ) self.assertEqual( datetime_to_str(60), "1970-01-01T00:01:00Z" ) self.assertEqual( datetime_to_str(60*60), "1970-01-01T01:00:00Z" ) self.assertEqual( datetime_to_str(60*60*24), "1970-01-02T00:00:00Z" ) self.assertEqual( datetime_to_str(60*60*24*31), "1970-02-01T00:00:00Z" ) self.assertEqual( datetime_to_str(60*60*24*365), "1971-01-01T00:00:00Z" ) # self.assertEqual( datetime_to_str(1234567890), "2009-02-13T23:31:30Z" )
def summary_page1(self): """Plot summary stats.""" fig = plt.figure(figsize=(10, 8)) fig.suptitle(self.title + " (page 1)") rows = 3 cols = 2 f1l = fig.add_subplot(rows, cols, 1) if (len(self.sizes) > 0): f1l.hist(self.sizes, bins=self.bins) f1l.set_title('Histogram of resource sizes') f1l.set_xlabel('Size (%s)' % (self.sizes_unit)) f1l.set_ylabel('Number of resources') else: f1l.text(0.1, 0.5, 'No resources with size') f1r = fig.add_subplot(rows, cols, 2) lines = ['%d resources' % self.resource_count, '%d resources with size' % len(self.sizes), '%d resources with no size (omitted)' % self.no_size, 'max resource size = %s' % (self.human_size(self.sizes_max))] self.text_plot(f1r, lines, rows=6) f2l = fig.add_subplot(rows, cols, 3) if (len(self.sizes_log) > 0): f2l.hist(self.sizes_log, bins=self.bins) f2l.set_title('Histogram of log10(resource sizes)') f2l.set_xlabel('log10( Size (bytes) )') f2l.set_ylabel('Number of resources') else: f2l.text(0.1, 0.5, 'No resources with non-zero size') f2r = fig.add_subplot(rows, cols, 4) lines = ['%d resources with non-zero size' % len(self.sizes_log), '%d resources with zero size (omitted)' % (len(self.sizes) - len(self.sizes_log))] self.text_plot(f2r, lines, rows=6) f3l = fig.add_subplot(rows, cols, 5) if (len(self.updates) > 0): f3l.hist(self.updates, bins=self.bins) f3l.set_title('Histogram of resource update times)') f3l.set_xlabel('Update time (%s before now)' % (self.updates_unit)) f3l.set_ylabel('Number of resources') else: f3l.text(0.1, 0.5, 'No resources with timestamp') f3r = fig.add_subplot(rows, cols, 6) lines = ['%d resources with timestamp' % len(self.updates), 'oldest: %s' % datetime_to_str(self.oldest), 'newest: %s' % datetime_to_str(self.newest), '%d resources with no timestamp (omitted)' % self.no_timestamp] self.text_plot(f3r, lines, rows=6) fig.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.5)
def test1_datetime_to_str(self): """Writing...""" self.assertEqual(datetime_to_str(0), "1970-01-01T00:00:00Z") self.assertEqual(datetime_to_str(0.000001), "1970-01-01T00:00:00.000001Z") self.assertEqual(datetime_to_str(0.1), "1970-01-01T00:00:00.100000Z") self.assertEqual(datetime_to_str(1), "1970-01-01T00:00:01Z") self.assertEqual(datetime_to_str(60), "1970-01-01T00:01:00Z") self.assertEqual(datetime_to_str(60 * 60), "1970-01-01T01:00:00Z") self.assertEqual(datetime_to_str(60 * 60 * 24), "1970-01-02T00:00:00Z") self.assertEqual(datetime_to_str(60 * 60 * 24 * 31), "1970-02-01T00:00:00Z") self.assertEqual(datetime_to_str(60 * 60 * 24 * 365), "1971-01-01T00:00:00Z") # self.assertEqual(datetime_to_str(1234567890), "2009-02-13T23:31:30Z")
def summarize(self, opt): """Gerenate comple summary in requested format.""" source = opt.title + ' summary' if opt.title else 'Summary' now_str = datetime_to_str(self.now, True) self.title = "%s at %s" % (source, now_str) # Write PDF or display in interactive viewer? if (opt.pdf): with PdfPages(opt.pdf) as pdf: self.summary_page1() pdf.savefig() plt.close() self.summary_page2() pdf.savefig() plt.close() self.summary_page3() pdf.savefig() plt.close() d = pdf.infodict() d['Title'] = self.title d['CreationDate'] = datetime.datetime.today() d['ModDate'] = datetime.datetime.today() print("Plot saved as %s" % (opt.pdf)) else: plt.ion() print("Showing page1.") self.summary_page1() plt.show() _ = input( "Press [enter] for page2.") # wait for input from the user plt.close() self.summary_page2() plt.show() _ = input("Press [enter] for page3.") plt.close() self.summary_page3() plt.show() _ = input("Press [enter] to exit.")
def summarize(self, opt): """Gerenate comple summary in requested format.""" source = opt.title + ' summary' if opt.title else 'Summary' now_str = datetime_to_str(self.now, True) self.title = "%s at %s" % (source, now_str) # Write PDF or display in interactive viewer? if (opt.pdf): with PdfPages(opt.pdf) as pdf: self.summary_page1() pdf.savefig() plt.close() self.summary_page2() pdf.savefig() plt.close() self.summary_page3() pdf.savefig() plt.close() d = pdf.infodict() d['Title'] = self.title d['CreationDate'] = datetime.datetime.today() d['ModDate'] = datetime.datetime.today() print("Plot saved as %s" % (opt.pdf)) else: plt.ion() print("Showing page1.") self.summary_page1() plt.show() _ = input("Press [enter] for page2.") # wait for input from the user plt.close() self.summary_page2() plt.show() _ = input("Press [enter] for page3.") plt.close() self.summary_page3() plt.show() _ = input("Press [enter] to exit.")
def rt(dts): """ Do simple round-trip """ return(datetime_to_str(str_to_datetime(dts)))
def test01_datetime_to_str(self): """Writing.""" self.assertEqual(datetime_to_str(0), "1970-01-01T00:00:00Z") self.assertEqual(datetime_to_str(0.000001), "1970-01-01T00:00:00.000001Z") self.assertEqual(datetime_to_str(0.1), "1970-01-01T00:00:00.100000Z") self.assertEqual(datetime_to_str(1), "1970-01-01T00:00:01Z") self.assertEqual(datetime_to_str(60), "1970-01-01T00:01:00Z") self.assertEqual(datetime_to_str(60 * 60), "1970-01-01T01:00:00Z") self.assertEqual(datetime_to_str(60 * 60 * 24), "1970-01-02T00:00:00Z") self.assertEqual(datetime_to_str(60 * 60 * 24 * 31), "1970-02-01T00:00:00Z") self.assertEqual(datetime_to_str(60 * 60 * 24 * 365), "1971-01-01T00:00:00Z") # Random other datetime self.assertEqual(datetime_to_str(1234567890), "2009-02-13T23:31:30Z") # Rounding issues self.assertEqual(datetime_to_str(0.199999), "1970-01-01T00:00:00.199999Z") self.assertEqual(datetime_to_str(0.1999991), "1970-01-01T00:00:00.199999Z") self.assertEqual(datetime_to_str(0.1999999), "1970-01-01T00:00:00.200000Z") self.assertEqual(datetime_to_str(0.200000), "1970-01-01T00:00:00.200000Z") self.assertEqual(datetime_to_str(0.2000001), "1970-01-01T00:00:00.200000Z") self.assertEqual(datetime_to_str(0.2000009), "1970-01-01T00:00:00.200001Z") self.assertEqual(datetime_to_str(0.200001), "1970-01-01T00:00:00.200001Z") # No fractions self.assertEqual(datetime_to_str(100, True), "1970-01-01T00:01:40Z") self.assertEqual(datetime_to_str(0.2000009, True), "1970-01-01T00:00:00Z") self.assertEqual(datetime_to_str(0.200001, no_fractions=True), "1970-01-01T00:00:00Z")