def test_remote_bz2(self): uri = http_base + 'heb.rdf.bz2' rdfstats = RDFStats(uri) #rdfstats.set_callback_function_download(test_callback_download) #rdfstats.set_callback_function_extraction(test_callback_extraction) rdfstats.start_statistics() assert (len(rdfstats.get_stats_results()) > 5)
def test_404_remote_tar_gz(self): import tarfile uri = http_base + 'DOESNOTEXIST.nt.tgz' # FIXME this should probably be some different exception with self.assertRaises(tarfile.ReadError): rdfstats = RDFStats(uri) rdfstats.start_statistics()
def test_remote_bz2(self): uri = http_base + 'heb.rdf.bz2' rdfstats = RDFStats(uri) #rdfstats.set_callback_function_download(test_callback_download) #rdfstats.set_callback_function_extraction(test_callback_extraction) rdfstats.start_statistics() assert(len(rdfstats.get_stats_results()) > 5)
def test_owl_subclasses(self): uri = 'file://' + testfile_path + 'owl_subclasses.nt' rdfstats = RDFStats(uri, format="nt", stats=[A4Subsumption]) rdfstats.start_statistics() self.assertEqual( rdfstats.get_stats_results()['a4subsumption']['detectors'] ['subsumptionDetectorOwlSubClassOf']['results'] ['restrictionTypeOccurrence']['value'], 4)
def test_amount_pattern_restrictions(self): uri = 'file://' + testfile_path + 'literalPatternMatching.nt' rdfstats = RDFStats(uri, format="nt", stats=[A20LiteralPatternMatching]) rdfstats.start_statistics() self.assertEqual( rdfstats.get_stats_results()['a20literalpatternmatching'] ['amount_xsd_patterns'], 2)
def runLODStats(self, uri_file=None, file_format=None): if(not uri_file): uri_file = self.uri_file if(not file_format): file_format = self.file_format rdfstats = RDFStats(uri_file, format=file_format, stats=lodstats_set) rdfstats.parse(callback_fun=self.callback_parse) rdfstats.do_stats(callback_fun=self.callback_stats) self.rdfstats = rdfstats return rdfstats
def process_dataset(self, id): self.logging_file_config(config_file) log = logging.getLogger(__name__) self.worker_proc = None self.rdfdoc_to_do = None signal.signal(signal.SIGINT, self.term_handler) signal.signal(signal.SIGTERM, self.term_handler) rdfdoc_to_do = self._get_dataset(id) if rdfdoc_to_do is None: log.warning("rdfdoc_to_do is None") return 0 # register this worker self.worker_proc = model.WorkerProc() self.worker_proc.pid = os.getpid() self.worker_proc.rdfdoc = rdfdoc_to_do Session.add(self.worker_proc) rdfdoc_to_do.worked_on = True self.rdfdoc_to_do = rdfdoc_to_do log.debug("worker %i working on %i" % (self.worker_proc.pid, self.rdfdoc_to_do.id)) if rdfdoc_to_do.current_stats and rdfdoc_to_do.current_stats.errors == 'broken': rdfdoc_to_do.worked_on = False rdfdoc_to_do.last_updated = datetime.now() Session.delete(self.worker_proc) Session.commit() sys.exit(0) last_stat_result = rdfdoc_to_do.current_stats stat_result = model.StatResult() self.stat_result = stat_result rdfdoc_to_do.stats.append(stat_result) rdfdoc_to_do.current_stats = stat_result stat_result.triples_done = None stat_result.content_length = None stat_result.bytes_download = None stat_result.bytes = None stat_result.warnings = None stat_result.last_warning = None Session.commit() log.info(rdfdoc_to_do.format) error = None modified = True # set True if remote file has been modified try: rdfdocstats = RDFStats(rdfdoc_to_do.uri.encode('utf-8'), format=rdfdoc_to_do.format, stats=lodstats_stats) rdfdocstats.set_callback_function_download(self.callback_function_download) rdfdocstats.set_callback_function_extraction(self.callback_function_extraction) rdfdocstats.set_callback_function_statistics(self.callback_stats) rdfdocstats.start_statistics() except NotModified, errorstr: log.warning("not modified") modified = False
def test_max(self): uri = 'file://' + testfile_path + 'disjointProperties.nt' rdfstats = RDFStats(uri, format="nt", stats=[A69DisjointProperties]) rdfstats.start_statistics() self.assertEqual (rdfstats.get_stats_results()['a69disjointproperties']['max_disjoint'], 4.0)
def test_remote_not_usual_extension(self): uri = "https://data.kingcounty.gov/api/views/jqei-rbgf/rows.rdf?accessType=DOWNLOAD" rdfstats = RDFStats(uri, format="rdf") rdfstats.start_statistics() assert (len(rdfstats.voidify("turtle")) > 5)
def test_remote_tar(self): uri = http_base + 'heb.nt.tgz' rdfstats = RDFStats(uri) rdfstats.start_statistics() assert (len(rdfstats.get_stats_results()) > 5)
def test_local_rdf(self): uri = 'file://' + testfile_path + 'heb-original.rdf' rdfstats = RDFStats(uri) rdfstats.start_statistics() assert (len(rdfstats.voidify("turtle")) > 5)
def test_remote_tar(self): uri = http_base + 'heb.nt.tgz' rdfstats = RDFStats(uri) rdfstats.start_statistics() assert(len(rdfstats.get_stats_results()) > 5)
def test_local_rdf(self): uri = 'file://' + testfile_path + 'heb-original.rdf' rdfstats = RDFStats(uri) rdfstats.start_statistics() assert(len(rdfstats.voidify("turtle")) > 5)
def test_remote_not_usual_extension(self): uri = "https://data.kingcounty.gov/api/views/jqei-rbgf/rows.rdf?accessType=DOWNLOAD" rdfstats = RDFStats(uri, format="rdf") rdfstats.start_statistics() assert(len(rdfstats.voidify("turtle")) > 5)