Example #1
0
 def test_remote_bz2(self):
     uri = http_base + 'heb.rdf.bz2'
     rdfstats = RDFStats(uri)
     #rdfstats.set_callback_function_download(test_callback_download)
     #rdfstats.set_callback_function_extraction(test_callback_extraction)
     rdfstats.start_statistics()
     assert (len(rdfstats.get_stats_results()) > 5)
Example #2
0
 def test_404_remote_tar_gz(self):
     import tarfile
     uri = http_base + 'DOESNOTEXIST.nt.tgz'
     # FIXME this should probably be some different exception
     with self.assertRaises(tarfile.ReadError):
         rdfstats = RDFStats(uri)
         rdfstats.start_statistics()
Example #3
0
 def test_remote_bz2(self):
     uri = http_base + 'heb.rdf.bz2'
     rdfstats = RDFStats(uri)
     #rdfstats.set_callback_function_download(test_callback_download)
     #rdfstats.set_callback_function_extraction(test_callback_extraction)
     rdfstats.start_statistics()
     assert(len(rdfstats.get_stats_results()) > 5)
Example #4
0
 def test_404_remote_tar_gz(self):
     import tarfile
     uri = http_base + 'DOESNOTEXIST.nt.tgz'
     # FIXME this should probably be some different exception
     with self.assertRaises(tarfile.ReadError):
         rdfstats = RDFStats(uri)
         rdfstats.start_statistics()
 def test_owl_subclasses(self):
     uri = 'file://' + testfile_path + 'owl_subclasses.nt'
     rdfstats = RDFStats(uri, format="nt", stats=[A4Subsumption])
     rdfstats.start_statistics()
     self.assertEqual(
         rdfstats.get_stats_results()['a4subsumption']['detectors']
         ['subsumptionDetectorOwlSubClassOf']['results']
         ['restrictionTypeOccurrence']['value'], 4)
 def test_amount_pattern_restrictions(self):
     uri = 'file://' + testfile_path + 'literalPatternMatching.nt'
     rdfstats = RDFStats(uri,
                         format="nt",
                         stats=[A20LiteralPatternMatching])
     rdfstats.start_statistics()
     self.assertEqual(
         rdfstats.get_stats_results()['a20literalpatternmatching']
         ['amount_xsd_patterns'], 2)
    def runLODStats(self, uri_file=None, file_format=None):
        if(not uri_file):
            uri_file = self.uri_file

        if(not file_format):
            file_format = self.file_format
        rdfstats = RDFStats(uri_file, format=file_format, stats=lodstats_set)
        rdfstats.parse(callback_fun=self.callback_parse)
        rdfstats.do_stats(callback_fun=self.callback_stats)

        self.rdfstats = rdfstats
        return rdfstats
Example #8
0
    def process_dataset(self, id):
        self.logging_file_config(config_file)
        log = logging.getLogger(__name__)

        self.worker_proc = None
        self.rdfdoc_to_do = None

        signal.signal(signal.SIGINT, self.term_handler)
        signal.signal(signal.SIGTERM, self.term_handler)

        rdfdoc_to_do = self._get_dataset(id)
        if rdfdoc_to_do is None:
            log.warning("rdfdoc_to_do is None")
            return 0

        # register this worker
        self.worker_proc = model.WorkerProc()
        self.worker_proc.pid = os.getpid()
        self.worker_proc.rdfdoc = rdfdoc_to_do
        Session.add(self.worker_proc)
        rdfdoc_to_do.worked_on = True
        self.rdfdoc_to_do = rdfdoc_to_do
        log.debug("worker %i working on %i" % (self.worker_proc.pid, self.rdfdoc_to_do.id))

        if rdfdoc_to_do.current_stats and rdfdoc_to_do.current_stats.errors == 'broken':
            rdfdoc_to_do.worked_on = False
            rdfdoc_to_do.last_updated = datetime.now()
            Session.delete(self.worker_proc)
            Session.commit()
            sys.exit(0)

        last_stat_result = rdfdoc_to_do.current_stats
        stat_result = model.StatResult()
        self.stat_result = stat_result
        rdfdoc_to_do.stats.append(stat_result)
        rdfdoc_to_do.current_stats = stat_result
        stat_result.triples_done = None
        stat_result.content_length = None
        stat_result.bytes_download = None
        stat_result.bytes = None
        stat_result.warnings = None
        stat_result.last_warning = None
        Session.commit()

        log.info(rdfdoc_to_do.format)

        error = None
        modified = True # set True if remote file has been modified
        try:
            rdfdocstats = RDFStats(rdfdoc_to_do.uri.encode('utf-8'), format=rdfdoc_to_do.format, stats=lodstats_stats)
            rdfdocstats.set_callback_function_download(self.callback_function_download)
            rdfdocstats.set_callback_function_extraction(self.callback_function_extraction)
            rdfdocstats.set_callback_function_statistics(self.callback_stats)
            rdfdocstats.start_statistics()
        except NotModified, errorstr:
            log.warning("not modified")
            modified = False
Example #9
0
 def test_max(self):
     uri = 'file://' + testfile_path + 'disjointProperties.nt'
     rdfstats = RDFStats(uri, format="nt", stats=[A69DisjointProperties])
     rdfstats.start_statistics()
     self.assertEqual (rdfstats.get_stats_results()['a69disjointproperties']['max_disjoint'], 4.0)
Example #10
0
 def test_remote_not_usual_extension(self):
     uri = "https://data.kingcounty.gov/api/views/jqei-rbgf/rows.rdf?accessType=DOWNLOAD"
     rdfstats = RDFStats(uri, format="rdf")
     rdfstats.start_statistics()
     assert (len(rdfstats.voidify("turtle")) > 5)
Example #11
0
 def test_remote_tar(self):
     uri = http_base + 'heb.nt.tgz'
     rdfstats = RDFStats(uri)
     rdfstats.start_statistics()
     assert (len(rdfstats.get_stats_results()) > 5)
Example #12
0
 def test_local_rdf(self):
     uri = 'file://' + testfile_path + 'heb-original.rdf'
     rdfstats = RDFStats(uri)
     rdfstats.start_statistics()
     assert (len(rdfstats.voidify("turtle")) > 5)
Example #13
0
 def test_remote_tar(self):
     uri = http_base + 'heb.nt.tgz'
     rdfstats = RDFStats(uri)
     rdfstats.start_statistics()
     assert(len(rdfstats.get_stats_results()) > 5)
Example #14
0
 def test_local_rdf(self):
     uri = 'file://' + testfile_path + 'heb-original.rdf'
     rdfstats = RDFStats(uri)
     rdfstats.start_statistics()
     assert(len(rdfstats.voidify("turtle")) > 5)
Example #15
0
 def test_remote_not_usual_extension(self):
     uri = "https://data.kingcounty.gov/api/views/jqei-rbgf/rows.rdf?accessType=DOWNLOAD"
     rdfstats = RDFStats(uri, format="rdf")
     rdfstats.start_statistics()
     assert(len(rdfstats.voidify("turtle")) > 5)