def _index_item(self, uri, num, batch_num):
        """ queries the triplestore for an item sends it to elasticsearch """

        data = RdfDataset(get_all_item_data(uri, self.namespace),
                          uri).base_class.es_json()
        self.batch_data[batch_num].append(data)
        self.count += 1
    def setUp(self):
        self.r = rdfframework.rdfclass

        self.rdf_framework = RdfFramework(reset=False, config=config)

        self.cfg = RdfConfigManager()

        self.item_uri = "<http://library.kean.edu/173849#Work>"
        self.conn = cfg.data_tstore
        self.data = get_all_item_data(item_uri, conn)
        self.x = RdfDataset(data, item_uri)
    def setUp(self):
        self.r = rdfframework.rdfclass

        self.rdf_framework = RdfFramework(reset=False, config=config)

        self.cfg = RdfConfigManager()

        self.item_uri = "<http://library.kean.edu/173849#Work>"
        self.conn = cfg.data_tstore
        self.data = get_all_item_data(item_uri, conn)
        self.x = RdfDataset(data, item_uri)
Esempio n. 4
0
    def _index_sub(self, uri_list, num, batch_num):
        """
        Converts a list of uris to elasticsearch json objects

        args:
            uri_list: list of uris to convert
            num: the ending count within the batch
            batch_num: the batch number
        """
        bname = '%s-%s' % (batch_num, num)
        log.debug("batch_num '%s' starting es_json conversion",
                  bname)
        qry_data = get_all_item_data([item[0] for item in uri_list],
                                     self.tstore_conn,
                                     rdfclass=self.rdf_class)
        log.debug("batch_num '%s-%s' query_complete | count: %s",
                  batch_num,
                  num,
                  len(qry_data))
        # path = os.path.join(CFG.dirs.cache, "index_pre")
        # if not os.path.exists(path):
        #     os.makedirs(path)
        # with open(os.path.join(path, bname + ".json"), "w") as fo:
        #     fo.write(json.dumps(qry_data))
        data = RdfDataset(qry_data)
        del qry_data
        log.debug("batch_num '%s-%s' RdfDataset Loaded", batch_num, num)
        for value in uri_list:
            try:

                self.batch_data[batch_num]['main'].append(\
                        data[value[0]].es_json())
                self.count += 1
            except KeyError:
                pass
        for name, indexer in self.other_indexers.items():
            for item in data.json_qry("$.:%s" % name.pyuri):
                val = item.es_json()
                if val:
                    self.batch_data[batch_num][name].append(val)
                    self.batch_uris[batch_num].append(item.subject)
        del data
        del uri_list
        log.debug("batch_num '%s-%s' converted to es_json", batch_num, num)
Esempio n. 5
0
 def run(self, **kwargs):
     kwargs['output'] = self.__graph__()
     if "limit" in kwargs:
         self.limit = kwargs.get('limit')
     if "offset" in kwargs:
         self.offset = kwargs.get('offset')
     start = datetime.datetime.now()
     if kwargs.get("no_json"):
         self.use_json_qry = False
     else:
         self.use_json_qry = self.default_use_json_qry
     if self.use_json_qry:
         if not kwargs.get('dataset'):
             if self.data_query:
                 sparql = PREFIX + self.data_query.format(**kwargs)
                 data = self.ext_conn.query(sparql)
             else:
                 try:
                     data = get_all_item_data(
                         items=kwargs[kwargs['iri_key']],
                         conn=self.ext_conn,
                         output='json',
                         debug=False)
                     log.debug("data triple count: %s", len(data))
                 except KeyError:
                     raise KeyError(
                         "missing kwarg['iri_key'] defining which"
                         " kwarg to use that contians the subject"
                         " uri used to query for data. Example: "
                         "iri_key='instance_iri, instance_iri="
                         "<http://some.iri>")
             kwargs['dataset'] = RdfDataset(data)
             # pdb.set_trace()
     # start = datetime.datetime.now()
     super(SPARQLProcessor, self).run(**kwargs)
     # print("query time: ", (datetime.datetime.now() - start))
     self.output = kwargs['output']
     return kwargs['output']
    'o': {
        'type': 'uri',
        'value': 'http://id.loc.gov/ontologies/bibframe/Instance'
    },
    'p': {
        'type': 'uri',
        'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'
    },
    's': {
        'type': 'uri',
        'value':
        'https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008'
    }
}]

if __name__ == '__main__':
    time_test(DATA)
    time_test(DATA, multiprocessing=True)
    time_test(DATA)
    time_test(DATA, multiprocessing=True)
    from rdfframework.sparql import get_all_item_data
    from rdfframework.connections import Blazegraph
    from rdfframework.datatypes import RdfNsManager
    RdfNsManager({"bf": "http://id.loc.gov/ontologies/bibframe/"})
    data_iri = "<https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008>"
    conn = Blazegraph(namespace="plain2peak")
    data = get_all_item_data(data_iri, conn)
    print("data count: ", len(data))
    time_test(data)
    time_test(data, multiprocessing=True)
        'value': 'http://id.loc.gov/ontologies/bibframe/carrier'},
  's': {'type': 'uri',
        'value': 'https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008'}},
 {'o': {'type': 'bnode', 'value': 't3190361'},
  'p': {'type': 'uri',
        'value': 'http://id.loc.gov/ontologies/bibframe/identifiedBy'},
  's': {'type': 'uri',
        'value': 'https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008'}},
 {'o': {'type': 'uri',
        'value': 'http://id.loc.gov/ontologies/bibframe/Instance'},
  'p': {'type': 'uri',
        'value': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type'},
  's': {'type': 'uri',
        'value': 'https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008'}}]

if __name__ == '__main__':
    time_test(DATA)
    time_test(DATA, multiprocessing=True)
    time_test(DATA)
    time_test(DATA, multiprocessing=True)
    from rdfframework.sparql import get_all_item_data
    from rdfframework.connections import Blazegraph
    from rdfframework.datatypes import RdfNsManager
    RdfNsManager({"bf": "http://id.loc.gov/ontologies/bibframe/"})
    data_iri = "<https://plains2peaks.org/d573941e-82c6-11e7-b159-005056c00008>"
    conn = Blazegraph(namespace="plain2peak")
    data = get_all_item_data(data_iri, conn)
    print("data count: ", len(data))
    time_test(data)
    time_test(data, multiprocessing=True)