def dump_articles(): connection = MongoClient('localhost', 27017) db = connection.PTEST_BACKUP results = db.crawling.find({}, {'_id': False}) """ { "_id" : ObjectId("54dd29d2b396811764a01330"), "url" : "http://www.nasa.gov/pdf/55395main_12%20Earth%20Science.pdf", "home" : "NASA", "abstract" : "The mission of NASA's Earth Science ... and help answer qu estions concerning many related aspects of ... forecasters in assessing particul ate pollutio ...", "title" : "Earth Science - NASA", "keyword" : "aerosols+(pollution+aspects)", "stored" : true, "complete" : false, "key" : "aerosols (pollution aspects)", "hashed" : "aHR0cDovL3d3dy5uYXNhLmdvdi9wZGYvNTUzOTVtYWluXzEyJTIwRWFydGglMjBTY2llbmNlLnBkZg==" } """ # upload via POST endpoint from scripts.remote.remote import post_curling import json for record in results: post_curling(_CRAWLING_POST['local'], {'resource': json.dumps(record), 'pwd': _TEMP_SECRET}, display=True) # close the connection to MongoDB connection.close()
def test_post_to_component_endpoint(self): """ WARNING: it stores in the local datastore the json-ld in testdata.component :return: """ #from models import Component from scripts.testdata.component import component from scripts.remote.remote import post_curling post_curling(url='http://localhost:8080/database/cots/store', params={'pwd': _TEMP_SECRET, 'data': component}, display=True)
def test_dumping_pipe_full(cls): """ Tests creating python instances > dumping JSON-LD > translate into N-TRIPLES :return: """ print " --- full test for generating components and dumping them in JSON-LD" jsons = [SubSystem.generate_py_instance(k, v) for k, v in tech_constrains.items()] jsonlds = [SubSystem.generate_jsonld(c) for c in jsons] print json.dumps(jsonlds, indent=4) print " --- Translating the component into ntriples via RDFtranslator" from scripts.remote.remote import post_curling url = 'http://rdf-translator.appspot.com/convert/json-ld/nt/content' post_curling(url, {'content': json.dumps(jsonlds)}, display=True)
def set_typeof_and_ingraph_properties(self, bookmark='start'): print "Fetching page: " + bookmark articles = get_curling(_ENV[self.test_env]['_SERVICE'] + '/datastore/index', {'token': _CLIENT_TOKEN, 'bookmark': bookmark if bookmark != 'start' else ''}) articles = json.loads(articles) # fetch single article for a in articles['articles']: res = get_curling( _ENV[self.test_env]['_SERVICE'] + '/datastore/webresource', { 'retrieve': a, 'token': _CLIENT_TOKEN } ) res = json.loads(res) if not res['type_of'] in res.keys(): try: int(res['title']) update = {'type_of': 'tweet', 'in_graph': False } except Exception: if res['title'] == '': if res['url'].endswith(('jpg', 'jpeg', 'png', 'mp3', 'mp4')): update = {'type_of': 'media', 'in_graph': False } else: update = {'type_of': 'link', 'in_graph': False } else: update = {'type_of': 'feed', 'in_graph': False } print update post_curling( _ENV[self.test_env]['_SERVICE'] + '/datastore/webresource', { 'token': _CLIENT_TOKEN, 'update': a, 'properties': json.dumps(update) }, display=True ) if not articles['next']: return None return self.fetch_and_dump_webresources( bookmark=articles['next'] )
def dump_concepts_to_graph(self, list_of_ids, url): """ Take a list of unique ids, create a graph of concepts, serialize it to triples and dump them to right graph in the shard. :param list_of_ids: a list() of numerical ids from the datastore :param url: the url of the sparql endpoint of the shard :return: None """ g = Graph() for uuid in list_of_ids: [g.add(c) for c in self.create_concepts_triples(uuid)] triples = g.serialize(format='nt') post_curling(url, {'token': _CLIENT_TOKEN, 'triple': triples, 'graph_id': _CONCEPTS_GRAPH_ID}, display=True )
def dump_webresources_to_graph(self, list_of_ids, url): """ Take a list of unique ids, create a graph of webresources, serialize it to triples and dump them to the right graph in the shard. :param list_of_ids: a list() of numerical ids from the datastore :param url: the url of the sparql endpoint of the shard :return: None """ g = Graph() for uuid in list_of_ids: print uuid g.add(self.create_webresource_triple(uuid)) triples = g.serialize(format='nt') post_curling(url, {'token': _CLIENT_TOKEN, 'triple': triples, 'graph_id': _WEBRES_GRAPH_ID}, display=True )
def dump_articles(): connection = MongoClient('localhost', 27017) db = connection.PTEST_BACKUP results = db.crawling.find({}, {'_id': False}) """ { "_id" : ObjectId("54dd29d2b396811764a01330"), "url" : "http://www.nasa.gov/pdf/55395main_12%20Earth%20Science.pdf", "home" : "NASA", "abstract" : "The mission of NASA's Earth Science ... and help answer qu estions concerning many related aspects of ... forecasters in assessing particul ate pollutio ...", "title" : "Earth Science - NASA", "keyword" : "aerosols+(pollution+aspects)", "stored" : true, "complete" : false, "key" : "aerosols (pollution aspects)", "hashed" : "aHR0cDovL3d3dy5uYXNhLmdvdi9wZGYvNTUzOTVtYWluXzEyJTIwRWFydGglMjBTY2llbmNlLnBkZg==" } """ # upload via POST endpoint from scripts.remote.remote import post_curling import json for record in results: post_curling(_CRAWLING_POST['local'], { 'resource': json.dumps(record), 'pwd': _TEMP_SECRET }, display=True) # close the connection to MongoDB connection.close()
def test_n3_endpoints(self): """ Test the N3 """ print "Running test_n3_endpoints" env = self.test_env base_url_resource = _ENV[env]['_SERVICE'] + "/datastore/webresource" base_url_concept = _ENV[env]['_SERVICE'] + "/datastore/concept" test_concepts = [] response = post_curling(base_url_resource, {"token": _CLIENT_TOKEN}) print response pass