def setUp(self): self.dbFactory = DBFactory() self.dbManager = self.dbFactory.get_specific_db_manager_and_schema('SOLR','test_core')
def postProcessing(options): """ Executes some curating operations over imported data """ init_logger() logger.info('> Executing ckan postprocessing... ') ds_name = None if ('ds_name' in options.keys()): ds_name = options['ds_name'] user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user'); passwtemp = config.get('AuthenticationSection', 'database.password'); user = usertemp passw = passwtemp except Exception as e: logger.info ("Not user info found, using anonymous user... ") logger.info (e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw) # We want to change all courses from mygoblet.org tagged as Training Materials ckan_conditions = [ ['AND',[ ['EQ','link','http://www.mygoblet.org//training-portal/courses/*'], ['EQ','resource_type',["Training Material"]] ] ] ] previous_count = dbManager.count_data_by_conditions(ckan_conditions) #print (previous_count) results = dbManager.get_data_by_conditions(ckan_conditions) # delete all of them , and then we insert them again modified. We will have to implement update operation in AbstractManager dbManager.delete_data_by_conditions(ckan_conditions) new_count = dbManager.count_data_by_conditions(ckan_conditions) #print (new_count) numSuccess = 0 for result in results: #print (result) exists = util.existURL(result.get("link")) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title":result.get("title"), "description":result.get("description"), "field":result.get("field"), "source":result.get("source"), "resource_type":["Event"], # Now they are Events, not Training Materials! "insertion_date":result.get("insertion_date"), "created":result.get("created"), "audience":result.get("audience"), "link":result.get("link") }) if success: numSuccess=numSuccess+1 #print (numSuccess) logger.info('Changed '+str(numSuccess)+' mygoblet.org records tagged as Training Materials to Events') logger.info('< Finished ckan postprocessing')
class TestSolrDB(unittest.TestCase): def setUp(self): self.dbFactory = DBFactory() self.dbManager = self.dbFactory.get_specific_db_manager_and_schema('SOLR','test_core') def test_insertion(self): print ('> Insertion test') self.previous_count = len(self.dbManager.get_all_data()) # print ('previous count: %i' % (self.previous_count)) self.dbManager.insert_data({'title':'test title','field':'test-field'}) self.new_count = len(self.dbManager.get_all_data()) # print ('new count: %i' % (self.new_count) ) self.assertEqual(self.previous_count+1, self.new_count) def test_all_deletion(self): print ('> All deletion test') self.dbManager.insert_data({'title':'test title','field':'test-field'}) self.dbManager.delete_all_data() self.new_count = len(self.dbManager.get_all_data()) # print ('data count after deletion: %i' % (self.new_count) ) self.assertEqual(0, self.new_count) def test_delete(self): print ('> Select test') self.dbManager.delete_all_data() self.dbManager.insert_data({'title':'first test title','field':'test-field'}) self.dbManager.insert_data({'title':'second test title','field':'test-field'}) self.dbManager.insert_data({'title':'third test title','field':'test-field'}) self.previous_count = len(self.dbManager.get_all_data()) # print ('previous count: %i' % (self.previous_count)) self.assertEqual(3, self.previous_count) self.dbManager.delete_data_by_conditions([['EQ','title','first test title']]) self.new_count = len(self.dbManager.get_all_data()) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.dbManager.delete_data_by_conditions( [ ['OR', [ ['AND',[ ['EQ','field','test-field'], ['EQ','title','second test title'] ] ], ['AND',[ ['EQ','field','test-field'], ['EQ','title','first test title'] ] ] ] ] ]) self.new_count = len(self.dbManager.get_all_data()) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) def test_select(self): print ('> Select test') self.dbManager.delete_all_data() self.dbManager.insert_data({'title':'first test title','field':'test-field'}) self.dbManager.insert_data({'title':'second test title','field':'test-field'}) self.dbManager.insert_data({'title':'third test title','field':'test-field'}) self.previous_count = len(self.dbManager.get_all_data()) # print ('previous count: %i' % (self.previous_count)) self.assertEqual(3, self.previous_count) self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','title','first test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(3, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full([['NO','title','second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field'], ['EQ','title','second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field'], ['NO','title','second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full([['EQ','field','test-field'], ['EQ','title','second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full( [ ['AND',[ ['EQ','field','test-field'], ['EQ','title','second test title'] ] ] ], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full( [ ['OR', [ ['EQ','field','test-field'], ['EQ','title','second test title'] ] ] ], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(3, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full( [ ['OR', [ ['AND',[ ['EQ','field','test-field'], ['EQ','title','second test title'] ] ], ['AND',[ ['EQ','field','test-field'], ['EQ','title','first test title'] ] ] ] ] ], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.new_count = len(self.dbManager.get_data_by_conditions_full( [ ['OR', [ ['NO', [ ['AND',[ ['EQ','field','test-field'], ['EQ','title','second test title'] ] ] ] ], ['AND',[ ['EQ','field','test-field'], ['EQ','title','first test title'] ] ] ] ] ], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) # Testing sorting rules... sorted_results = self.dbManager.get_data_by_conditions_full([['EQ','field','test-field']], [['title','ASC']], None) first_sorted_result = sorted_results[0] self.assertEqual(3, len(sorted_results)) self.assertEqual('first test title', first_sorted_result['title']) sorted_results = self.dbManager.get_data_by_conditions_full([['EQ','field','test-field']], [['title','DESC']], None) first_sorted_result = sorted_results[0] self.assertEqual(3, len(sorted_results)) self.assertEqual('third test title', first_sorted_result['title'])
def postProcessing(options): """ Executes some curating operations over imported data """ init_logger() logger.info('> Executing ckan postprocessing... ') ds_name = None if ('ds_name' in options.keys()): ds_name = options['ds_name'] user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user') passwtemp = config.get('AuthenticationSection', 'database.password') user = usertemp passw = passwtemp except Exception as e: logger.info("Not user info found, using anonymous user... ") logger.info(e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username( ds_name, user, passw) # We want to change all courses from mygoblet.org tagged as Training Materials ckan_conditions = [[ 'AND', [['EQ', 'link', 'http://www.mygoblet.org//training-portal/courses/*'], ['EQ', 'resource_type', ["Training Material"]]] ]] previous_count = dbManager.count_data_by_conditions(ckan_conditions) #print (previous_count) results = dbManager.get_data_by_conditions(ckan_conditions) # delete all of them , and then we insert them again modified. We will have to implement update operation in AbstractManager dbManager.delete_data_by_conditions(ckan_conditions) new_count = dbManager.count_data_by_conditions(ckan_conditions) #print (new_count) numSuccess = 0 for result in results: #print (result) exists = util.existURL(result.get("link")) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title": result.get("title"), "description": result.get("description"), "field": result.get("field"), "source": result.get("source"), "resource_type": ["Event"], # Now they are Events, not Training Materials! "insertion_date": result.get("insertion_date"), "created": result.get("created"), "audience": result.get("audience"), "link": result.get("link") }) if success: numSuccess = numSuccess + 1 #print (numSuccess) logger.info('Changed ' + str(numSuccess) + ' mygoblet.org records tagged as Training Materials to Events') logger.info('< Finished ckan postprocessing')
def main_options(options): """ Executes the main functionality of this script: it extracts JSON data from each Training Material found and inserts its main data into the DB. * options {list} specific configurations for initialization. ds_name {string} specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase registriesFromTime {datetime} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not """ init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info ('>> Starting ckanData importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info ('ds_name='+ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info ('delete_all_old_data='+str(delete_all_old_data)) if ('registriesFromTime' in options.keys()): registriesFromTime = options['registriesFromTime'] logger.info ('registriesFromTime='+str(registriesFromTime)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info ('updateRegistries='+str(updateRegistries)) else: logger.info ('>> Starting ckanData importing process...') materials_names = None if updateRegistries: materials_names = get_materials_names() user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user'); passwtemp = config.get('AuthenticationSection', 'database.password'); user = usertemp passw = passwtemp except Exception as e: logger.info ("Not user info found, using anonymous user... ") logger.info (e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw) # print (dbManager) if (delete_all_old_data is not None and delete_all_old_data): ckan_conditions = [['EQ','source',get_source_field()]] previous_count = dbManager.count_data_by_conditions(ckan_conditions) dbManager.delete_data_by_conditions(ckan_conditions) new_count = dbManager.count_data_by_conditions(ckan_conditions) if (previous_count is not None and new_count is not None): logger.info ('Deleted '+str( (previous_count-new_count) )+' registries') if materials_names is not None: numSuccess = 0 for material_name in materials_names: json_data = get_json_from_material_name(material_name) if (json_data is not None): # If we have registriesFromTime, we have to check that each one's creation date if more recent than registriesFromTime if registriesFromTime is None or isDataMoreRecentThan(json_data,registriesFromTime): success = dbManager.insert_data({ "title":get_title(json_data), "description":get_notes(json_data), "field":get_field(json_data), "source":get_source_field(), "resource_type":get_resource_type_field(), "insertion_date":get_insertion_date_field(), "created":get_created(json_data), "audience":get_audience(json_data), "link":get_link(json_data) }) if success: numSuccess=numSuccess+1 logger.info ('Inserted '+str(numSuccess)+' new registries') if updateRegistries: postProcessing(options) logger.info('<< Finished ckanData importing process.')
def setUp(self): self.dbFactory = DBFactory() self.dbManager = self.dbFactory.get_specific_db_manager_and_schema( 'SOLR', 'test_core')
def main_options(options): """ Executes the main functionality of this script: it extracts JSON data from each Training Material found and inserts its main data into the DB. * options {list} specific configurations for initialization. ds_name {string} specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase registriesFromTime {datetime} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not """ init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info('>> Starting ckanData importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info('ds_name=' + ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info('delete_all_old_data=' + str(delete_all_old_data)) if ('registriesFromTime' in options.keys()): registriesFromTime = options['registriesFromTime'] logger.info('registriesFromTime=' + str(registriesFromTime)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info('updateRegistries=' + str(updateRegistries)) else: logger.info('>> Starting ckanData importing process...') materials_names = None if updateRegistries: materials_names = get_materials_names() user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user') passwtemp = config.get('AuthenticationSection', 'database.password') user = usertemp passw = passwtemp except Exception as e: logger.info("Not user info found, using anonymous user... ") logger.info(e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username( ds_name, user, passw) # print (dbManager) if (delete_all_old_data is not None and delete_all_old_data): ckan_conditions = [['EQ', 'source', get_source_field()]] previous_count = dbManager.count_data_by_conditions(ckan_conditions) dbManager.delete_data_by_conditions(ckan_conditions) new_count = dbManager.count_data_by_conditions(ckan_conditions) if (previous_count is not None and new_count is not None): logger.info('Deleted ' + str((previous_count - new_count)) + ' registries') if materials_names is not None: numSuccess = 0 for material_name in materials_names: json_data = get_json_from_material_name(material_name) if (json_data is not None): # If we have registriesFromTime, we have to check that each one's creation date if more recent than registriesFromTime if registriesFromTime is None or isDataMoreRecentThan( json_data, registriesFromTime): success = dbManager.insert_data({ "title": get_title(json_data), "description": get_notes(json_data), "field": get_field(json_data), "source": get_source_field(), "resource_type": get_resource_type_field(), "insertion_date": get_insertion_date_field(), "created": get_created(json_data), "audience": get_audience(json_data), "link": get_link(json_data) }) if success: numSuccess = numSuccess + 1 logger.info('Inserted ' + str(numSuccess) + ' new registries') if updateRegistries: postProcessing(options) logger.info('<< Finished ckanData importing process.')
class TestSolrDB(unittest.TestCase): def setUp(self): self.dbFactory = DBFactory() self.dbManager = self.dbFactory.get_specific_db_manager_and_schema( 'SOLR', 'test_core') def test_insertion(self): print('> Insertion test') self.previous_count = len(self.dbManager.get_all_data()) # print ('previous count: %i' % (self.previous_count)) self.dbManager.insert_data({ 'title': 'test title', 'field': 'test-field' }) self.new_count = len(self.dbManager.get_all_data()) # print ('new count: %i' % (self.new_count) ) self.assertEqual(self.previous_count + 1, self.new_count) def test_all_deletion(self): print('> All deletion test') self.dbManager.insert_data({ 'title': 'test title', 'field': 'test-field' }) self.dbManager.delete_all_data() self.new_count = len(self.dbManager.get_all_data()) # print ('data count after deletion: %i' % (self.new_count) ) self.assertEqual(0, self.new_count) def test_delete(self): print('> Select test') self.dbManager.delete_all_data() self.dbManager.insert_data({ 'title': 'first test title', 'field': 'test-field' }) self.dbManager.insert_data({ 'title': 'second test title', 'field': 'test-field' }) self.dbManager.insert_data({ 'title': 'third test title', 'field': 'test-field' }) self.previous_count = len(self.dbManager.get_all_data()) # print ('previous count: %i' % (self.previous_count)) self.assertEqual(3, self.previous_count) self.dbManager.delete_data_by_conditions( [['EQ', 'title', 'first test title']]) self.new_count = len(self.dbManager.get_all_data()) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.dbManager.delete_data_by_conditions([[ 'OR', [[ 'AND', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'second test title']] ], [ 'AND', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'first test title']] ]] ]]) self.new_count = len(self.dbManager.get_all_data()) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) def test_select(self): print('> Select test') self.dbManager.delete_all_data() self.dbManager.insert_data({ 'title': 'first test title', 'field': 'test-field' }) self.dbManager.insert_data({ 'title': 'second test title', 'field': 'test-field' }) self.dbManager.insert_data({ 'title': 'third test title', 'field': 'test-field' }) self.previous_count = len(self.dbManager.get_all_data()) # print ('previous count: %i' % (self.previous_count)) self.assertEqual(3, self.previous_count) self.new_count = len( self.dbManager.get_data_by_conditions_full( [['EQ', 'title', 'first test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full( [['EQ', 'field', 'test-field']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(3, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full( [['NO', 'title', 'second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full( [['EQ', 'field', 'test-field'], ['EQ', 'title', 'second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full( [['EQ', 'field', 'test-field'], ['NO', 'title', 'second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full( [['EQ', 'field', 'test-field'], ['EQ', 'title', 'second test title']], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full([[ 'AND', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'second test title']] ]], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full([[ 'OR', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'second test title']] ]], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(3, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full([[ 'OR', [[ 'AND', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'second test title']] ], [ 'AND', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'first test title']] ]] ]], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(2, self.new_count) self.new_count = len( self.dbManager.get_data_by_conditions_full([[ 'OR', [[ 'NO', [[ 'AND', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'second test title']] ]] ], [ 'AND', [['EQ', 'field', 'test-field'], ['EQ', 'title', 'first test title']] ]] ]], None, None)) # print ('new count: %i' % (self.new_count) ) self.assertEqual(1, self.new_count) # Testing sorting rules... sorted_results = self.dbManager.get_data_by_conditions_full( [['EQ', 'field', 'test-field']], [['title', 'ASC']], None) first_sorted_result = sorted_results[0] self.assertEqual(3, len(sorted_results)) self.assertEqual('first test title', first_sorted_result['title']) sorted_results = self.dbManager.get_data_by_conditions_full( [['EQ', 'field', 'test-field']], [['title', 'DESC']], None) first_sorted_result = sorted_results[0] self.assertEqual(3, len(sorted_results)) self.assertEqual('third test title', first_sorted_result['title'])
def main_options(options): """ Executes the main functionality of this script: it extracts information from iAnn events data and insert some of them into the DB * options {list} specific configurations for initialization. ds_name: specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase registriesFromTime {datetime} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not In this script we will insert these fields into each registry: "title" {string} Title for the event; "start" - Date the event starts; "end" - Date the event ends; "city" {string} City where the event is hosted; "country" {string} Country where the event is hosted; "field" {string} Branches of science in molecular biology. "provider" {string} Organization hosting the event; "link" {string} Link to the data registry. "source" {string} Default ('ckan'); "insertion date" {date} Current date and time. "created" {date} Date and time of creation of the original registry. See more eg: http://iann.pro/iann-web-services """ # IannDataLocking.lock() init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info ('>> Starting iann importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info ('ds_name='+ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info ('delete_all_old_data='+str(delete_all_old_data)) if ('registriesFromTime' in options.keys()): registriesFromTime = options['registriesFromTime'] logger.info ('registriesFromTime='+str(registriesFromTime)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info ('updateRegistries='+str(updateRegistries)) else: logger.info ('>> Starting iann importing process...') iann_data = None if updateRegistries: iann_data = get_iann_data(registriesFromTime) user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user'); passwtemp = config.get('AuthenticationSection', 'database.password'); user = usertemp passw = passwtemp except Exception as e: logger.info ("Not user info found, using anonymous user... ") logger.info (e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw) if (delete_all_old_data is not None and delete_all_old_data): iann_conditions = [['EQ','source',get_source_field()]] previous_count = dbManager.count_data_by_conditions(iann_conditions) dbManager.delete_data_by_conditions(iann_conditions) new_count = dbManager.count_data_by_conditions(iann_conditions) if (previous_count is not None and new_count is not None): logger.info ('Deleted '+str( (previous_count-new_count) )+' registries') if iann_data is not None: numSuccess = 0 for result in iann_data: if (result is not None): exists = util.existURL(get_link(record)) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title":get_title(result), "start":get_start(result), "end":get_end(result), "city":get_city(result), "country":get_country(result), "field":get_field(result), "provider":get_provider(result), "link":get_link(result), "source":get_source_field(), "resource_type":get_resource_type_field(), "insertion_date":get_insertion_date_field(), "created":get_creation_date_field(result) }) if success: numSuccess=numSuccess+1 logger.info ('Inserted '+str(numSuccess)+' new registries') logger.info ('<< Finished iann importing process.')
def main_options(options): """ Executes the main functionality of this script: it extracts JSON data from each record found on Elixir's registry and inserts its main data into the DB. * options {list} specific configurations for initialization. ds_name {string} specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous Elixir registry data in our DataBase registriesFromTime {date} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not In this script we will insert these fields into each registry: "title" {string} Title for the data registry. "notes" {string} Description for the data registry. "link" {string} Link to the data registry. "field" {string} Default ('Services Registry'); "source" {string} Default ('ckan'); "insertion date" {date} Current date and time. """ init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info ('>> Starting Elixir registry importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info ('ds_name='+ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info ('delete_all_old_data='+str(delete_all_old_data)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info ('updateRegistries='+str(updateRegistries)) else: logger.info ('>> Starting Elixir registry importing process...') records = None if updateRegistries: records = get_records() user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user'); passwtemp = config.get('AuthenticationSection', 'database.password'); user = usertemp passw = passwtemp except Exception as e: logger.info ("Not user info found, using anonymous user... ") logger.info (e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username(ds_name,user,passw) if (delete_all_old_data is not None and delete_all_old_data): registry_conditions = [['EQ','source',get_source_field()]] previous_count = dbManager.count_data_by_conditions(registry_conditions) dbManager.delete_data_by_conditions(registry_conditions) new_count = dbManager.count_data_by_conditions(registry_conditions) if (previous_count is not None and new_count is not None): logger.info ('Deleted '+str( (previous_count-new_count) )+' registries') if records is not None: numSuccess = 0 for record in records: # exists = util.existURL(get_link(record)) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) # if (exists): success = dbManager.insert_data({ "title":get_title(record), "description":get_description(record), "link":get_link(record), "field":get_field(record), "source":get_source_field(), "resource_type":get_resource_type_field(record), "insertion_date":get_insertion_date_field() }) if success: numSuccess=numSuccess+1 logger.info ('Inserted '+str(numSuccess)+' new registries') logger.info('<< Finished Elixir registry importing process...')
def main_options(options): """ Executes the main functionality of this script: it extracts JSON data from each record found on Elixir's registry and inserts its main data into the DB. * options {list} specific configurations for initialization. ds_name {string} specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous Elixir registry data in our DataBase registriesFromTime {date} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not In this script we will insert these fields into each registry: "title" {string} Title for the data registry. "notes" {string} Description for the data registry. "link" {string} Link to the data registry. "field" {string} Default ('Services Registry'); "source" {string} Default ('ckan'); "insertion date" {date} Current date and time. """ init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info( '>> Starting Elixir registry importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info('ds_name=' + ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info('delete_all_old_data=' + str(delete_all_old_data)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info('updateRegistries=' + str(updateRegistries)) else: logger.info('>> Starting Elixir registry importing process...') records = None if updateRegistries: records = get_records() user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user') passwtemp = config.get('AuthenticationSection', 'database.password') user = usertemp passw = passwtemp except Exception as e: logger.info("Not user info found, using anonymous user... ") logger.info(e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username( ds_name, user, passw) if (delete_all_old_data is not None and delete_all_old_data): registry_conditions = [['EQ', 'source', get_source_field()]] previous_count = dbManager.count_data_by_conditions( registry_conditions) dbManager.delete_data_by_conditions(registry_conditions) new_count = dbManager.count_data_by_conditions(registry_conditions) if (previous_count is not None and new_count is not None): logger.info('Deleted ' + str((previous_count - new_count)) + ' registries') if records is not None: numSuccess = 0 for record in records: # exists = util.existURL(get_link(record)) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) # if (exists): success = dbManager.insert_data({ "title": get_title(record), "description": get_description(record), "link": get_link(record), "field": get_field(record), "source": get_source_field(), "resource_type": get_resource_type_field(record), "insertion_date": get_insertion_date_field() }) if success: numSuccess = numSuccess + 1 logger.info('Inserted ' + str(numSuccess) + ' new registries') logger.info('<< Finished Elixir registry importing process...')
def main_options(options): """ Executes the main functionality of this script: it extracts information from iAnn events data and insert some of them into the DB * options {list} specific configurations for initialization. ds_name: specific dataset/database to use with the DB manager delete_all_old_data {boolean} specifies if we should delete all previous ckanData in our DataBase registriesFromTime {datetime} time from registries will be obtained updateRegistries {boolean} if we want to get new regiestries or not In this script we will insert these fields into each registry: "title" {string} Title for the event; "start" - Date the event starts; "end" - Date the event ends; "city" {string} City where the event is hosted; "country" {string} Country where the event is hosted; "field" {string} Branches of science in molecular biology. "provider" {string} Organization hosting the event; "link" {string} Link to the data registry. "source" {string} Default ('ckan'); "insertion date" {date} Current date and time. "created" {date} Date and time of creation of the original registry. See more eg: http://iann.pro/iann-web-services """ # IannDataLocking.lock() init_logger() ds_name = None delete_all_old_data = False registriesFromTime = None updateRegistries = True if options is not None: logger.info('>> Starting iann importing process... params: ') if ('ds_name' in options.keys()): ds_name = options['ds_name'] logger.info('ds_name=' + ds_name) if ('delete_all_old_data' in options.keys()): delete_all_old_data = options['delete_all_old_data'] logger.info('delete_all_old_data=' + str(delete_all_old_data)) if ('registriesFromTime' in options.keys()): registriesFromTime = options['registriesFromTime'] logger.info('registriesFromTime=' + str(registriesFromTime)) if ('updateRegistries' in options.keys()): updateRegistries = options['updateRegistries'] logger.info('updateRegistries=' + str(updateRegistries)) else: logger.info('>> Starting iann importing process...') iann_data = None if updateRegistries: iann_data = get_iann_data(registriesFromTime) user = None passw = None try: config = ConfigParser.RawConfigParser() config.read('ConfigFile.properties') usertemp = config.get('AuthenticationSection', 'database.user') passwtemp = config.get('AuthenticationSection', 'database.password') user = usertemp passw = passwtemp except Exception as e: logger.info("Not user info found, using anonymous user... ") logger.info(e) dbFactory = DBFactory() dbManager = dbFactory.get_default_db_manager_with_username( ds_name, user, passw) if (delete_all_old_data is not None and delete_all_old_data): iann_conditions = [['EQ', 'source', get_source_field()]] previous_count = dbManager.count_data_by_conditions(iann_conditions) dbManager.delete_data_by_conditions(iann_conditions) new_count = dbManager.count_data_by_conditions(iann_conditions) if (previous_count is not None and new_count is not None): logger.info('Deleted ' + str((previous_count - new_count)) + ' registries') if iann_data is not None: numSuccess = 0 for result in iann_data: if (result is not None): exists = util.existURL(get_link(record)) # logger.info ('Exists? '+get_link(record)+' :'+str(exists)) if (exists): success = dbManager.insert_data({ "title": get_title(result), "start": get_start(result), "end": get_end(result), "city": get_city(result), "country": get_country(result), "field": get_field(result), "provider": get_provider(result), "link": get_link(result), "source": get_source_field(), "resource_type": get_resource_type_field(), "insertion_date": get_insertion_date_field(), "created": get_creation_date_field(result) }) if success: numSuccess = numSuccess + 1 logger.info('Inserted ' + str(numSuccess) + ' new registries') logger.info('<< Finished iann importing process.')