def routine(): if not os.path.exists(ITEMS_DIR): os.makedirs(ITEMS_DIR) ################################ ### RETRIEVE RECORDS OF CTXs ### ################################ ctxs = utils.read_json(os.path.join(CTX_DIR, "all.json")) ctx_meta = {} for rec in ctxs['records']: objectId = rec['data']['objectId'] ctx_meta[objectId] = rec['data']['name'] ctx_ids = list(ctx_meta.keys()) ctx_ids.sort() client = Client() for ctx_idx in tqdm(ctx_ids): print("retrieve data of context:", ctx_meta[ctx_idx]) ctx_data = client.get_data(ctx_id=ctx_idx) utils.write_json(os.path.join(ITEMS_DIR, ctx_idx+".json"), ctx_data.collection)
def routine(): if not os.path.exists(JOUR_DIR): os.makedirs(JOUR_DIR) ################################# ### RETRIEVE LIST OF JOURNALS ### ################################# jour_controller = JournalConeController() journals = jour_controller.get_entities() utils.write_json(os.path.join(JOUR_DIR, 'all.json'), journals) ################################################ ### RETRIEVE INDIVIDUAL ENTRIES FOR JOURNALS ### ################################################ jour_meta = {} for jour in journals: idx = jour['id'].split("/")[-1] jour_meta[idx] = jour['value'] jour_ids = list(jour_meta.keys()) # request data jour_data = {} for idx in tqdm(jour_ids): jour_details = jour_controller.get_entity(idx) jour_data[idx] = jour_details utils.write_json(os.path.join(JOUR_DIR, 'collection.json'), jour_data)
def routine(): if not os.path.exists(LANG_DIR): os.makedirs(LANG_DIR) ################################################# ### RETRIEVE LIST OF LANGUAGES (CoNE/ISO 639) ### ################################################# lang_controller = LanguageConeController() langs = lang_controller.get_entities() utils.write_json(os.path.join(LANG_DIR, 'all.json'), langs) ################################################# ### RETRIEVE INDIVIDUAL ENTRIES FOR LANGUAGES ### ################################################# lang_meta = {} for lang in langs: idx = lang['id'].split("/")[-1] lang_meta[idx] = lang['value'] lang_ids = list(lang_meta.keys()) # request data lang_data = {} for idx in tqdm(lang_ids): lang_details = lang_controller.get_entity(idx) lang_data[idx] = lang_details utils.write_json(os.path.join(LANG_DIR, 'collection.json'), lang_data)
def routine(): if not os.path.exists(CTX_DIR): os.makedirs(CTX_DIR) ################################ ### RETRIEVE CONTEXTS (CTXs) ### ################################ ctx_controller = ContextRestController() ctxs = ctx_controller.get_all() utils.write_json(os.path.join(CTX_DIR, 'all.json'), ctxs)
def routine(): if not os.path.exists(OUS_DIR): os.makedirs(OUS_DIR) ########################################### ### RETRIEVE ORGANIZATIONAL UNITS (OUS) ### ########################################### ou_controller = OrgUnitRestController() ous = ou_controller.get_all() utils.write_json(os.path.join(OUS_DIR, "all.json"), ous)
def routine(): all_ous = [] all_fnd = {} eng_fnd = utils.read_json(os.path.join(MAP_DIR, "ous_fnd_eng.json")) for eng_ou in eng_fnd: all_fnd[eng_ou] = eng_fnd[eng_ou] all_ous.append(eng_fnd[eng_ou]) deu_fnd = utils.read_json(os.path.join(MAP_DIR, "ous_fnd_deu.json")) # deu_ous = list(deu_fnd.values()) for deu_ou in deu_fnd: if deu_fnd[deu_ou] not in all_ous: all_fnd[deu_ou] = deu_fnd[deu_ou] all_ous.append(deu_fnd[deu_ou]) utils.write_json(os.path.join(MAP_DIR, "mpi_ous.json"), all_fnd)
def routine(): if not os.path.exists(PERS_DIR): os.makedirs(PERS_DIR) ####################################### ### RETRIEVE LIST OF PERSONS (CoNE) ### ####################################### pers_controller = PersonConeController() pers = pers_controller.get_entities() utils.write_json(os.path.join(PERS_DIR, 'all.json'), pers) ############################## ### EXTRACT UNIQUE PERSONS ### ############################## pers_unique = {} for p in pers: idx = p['id'].split("/")[-1] if idx in pers_unique: pers_unique[idx].append(p['value']) else: pers_unique[idx] = [p['value']] utils.write_json(os.path.join(PERS_DIR, 'unique.json'), pers_unique) ############################################### ### RETRIEVE INDIVIDUAL ENTRIES FOR PERSONS ### ############################################### pers_ids = list(pers_unique.keys()) pers_ids.sort() # request data pers_data = {} for idx in tqdm(pers_ids): pers_details = pers_controller.get_entity(idx) pers_data[idx] = pers_details utils.write_json(os.path.join(PERS_DIR, 'collection.json'), pers_data)
def routine(): if not os.path.exists(MAP_DIR): os.makedirs(MAP_DIR) ############################################################## ### READ FILES CONTAINING ALL MAX PLANCK INSTITUTES (MPIs) ### ### AND ORGANIZATIONAL UNITS (OUs) FROM PURE ################# ############################################################## mpis = utils.read_json(os.path.join(SCRAPE_DIR, 'all.json')) pure_ous = utils.read_json(os.path.join(OUS_DIR, 'all.json')) ############################ ### EXTRACT NAMES OF OUs ### ############################ names = {} for record in pure_ous['records']: idx = record['data']['objectId'] metadata = record['data']['metadata'] name = metadata['name'].strip() names[name] = idx if 'alternativeNames' in metadata and metadata['alternativeNames'][0]: for altname in metadata['alternativeNames']: names[altname.strip()] = idx m = list(mpis.keys()) n = list(names.keys()) ############################# ### MAP MPIs TO NAMES/OUs ### ############################# not_fnd = [] fnd = {} for mpi in m: if mpi in n: fnd[mpi] = names[mpi] elif mpi.replace('Max Planck Institute', 'MPI') in n: fnd[mpi] = names[mpi.replace('Max Planck Institute', 'MPI')] elif mpi.split(",")[0] in n: fnd[mpi] = names[mpi.split(",")[0]] # Max Planck Institute for Software Systems, Kaiserslautern site # Max Planck Institute for Software Systems, Saarbrücken site # Max Planck Institute for Intelligent Systems, Stuttgart site # Max Planck Institute for Intelligent Systems, Tübingen site elif mpi.split(" (")[0] in n: fnd[mpi] = names[mpi.split(" (")[0]] # Max Planck Institute for Gravitational Physics (Hannover) # Max Planck Institute for Ornithology (Radolfzell) # Max Planck Institute for Plasma Physics (Greifswald) elif mpi == 'Research Group Social Neuroscience': # part of the Max Planck Institute for Human Cognitive and Brain Sciences continue else: # print("no equivalent found for", mpi) not_fnd.append(mpi) idea = {} for no_eq in not_fnd: parts = no_eq.split('Max Planck Institute for') if len(parts) > 1: for ou in n: if parts[1].strip().lower() in ou.lower(): if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] for no_eq in not_fnd: parts = no_eq.split('for') if len(parts) > 1: for ou in n: if parts[1].strip().lower() in ou.lower(): if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] for no_eq in not_fnd: parts = no_eq.split('of') if len(parts) > 1: for ou in n: if parts[1].strip().lower() in ou.lower(): if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] for no_eq in not_fnd: parts = no_eq.split(',') if len(parts) > 1: for ou in n: if parts[0].strip().lower() in ou.lower(): if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] ############################### ### PRINT AND WRITE RESULTS ### ############################### print("") print("found matches for") counter = 0 for mpi in m: if mpi not in not_fnd: counter += 1 print(mpi) print(str(counter),"in total") utils.write_json(os.path.join(MAP_DIR, "ous_fnd_eng.json"), fnd) print("") print("found possible matches for") counter = 0 for nt_eq in idea: counter += 1 print(nt_eq) print(str(counter),"in total") utils.write_json(os.path.join(MAP_DIR, "ous_ideas_eng.json"), idea) print("") print("no match found for") counter = 0 for nt_eq in not_fnd: if nt_eq not in idea: counter += 1 print(nt_eq) print(str(counter),"in total") print("") utils.write_json(os.path.join(MAP_DIR, "ous_not_fnd_eng.txt"), not_fnd)
if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] print("") print("found matches for") counter = 0 for mpi in m: if mpi not in not_fnd: counter += 1 print(mpi) print(str(counter), "in total") utils.write_json(MPIS_DIR + "map/ous_fnd_deu.json", fnd) print("") print("found possible matches for") counter = 0 for nt_eq in idea: counter += 1 print(nt_eq) print(str(counter), "in total") utils.write_json(MPIS_DIR + "map/ous_ideas_deu.json", idea) print("") print("no match found for") counter = 0 for nt_eq in not_fnd:
from pybman import utils from pybman.rest import ContextRestController from .utils_paths import BASE_DIR PURE_DIR = BASE_DIR + 'pure/' ################################ ### RETRIEVE CONTEXTS (CTXs) ### ################################ ctx_controller = ContextRestController() ctxs = ctx_controller.get_all() utils.write_json(PURE_DIR + "ctx/all.json", ctxs)
def routine(): if not os.path.exists(MAP_DIR): os.makedirs(MAP_DIR) ###################################################################### ### READ FILES CONTAINING METADATA ON MAX PLANCK INSTITUTES (MPIs) ### ### AND ORGANIZATIONAL UNITS (OUs) FROM PURE ######################### ###################################################################### mpis = utils.read_json(os.path.join(SCRAPE_DIR, 'all_deu.json')) pure_ous = utils.read_json(os.path.join(OUS_DIR, 'all.json')) ############################ ### EXTRACT NAMES OF OUs ### ############################ names = {} for record in pure_ous['records']: idx = record['data']['objectId'] metadata = record['data']['metadata'] name = metadata['name'].strip() names[name] = idx if 'alternativeNames' in metadata and metadata['alternativeNames'][0]: for altname in metadata['alternativeNames']: names[altname.strip()] = idx m = list(mpis.keys()) n = list(names.keys()) ############################# ### MAP MPIs TO NAMES/OUs ### ############################# not_fnd = [] fnd = {} for mpi in m: if mpi in n: fnd[mpi] = names[mpi] elif mpi.replace('-', ' ') in n: fnd[mpi] = names[mpi.replace('-', ' ')] else: # print("no equivalent found for", mpi) not_fnd.append(mpi) idea = {} for no_eq in not_fnd: parts = no_eq.split('Max-Planck-Institut für') if len(parts) > 1: for ou in n: if parts[1].strip().lower() in ou.lower(): if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] for no_eq in not_fnd: parts = no_eq.split('für') if len(parts) > 1: for ou in n: if parts[1].strip().lower() in ou.lower(): if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] for no_eq in not_fnd: parts = no_eq.split(',') if len(parts) > 1: for ou in n: if parts[0].strip().lower() in ou.lower(): if no_eq in idea: if ou not in idea[no_eq]: idea[no_eq].append(ou) else: idea[no_eq] = [ou] ############################### ### PRINT AND WRITE RESULTS ### ############################### print("") print("found matches for") counter = 0 for mpi in m: if mpi not in not_fnd: counter += 1 print(mpi) print(str(counter), "in total") utils.write_json(os.path.join(MAP_DIR, "ous_fnd_deu.json"), fnd) print("") print("found possible matches for") counter = 0 for nt_eq in idea: counter += 1 print(nt_eq) print(str(counter), "in total") utils.write_json(os.path.join(MAP_DIR, "ous_ideas_deu.json"), idea) print("") print("no match found for") counter = 0 for nt_eq in not_fnd: if nt_eq not in idea: counter += 1 print(nt_eq) print(str(counter), "in total") print("") utils.write_json(os.path.join(MAP_DIR, "ous_not_fnd_deu.txt"), not_fnd)
from pybman import utils from pybman.rest import OrgUnitRestController from .utils_paths import BASE_DIR PURE_DIR = BASE_DIR + 'pure/' ########################################### ### RETRIEVE ORGANIZATIONAL UNITS (OUS) ### ########################################### ou_controller = OrgUnitRestController() ous = ou_controller.get_all() utils.write_json(PURE_DIR + "ous/all.json", ous)
from tqdm import tqdm from pybman import utils from pybman.rest import LanguageConeController from .utils_paths import BASE_DIR PURE_DIR = BASE_DIR + 'pure/' ################################################# ### RETRIEVE LIST OF LANGUAGES (CoNE/ISO 639) ### ################################################# lang_controller = LanguageConeController() langs = lang_controller.get_entities() utils.write_json(PURE_DIR + "lang/all.json", langs) ################################################# ### RETRIEVE INDIVIDUAL ENTRIES FOR LANGUAGES ### ################################################# lang_meta = {} for lang in langs: idx = lang['id'].split("/")[-1] lang_meta[idx] = lang['value'] lang_ids = list(lang_meta.keys()) # request data lang_data = {}
from tqdm import tqdm from pybman import utils from pybman.rest import PersonConeController from .utils_paths import BASE_DIR PURE_DIR = BASE_DIR + 'pure/' ####################################### ### RETRIEVE LIST OF PERSONS (CoNE) ### ####################################### pers_controller = PersonConeController() pers = pers_controller.get_entities() utils.write_json(PURE_DIR + "pers/all.json", pers) ############################## ### EXTRACT UNIQUE PERSONS ### ############################## pers_unique = {} for p in pers: idx = p['id'].split("/")[-1:][0] if idx in pers_unique: pers_unique[idx].append(p['value']) else: pers_unique[idx] = [p['value']] utils.write_json(PURE_DIR + "pers/unique.json", pers_unique)
print("scraped",len(mpis),"institues!") mpis_mapped = utils.read_json(BASE_DIR + 'mpis/map/mpi_ous.json') # ous.json o = list(mpis_mapped.values()) m = list(mpis_mapped.keys()) ous_mpi = {} for i in range(len(o)): ou = o[i] name = m[i] ous_mpi[ou] = name print("done with reverse mapping!") utils.write_json(MPIS_DIR + 'mapped/ous_mpi.json', ous_mpi) m = list(mpis.keys()) n = list(mpis_mapped.keys()) counter = 0 no_map = [] for i in m: if i in n: counter += 1 continue elif i == 'Research Group Social Neuroscience': # part of the Max Planck Institute for Human Cognitive and Brain Sciences continue else:
mpis_mapped = utils.read_json(os.path.join(MAP_DIR, "mpi_ous.json")) # ous.json o = list(mpis_mapped.values()) m = list(mpis_mapped.keys()) ous_mpi = {} for i in range(len(o)): ou = o[i] name = m[i] ous_mpi[ou] = name print("done with reverse mapping!") utils.write_json(os.path.join(MAPPED_DIR, 'ous_mpi.json'), ous_mpi) m = list(mpis.keys()) n = list(mpis_mapped.keys()) counter = 0 no_map = [] for i in m: if i in n: counter += 1 continue elif i == 'Research Group Social Neuroscience': # part of the Max Planck Institute for Human Cognitive and Brain Sciences continue else:
from pybman import utils from pybman import Client from pybman.rest import ContextRestController from .utils_paths import BASE_DIR PURE_DIR = BASE_DIR + 'pure/' ITEMS_DIR = BASE_DIR + 'items/' ################################ ### RETRIEVE RECORDS OF CTXs ### ################################ ctxs = utils.read_json(PURE_DIR + "ctx/all.json") ctx_meta = {} for rec in ctxs['records']: objectId = rec['data']['objectId'] ctx_meta[objectId] = rec['data']['name'] ctx_ids = list(ctx_meta.keys()) ctx_ids.sort() client = Client() for ctx_idx in tqdm(ctx_ids): print("retrieve data of context:", ctx_meta[ctx_idx]) ctx_data = client.get_data(ctx_id=ctx_idx) utils.write_json(ITEMS_DIR +ctx_idx+".json",ctx_data.collection)
from tqdm import tqdm from pybman import utils from pybman.rest import JournalConeController from .utils_paths import BASE_DIR PURE_DIR = BASE_DIR + 'pure/' ################################# ### RETRIEVE LIST OF JOURNALS ### ################################# jour_controller = JournalConeController() journals = jour_controller.get_entities() utils.write_json(PURE_DIR + 'jour/all.json', journals) ################################################ ### RETRIEVE INDIVIDUAL ENTRIES FOR JOURNALS ### ################################################ jour_meta = {} for jour in journals: idx = jour['id'].split("/")[-1] jour_meta[idx] = jour['value'] jour_ids = list(jour_meta.keys()) # request data jour_data = {}
from pybman import utils from .utils_paths import BASE_DIR MPIS_DIR = BASE_DIR + 'mpis/' all_ous = [] all_fnd = {} eng_fnd = utils.read_json(MPIS_DIR + "map/ous_fnd_eng.json") for eng_ou in eng_fnd: all_fnd[eng_ou] = eng_fnd[eng_ou] all_ous.append(eng_fnd[eng_ou]) deu_fnd = utils.read_json(MPIS_DIR + "map/ous_fnd_deu.json") deu_ous = list(deu_fnd.values()) for deu_ou in deu_fnd: if deu_fnd[deu_ou] not in all_ous: all_fnd[deu_ou] = deu_fnd[deu_ou] all_ous.append(deu_fnd[deu_ou]) utils.write_json(MPIS_DIR + "map/mpi_ous.json", all_fnd)