Example #1
0
def routine():

    if not os.path.exists(ITEMS_DIR):
        os.makedirs(ITEMS_DIR)

    ################################
    ### RETRIEVE RECORDS OF CTXs ###
    ################################

    ctxs = utils.read_json(os.path.join(CTX_DIR, "all.json"))

    ctx_meta = {}

    for rec in ctxs['records']:
        objectId = rec['data']['objectId']
        ctx_meta[objectId] = rec['data']['name']

    ctx_ids = list(ctx_meta.keys())
    ctx_ids.sort()

    client = Client()

    for ctx_idx in tqdm(ctx_ids):
        print("retrieve data of context:", ctx_meta[ctx_idx])
        ctx_data = client.get_data(ctx_id=ctx_idx)
        utils.write_json(os.path.join(ITEMS_DIR, ctx_idx+".json"),
                         ctx_data.collection)
Example #2
0
def routine():

    if not os.path.exists(JOUR_DIR):
        os.makedirs(JOUR_DIR)

    #################################
    ### RETRIEVE LIST OF JOURNALS ###
    #################################

    jour_controller = JournalConeController()
    journals = jour_controller.get_entities()

    utils.write_json(os.path.join(JOUR_DIR, 'all.json'), journals)

    ################################################
    ### RETRIEVE INDIVIDUAL ENTRIES FOR JOURNALS ###
    ################################################

    jour_meta = {}

    for jour in journals:
        idx = jour['id'].split("/")[-1]
        jour_meta[idx] = jour['value']

    jour_ids = list(jour_meta.keys())

    # request data
    jour_data = {}
    for idx in tqdm(jour_ids):
        jour_details = jour_controller.get_entity(idx)
        jour_data[idx] = jour_details

    utils.write_json(os.path.join(JOUR_DIR, 'collection.json'), jour_data)
Example #3
0
def routine():

    if not os.path.exists(LANG_DIR):
        os.makedirs(LANG_DIR)

    #################################################
    ### RETRIEVE LIST OF LANGUAGES (CoNE/ISO 639) ###
    #################################################

    lang_controller = LanguageConeController()
    langs = lang_controller.get_entities()

    utils.write_json(os.path.join(LANG_DIR, 'all.json'), langs)

    #################################################
    ### RETRIEVE INDIVIDUAL ENTRIES FOR LANGUAGES ###
    #################################################

    lang_meta = {}

    for lang in langs:
        idx = lang['id'].split("/")[-1]
        lang_meta[idx] = lang['value']

    lang_ids = list(lang_meta.keys())

    # request data
    lang_data = {}
    for idx in tqdm(lang_ids):
        lang_details = lang_controller.get_entity(idx)
        lang_data[idx] = lang_details

    utils.write_json(os.path.join(LANG_DIR, 'collection.json'), lang_data)
Example #4
0
def routine():

    if not os.path.exists(CTX_DIR):
        os.makedirs(CTX_DIR)

    ################################
    ### RETRIEVE CONTEXTS (CTXs) ###
    ################################

    ctx_controller = ContextRestController()
    ctxs = ctx_controller.get_all()

    utils.write_json(os.path.join(CTX_DIR, 'all.json'), ctxs)
Example #5
0
def routine():

    if not os.path.exists(OUS_DIR):
        os.makedirs(OUS_DIR)

    ###########################################
    ### RETRIEVE ORGANIZATIONAL UNITS (OUS) ###
    ###########################################

    ou_controller = OrgUnitRestController()
    ous = ou_controller.get_all()

    utils.write_json(os.path.join(OUS_DIR, "all.json"), ous)
Example #6
0
def routine():

    all_ous = []
    all_fnd = {}

    eng_fnd = utils.read_json(os.path.join(MAP_DIR, "ous_fnd_eng.json"))

    for eng_ou in eng_fnd:
        all_fnd[eng_ou] = eng_fnd[eng_ou]
        all_ous.append(eng_fnd[eng_ou])

    deu_fnd = utils.read_json(os.path.join(MAP_DIR, "ous_fnd_deu.json"))
    # deu_ous = list(deu_fnd.values())

    for deu_ou in deu_fnd:
        if deu_fnd[deu_ou] not in all_ous:
            all_fnd[deu_ou] = deu_fnd[deu_ou]
            all_ous.append(deu_fnd[deu_ou])

    utils.write_json(os.path.join(MAP_DIR, "mpi_ous.json"), all_fnd)
Example #7
0
def routine():

    if not os.path.exists(PERS_DIR):
        os.makedirs(PERS_DIR)

    #######################################
    ### RETRIEVE LIST OF PERSONS (CoNE) ###
    #######################################

    pers_controller = PersonConeController()
    pers = pers_controller.get_entities()

    utils.write_json(os.path.join(PERS_DIR, 'all.json'), pers)

    ##############################
    ### EXTRACT UNIQUE PERSONS ###
    ##############################

    pers_unique = {}

    for p in pers:
        idx = p['id'].split("/")[-1]
        if idx in pers_unique:
            pers_unique[idx].append(p['value'])
        else:
            pers_unique[idx] = [p['value']]

    utils.write_json(os.path.join(PERS_DIR, 'unique.json'), pers_unique)

    ###############################################
    ### RETRIEVE INDIVIDUAL ENTRIES FOR PERSONS ###
    ###############################################

    pers_ids = list(pers_unique.keys())
    pers_ids.sort()

    # request data
    pers_data = {}
    for idx in tqdm(pers_ids):
        pers_details = pers_controller.get_entity(idx)
        pers_data[idx] = pers_details

    utils.write_json(os.path.join(PERS_DIR, 'collection.json'), pers_data)
Example #8
0
def routine():

    if not os.path.exists(MAP_DIR):
        os.makedirs(MAP_DIR)

    ##############################################################
    ### READ FILES CONTAINING ALL MAX PLANCK INSTITUTES (MPIs) ###
    ### AND ORGANIZATIONAL UNITS (OUs) FROM PURE #################
    ##############################################################

    mpis = utils.read_json(os.path.join(SCRAPE_DIR, 'all.json'))
    pure_ous = utils.read_json(os.path.join(OUS_DIR, 'all.json'))

    ############################
    ### EXTRACT NAMES OF OUs ###
    ############################

    names = {}

    for record in pure_ous['records']:
        idx = record['data']['objectId']
        metadata = record['data']['metadata']
        name = metadata['name'].strip()
        names[name] = idx
        if 'alternativeNames' in metadata and metadata['alternativeNames'][0]:
            for altname in metadata['alternativeNames']:
                names[altname.strip()] = idx

    m = list(mpis.keys())
    n = list(names.keys())

    #############################
    ### MAP MPIs TO NAMES/OUs ###
    #############################

    not_fnd = []
    fnd = {}

    for mpi in m:
        if mpi in n:
            fnd[mpi] = names[mpi]
        elif mpi.replace('Max Planck Institute', 'MPI') in n:
            fnd[mpi] = names[mpi.replace('Max Planck Institute', 'MPI')]
        elif mpi.split(",")[0] in n:
            fnd[mpi] = names[mpi.split(",")[0]]
            # Max Planck Institute for Software Systems, Kaiserslautern site
            # Max Planck Institute for Software Systems, Saarbrücken site
            # Max Planck Institute for Intelligent Systems, Stuttgart site
            # Max Planck Institute for Intelligent Systems, Tübingen site
        elif mpi.split(" (")[0] in n:
            fnd[mpi] = names[mpi.split(" (")[0]]
            # Max Planck Institute for Gravitational Physics (Hannover)
            # Max Planck Institute for Ornithology (Radolfzell)
            # Max Planck Institute for Plasma Physics (Greifswald)
        elif mpi == 'Research Group Social Neuroscience':
            # part of the Max Planck Institute for Human Cognitive and Brain Sciences
            continue
        else:
            # print("no equivalent found for", mpi)
            not_fnd.append(mpi)

    idea = {}

    for no_eq in not_fnd:
        parts = no_eq.split('Max Planck Institute for')
        if len(parts) > 1:
            for ou in n:
                if parts[1].strip().lower() in ou.lower():
                    if no_eq in idea:
                        if ou not in idea[no_eq]:
                            idea[no_eq].append(ou)
                    else:
                        idea[no_eq] = [ou]

    for no_eq in not_fnd:
        parts = no_eq.split('for')
        if len(parts) > 1:
            for ou in n:
                if parts[1].strip().lower() in ou.lower():
                    if no_eq in idea:
                        if ou not in idea[no_eq]:
                            idea[no_eq].append(ou)
                    else:
                        idea[no_eq] = [ou]

    for no_eq in not_fnd:
        parts = no_eq.split('of')
        if len(parts) > 1:
            for ou in n:
                if parts[1].strip().lower() in ou.lower():
                    if no_eq in idea:
                        if ou not in idea[no_eq]:
                            idea[no_eq].append(ou)
                    else:
                        idea[no_eq] = [ou]

    for no_eq in not_fnd:
        parts = no_eq.split(',')
        if len(parts) > 1:
            for ou in n:
                if parts[0].strip().lower() in ou.lower():
                    if no_eq in idea:
                        if ou not in idea[no_eq]:
                            idea[no_eq].append(ou)
                    else:
                        idea[no_eq] = [ou]

    ###############################
    ### PRINT AND WRITE RESULTS ###
    ###############################

    print("")
    print("found matches for")
    counter = 0
    for mpi in m:
        if mpi not in not_fnd:
            counter += 1
            print(mpi)

    print(str(counter),"in total")
    utils.write_json(os.path.join(MAP_DIR, "ous_fnd_eng.json"), fnd)

    print("")
    print("found possible matches for")
    counter = 0
    for nt_eq in idea:
        counter += 1
        print(nt_eq)

    print(str(counter),"in total")
    utils.write_json(os.path.join(MAP_DIR, "ous_ideas_eng.json"), idea)

    print("")
    print("no match found for")
    counter = 0
    for nt_eq in not_fnd:
        if nt_eq not in idea:
            counter += 1
            print(nt_eq)

    print(str(counter),"in total")
    print("")
    utils.write_json(os.path.join(MAP_DIR, "ous_not_fnd_eng.txt"), not_fnd)
Example #9
0
                if no_eq in idea:
                    if ou not in idea[no_eq]:
                        idea[no_eq].append(ou)
                else:
                    idea[no_eq] = [ou]

print("")
print("found matches for")
counter = 0
for mpi in m:
    if mpi not in not_fnd:
        counter += 1
        print(mpi)

print(str(counter), "in total")
utils.write_json(MPIS_DIR + "map/ous_fnd_deu.json", fnd)

print("")
print("found possible matches for")
counter = 0
for nt_eq in idea:
    counter += 1
    print(nt_eq)

print(str(counter), "in total")
utils.write_json(MPIS_DIR + "map/ous_ideas_deu.json", idea)

print("")
print("no match found for")
counter = 0
for nt_eq in not_fnd:
Example #10
0
from pybman import utils
from pybman.rest import ContextRestController

from .utils_paths import BASE_DIR

PURE_DIR = BASE_DIR + 'pure/'

################################
### RETRIEVE CONTEXTS (CTXs) ###
################################

ctx_controller = ContextRestController()
ctxs = ctx_controller.get_all()

utils.write_json(PURE_DIR + "ctx/all.json", ctxs)
Example #11
0
def routine():

    if not os.path.exists(MAP_DIR):
        os.makedirs(MAP_DIR)

    ######################################################################
    ### READ FILES CONTAINING METADATA ON MAX PLANCK INSTITUTES (MPIs) ###
    ### AND ORGANIZATIONAL UNITS (OUs) FROM PURE #########################
    ######################################################################

    mpis = utils.read_json(os.path.join(SCRAPE_DIR, 'all_deu.json'))
    pure_ous = utils.read_json(os.path.join(OUS_DIR, 'all.json'))

    ############################
    ### EXTRACT NAMES OF OUs ###
    ############################

    names = {}

    for record in pure_ous['records']:
        idx = record['data']['objectId']
        metadata = record['data']['metadata']
        name = metadata['name'].strip()
        names[name] = idx
        if 'alternativeNames' in metadata and metadata['alternativeNames'][0]:
            for altname in metadata['alternativeNames']:
                names[altname.strip()] = idx

    m = list(mpis.keys())
    n = list(names.keys())

    #############################
    ### MAP MPIs TO NAMES/OUs ###
    #############################

    not_fnd = []
    fnd = {}

    for mpi in m:
        if mpi in n:
            fnd[mpi] = names[mpi]
        elif mpi.replace('-', ' ') in n:
            fnd[mpi] = names[mpi.replace('-', ' ')]
        else:
            # print("no equivalent found for", mpi)
            not_fnd.append(mpi)

    idea = {}

    for no_eq in not_fnd:
        parts = no_eq.split('Max-Planck-Institut für')
        if len(parts) > 1:
            for ou in n:
                if parts[1].strip().lower() in ou.lower():
                    if no_eq in idea:
                        if ou not in idea[no_eq]:
                            idea[no_eq].append(ou)
                    else:
                        idea[no_eq] = [ou]

    for no_eq in not_fnd:
        parts = no_eq.split('für')
        if len(parts) > 1:
            for ou in n:
                if parts[1].strip().lower() in ou.lower():
                    if no_eq in idea:
                        if ou not in idea[no_eq]:
                            idea[no_eq].append(ou)
                    else:
                        idea[no_eq] = [ou]

    for no_eq in not_fnd:
        parts = no_eq.split(',')
        if len(parts) > 1:
            for ou in n:
                if parts[0].strip().lower() in ou.lower():
                    if no_eq in idea:
                        if ou not in idea[no_eq]:
                            idea[no_eq].append(ou)
                    else:
                        idea[no_eq] = [ou]

    ###############################
    ### PRINT AND WRITE RESULTS ###
    ###############################

    print("")
    print("found matches for")
    counter = 0
    for mpi in m:
        if mpi not in not_fnd:
            counter += 1
            print(mpi)

    print(str(counter), "in total")
    utils.write_json(os.path.join(MAP_DIR, "ous_fnd_deu.json"), fnd)

    print("")
    print("found possible matches for")
    counter = 0
    for nt_eq in idea:
        counter += 1
        print(nt_eq)

    print(str(counter), "in total")
    utils.write_json(os.path.join(MAP_DIR, "ous_ideas_deu.json"), idea)

    print("")
    print("no match found for")
    counter = 0
    for nt_eq in not_fnd:
        if nt_eq not in idea:
            counter += 1
            print(nt_eq)

    print(str(counter), "in total")
    print("")
    utils.write_json(os.path.join(MAP_DIR, "ous_not_fnd_deu.txt"), not_fnd)
Example #12
0
from pybman import utils
from pybman.rest import OrgUnitRestController

from .utils_paths import BASE_DIR

PURE_DIR = BASE_DIR + 'pure/'

###########################################
### RETRIEVE ORGANIZATIONAL UNITS (OUS) ###
###########################################

ou_controller = OrgUnitRestController()
ous = ou_controller.get_all()

utils.write_json(PURE_DIR + "ous/all.json", ous)
Example #13
0
from tqdm import tqdm
from pybman import utils
from pybman.rest import LanguageConeController

from .utils_paths import BASE_DIR

PURE_DIR = BASE_DIR + 'pure/'

#################################################
### RETRIEVE LIST OF LANGUAGES (CoNE/ISO 639) ###
#################################################

lang_controller = LanguageConeController()
langs = lang_controller.get_entities()

utils.write_json(PURE_DIR + "lang/all.json", langs)

#################################################
### RETRIEVE INDIVIDUAL ENTRIES FOR LANGUAGES ###
#################################################

lang_meta = {}

for lang in langs:
    idx = lang['id'].split("/")[-1]
    lang_meta[idx] = lang['value']

lang_ids = list(lang_meta.keys())

# request data
lang_data = {}
Example #14
0
from tqdm import tqdm
from pybman import utils
from pybman.rest import PersonConeController

from .utils_paths import BASE_DIR

PURE_DIR = BASE_DIR + 'pure/'

#######################################
### RETRIEVE LIST OF PERSONS (CoNE) ###
#######################################

pers_controller = PersonConeController()
pers = pers_controller.get_entities()

utils.write_json(PURE_DIR + "pers/all.json", pers)

##############################
### EXTRACT UNIQUE PERSONS ###
##############################

pers_unique = {}

for p in pers:
    idx = p['id'].split("/")[-1:][0]
    if idx in pers_unique:
        pers_unique[idx].append(p['value'])
    else:
        pers_unique[idx] = [p['value']]

utils.write_json(PURE_DIR + "pers/unique.json", pers_unique)
Example #15
0
print("scraped",len(mpis),"institues!")

mpis_mapped = utils.read_json(BASE_DIR + 'mpis/map/mpi_ous.json') # ous.json

o = list(mpis_mapped.values())
m = list(mpis_mapped.keys())

ous_mpi = {}

for i in range(len(o)):
    ou = o[i]
    name = m[i]
    ous_mpi[ou] = name

print("done with reverse mapping!")
utils.write_json(MPIS_DIR + 'mapped/ous_mpi.json', ous_mpi)

m = list(mpis.keys())
n = list(mpis_mapped.keys())

counter = 0
no_map = []

for i in m:
    if i in n:
        counter += 1
        continue
    elif i == 'Research Group Social Neuroscience':
        # part of the Max Planck Institute for Human Cognitive and Brain Sciences
        continue
    else:
Example #16
0
mpis_mapped = utils.read_json(os.path.join(MAP_DIR,
                                           "mpi_ous.json"))  # ous.json

o = list(mpis_mapped.values())
m = list(mpis_mapped.keys())

ous_mpi = {}

for i in range(len(o)):
    ou = o[i]
    name = m[i]
    ous_mpi[ou] = name

print("done with reverse mapping!")
utils.write_json(os.path.join(MAPPED_DIR, 'ous_mpi.json'), ous_mpi)

m = list(mpis.keys())
n = list(mpis_mapped.keys())

counter = 0
no_map = []

for i in m:
    if i in n:
        counter += 1
        continue
    elif i == 'Research Group Social Neuroscience':
        # part of the Max Planck Institute for Human Cognitive and Brain Sciences
        continue
    else:
Example #17
0
from pybman import utils
from pybman import Client
from pybman.rest import ContextRestController

from .utils_paths import BASE_DIR

PURE_DIR = BASE_DIR + 'pure/'
ITEMS_DIR = BASE_DIR + 'items/'

################################
### RETRIEVE RECORDS OF CTXs ###
################################

ctxs = utils.read_json(PURE_DIR + "ctx/all.json")

ctx_meta = {}

for rec in ctxs['records']:
    objectId = rec['data']['objectId']
    ctx_meta[objectId] = rec['data']['name']

ctx_ids = list(ctx_meta.keys())
ctx_ids.sort()

client = Client()

for ctx_idx in tqdm(ctx_ids):
    print("retrieve data of context:", ctx_meta[ctx_idx])
    ctx_data = client.get_data(ctx_id=ctx_idx)
    utils.write_json(ITEMS_DIR +ctx_idx+".json",ctx_data.collection)
Example #18
0
from tqdm import tqdm
from pybman import utils
from pybman.rest import JournalConeController

from .utils_paths import BASE_DIR

PURE_DIR = BASE_DIR + 'pure/'

#################################
### RETRIEVE LIST OF JOURNALS ###
#################################

jour_controller = JournalConeController()
journals = jour_controller.get_entities()

utils.write_json(PURE_DIR + 'jour/all.json', journals)

################################################
### RETRIEVE INDIVIDUAL ENTRIES FOR JOURNALS ###
################################################

jour_meta = {}

for jour in journals:
    idx = jour['id'].split("/")[-1]
    jour_meta[idx] = jour['value']

jour_ids = list(jour_meta.keys())

# request data
jour_data = {}
Example #19
0
from pybman import utils

from .utils_paths import BASE_DIR

MPIS_DIR = BASE_DIR + 'mpis/'

all_ous = []
all_fnd = {}

eng_fnd = utils.read_json(MPIS_DIR + "map/ous_fnd_eng.json")

for eng_ou in eng_fnd:
    all_fnd[eng_ou] = eng_fnd[eng_ou]
    all_ous.append(eng_fnd[eng_ou])

deu_fnd = utils.read_json(MPIS_DIR + "map/ous_fnd_deu.json")
deu_ous = list(deu_fnd.values())

for deu_ou in deu_fnd:
    if deu_fnd[deu_ou] not in all_ous:
        all_fnd[deu_ou] = deu_fnd[deu_ou]
        all_ous.append(deu_fnd[deu_ou])

utils.write_json(MPIS_DIR + "map/mpi_ous.json", all_fnd)