def routine(): if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) if not os.path.exists(GRAPH_DIR): os.makedirs(GRAPH_DIR) print("console output is redirected to graph_ous.log ...") stdout = sys.stdout log = open(os.path.join(LOG_DIR, "graph_ous.log"), "w+") sys.stdout = log ous = utils.read_json(os.path.join(OUS_DIR, "all.json")) org_nodes = [['Id', 'Label']] org_edges = [['Source', 'Target']] for record in ous['records']: org_unit_id = record['data']['objectId'] org_unit_name = utils.clean_string(record['data']['name']) org_nodes.append([org_unit_id, org_unit_name]) if 'parentAffiliation' in record['data']: parent = record['data']['parentAffiliation']['objectId'] org_edges.append([org_unit_id, parent]) utils.write_csv(os.path.join(GRAPH_DIR, 'pure--ous_nodes.csv'), org_nodes) utils.write_csv(os.path.join(GRAPH_DIR, 'pure--ous_ous_edges.csv'), org_edges) log.close() sys.stdout = stdout
def routine(): if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) if not os.path.exists(GRAPH_DIR): os.makedirs(GRAPH_DIR) print("console output is redirected to graph_contexts_mpis.log ...") stdout = sys.stdout log = open(os.path.join(LOG_DIR, "graph_contexts_mpis.log"), "w+") sys.stdout = log ctxs = utils.read_json(os.path.join(CTX_DIR, "all.json")) ous = utils.read_json(os.path.join(OUS_DIR, "all.json")) mpis = utils.read_json(os.path.join(MAPPED_DIR, "ous_ctx.json")) institutes = [['Id', 'Label']] contexts = [['Id', 'Label', 'Created']] for rec in ous['records']: if rec['data']['objectId'] in mpis: objectId = rec['data']['objectId'] name = utils.clean_string(rec['data']['name']) institutes.append([objectId, name]) utils.write_csv(os.path.join(GRAPH_DIR, 'mpis--ous_nodes--ctx.csv'), institutes) institutes_contexts = [['Source', 'Target']] mpis_ctx = [] for mpi in mpis: for context in mpis[mpi]: institutes_contexts.append([mpi, context]) mpis_ctx.append(context) utils.write_csv(os.path.join(GRAPH_DIR, 'mpis--ous_ctx_edges.csv'), institutes_contexts) for rec in ctxs['records']: objectId = rec['data']['objectId'] if objectId in mpis_ctx: name = rec['data']['name'].replace('"', '') created = rec['data']['creationDate'].split("-")[0] contexts.append([objectId, name, created]) utils.write_csv(os.path.join(GRAPH_DIR, 'mpis--ctx_nodes--ous.csv'), contexts) log.close() sys.stdout = stdout
def routine(): if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) if not os.path.exists(GRAPH_DIR): os.makedirs(GRAPH_DIR) print("console output is redirected to graph_ous_mpis.log ...") stdout = sys.stdout log = open(os.path.join(LOG_DIR, "graph_ous_mpis.log"), "w+") sys.stdout = log mpis = utils.read_json(os.path.join(MAPPED_DIR, "ous_mpi.json")) ous = utils.read_json(os.path.join(OUS_DIR, "all.json")) ous_nodes = [["Id", "Label"]] ous_edges = [["Source", "Target"]] children = [] ous_collected = [] for rec in ous['records']: if rec['data']['objectId'] in mpis: objectId = rec['data']['objectId'] name = utils.clean_string(rec['data']['name']) ous_nodes.append([objectId, name]) ous_collected.append(objectId) if 'parentAffiliation' in rec['data']: parent = rec['data']['parentAffiliation']['objectId'] ous_edges.append([objectId, parent]) else: print("no parent:", objectId) if rec['data']['objectId'] not in mpis and 'parentAffiliation' in rec[ 'data']: if rec['data']['parentAffiliation']['objectId'] in mpis \ or rec['data']['parentAffiliation']['objectId'] in children: objectId = rec['data']['objectId'] name = utils.clean_string(rec['data']['name']) ous_nodes.append([objectId, name]) ous_collected.append(objectId) parent = rec['data']['parentAffiliation']['objectId'] ous_edges.append([objectId, parent]) if rec['data']['hasChildren']: children.append(objectId) found = True while found: changed = False for rec in ous['records']: if rec['data']['objectId'] not in ous_collected \ and 'parentAffiliation' in rec['data']: if rec['data']['parentAffiliation']['objectId'] in mpis \ or rec['data']['parentAffiliation']['objectId'] in children: objectId = rec['data']['objectId'] name = utils.clean_string(rec['data']['name']) ous_nodes.append([objectId, name]) ous_collected.append(objectId) changed = True parent = rec['data']['parentAffiliation']['objectId'] ous_edges.append([objectId, parent]) if rec['data']['hasChildren']: children.append(objectId) if not changed: found = False utils.write_csv(os.path.join(GRAPH_DIR, "mpis--ous_nodes--tree-full.csv"), ous_nodes) utils.write_csv(os.path.join(GRAPH_DIR, "mpis--ous_ous_edges--tree.csv"), ous_edges) # Institutes institutes = [['Id', 'Label']] for rec in ous['records']: if rec['data']['objectId'] in mpis: objectId = rec['data']['objectId'] name = utils.clean_string(rec['data']['name']) institutes.append([objectId, name]) utils.write_csv(os.path.join(GRAPH_DIR, 'mpis--ous_nodes.csv'), institutes) # Children of Institutes kids_names = [["Id", "Label"]] mpis_kids_nodes = utils.read_csv_with_header( os.path.join(GRAPH_DIR, 'mpis--ous_nodes--tree-full.csv')) mpis_kids_nodes = list(mpis_kids_nodes.values()) for i in range(1, len(mpis_kids_nodes[0])): kid_id = mpis_kids_nodes[0][i] kid_name = utils.clean_string(mpis_kids_nodes[1][i]) if kid_id not in mpis: kids_names.append([kid_id, kid_name]) utils.write_csv( os.path.join(GRAPH_DIR, 'mpis--ous_nodes--tree-children.csv'), kids_names) log.close() sys.stdout = stdout
def routine(): if not os.path.exists(LOG_DIR): os.makedirs(LOG_DIR) print("console output is redirected to count_journals.log ...") stdout = sys.stdout log = open(os.path.join(LOG_DIR, "count_journals.log"), "w+") sys.stdout = log from ..utils.local import ld JOUR_STATS = os.path.join(STATS_DIR, 'journals') if not os.path.exists(JOUR_STATS): os.makedirs(JOUR_STATS) ous_ctx = utils.read_json(os.path.join(EXTDATA_DIR, 'selected.json')) mpis = utils.read_json(os.path.join(MAPPED_DIR, 'ous_mpi.json')) print("start processing!") start_time = time.time() for mpi in mpis: if mpi not in ous_ctx: print(mpis[mpi] + " has no contexts!") print("") continue print("processing " + mpis[mpi] + "...") articles = [] journals = {} counter = 0 nojour = 0 mpi_ctxs = ous_ctx[mpi] for mpi_ctx in mpi_ctxs: print("extracting " + mpi_ctx + " ...") all = ld.get_data(mpi_ctx)[0] # consider only released items data_set = DataSet(data_id=all.idx + "_released", raw=all.get_items_released()) if not data_set.records: print(mpi_ctx + " has no records!") continue print(str(data_set.num) + " records to process...") for record in data_set.records: data = record['data'] if data['publicState'] == 'RELEASED': if data['metadata']['genre'] == 'ARTICLE': articles.append(record) for article in articles: jour = False if 'sources' in article['data']['metadata']: for source in article['data']['metadata']['sources']: if source['genre'] == 'JOURNAL': if 'title' in source: jour = True counter += 1 if source['title'] in journals: journals[source['title']] += 1 else: journals[source['title']] = 1 else: print(article['data']['objectId'] + " has journal as source without title!") continue if jour: break if not jour: nojour += 1 else: print("found article " + article['data']['objectId'] + " without any source!") print('found ' + str(counter) + ' articles with journals as source') print('found ' + str(nojour) + ' articles without a journal as souce') journals = sorted(journals.items(), key=lambda x: x[1], reverse=True) total = len(journals) path = os.path.join(JOUR_STATS, mpi + '_jour_art.csv') print("write stats to file: " + path) with open(path, 'w', newline='') as csv_file: # quoting=csv.QUOTE_NONE csv_writer = csv.writer( csv_file, delimiter='\t', quotechar='"', quoting=csv.QUOTE_MINIMAL) csv_writer.writerow(['journals', 'articles']) for i in range(0, total): jour, art = journals[i] jour = jour.replace('\t', ' ') jour = jour.replace(',', '') jour = utils.clean_string(jour) csv_writer.writerow([jour, art]) print("finished " + mpis[mpi] + "!") print("") print("finished processing after %s sec!" % round(time.time() - start_time, 2)) log.close() sys.stdout = stdout