def person_delete(person_id=''): """ Delete an existing person swagger_from_file: api_doc/person_delete.yml """ if is_token_valid(request.headers.get('Authorization')): # TODO decide on base of the api key scopes # load group delete_person_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, core='person', query='id:%s' % person_id) delete_person_solr.request() if delete_person_solr.results: thedata = json.loads(delete_person_solr.results[0].get('wtf_json')) form = PersonAdminForm.from_json(thedata) # modify status to 'deleted' form.editorial_status.data = 'deleted' form.changed.data = timestamp() form.note.data = 'Deleted via REST API' # save group persistence.person2solr(form, action='delete') return make_response('person deleted!', 204) else: return make_response( 'person resource \'%s\' not found!' % person_id, 404) else: return make_response('Unauthorized', 401)
def work_delete(work_id=''): """ Delete an existing work swagger_from_file: api_doc/work_delete.yml """ if is_token_valid(request.headers.get('Authorization')): # TODO decide on base of the api key scopes # load work delete_work_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, core='hb2', query='id:%s' % work_id) delete_work_solr.request() if delete_work_solr.results: thedata = json.loads(delete_work_solr.results[0].get('wtf_json')) form = display_vocabularies.PUBTYPE2FORM.get( thedata.get('pubtype')).from_json(thedata) # modify status to 'deleted' form.editorial_status.data = 'deleted' form.changed.data = timestamp() form.note.data = 'Deleted via REST API' # save work persistence.record2solr(form, action='delete') return make_response('work deleted!', 204) else: return make_response('work resource \'%s\' not found!' % work_id, 404) else: return make_response('Unauthorized', 401)
def sync_hb_to_orcid(): get_user = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT, application=p_secrets.SOLR_APP, core='hb2_users', query='orcidid:%s' % orcid_id) get_user.request() if get_user.results: if '/activities/update' in get_user.results[0].get('orcidscopes'): # records = get_new_records(affiliation=affiliation, query='pnd:"1049808495%23Becker, Hans-Georg"') records = get_updated_records(affiliation=affiliation, query='pnd:"1019952040%23Höhner, Kathrin"') orcid_update_records(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token, works=records)
def export_solr_dump(core=''): """ Export the wtf_json field of every doc in the index to a new document in the users core and to the user's local file system. Uses the current user's ID and a timestamp as the document ID and file name. :param core: """ dow = days_of_week[datetime.datetime.today().weekday()] if core != 'hb2_users': filename = '%s/%s/%s_%s.json' % ( secrets.BACKUP_DIR, dow, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core) export_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, export_field='wtf_json', core=core) export_docs = export_solr.export() fo = open(filename, 'w') fo.write(json.dumps(export_docs, indent=4)) fo.close() filename = '%s/%s/%s_%s.not_imported.json' % ( secrets.BACKUP_DIR, dow, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core) export_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='-editorial_status:imported', export_field='wtf_json', core=core) export_docs = export_solr.export() fo = open(filename, 'w') fo.write(json.dumps(export_docs, indent=4)) fo.close() else: filename = '%s/%s/%s_%s.json' % ( secrets.BACKUP_DIR, dow, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core) export_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, core=core) export_docs = export_solr.export() fo = open(filename, 'w') fo.write(json.dumps(export_docs, indent=4)) fo.close()
def export_solr_dump(): dow = days_of_week[datetime.datetime.today().weekday()] filename = '%s/%s/%s_%s.dead_ends.json' % (secrets.BACKUP_DIR, dow, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 'hb2') export_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='is_part_of:[\'\' TO *]', export_field='wtf_json', core='hb2') export_docs = export_solr.export() # TODO get id of the host and check if it exists dead_ends = [] for doc in export_docs: for part in doc.get('is_part_of'): try: query = 'id:%s' % part.get('is_part_of') get_record_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, core='hb2', query=query, facet='false', fields=['wtf_json']) get_record_solr.request() if len(get_record_solr.results) == 0: print('%s is a dead end' % part.get('is_part_of')) if part.get('is_part_of') not in dead_ends: dead_ends.append(part.get('is_part_of')) except AttributeError as e: print(e) fo = open(filename, 'w') fo.write(json.dumps(dead_ends, indent=4)) fo.close()
def get_updated_records(affiliation='', query='*:*'): get_record_solr = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT, application=p_secrets.SOLR_APP, query='%s AND orcid_put_code:[\'\' TO *]' % query, rows=100000) get_record_solr.request() orcid_records = {} if len(get_record_solr.results) == 0: logging.error('No records found for query: %s' % query) else: print(len(get_record_solr.results)) # orcid_records.append(orcid_processor.wtf_orcid(affiliation=affiliation, wtf_records=[json.loads(get_record_solr.results[0].get('wtf_json'))])[0]) for record in get_record_solr.results: wtf = json.loads(record.get('wtf_json')) orcid_records.setdefault(record.get('orcid_put_code')[0], orcid_processor.wtf_orcid(affiliation=affiliation, wtf_records=[wtf])) return orcid_records
def export_solr_query(core='', query='*:*', filename=''): if core != 'hb2_users': if filename != '': dow = days_of_week[datetime.datetime.today().weekday()] filename = '%s/%s/%s_%s.%s' % ( secrets.BACKUP_DIR, dow, datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core, filename) export_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query=query, export_field='wtf_json', core=core) export_docs = export_solr.export() fo = open(filename, 'w') fo.write(json.dumps(export_docs, indent=4)) fo.close()
def bibliography(agent='', agent_id='', style='harvard1'): """ Getting a bibliography swagger_from_file: bibliography_doc/bibliography.yml """ format = request.args.get('format', 'html') filter_by_year = request.args.get('filter_by_year', '') filter_by_type = request.args.get('filter_by_type', '') exclude_by_type = request.args.get('exclude_by_type', '') filter_by_pr = request.args.get('filter_by_pr', False) filter_by_ger = request.args.get('filter_by_ger', False) filter_by_eng = request.args.get('filter_by_eng', False) filter_by_current_members = request.args.get('filter_by_current_members', False) filter_by_former_members = request.args.get('filter_by_former_members', False) group_by_year = request.args.get('group_by_year', False) # logging.info('group_by_year = %s' % group_by_year) group_by_type = request.args.get('group_by_type', False) group_by_type_year = request.args.get('group_by_type_year', False) pubsort = request.args.get('pubsort', '') toc = request.args.get('toc', False) locale = request.args.get('locale', '') # TODO start-creationdate, end-creationdate >> Szenario Raumplanung start_creationdate = request.args.get('start_creationdate', '') end_creationdate = request.args.get('end_creationdate', '') reasoning = request.args.get('reasoning', False) refresh = request.args.get('refresh', False) formats = ['html', 'txt'] agent_types = { 'person': 'person', 'research_group': 'organisation', 'chair': 'organisation', 'organisation': 'organisation', 'working_group': 'group', 'project': 'group', } pubsorts = ['stm', 'anh'] STM_SORT = ['ArticleJournal', 'Chapter', 'Monograph', 'Journal', 'Series', 'Conference', 'Collection', 'MultivolumeWork', 'SpecialIssue', 'Patent', 'Standard', 'Thesis', 'InternetDocument', 'Report', 'Lecture', 'Sonstiges', 'ArticleNewspaper', 'PressRelease', 'RadioTVProgram', 'AudioVideoDocument', 'ResearchData', 'Other'] STM_LIST = { 'ArticleJournal': '', 'Chapter': '', 'Monograph': '', 'Journal': '', 'Series': '', 'Conference': '', 'Collection': '', 'MultivolumeWork': '', 'SpecialIssue': '', 'Patent': '', 'Standard': '', 'Thesis': '', 'InternetDocument': '', 'Report': '', 'Lecture': '', 'ArticleNewspaper': '', 'PressRelease': '', 'RadioTVProgram': '', 'AudioVideoDocument': '', 'ResearchData': '', 'Other': '', } ANH_SORT = ['Monograph', 'ArticleJournal', 'ChapterInLegalCommentary', 'Chapter', 'LegalCommentary', 'Collection', 'MultivolumeWork', 'Conference', 'Edition', 'SpecialIssue', 'Journal', 'Series', 'Newspaper', 'Thesis', 'ArticleNewspaper', 'Lecture', 'Report', 'InternetDocument', 'RadioTVProgram', 'AudioVideoDocument', 'PressRelease', 'ResearchData', 'Other'] ANH_LIST = { 'Monograph': '', 'ArticleJournal': '', 'ChapterInLegalCommentary': '', 'Chapter': '', 'LegalCommentary': '', 'Collection': '', 'MultivolumeWork': '', 'Conference': '', 'Edition': '', 'SpecialIssue': '', 'Journal': '', 'Series': '', 'Newspaper': '', 'Thesis': '', 'ArticleNewspaper': '', 'Lecture': '', 'Report': '', 'InternetDocument': '', 'RadioTVProgram': '', 'AudioVideoDocument': '', 'PressRelease': '', 'ResearchData': '', 'Other': '', } if format not in formats: return make_response('Bad request: format!', 400) elif agent not in agent_types.keys(): return make_response('Bad request: agent!', 400) elif pubsort and pubsort not in pubsorts: return make_response('Bad request: pubsort!', 400) key = request.full_path.replace('&refresh=true', '').replace('?refresh=true', '?') # logging.debug('KEY: %s' % key) response = '' if not refresh: # request in cache? try: storage_publists_cache = app.extensions['redis']['REDIS_PUBLIST_CACHE'] if storage_publists_cache.exists(key): response = storage_publists_cache.get(key) except Exception as e: logging.info('REDIS ERROR: %s' % e) if response == '': group = False group_field = '' group_limit = 100000 if str2bool(group_by_year): group = True group_field = 'fdate' elif str2bool(group_by_type): group = True group_field = 'pubtype' filterquery = [] if str2bool(filter_by_eng): filterquery.append('language:eng') elif str2bool(filter_by_ger): filterquery.append('language:ger') elif str2bool(filter_by_pr): filterquery.append('peer_reviewed:true') if filter_by_type != '': entries = filter_by_type.split('|') filter_string = '' for entry in entries: filter_string += 'pubtype:%s' % PUBTYPE_KEYS.get(entry.lower()) + '+OR+' filterquery.append(filter_string[:-4]) if filter_by_year != '': entries = filter_by_year.split('|') filter_string = '' for entry in entries: filter_string += 'fdate:%s' % entry + '+OR+' filterquery.append(filter_string[:-4]) if exclude_by_type: entries = exclude_by_type.split('|') for entry in entries: filterquery.append('-pubtype:"%s"' % PUBTYPE_KEYS.get(entry.lower())) fquery = '' if start_creationdate and not end_creationdate: fquery = 'recordCreationDate:[%s TO *]' % (start_creationdate + 'T00:00:00Z') elif not start_creationdate and end_creationdate: fquery = 'recordCreationDate:[*+TO+%s]' % (end_creationdate + 'T00:00:00Z') elif start_creationdate and end_creationdate: fquery = 'recordCreationDate:[%s+TO+%s]' % (start_creationdate + 'T00:00:00Z', end_creationdate + 'T00:00:00Z') if fquery: filterquery.append(fquery) query = '' results = [] if agent_types.get(agent) == 'person': # get facet value actor_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='gnd:%s' % agent_id, export_field='wtf_json', core=agent_types.get(agent)) actor_solr.request() if len(actor_solr.results) == 0: return make_response('Not Found: Unknown Agent!', 404) else: name = actor_solr.results[0].get('name') query = 'pndid:%s' % agent_id # query = 'pnd:"%s%s%s"' % (agent_id, '%23', name) # logging.info('query=%s' % query) else: # get orga/group doc actor_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='id:%s' % agent_id, export_field='wtf_json', core=agent_types.get(agent)) actor_solr.request() if actor_solr.results: name = actor_solr.results[0].get('pref_label') # logging.debug('name = %s' % name) if reasoning: # logging.debug('reasoning: %s' % reasoning) orgas = {} orgas.setdefault(agent_id, name) # get all children if actor_solr.results[0].get('children'): children = actor_solr.results[0].get('children') for child_json in children: child = json.loads(child_json) orgas.setdefault(child.get('id'), child.get('label')) query = '' idx_o = 0 id_type = agent_types.get(agent) if id_type == 'organisation': id_type = 'affiliation' for orga_id in orgas.keys(): fquery = ['gnd:[\'\' TO *]'] if not agent_types.get(agent) == 'person': if filter_by_former_members: fquery.append('personal_status:emeritus+OR+personal_status:alumnus') elif filter_by_current_members: fquery.append('-personal_status:emeritus') fquery.append('-personal_status:alumnus') member_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='%s_id:"%s"' % (id_type, orga_id), fquery=fquery, fields=['gnd', 'name'], rows=100000, core='person') member_solr.request() query_part = '' if member_solr.results and len(member_solr.results) > 0: idx_p = 0 for member in member_solr.results: query_part += 'pnd:"%s%s%s"' % (member.get('gnd'), '%23', member.get('name')) idx_p += 1 if idx_p < len(member_solr.results) and query_part != '': query_part += ' OR ' if query_part != '': query += query_part idx_o += 1 if idx_o < len(orgas) and query != '': query += ' OR ' while query.endswith(' OR '): query = query[:-4] # logging.info('query=%s' % query) else: logging.debug('reasoning: %s' % reasoning) id_type = agent_types.get(agent) if id_type == 'organisation': id_type = 'affiliation' query = '%s_id:%s' % (id_type, agent_id) else: return make_response('Not Found: Unknown Agent!', 404) biblist_id = str(uuid.uuid4()) biblist = '' biblist_toc = '' biblist_coins = '' STM_TOC = { 'ArticleJournal': '', 'Chapter': '', 'Monograph': '', 'Journal': '', 'Series': '', 'Conference': '', 'Collection': '', 'MultivolumeWork': '', 'SpecialIssue': '', 'Patent': '', 'Standard': '', 'Thesis': '', 'InternetDocument': '', 'Report': '', 'Lecture': '', 'ArticleNewspaper': '', 'PressRelease': '', 'RadioTVProgram': '', 'AudioVideoDocument': '', 'ResearchData': '', } ANH_TOC = { 'Monograph': '', 'ArticleJournal': '', 'ChapterInLegalCommentary': '', 'Chapter': '', 'LegalCommentary': '', 'Collection': '', 'MultivolumeWork': '', 'Conference': '', 'Edition': '', 'SpecialIssue': '', 'Journal': '', 'Series': '', 'Newspaper': '', 'Thesis': '', 'ArticleNewspaper': '', 'Lecture': '', 'Report': '', 'InternetDocument': '', 'RadioTVProgram': '', 'AudioVideoDocument': '', 'PressRelease': '', 'ResearchData': '', } STM_COINS = { 'ArticleJournal': '', 'Chapter': '', 'Monograph': '', 'Journal': '', 'Series': '', 'Conference': '', 'Collection': '', 'MultivolumeWork': '', 'SpecialIssue': '', 'Patent': '', 'Standard': '', 'Thesis': '', 'InternetDocument': '', 'Report': '', 'Lecture': '', 'ArticleNewspaper': '', 'PressRelease': '', 'RadioTVProgram': '', 'AudioVideoDocument': '', 'ResearchData': '', } ANH_COINS = { 'Monograph': '', 'ArticleJournal': '', 'ChapterInLegalCommentary': '', 'Chapter': '', 'LegalCommentary': '', 'Collection': '', 'MultivolumeWork': '', 'Conference': '', 'Edition': '', 'SpecialIssue': '', 'Journal': '', 'Series': '', 'Newspaper': '', 'Thesis': '', 'ArticleNewspaper': '', 'Lecture': '', 'Report': '', 'InternetDocument': '', 'RadioTVProgram': '', 'AudioVideoDocument': '', 'PressRelease': '', 'ResearchData': '', } if group_by_type_year: facet_tree = ('pubtype', 'fdate') publist_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, handler='query', query=query, fquery=filterquery, fields=['wtf_json'], rows=0, facet='true', facet_tree=facet_tree, facet_sort=False, facet_limit=-1, sort='fdate asc', core='hb2') publist_solr.request() # logging.info('publist_solr.tree: %s' % json.dumps(publist_solr.tree, indent=4)) list_cnt = 0 for pubtype in publist_solr.tree.get('pubtype,fdate'): # logging.debug('pubtype = %s' % pubtype.get('value')) # logging.debug('pubtype = %s' % pubtype) year_list = '' year_coins = '' if pubtype.get('pivot'): for year in pubtype.get('pivot')[::-1]: # logging.debug('\t%s: %s' % (year.get('value'), year.get('count'))) filterquery = [] filterquery.append('fdate:%s' % year.get('value')) filterquery.append('pubtype:%s' % pubtype.get('value')) pivot_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, handler='query', query=query, fields=['wtf_json'], rows=100000, fquery=filterquery, core='hb2') pivot_solr.request() results = pivot_solr.results # logging.debug('PIVOT_PUB_LIST: %s' % results) publist_docs = [] for result in results: publist_docs.append(json.loads(result.get('wtf_json'))) if format == 'html': year_coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(json.loads(result.get('wtf_json'))).replace('&', '&') if not group_by_type: if format == 'html': year_list += '<h5>%s</h5>' % year.get('value') else: year_list += '%s\n' % year.get('value') year_list += citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) else: filterquery = [] filterquery.append('pubtype:%s' % pubtype.get('value')) pivot_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, handler='query', query=query, fields=['wtf_json'], rows=100000, fquery=filterquery, core='hb2') pivot_solr.request() results = pivot_solr.results # logging.debug('PIVOT_PUB_LIST: %s' % results) publist_docs = [] for result in results: publist_docs.append(json.loads(result.get('wtf_json'))) if format == 'html': year_coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl( json.loads(result.get('wtf_json'))).replace('&', '&') year_list += citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) if locale.startswith('de'): group_value = display_vocabularies.PUBTYPE_GER.get(pubtype.get('value')) else: group_value = display_vocabularies.PUBTYPE_ENG.get(pubtype.get('value')) list_cnt += 1 if format == 'html': header = '<h4 id="%s_%s">%s</h4>' % (biblist_id, list_cnt, group_value) elif format == 'txt': header = '%s\n' % group_value else: header = '' footer = '' if toc and format == 'html': back_string = 'Back to table of contents' if locale.startswith('de'): back_string = 'Zurück zum Inhaltsverzeichnis' footer = '<div class="toc_return"><a href="#%s_citetoc">%s</a></div>' % (biblist_id, back_string) if pubsort == 'stm': STM_LIST[pubtype.get('value')] = header + year_list + footer if format == 'html': STM_TOC[pubtype.get('value')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value) STM_COINS[pubtype.get('value')] = year_coins elif pubsort == 'anh': ANH_LIST[pubtype.get('value')] = header + year_list + footer if format == 'html': ANH_TOC[pubtype.get('value')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value) ANH_COINS[pubtype.get('value')] = year_coins else: biblist += header + year_list if format == 'html': biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value) biblist_coins += year_coins if pubsort == 'anh': # logging.debug(ANH_LIST) biblist = '' biblist_toc = '' for pubtype in ANH_SORT: if ANH_LIST.get(pubtype): biblist += ANH_LIST.get(pubtype) if format == 'html': biblist_toc += ANH_TOC.get(pubtype) biblist_coins += ANH_COINS.get(pubtype) elif pubsort == 'stm': # logging.debug(STM_LIST) biblist = '' biblist_toc = '' for pubtype in STM_SORT: if STM_LIST.get(pubtype): biblist += STM_LIST.get(pubtype) if format == 'html': biblist_toc += STM_TOC.get(pubtype) biblist_coins += STM_COINS.get(pubtype) else: publist_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, handler='query', query=query, fields=['wtf_json'], rows=100000, fquery=filterquery, group=group, group_field=group_field, group_limit=group_limit, sort='fdate desc', core='hb2') publist_solr.request() results.extend(publist_solr.results) # print('publist_solr.results: %s' % results) publist_docs = [] if group: biblist = '' list_cnt = 0 for result in results: # logging.debug('groupValue: %s' % result.get('groupValue')) # logging.debug('numFound: %s' % result.get('doclist').get('numFound')) # logging.debug('docs: %s' % result.get('doclist').get('docs')) coins = '' for doc in result.get('doclist').get('docs'): publist_docs.append(json.loads(doc.get('wtf_json'))) if format == 'html': coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(json.loads(doc.get('wtf_json'))).replace('&', '&') group_value = result.get('groupValue') if str2bool(group_by_type): if locale.startswith('de'): group_value = display_vocabularies.PUBTYPE_GER.get(result.get('groupValue')) else: group_value = display_vocabularies.PUBTYPE_ENG.get(result.get('groupValue')) list_cnt += 1 if format == 'html': header = '<h4 id="%s_%s">%s</h4>' % (biblist_id, list_cnt, group_value) elif format == 'txt': header = '%s' % group_value else: header = '' footer = '' if toc and format == 'html': back_string = 'Back to table of contents' if locale.startswith('de'): back_string = 'Zurück zum Inhaltsverzeichnis' footer = '<div class="toc_return"><a href="#%s_citetoc">%s</a></div>' % (biblist_id, back_string) if str2bool(group_by_type): if pubsort == 'stm': STM_LIST[result.get('groupValue')] = header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer if format == 'html': STM_TOC[result.get('groupValue')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value) STM_COINS[result.get('groupValue')] = coins elif pubsort == 'anh': ANH_LIST[result.get('groupValue')] = header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer if format == 'html': ANH_TOC[result.get('groupValue')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value) ANH_COINS[result.get('groupValue')] = coins else: biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer if format == 'html': biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value) biblist_coins += coins elif str2bool(group_by_year): biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer if format == 'html': biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value) biblist_coins += coins else: biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer if str2bool(group_by_type) and pubsort == 'anh': # logging.debug(ANH_LIST) biblist = '' biblist_toc = '' for pubtype in ANH_SORT: if ANH_LIST.get(pubtype): biblist += ANH_LIST.get(pubtype) if format == 'html': biblist_toc += ANH_TOC.get(pubtype) biblist_coins += ANH_COINS.get(pubtype) elif str2bool(group_by_type) and pubsort == 'stm': # logging.debug(STM_LIST) biblist = '' biblist_toc = '' for pubtype in STM_SORT: if STM_LIST.get(pubtype): biblist += STM_LIST.get(pubtype) if format == 'html': biblist_toc += STM_TOC.get(pubtype) biblist_coins += STM_COINS.get(pubtype) publist_docs = [] else: for result in results: publist_docs.append(json.loads(result.get('wtf_json'))) biblist = citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) response = '' if toc and format == 'html': response += '<ul id="%s_citetoc">' % biblist_id + biblist_toc + '</ul>' response += biblist + biblist_coins if response: try: storage_publists_cache = app.extensions['redis']['REDIS_PUBLIST_CACHE'] storage_publists_cache.set(key, response) storage_publists_cache.hset(agent_id, key, timestamp()) except Exception as e: logging.error('REDIS: %s' % e) resp = make_response(response) if format == 'txt': resp.headers["Content-Type"] = "text/plain; charset=utf-8" else: resp.headers["Content-Type"] = "text/html; charset=utf-8" return resp
def wtf_bibtex(wtf_records=None): # logging.info('wtf_records: %s' % wtf_records) if wtf_records is None: wtf_records = [] if len(wtf_records) > 0: db = BibDatabase() db.entries = [] for record in wtf_records: bibtex_entry = {} bibtex_type = BIBTEX_PUBTYPES.get(record.get('pubtype')) if bibtex_type is None: bibtex_type.setdefault('pubtype', 'misc') bibtex_entry.setdefault('ENTRYTYPE', bibtex_type) bibtex_entry.setdefault('ID', record.get('id')) title = record.get('title') if record.get('subtitle'): title += ': %s' % record.get('subtitle') bibtex_entry.setdefault('title', title) if record.get('issued'): date_parts = [] for date_part in str(record.get('issued')).replace( '[', '').replace(']', '').split('-'): date_parts.append(date_part) bibtex_entry.setdefault('year', date_parts[0]) if len(date_parts) > 1: bibtex_entry.setdefault('month', date_parts[1]) if len(date_parts) > 2: bibtex_entry.setdefault('day', date_parts[2]) if record.get('DOI'): bibtex_entry.setdefault('crossref', record.get('DOI')[0]) author_str = '' for author in record.get('person'): if 'aut' in author.get('role'): if author_str != '': author_str += ' and ' author_str += author.get('name') bibtex_entry.setdefault('author', author_str) # is_part_of hosts = [] if record.get('is_part_of'): hosts = record.get('is_part_of') for host in hosts: if host.get('is_part_of') != '': try: ipo_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='id:%s' % host.get('is_part_of'), facet='false', fields=['wtf_json']) ipo_solr.request() if len(ipo_solr.results) > 0: myjson = json.loads( ipo_solr.results[0].get('wtf_json')) title = myjson.get('title') if myjson.get('subtitle'): title += ': %s' % myjson.get('subtitle') if bibtex_entry.get('ENTRYTYPE') == 'article': bibtex_entry.setdefault('journal', title) elif bibtex_entry.get('ENTRYTYPE') == 'inbook': bibtex_entry.setdefault('booktitle', title) elif bibtex_entry.get( 'ENTRYTYPE') == 'inproceedings': bibtex_entry.setdefault('booktitle', title) elif bibtex_entry.get( 'ENTRYTYPE') == 'incollection': bibtex_entry.setdefault('booktitle', title) else: bibtex_entry.setdefault('series', title) except AttributeError as e: logging.error(e) if host.get('volume') != '': bibtex_entry.setdefault('volume', host.get('volume')) if bibtex_entry: db.entries.append(bibtex_entry) return bibtexparser.dumps(db) else: return ''
def sync_orcid_to_hb(): get_user = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT, application=p_secrets.SOLR_APP, core='hb2_users', query='orcidid:%s' % orcid_id) get_user.request() if get_user.results: if '/read-limited' in get_user.results[0].get('orcidscopes'): works = orcid_read_works(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token) logger.info('results from ORCID: %s\n' % len(works)) if works: for work in works: do_break = False hb2_record_id = None orcid_record = None for work_sum in work.get('work-summary'): # - putcode is not in hb2 try: response = requests.get('http://*****:*****@tu-dortmund.de'] # print(json.dumps(thedata, indent=4)) else: # logger.info(json.dumps(orcid_record, indent=4)) thedata = orcid_processor.orcid_wtf(orcid_id, orcid_record) print(thedata) # add author via orcid_user_info public_info = orcid_user_info(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token) person = { 'name': '%s, %s' % (public_info.get('name').get('family-name').get('value'), public_info.get('name').get('given-names').get('value')), 'orcid': orcid_id, 'role': ['aut'] } if affiliation == 'tudo': person['tudo'] = True person['rubi'] = False thedata['catalog'] = ['Technische Universität Dortmund'] elif affiliation == 'rub': person['tudo'] = False person['rubi'] = True thedata['catalog'] = ['Ruhr-Universität Bochum'] else: person['tudo'] = False person['rubi'] = False thedata['catalog'] = ['Temporäre Daten'] thedata['person'] = [person] if thedata: logger.info('POST /work') # POST request logger.info(json.dumps(thedata, indent=4)) try: # post data response = requests.post( '%s/%s' % (orcid_secrets.API, 'work'), headers={'Content-Type': 'application/json', 'Authorization': 'Bearer %s' % orcid_secrets.TOKEN}, data=json.dumps(thedata) ) status = response.status_code logger.info('STATUS: %s' % status) if status == 201: response_json = json.loads(response.content.decode("utf-8")) logger.info(response_json.get('work')) if response_json.get('message'): logger.info(response_json.get('message')) else: logger.error('ERROR: %s: %s' % (status, response.content.decode("utf-8"))) except requests.exceptions.ConnectionError as e: logging.error(e) logger.info('')
def export_oa_report(year=''): ''' Getting a bibliography swagger_from_file: api_doc/export_oa_report.yml ''' pubtype = request.args.get('pubtype', 'ArticleJournal') if theme(request.access_route) == 'dortmund': affiliation = 'tudo' affiliation_str = 'TU Dortmund' elif theme(request.access_route) == 'bochum': affiliation = 'rubi' affiliation_str = 'Ruhr-Universität Bochum' else: affiliation = '' affiliation_str = '' if affiliation: csv = '"AU";"TI";"SO";"DT";"RP";"EM";"OI";"PU";"ISSN";"E-ISSN";"DOI";"OA";"RP TUDO";"Fak"\n' # TODO search for all publications of the given year oa_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, core='hb2', handler='query', query='*:*', facet='false', rows=100000, fquery=[ '%s:true' % affiliation, 'fdate:%s' % year, 'pubtype:%s' % pubtype ]) oa_solr.request() results = oa_solr.results if results: for record in results: thedata = json.loads(record.get('wtf_json')) author = '' corresponding_author = '' corresponding_affiliation = '' faks = '' for person in thedata.get('person'): if 'aut' in person.get('role'): author += person.get('name') + ';' if person.get('corresponding_author'): corresponding_author = person.get('name') if person.get('tudo'): corresponding_affiliation = True if person.get('gnd'): tudo = persistence.get_person( person.get('gnd')) # print(person.get('gnd')) if tudo: if tudo.get('affiliation_id'): faks = '' for entry in tudo.get( 'affiliation_id'): affil = persistence.get_orga( entry) fak = '' if affil: has_parent = False fak = affil.get( 'pref_label') if affil.get('parent_id'): has_parent = True fak = '%s / %s' % ( affil.get( 'parent_label' ), affil.get( 'pref_label')) while has_parent: affil = persistence.get_orga( affil.get( 'parent_id')) if affil.get( 'parent_id'): has_parent = True fak = '%s / %s' % ( affil.get( 'parent_label' ), affil.get( 'pref_label' )) else: has_parent = False else: fak = 'LinkError: Person %s' % person.get( 'gnd') faks += fak + ';' faks = faks[:-1] author = author[:-1] publisher = '' journal_title = '' issn = '' journal_title = '' if record.get('is_part_of_id'): if record.get('is_part_of_id')[0]: host = persistence.get_work( record.get('is_part_of_id')[0]) if host: record = json.loads(host.get('wtf_json')) # print(json.dumps(record, indent=4)) journal_title = record.get('title') if record.get('fsubseries'): journal_title = record.get('fsubseries') publisher = '' if record.get('publisher'): publisher = record.get('publisher') issn = '' if record.get('ISSN'): for entry in record.get('ISSN'): if entry: issn = entry break csv += '"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s"\n' % ( author, thedata.get('title'), journal_title, 'article', corresponding_author, '', '', publisher, issn, '', thedata.get('DOI')[0], thedata.get('oa_funded'), corresponding_affiliation, faks, ) resp = make_response(csv, 200) resp.headers['Content-Type'] = 'text/csv; charset=utf-8' return resp else: return make_response( 'No affiliation parameter set. Please contact the administrator!', 400)
fo.close() # persons('../../person.json') # works('../../works.json') # issued_data('/data/backup/Samstag/2016-10-01_15-28-31_hb2.not_issued.json', '/data/issued/issuedData.json') # delete_issued('/data/backup/Montag/2016-10-03_09-39-18_hb2.pubtype_Journal.json') organisations = [] logging.debug('TEST') try: record_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, core='organisation', rows=100000) record_solr.request() for result in record_solr.results: organisations.append(result.get('id')) # print('results: %s' % organisations) # print('result_list: %s' % record_solr.results) except Exception as e: logging.error(e) # organisations = ['TUDO9144040']
def export_openapc(year=''): ''' Getting a bibliography swagger_from_file: api_doc/export_openapc.yml ''' if theme(request.access_route) == 'dortmund': affiliation = 'tudo' affiliation_str = 'TU Dortmund' elif theme(request.access_route) == 'bochum': affiliation = 'rubi' affiliation_str = 'Ruhr-Universität Bochum' else: affiliation = '' affiliation_str = '' if affiliation: csv = '"institution";"period";"euro";"doi";"is_hybrid";"publisher";"journal_full_title";"issn";"url";"local_id"\n' oa_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, core='hb2', handler='query', query='oa_funds:true', facet='false', rows=100000, fquery=['%s:true' % affiliation, 'fdate:%s' % year]) oa_solr.request() results = oa_solr.results if len(results) > 0: for record in results: thedata = json.loads(record.get('wtf_json')) doi = record.get('doi')[0] is_hybrid = False if record.get('is_hybrid'): is_hybrid = record.get('is_hybrid') publisher = '' journal_title = '' issn = '' url = '' if not doi: journal_title = '' if record.get('is_part_of_id'): if record.get('is_part_of_id')[0]: host = persistence.get_work( record.get('is_part_of_id')[0]) if host: record = json.loads(host.get('wtf_json')) # print(json.dumps(record, indent=4)) journal_title = record.get('title') if record.get('fsubseries'): journal_title = record.get('fsubseries') publisher = '' if record.get('publisher'): publisher = record.get('publisher') issn = '' if record.get('ISSN'): for entry in record.get('ISSN'): if entry: issn = entry break url = '' if thedata.get('uri'): for uri in thedata.get('uri'): url = uri break csv += '"%s";%s;%s;"%s";"%s";"%s";"%s";"%s";"%s";"%s"\n' % ( affiliation_str, year, 0.00, doi, is_hybrid, publisher, journal_title, issn, url, record.get('id')) resp = make_response(csv, 200) resp.headers['Content-Type'] = 'text/csv; charset=utf-8' return resp else: return make_response('No results', 404) else: return make_response( 'No affiliation parameter set. Please contact the administrator!', 400)
from fuzzywuzzy import fuzz from utils.solr_handler import Solr try: import local_app_secrets as secrets except ImportError: import app_secrets as secrets # TODO: Deduplizierung nach Nachname, 1. Buchsctabe des Vornamens # TODO: Vorname und Nachname sind gleich, aber GNDs unterschiedlich => Ist das ueberhaupt ein TODO? # TODO: Nachname ist gleich und wenn Vorname in den Daten nur ein Buchstabe oder wenn echter Vorname, dann die # ersten beiden Buchstaben vergleichen results = [] new_titles = Solr(application=secrets.SOLR_APP, facet='false', rows=2000000, fields=['pnd', 'id', 'title', 'pubtype', 'catalog']) new_titles.request() for doc in new_titles.results: # logging.info(doc) if doc.get('pnd'): catalog = 'tmp' if doc.get('catalog'): if 'Ruhr-Universität Bochum' in doc.get('catalog'): catalog = 'rub' elif 'Technische Universität Dortmund' in doc.get('catalog'): catalog = 'tudo' result = {'id': doc.get('id'), 'catalog': catalog, 'title': doc.get('title'), 'pubtype': doc.get('pubtype')} creators = [] # TODO sobald es einen gibt mit len(ids) != 3 muss der Datensatz nicht betrachtet werden!
def wtf_csl(wtf_records=None): csl_records = [] # logging.info('wtf_records: %s' % wtf_records) if wtf_records is None: wtf_records = [] if len(wtf_records) > 0: for record in wtf_records: # logging.info('record: %s' % record) hosts = [] if record.get('is_part_of'): hosts = record.get('is_part_of') else: hosts.append({'is_part_of': ''}) for host in hosts: csl_record = {} # id csl_record.setdefault('id', record.get('id')) # type csl_type = CSL_PUBTYPES.get(record.get('pubtype')) if csl_type is None: csl_record.setdefault('pubtype', record.get('pubtype')) csl_record.setdefault('type', csl_type) # title title = record.get('title') if record.get('subtitle'): title += ': %s' % record.get('subtitle') # TODO title supplements csl_record.setdefault('title', title) # doi if record.get('DOI') and record.get('DOI')[0] != '': csl_record.setdefault('DOI', record.get('DOI')[0].strip()) csl_record.setdefault('URL', 'http://dx.doi.org/%s' % record.get('DOI')[0].strip()) csl_record.setdefault('uri', 'http://dx.doi.org/%s' % record.get('DOI')[0].strip()) # uri if record.get('uri') and record.get('uri')[0] != '': for uri in record.get('uri'): csl_record.setdefault('URL', uri.strip()) csl_record.setdefault('uri', uri.strip()) # contributors if record.get('person'): author = [] editor = [] contributor = [] for person in record.get('person'): # logging.info(person.get('name')) family = person.get('name').split(', ')[0] given = '' if len(person.get('name').split(', ')) > 1: given = person.get('name').split(', ')[1] # logging.info('%s, %s' % (family, given)) if person.get('role'): if 'aut' in person.get('role'): author.append({'family': family, 'given': given}) elif 'edt' in person.get('role'): editor.append({'family': family, 'given': given}) else: contributor.append({'family': family, 'given': given}) if len(author) > 0: csl_record.setdefault('author', author) if len(editor) > 0: csl_record.setdefault('editor', editor) if len(contributor) > 0 and record.get('pubtype') == 'Lecture': csl_record.setdefault('author', contributor) # container if host.get('is_part_of') != '': try: ipo_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='id:%s' % host.get('is_part_of'), facet='false', fields=['wtf_json']) ipo_solr.request() if len(ipo_solr.results) > 0: myjson = json.loads(ipo_solr.results[0].get('wtf_json')) if myjson.get('pubtype') != 'Series': title = myjson.get('title') if myjson.get('subtitle'): title += ': %s' % myjson.get('subtitle') csl_record.setdefault('container-title', title) author = [] editor = [] for person in myjson.get('person'): # logging.info(person.get('name')) family = person.get('name').split(', ')[0] given = '' if len(person.get('name').split(', ')) > 1: given = person.get('name').split(', ')[1] # logging.info('%s, %s' % (family, given)) if person.get('role'): if 'aut' in person.get('role'): author.append({'family': family, 'given': given}) elif 'edt' in person.get('role'): editor.append({'family': family, 'given': given}) if len(author) > 0: csl_record.setdefault('author', author) if len(editor) > 0: csl_record.setdefault('editor', editor) else: if myjson.get('fsubseries'): title = myjson.get('fsubseries') else: title = myjson.get('title') csl_record.setdefault('edition', title) else: csl_record.setdefault('container-title', host.get('is_part_of')) except AttributeError as e: logging.error(e) # volume if host.get('volume') and host.get('volume') != '': csl_record.setdefault('volume', host.get('volume')) # issue if host.get('issue') and host.get('issue') != '': csl_record.setdefault('issue', host.get('issue')) # page_first if host.get('page_first') and host.get('page_first') != '': csl_record.setdefault('page_first', host.get('page_first').replace('-', '_')) # page_last if host.get('page_last') and host.get('page_last') != '': csl_record.setdefault('page_last', host.get('page_last').replace('-', '_')) # page if host.get('page_first') and host.get('page_first') != '' and host.get('page_last') and host.get('page_last') != '': csl_record.setdefault('page', '%s-%s' % (host.get('page_first').replace('-', '_'), host.get('page_last').replace('-', '_'))) else: if host.get('page_first') and host.get('page_first') != '': csl_record.setdefault('page', host.get('page_first').replace('-', '_')) # page_last if host.get('page_last') and host.get('page_last') != '': csl_record.setdefault('page', host.get('page_last').replace('-', '_')) # collection-number # collection-author # collection-editor # number_of_volumes if host.get('number_of_volumes') and host.get('number_of_volumes') != '': csl_record.setdefault('number_of_volumes', host.get('number_of_volumes')) # language if record.get('language') and record.get('language')[0] != '' and record.get('language')[0] != 'None': csl_record.setdefault('language', record.get('language')[0]) # issued if record.get('issued'): issued = {} date_parts = [] for date_part in str(record.get('issued')).replace('[', '').replace(']', '').split('-'): date_parts.append(date_part) issued.setdefault('date-parts', []).append(date_parts) csl_record.setdefault('issued', issued) # edition if record.get('edition'): csl_record.setdefault('edition', record.get('edition')) # isbn if record.get('isbn'): csl_record.setdefault('isbn', record.get('ISBN')[0]) # issn if record.get('issn'): csl_record.setdefault('issn', record.get('ISSN')[0]) # ismn if record.get('ismn'): csl_record.setdefault('ismn', record.get('ISMN')[0]) # publisher if record.get('publisher'): csl_record.setdefault('publisher', record.get('publisher')) csl_record.setdefault('original-publisher', record.get('publisher')) # publisher_place if record.get('publisher_place'): csl_record.setdefault('publisher-place', record.get('publisher_place')) csl_record.setdefault('original-publisher-place', record.get('publisher_place')) # number_of_pages if record.get('number_of_pages'): csl_record.setdefault('number_of_pages', record.get('number_of_pages')) # uri # WOSID if record.get('WOSID'): csl_record.setdefault('WOSID', record.get('WOSID')) # PMID if record.get('PMID'): csl_record.setdefault('PMID', record.get('PMID')) # abstract if record.get('abstract')[0] and record.get('abstract')[0].get('content') != '': csl_record.setdefault('abstract', record.get('abstract')[0].get('content')) csl_records.append(csl_record) return csl_records
def wtf_openurl(record=None): open_url = 'ctx_ver=Z39.88-2004' if record: # pubtype if record.get('pubtype') and OPENURL_KEV_MTX.get( record.get('pubtype')): open_url += '&rft_val_fmt=info:ofi/fmt:kev:mtx:%s' % OPENURL_KEV_MTX.get( record.get('pubtype')) else: open_url += '&rft_val_fmt=info:ofi/fmt:kev:mtx:%s' % 'book' if OPENURL_GENRE.get(record.get('pubtype')): open_url += '&rft.genre=%s' % OPENURL_GENRE.get( record.get('pubtype')) else: open_url += '&rft.genre=%s' % 'unknown' # sid # open_url += '&info:ofi/nam:info:sid:%s' % str(parse.quote(record.get('id'), 'utf-8')) # doi if record.get('DOI') and record.get('DOI')[0]: open_url += '&info:ofi/nam:info:doi:%s' % parse.quote( record.get('DOI')[0], 'utf-8') # authors for person in record.get('person'): open_url += '&rft.au=%s' % parse.quote(person.get('name'), 'utf8') if record.get('is_part_of') and record.get('is_part_of')[ 0] and record.get('is_part_of')[0].get('is_part_of'): for host in record.get('is_part_of'): if host.get('is_part_of'): try: ipo_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='id:%s' % host.get('is_part_of'), facet='false', fields=['wtf_json']) ipo_solr.request() if len(ipo_solr.results) > 0: myjson = json.loads( ipo_solr.results[0].get('wtf_json')) if myjson.get('pubtype') == 'journal': open_url += '&rft.jtitle=%s' % parse.quote( myjson.get('title'), 'utf-8') open_url += '&rft.issn=%s' % parse.quote( myjson.get('ISSN')[0], 'utf-8') open_url += '&rft.volume=%s' % parse.quote( host.get('volume'), 'utf-8') open_url += '&rft.issue=%s' % parse.quote( host.get('issue'), 'utf-8') open_url += '&rft.pages=%s' % host.get( 'page_first') if host.get('page_last'): open_url += '-%s' % host.get('page_last') # article title open_url += '&rft.atitle=%s' % parse.quote( record.get('title'), 'utf-8') elif myjson.get('pubtype') == 'Monograph' or \ myjson.get('pubtype') == 'Collection' or \ myjson.get('pubtype') == 'Conference' or \ myjson.get('pubtype') == 'LegalCommentary': # btitle open_url += '&rft.btitle=%s' % parse.quote( myjson.get('title'), 'utf-8') open_url += '&rft.isbn=%s' % parse.quote( myjson.get('ISBN')[0], 'utf-8') open_url += '&rft.pages=%s' % host.get( 'page_first') if host.get('page_last'): open_url += '-%s' % host.get('page_last') except AttributeError as e: logging.error(e) break if 'rft.atitle' not in open_url: open_url += '&rft.title=%s' % parse.quote(record.get('title'), 'utf-8') if record.get('ISSN'): open_url += '&rft.issn=%s' % parse.quote( record.get('ISSN')[0], 'utf-8') if record.get('ISBN'): open_url += '&rft.isbn=%s' % parse.quote( record.get('ISBN')[0], 'utf-8') # origin info if record.get('issued'): open_url += '&rft.date=%s' % record.get('issued') if record.get('publisher_place'): open_url += '&rft.place=%s' % parse.quote( record.get('publisher_place'), 'utf-8') if record.get('publisher'): open_url += '&rft.publisher=%s' % parse.quote( record.get('publisher'), 'utf-8') # other for corporation in record.get('corporation'): open_url += '&rft.inst=%s' % parse.quote(corporation.get('name'), 'utf-8') return open_url
def wtf_orcid(affiliation='', wtf_records=None): orcid_records = [] # logging.info('wtf_records: %s' % wtf_records) if wtf_records is None: wtf_records = [] if len(wtf_records) > 0: for record in wtf_records: orcid_record = {} db = BibDatabase() db.entries = [] bibtex_entry = {} # work type orcid_type = ORCID_PUBTYPES.get(record.get('pubtype')) if orcid_type is None: orcid_type.setdefault('pubtype', 'OTHER') orcid_record.setdefault('type', orcid_type) bibtex_type = BIBTEX_PUBTYPES.get(record.get('pubtype')) if bibtex_type is None: bibtex_type.setdefault('pubtype', 'misc') bibtex_entry.setdefault('ENTRYTYPE', bibtex_type) external_ids = {} external_id = [] # ids - record id (source-work-id) ext_id = {} ext_id.setdefault('external-id-type', 'source-work-id') ext_id.setdefault('external-id-value', record.get('id')) ext_id.setdefault('external-id-relationship', 'SELF') if affiliation and affiliation in affiliation_url: ext_id.setdefault( 'external-id-url', '%s%s/%s' % (affiliation_url.get(affiliation), record.get('pubtype'), record.get('id'))) external_id.append(ext_id) bibtex_entry.setdefault('ID', record.get('id')) # ids - ISBN (isbn) if record.get('ISBN'): for isbn in record.get('ISBN'): if isbn: ext_id = {} ext_id.setdefault('external-id-type', 'isbn') ext_id.setdefault('external-id-value', isbn) ext_id.setdefault('external-id-relationship', 'SELF') external_id.append(ext_id) # ids - ISSN (issn) if record.get('ISSN'): for issn in record.get('ISSN'): if issn: ext_id = {} ext_id.setdefault('external-id-type', 'issn') ext_id.setdefault('external-id-value', issn) ext_id.setdefault('external-id-relationship', 'SELF') external_id.append(ext_id) # ids - ZDB (other-id) if record.get('ZDBID'): for zdbid in record.get('ZDBID'): if zdbid: ext_id = {} ext_id.setdefault('external-id-type', 'other-id') ext_id.setdefault('external-id-value', zdbid) ext_id.setdefault( 'external-id-url', 'http://ld.zdb-services.de/resource/%s' % zdbid) ext_id.setdefault('external-id-relationship', 'SELF') external_id.append(ext_id) # ids - PMID (pmc) if record.get('PMID'): ext_id = {} ext_id.setdefault('external-id-type', 'pmid') ext_id.setdefault('external-id-value', record.get('PMID')) ext_id.setdefault( 'external-id-url', 'http://www.ncbi.nlm.nih.gov/pubmed/%s' % record.get('PMID')) ext_id.setdefault('external-id-relationship', 'SELF') external_id.append(ext_id) # ids - WOS-ID (wosuid) if record.get('WOSID'): ext_id = {} ext_id.setdefault('external-id-type', 'doi') ext_id.setdefault('external-id-value', record.get('WOSID')) ext_id.setdefault( 'external-id-url', 'http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info:ut/%s' % record.get('WOSID')) ext_id.setdefault('external-id-relationship', 'SELF') external_id.append(ext_id) # ids - doi if record.get('DOI'): for doi in record.get('DOI'): if doi: ext_id = {} ext_id.setdefault('external-id-type', 'doi') ext_id.setdefault('external-id-value', doi) ext_id.setdefault('external-id-url', 'http://dx.doi.org/%s' % doi) ext_id.setdefault('external-id-relationship', 'SELF') external_id.append(ext_id) if external_id: external_ids.setdefault('external-id', external_id) bibtex_entry.setdefault('doi', record.get('DOI')[0]) orcid_record.setdefault('external-ids', external_ids) # titles title = {} title.setdefault('title', record.get('title')) if record.get('subtitle'): title.setdefault('subtitle', record.get('subtitle')) orcid_record.setdefault('title', title) title = record.get('title') if record.get('subtitle'): title += ': %s' % record.get('subtitle') bibtex_entry.setdefault('title', title) # issued if record.get('issued'): publication_date = {} date_parts = [] for date_part in str(record.get('issued')).replace( '[', '').replace(']', '').split('-'): date_parts.append(date_part) publication_date.setdefault('year', int(date_parts[0])) bibtex_entry.setdefault('year', date_parts[0]) if len(date_parts) > 1: publication_date.setdefault('month', int(date_parts[1])) bibtex_entry.setdefault('month', date_parts[1]) if len(date_parts) > 2: publication_date.setdefault('day', int(date_parts[2])) bibtex_entry.setdefault('day', date_parts[2]) orcid_record.setdefault('publication-date', publication_date) # contributors contributors = {} contributor = [] author_str = '' for author in record.get('person'): if 'aut' in author.get('role'): con = {} con.setdefault('credit-name', author.get('name')) if author.get('orcid'): con.setdefault('contributor-orcid', { 'uri': 'http://orcid.org/%s' % author.get('orcid') }) contributor_attributes = {} contributor_attributes.setdefault('contributor-role', 'AUTHOR') con.setdefault('contributor-attributes', contributor_attributes) contributor.append(con) if author_str != '': author_str += ' and ' author_str += author.get('name') contributors.setdefault('contributor', contributor) orcid_record.setdefault('contributors', contributors) bibtex_entry.setdefault('author', author_str) # language if record.get( 'language')[0] and record.get('language')[0] != 'None': orcid_record.setdefault( 'language-code', str( babelfish.Language.fromalpha3b( record.get('language')[0]))) # is_part_of hosts = [] if record.get('is_part_of'): hosts = record.get('is_part_of') for host in hosts: if host.get('is_part_of') != '': try: ipo_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT, application=secrets.SOLR_APP, query='id:%s' % host.get('is_part_of'), facet='false', fields=['wtf_json']) ipo_solr.request() if len(ipo_solr.results) > 0: myjson = json.loads( ipo_solr.results[0].get('wtf_json')) title = myjson.get('title') if myjson.get('subtitle'): title += ': %s' % myjson.get('subtitle') orcid_record.setdefault('journal-title', title) if bibtex_entry.get('ENTRYTYPE') == 'article': bibtex_entry.setdefault('journal', title) elif bibtex_entry.get('ENTRYTYPE') == 'inbook': bibtex_entry.setdefault('booktitle', title) elif bibtex_entry.get( 'ENTRYTYPE') == 'inproceedings': bibtex_entry.setdefault('booktitle', title) elif bibtex_entry.get( 'ENTRYTYPE') == 'incollection': bibtex_entry.setdefault('booktitle', title) else: bibtex_entry.setdefault('series', title) else: orcid_record.setdefault('journal-title', host.get('is_part_of')) except AttributeError as e: logging.error(e) if host.get('volume') != '': bibtex_entry.setdefault('volume', host.get('volume')) if bibtex_entry: db.entries.append(bibtex_entry) citation = {} citation.setdefault('citation-type', 'BIBTEX') citation.setdefault('citation', bibtexparser.dumps(db)) orcid_record.setdefault('citation', citation) orcid_records.append(orcid_record) return orcid_records