コード例 #1
0
ファイル: api.py プロジェクト: UB-Dortmund/mms
def person_delete(person_id=''):
    """
        Delete an existing person

        swagger_from_file: api_doc/person_delete.yml
    """

    if is_token_valid(request.headers.get('Authorization')):
        # TODO decide on base of the api key scopes
        # load group
        delete_person_solr = Solr(host=secrets.SOLR_HOST,
                                  port=secrets.SOLR_PORT,
                                  application=secrets.SOLR_APP,
                                  core='person',
                                  query='id:%s' % person_id)
        delete_person_solr.request()

        if delete_person_solr.results:
            thedata = json.loads(delete_person_solr.results[0].get('wtf_json'))
            form = PersonAdminForm.from_json(thedata)
            # modify status to 'deleted'
            form.editorial_status.data = 'deleted'
            form.changed.data = timestamp()
            form.note.data = 'Deleted via REST API'
            # save group
            persistence.person2solr(form, action='delete')

            return make_response('person deleted!', 204)
        else:
            return make_response(
                'person resource \'%s\' not found!' % person_id, 404)

    else:
        return make_response('Unauthorized', 401)
コード例 #2
0
ファイル: api.py プロジェクト: UB-Dortmund/mms
def work_delete(work_id=''):
    """
        Delete an existing work

        swagger_from_file: api_doc/work_delete.yml
    """

    if is_token_valid(request.headers.get('Authorization')):
        # TODO decide on base of the api key scopes
        # load work
        delete_work_solr = Solr(host=secrets.SOLR_HOST,
                                port=secrets.SOLR_PORT,
                                application=secrets.SOLR_APP,
                                core='hb2',
                                query='id:%s' % work_id)
        delete_work_solr.request()

        if delete_work_solr.results:
            thedata = json.loads(delete_work_solr.results[0].get('wtf_json'))
            form = display_vocabularies.PUBTYPE2FORM.get(
                thedata.get('pubtype')).from_json(thedata)
            # modify status to 'deleted'
            form.editorial_status.data = 'deleted'
            form.changed.data = timestamp()
            form.note.data = 'Deleted via REST API'
            # save work
            persistence.record2solr(form, action='delete')

            return make_response('work deleted!', 204)
        else:
            return make_response('work resource \'%s\' not found!' % work_id,
                                 404)

    else:
        return make_response('Unauthorized', 401)
コード例 #3
0
ファイル: orcid_sync.py プロジェクト: UB-Dortmund/mms
def sync_hb_to_orcid():
    get_user = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT,
                    application=p_secrets.SOLR_APP, core='hb2_users',
                    query='orcidid:%s' % orcid_id)
    get_user.request()

    if get_user.results:
        if '/activities/update' in get_user.results[0].get('orcidscopes'):
            # records = get_new_records(affiliation=affiliation, query='pnd:"1049808495%23Becker, Hans-Georg"')
            records = get_updated_records(affiliation=affiliation, query='pnd:"1019952040%23Höhner, Kathrin"')
            orcid_update_records(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token, works=records)
コード例 #4
0
def export_solr_dump(core=''):
    """
    Export the wtf_json field of every doc in the index to a new document in the users core and to the user's local file
    system. Uses the current user's ID and a timestamp as the document ID and file name.
    :param core:
    """
    dow = days_of_week[datetime.datetime.today().weekday()]
    if core != 'hb2_users':
        filename = '%s/%s/%s_%s.json' % (
            secrets.BACKUP_DIR, dow,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core)
        export_solr = Solr(host=secrets.SOLR_HOST,
                           port=secrets.SOLR_PORT,
                           application=secrets.SOLR_APP,
                           export_field='wtf_json',
                           core=core)
        export_docs = export_solr.export()

        fo = open(filename, 'w')
        fo.write(json.dumps(export_docs, indent=4))
        fo.close()

        filename = '%s/%s/%s_%s.not_imported.json' % (
            secrets.BACKUP_DIR, dow,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core)

        export_solr = Solr(host=secrets.SOLR_HOST,
                           port=secrets.SOLR_PORT,
                           application=secrets.SOLR_APP,
                           query='-editorial_status:imported',
                           export_field='wtf_json',
                           core=core)
        export_docs = export_solr.export()

        fo = open(filename, 'w')
        fo.write(json.dumps(export_docs, indent=4))
        fo.close()

    else:
        filename = '%s/%s/%s_%s.json' % (
            secrets.BACKUP_DIR, dow,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core)
        export_solr = Solr(host=secrets.SOLR_HOST,
                           port=secrets.SOLR_PORT,
                           application=secrets.SOLR_APP,
                           core=core)
        export_docs = export_solr.export()

        fo = open(filename, 'w')
        fo.write(json.dumps(export_docs, indent=4))
        fo.close()
コード例 #5
0
ファイル: get_dead_links.py プロジェクト: UB-Dortmund/mms
def export_solr_dump():
    dow = days_of_week[datetime.datetime.today().weekday()]
    filename = '%s/%s/%s_%s.dead_ends.json' % (secrets.BACKUP_DIR, dow,
                                                  datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 'hb2')

    export_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP, query='is_part_of:[\'\' TO *]',
                       export_field='wtf_json', core='hb2')
    export_docs = export_solr.export()

    # TODO get id of the host and check if it exists
    dead_ends = []
    for doc in export_docs:
        for part in doc.get('is_part_of'):
            try:
                query = 'id:%s' % part.get('is_part_of')
                get_record_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                       application=secrets.SOLR_APP, core='hb2', query=query, facet='false',
                                       fields=['wtf_json'])
                get_record_solr.request()
                if len(get_record_solr.results) == 0:
                    print('%s is a dead end' % part.get('is_part_of'))
                    if part.get('is_part_of') not in dead_ends:
                        dead_ends.append(part.get('is_part_of'))
            except AttributeError as e:
                print(e)

    fo = open(filename, 'w')
    fo.write(json.dumps(dead_ends, indent=4))
    fo.close()
コード例 #6
0
ファイル: orcid_sync.py プロジェクト: UB-Dortmund/mms
def get_updated_records(affiliation='', query='*:*'):

    get_record_solr = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT,
                           application=p_secrets.SOLR_APP,
                           query='%s AND orcid_put_code:[\'\' TO *]' % query, rows=100000)
    get_record_solr.request()

    orcid_records = {}

    if len(get_record_solr.results) == 0:
        logging.error('No records found for query: %s' % query)
    else:
        print(len(get_record_solr.results))
        # orcid_records.append(orcid_processor.wtf_orcid(affiliation=affiliation, wtf_records=[json.loads(get_record_solr.results[0].get('wtf_json'))])[0])
        for record in get_record_solr.results:
            wtf = json.loads(record.get('wtf_json'))
            orcid_records.setdefault(record.get('orcid_put_code')[0], orcid_processor.wtf_orcid(affiliation=affiliation, wtf_records=[wtf]))

    return orcid_records
コード例 #7
0
def export_solr_query(core='', query='*:*', filename=''):
    if core != 'hb2_users':
        if filename != '':
            dow = days_of_week[datetime.datetime.today().weekday()]
            filename = '%s/%s/%s_%s.%s' % (
                secrets.BACKUP_DIR, dow,
                datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core,
                filename)

            export_solr = Solr(host=secrets.SOLR_HOST,
                               port=secrets.SOLR_PORT,
                               application=secrets.SOLR_APP,
                               query=query,
                               export_field='wtf_json',
                               core=core)
            export_docs = export_solr.export()

            fo = open(filename, 'w')
            fo.write(json.dumps(export_docs, indent=4))
            fo.close()
コード例 #8
0
def bibliography(agent='', agent_id='', style='harvard1'):
    """
        Getting a bibliography

        swagger_from_file: bibliography_doc/bibliography.yml
    """
    format = request.args.get('format', 'html')

    filter_by_year = request.args.get('filter_by_year', '')
    filter_by_type = request.args.get('filter_by_type', '')
    exclude_by_type = request.args.get('exclude_by_type', '')
    filter_by_pr = request.args.get('filter_by_pr', False)
    filter_by_ger = request.args.get('filter_by_ger', False)
    filter_by_eng = request.args.get('filter_by_eng', False)
    filter_by_current_members = request.args.get('filter_by_current_members', False)
    filter_by_former_members = request.args.get('filter_by_former_members', False)
    group_by_year = request.args.get('group_by_year', False)
    # logging.info('group_by_year = %s' % group_by_year)
    group_by_type = request.args.get('group_by_type', False)
    group_by_type_year = request.args.get('group_by_type_year', False)
    pubsort = request.args.get('pubsort', '')
    toc = request.args.get('toc', False)
    locale = request.args.get('locale', '')
    # TODO start-creationdate, end-creationdate >> Szenario Raumplanung
    start_creationdate = request.args.get('start_creationdate', '')
    end_creationdate = request.args.get('end_creationdate', '')

    reasoning = request.args.get('reasoning', False)
    refresh = request.args.get('refresh', False)

    formats = ['html', 'txt']
    agent_types = {
        'person': 'person',
        'research_group': 'organisation',
        'chair': 'organisation',
        'organisation': 'organisation',
        'working_group': 'group',
        'project': 'group',
    }
    pubsorts = ['stm', 'anh']
    STM_SORT = ['ArticleJournal', 'Chapter', 'Monograph', 'Journal', 'Series', 'Conference', 'Collection',
                'MultivolumeWork', 'SpecialIssue', 'Patent', 'Standard', 'Thesis', 'InternetDocument', 'Report', 'Lecture', 'Sonstiges',
                'ArticleNewspaper', 'PressRelease', 'RadioTVProgram', 'AudioVideoDocument',
                'ResearchData', 'Other']
    STM_LIST = {
        'ArticleJournal': '',
        'Chapter': '',
        'Monograph': '',
        'Journal': '',
        'Series': '',
        'Conference': '',
        'Collection': '',
        'MultivolumeWork': '',
        'SpecialIssue': '',
        'Patent': '',
        'Standard': '',
        'Thesis': '',
        'InternetDocument': '',
        'Report': '',
        'Lecture': '',
        'ArticleNewspaper': '',
        'PressRelease': '',
        'RadioTVProgram': '',
        'AudioVideoDocument': '',
        'ResearchData': '',
        'Other': '',
    }
    ANH_SORT = ['Monograph', 'ArticleJournal', 'ChapterInLegalCommentary', 'Chapter', 'LegalCommentary', 'Collection',
                 'MultivolumeWork', 'Conference', 'Edition', 'SpecialIssue', 'Journal', 'Series', 'Newspaper', 'Thesis',
                'ArticleNewspaper',
                'Lecture', 'Report', 'InternetDocument', 'RadioTVProgram', 'AudioVideoDocument',
                'PressRelease', 'ResearchData', 'Other']
    ANH_LIST = {
        'Monograph': '',
        'ArticleJournal': '',
        'ChapterInLegalCommentary': '',
        'Chapter': '',
        'LegalCommentary': '',
        'Collection': '',
        'MultivolumeWork': '',
        'Conference': '',
        'Edition': '',
        'SpecialIssue': '',
        'Journal': '',
        'Series': '',
        'Newspaper': '',
        'Thesis': '',
        'ArticleNewspaper': '',
        'Lecture': '',
        'Report': '',
        'InternetDocument': '',
        'RadioTVProgram': '',
        'AudioVideoDocument': '',
        'PressRelease': '',
        'ResearchData': '',
        'Other': '',
    }

    if format not in formats:
        return make_response('Bad request: format!', 400)
    elif agent not in agent_types.keys():
        return make_response('Bad request: agent!', 400)
    elif pubsort and pubsort not in pubsorts:
        return make_response('Bad request: pubsort!', 400)

    key = request.full_path.replace('&refresh=true', '').replace('?refresh=true', '?')
    # logging.debug('KEY: %s' % key)
    response = ''
    if not refresh:
        # request in cache?
        try:

            storage_publists_cache = app.extensions['redis']['REDIS_PUBLIST_CACHE']

            if storage_publists_cache.exists(key):
                response = storage_publists_cache.get(key)

        except Exception as e:
            logging.info('REDIS ERROR: %s' % e)

    if response == '':

        group = False
        group_field = ''
        group_limit = 100000
        if str2bool(group_by_year):
            group = True
            group_field = 'fdate'
        elif str2bool(group_by_type):
            group = True
            group_field = 'pubtype'

        filterquery = []
        if str2bool(filter_by_eng):
            filterquery.append('language:eng')
        elif str2bool(filter_by_ger):
            filterquery.append('language:ger')
        elif str2bool(filter_by_pr):
            filterquery.append('peer_reviewed:true')

        if filter_by_type != '':
            entries = filter_by_type.split('|')
            filter_string = ''
            for entry in entries:
                filter_string += 'pubtype:%s' % PUBTYPE_KEYS.get(entry.lower()) + '+OR+'
            filterquery.append(filter_string[:-4])

        if filter_by_year != '':
            entries = filter_by_year.split('|')
            filter_string = ''
            for entry in entries:
                filter_string += 'fdate:%s' % entry + '+OR+'
            filterquery.append(filter_string[:-4])

        if exclude_by_type:
            entries = exclude_by_type.split('|')
            for entry in entries:
                filterquery.append('-pubtype:"%s"' % PUBTYPE_KEYS.get(entry.lower()))

        fquery = ''
        if start_creationdate and not end_creationdate:
            fquery = 'recordCreationDate:[%s TO *]' % (start_creationdate + 'T00:00:00Z')
        elif not start_creationdate and end_creationdate:
            fquery = 'recordCreationDate:[*+TO+%s]' % (end_creationdate + 'T00:00:00Z')
        elif start_creationdate and end_creationdate:
            fquery = 'recordCreationDate:[%s+TO+%s]' % (start_creationdate + 'T00:00:00Z', end_creationdate + 'T00:00:00Z')

        if fquery:
            filterquery.append(fquery)

        query = ''
        results = []
        if agent_types.get(agent) == 'person':

            # get facet value
            actor_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                              application=secrets.SOLR_APP, query='gnd:%s' % agent_id, export_field='wtf_json',
                              core=agent_types.get(agent))
            actor_solr.request()

            if len(actor_solr.results) == 0:
                return make_response('Not Found: Unknown Agent!', 404)
            else:
                name = actor_solr.results[0].get('name')

                query = 'pndid:%s' % agent_id
                # query = 'pnd:"%s%s%s"' % (agent_id, '%23', name)
                # logging.info('query=%s' % query)

        else:
            # get orga/group doc
            actor_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                              application=secrets.SOLR_APP, query='id:%s' % agent_id,
                              export_field='wtf_json',
                              core=agent_types.get(agent))
            actor_solr.request()

            if actor_solr.results:

                name = actor_solr.results[0].get('pref_label')
                # logging.debug('name = %s' % name)

                if reasoning:
                    # logging.debug('reasoning: %s' % reasoning)
                    orgas = {}
                    orgas.setdefault(agent_id, name)
                    # get all children
                    if actor_solr.results[0].get('children'):
                        children = actor_solr.results[0].get('children')
                        for child_json in children:
                            child = json.loads(child_json)
                            orgas.setdefault(child.get('id'), child.get('label'))
                    query = ''
                    idx_o = 0
                    id_type = agent_types.get(agent)
                    if id_type == 'organisation':
                        id_type = 'affiliation'

                    for orga_id in orgas.keys():

                        fquery = ['gnd:[\'\' TO *]']

                        if not agent_types.get(agent) == 'person':
                            if filter_by_former_members:
                                fquery.append('personal_status:emeritus+OR+personal_status:alumnus')
                            elif filter_by_current_members:
                                fquery.append('-personal_status:emeritus')
                                fquery.append('-personal_status:alumnus')

                        member_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                           application=secrets.SOLR_APP, query='%s_id:"%s"' % (id_type, orga_id),
                                           fquery=fquery, fields=['gnd', 'name'], rows=100000,
                                           core='person')
                        member_solr.request()

                        query_part = ''

                        if member_solr.results and len(member_solr.results) > 0:
                            idx_p = 0
                            for member in member_solr.results:
                                query_part += 'pnd:"%s%s%s"' % (member.get('gnd'), '%23', member.get('name'))
                                idx_p += 1
                                if idx_p < len(member_solr.results) and query_part != '':
                                    query_part += ' OR '

                            if query_part != '':
                                query += query_part

                        idx_o += 1
                        if idx_o < len(orgas) and query != '':
                            query += ' OR '

                    while query.endswith(' OR '):
                        query = query[:-4]

                    # logging.info('query=%s' % query)

                else:
                    logging.debug('reasoning: %s' % reasoning)
                    id_type = agent_types.get(agent)
                    if id_type == 'organisation':
                        id_type = 'affiliation'
                    query = '%s_id:%s' % (id_type, agent_id)
            else:
                return make_response('Not Found: Unknown Agent!', 404)

        biblist_id = str(uuid.uuid4())
        biblist = ''
        biblist_toc = ''
        biblist_coins = ''

        STM_TOC = {
            'ArticleJournal': '',
            'Chapter': '',
            'Monograph': '',
            'Journal': '',
            'Series': '',
            'Conference': '',
            'Collection': '',
            'MultivolumeWork': '',
            'SpecialIssue': '',
            'Patent': '',
            'Standard': '',
            'Thesis': '',
            'InternetDocument': '',
            'Report': '',
            'Lecture': '',
            'ArticleNewspaper': '',
            'PressRelease': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'ResearchData': '',
        }
        ANH_TOC = {
            'Monograph': '',
            'ArticleJournal': '',
            'ChapterInLegalCommentary': '',
            'Chapter': '',
            'LegalCommentary': '',
            'Collection': '',
            'MultivolumeWork': '',
            'Conference': '',
            'Edition': '',
            'SpecialIssue': '',
            'Journal': '',
            'Series': '',
            'Newspaper': '',
            'Thesis': '',
            'ArticleNewspaper': '',
            'Lecture': '',
            'Report': '',
            'InternetDocument': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'PressRelease': '',
            'ResearchData': '',
        }

        STM_COINS = {
            'ArticleJournal': '',
            'Chapter': '',
            'Monograph': '',
            'Journal': '',
            'Series': '',
            'Conference': '',
            'Collection': '',
            'MultivolumeWork': '',
            'SpecialIssue': '',
            'Patent': '',
            'Standard': '',
            'Thesis': '',
            'InternetDocument': '',
            'Report': '',
            'Lecture': '',
            'ArticleNewspaper': '',
            'PressRelease': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'ResearchData': '',
        }
        ANH_COINS = {
            'Monograph': '',
            'ArticleJournal': '',
            'ChapterInLegalCommentary': '',
            'Chapter': '',
            'LegalCommentary': '',
            'Collection': '',
            'MultivolumeWork': '',
            'Conference': '',
            'Edition': '',
            'SpecialIssue': '',
            'Journal': '',
            'Series': '',
            'Newspaper': '',
            'Thesis': '',
            'ArticleNewspaper': '',
            'Lecture': '',
            'Report': '',
            'InternetDocument': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'PressRelease': '',
            'ResearchData': '',
        }

        if group_by_type_year:

            facet_tree = ('pubtype', 'fdate')

            publist_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                application=secrets.SOLR_APP, handler='query',
                                query=query, fquery=filterquery,
                                fields=['wtf_json'], rows=0,
                                facet='true', facet_tree=facet_tree, facet_sort=False, facet_limit=-1,
                                sort='fdate asc', core='hb2')
            publist_solr.request()
            # logging.info('publist_solr.tree: %s' % json.dumps(publist_solr.tree, indent=4))

            list_cnt = 0
            for pubtype in publist_solr.tree.get('pubtype,fdate'):
                # logging.debug('pubtype = %s' % pubtype.get('value'))
                # logging.debug('pubtype = %s' % pubtype)
                year_list = ''
                year_coins = ''
                if pubtype.get('pivot'):
                    for year in pubtype.get('pivot')[::-1]:
                        # logging.debug('\t%s: %s' % (year.get('value'), year.get('count')))
                        filterquery = []
                        filterquery.append('fdate:%s' % year.get('value'))
                        filterquery.append('pubtype:%s' % pubtype.get('value'))
                        pivot_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                          application=secrets.SOLR_APP, handler='query',
                                          query=query, fields=['wtf_json'], rows=100000,
                                          fquery=filterquery, core='hb2')
                        pivot_solr.request()
                        results = pivot_solr.results
                        # logging.debug('PIVOT_PUB_LIST: %s' % results)

                        publist_docs = []
                        for result in results:
                            publist_docs.append(json.loads(result.get('wtf_json')))
                            if format == 'html':
                                year_coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(json.loads(result.get('wtf_json'))).replace('&', '&amp;')

                        if not group_by_type:
                            if format == 'html':
                                year_list += '<h5>%s</h5>' % year.get('value')
                            else:
                                year_list += '%s\n' % year.get('value')

                        year_list += citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style)
                else:
                    filterquery = []
                    filterquery.append('pubtype:%s' % pubtype.get('value'))
                    pivot_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                      application=secrets.SOLR_APP, handler='query',
                                      query=query, fields=['wtf_json'], rows=100000,
                                      fquery=filterquery, core='hb2')
                    pivot_solr.request()
                    results = pivot_solr.results
                    # logging.debug('PIVOT_PUB_LIST: %s' % results)

                    publist_docs = []
                    for result in results:
                        publist_docs.append(json.loads(result.get('wtf_json')))
                        if format == 'html':
                            year_coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(
                                json.loads(result.get('wtf_json'))).replace('&', '&amp;')

                    year_list += citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style)

                if locale.startswith('de'):
                    group_value = display_vocabularies.PUBTYPE_GER.get(pubtype.get('value'))
                else:
                    group_value = display_vocabularies.PUBTYPE_ENG.get(pubtype.get('value'))

                list_cnt += 1
                if format == 'html':
                    header = '<h4 id="%s_%s">%s</h4>' % (biblist_id, list_cnt, group_value)
                elif format == 'txt':
                    header = '%s\n' % group_value
                else:
                    header = ''
                footer = ''
                if toc and format == 'html':
                    back_string = 'Back to table of contents'
                    if locale.startswith('de'):
                        back_string = 'Zurück zum Inhaltsverzeichnis'
                    footer = '<div class="toc_return"><a href="#%s_citetoc">%s</a></div>' % (biblist_id, back_string)

                if pubsort == 'stm':
                    STM_LIST[pubtype.get('value')] = header + year_list + footer
                    if format == 'html':
                        STM_TOC[pubtype.get('value')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                        STM_COINS[pubtype.get('value')] = year_coins
                elif pubsort == 'anh':
                    ANH_LIST[pubtype.get('value')] = header + year_list + footer
                    if format == 'html':
                        ANH_TOC[pubtype.get('value')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                        ANH_COINS[pubtype.get('value')] = year_coins
                else:
                    biblist += header + year_list
                    if format == 'html':
                        biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                        biblist_coins += year_coins

            if pubsort == 'anh':
                # logging.debug(ANH_LIST)
                biblist = ''
                biblist_toc = ''
                for pubtype in ANH_SORT:
                    if ANH_LIST.get(pubtype):
                        biblist += ANH_LIST.get(pubtype)
                        if format == 'html':
                            biblist_toc += ANH_TOC.get(pubtype)
                            biblist_coins += ANH_COINS.get(pubtype)
            elif pubsort == 'stm':
                # logging.debug(STM_LIST)
                biblist = ''
                biblist_toc = ''
                for pubtype in STM_SORT:
                    if STM_LIST.get(pubtype):
                        biblist += STM_LIST.get(pubtype)
                        if format == 'html':
                            biblist_toc += STM_TOC.get(pubtype)
                            biblist_coins += STM_COINS.get(pubtype)

        else:

            publist_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                application=secrets.SOLR_APP, handler='query',
                                query=query, fields=['wtf_json'],
                                rows=100000, fquery=filterquery,
                                group=group, group_field=group_field, group_limit=group_limit,
                                sort='fdate desc',
                                core='hb2')
            publist_solr.request()
            results.extend(publist_solr.results)
            # print('publist_solr.results: %s' % results)

            publist_docs = []
            if group:
                biblist = ''
                list_cnt = 0
                for result in results:
                    # logging.debug('groupValue: %s' % result.get('groupValue'))
                    # logging.debug('numFound: %s' % result.get('doclist').get('numFound'))
                    # logging.debug('docs: %s' % result.get('doclist').get('docs'))

                    coins = ''
                    for doc in result.get('doclist').get('docs'):
                        publist_docs.append(json.loads(doc.get('wtf_json')))
                        if format == 'html':
                            coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(json.loads(doc.get('wtf_json'))).replace('&', '&amp;')

                    group_value = result.get('groupValue')
                    if str2bool(group_by_type):
                        if locale.startswith('de'):
                            group_value = display_vocabularies.PUBTYPE_GER.get(result.get('groupValue'))
                        else:
                            group_value = display_vocabularies.PUBTYPE_ENG.get(result.get('groupValue'))

                    list_cnt += 1
                    if format == 'html':
                        header = '<h4 id="%s_%s">%s</h4>' % (biblist_id, list_cnt, group_value)
                    elif format == 'txt':
                        header = '%s' % group_value
                    else:
                        header = ''
                    footer = ''
                    if toc and format == 'html':
                        back_string = 'Back to table of contents'
                        if locale.startswith('de'):
                            back_string = 'Zurück zum Inhaltsverzeichnis'
                        footer = '<div class="toc_return"><a href="#%s_citetoc">%s</a></div>' % (biblist_id, back_string)

                    if str2bool(group_by_type):
                        if pubsort == 'stm':
                            STM_LIST[result.get('groupValue')] = header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                            if format == 'html':
                                STM_TOC[result.get('groupValue')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                                STM_COINS[result.get('groupValue')] = coins
                        elif pubsort == 'anh':
                            ANH_LIST[result.get('groupValue')] = header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                            if format == 'html':
                                ANH_TOC[result.get('groupValue')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                                ANH_COINS[result.get('groupValue')] = coins
                        else:
                            biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                            if format == 'html':
                                biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                                biblist_coins += coins
                    elif str2bool(group_by_year):
                        biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                        if format == 'html':
                            biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                            biblist_coins += coins
                    else:
                        biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer

                    if str2bool(group_by_type) and pubsort == 'anh':
                        # logging.debug(ANH_LIST)
                        biblist = ''
                        biblist_toc = ''
                        for pubtype in ANH_SORT:
                            if ANH_LIST.get(pubtype):
                                biblist += ANH_LIST.get(pubtype)
                                if format == 'html':
                                    biblist_toc += ANH_TOC.get(pubtype)
                                    biblist_coins += ANH_COINS.get(pubtype)
                    elif str2bool(group_by_type) and pubsort == 'stm':
                        # logging.debug(STM_LIST)
                        biblist = ''
                        biblist_toc = ''
                        for pubtype in STM_SORT:
                            if STM_LIST.get(pubtype):
                                biblist += STM_LIST.get(pubtype)
                                if format == 'html':
                                    biblist_toc += STM_TOC.get(pubtype)
                                    biblist_coins += STM_COINS.get(pubtype)

                    publist_docs = []

            else:
                for result in results:
                    publist_docs.append(json.loads(result.get('wtf_json')))

                biblist = citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style)

        response = ''

        if toc and format == 'html':
            response += '<ul id="%s_citetoc">' % biblist_id + biblist_toc + '</ul>'

        response += biblist + biblist_coins

    if response:
        try:

            storage_publists_cache = app.extensions['redis']['REDIS_PUBLIST_CACHE']

            storage_publists_cache.set(key, response)
            storage_publists_cache.hset(agent_id, key, timestamp())

        except Exception as e:
            logging.error('REDIS: %s' % e)

    resp = make_response(response)
    if format == 'txt':
        resp.headers["Content-Type"] = "text/plain; charset=utf-8"
    else:
        resp.headers["Content-Type"] = "text/html; charset=utf-8"

    return resp
コード例 #9
0
ファイル: bibtex_processor.py プロジェクト: UB-Dortmund/mms
def wtf_bibtex(wtf_records=None):

    # logging.info('wtf_records: %s' % wtf_records)
    if wtf_records is None:
        wtf_records = []

    if len(wtf_records) > 0:

        db = BibDatabase()
        db.entries = []

        for record in wtf_records:

            bibtex_entry = {}

            bibtex_type = BIBTEX_PUBTYPES.get(record.get('pubtype'))
            if bibtex_type is None:
                bibtex_type.setdefault('pubtype', 'misc')
            bibtex_entry.setdefault('ENTRYTYPE', bibtex_type)

            bibtex_entry.setdefault('ID', record.get('id'))

            title = record.get('title')
            if record.get('subtitle'):
                title += ': %s' % record.get('subtitle')
            bibtex_entry.setdefault('title', title)

            if record.get('issued'):
                date_parts = []
                for date_part in str(record.get('issued')).replace(
                        '[', '').replace(']', '').split('-'):
                    date_parts.append(date_part)
                bibtex_entry.setdefault('year', date_parts[0])
                if len(date_parts) > 1:
                    bibtex_entry.setdefault('month', date_parts[1])
                if len(date_parts) > 2:
                    bibtex_entry.setdefault('day', date_parts[2])

            if record.get('DOI'):
                bibtex_entry.setdefault('crossref', record.get('DOI')[0])

            author_str = ''
            for author in record.get('person'):
                if 'aut' in author.get('role'):
                    if author_str != '':
                        author_str += ' and '
                    author_str += author.get('name')

            bibtex_entry.setdefault('author', author_str)

            # is_part_of
            hosts = []
            if record.get('is_part_of'):
                hosts = record.get('is_part_of')

            for host in hosts:
                if host.get('is_part_of') != '':
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST,
                                        port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP,
                                        query='id:%s' % host.get('is_part_of'),
                                        facet='false',
                                        fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(
                                ipo_solr.results[0].get('wtf_json'))
                            title = myjson.get('title')
                            if myjson.get('subtitle'):
                                title += ': %s' % myjson.get('subtitle')
                            if bibtex_entry.get('ENTRYTYPE') == 'article':
                                bibtex_entry.setdefault('journal', title)
                            elif bibtex_entry.get('ENTRYTYPE') == 'inbook':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'inproceedings':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'incollection':
                                bibtex_entry.setdefault('booktitle', title)
                            else:
                                bibtex_entry.setdefault('series', title)
                    except AttributeError as e:
                        logging.error(e)
                if host.get('volume') != '':
                    bibtex_entry.setdefault('volume', host.get('volume'))

            if bibtex_entry:
                db.entries.append(bibtex_entry)

        return bibtexparser.dumps(db)

    else:
        return ''
コード例 #10
0
ファイル: orcid_sync.py プロジェクト: UB-Dortmund/mms
def sync_orcid_to_hb():
    get_user = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT,
                    application=p_secrets.SOLR_APP, core='hb2_users',
                    query='orcidid:%s' % orcid_id)
    get_user.request()

    if get_user.results:
        if '/read-limited' in get_user.results[0].get('orcidscopes'):
            works = orcid_read_works(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token)
            logger.info('results from ORCID: %s\n' % len(works))
            if works:
                for work in works:
                    do_break = False
                    hb2_record_id = None
                    orcid_record = None
                    for work_sum in work.get('work-summary'):
                        # - putcode is not in hb2
                        try:
                            response = requests.get('http://*****:*****@tu-dortmund.de']
                                # print(json.dumps(thedata, indent=4))
                            else:
                                # logger.info(json.dumps(orcid_record, indent=4))
                                thedata = orcid_processor.orcid_wtf(orcid_id, orcid_record)
                                print(thedata)
                                # add author via orcid_user_info
                                public_info = orcid_user_info(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token)
                                person = {
                                    'name': '%s, %s' % (public_info.get('name').get('family-name').get('value'), public_info.get('name').get('given-names').get('value')),
                                    'orcid': orcid_id,
                                    'role': ['aut']
                                }
                                if affiliation == 'tudo':
                                    person['tudo'] = True
                                    person['rubi'] = False
                                    thedata['catalog'] = ['Technische Universität Dortmund']
                                elif affiliation == 'rub':
                                    person['tudo'] = False
                                    person['rubi'] = True
                                    thedata['catalog'] = ['Ruhr-Universität Bochum']
                                else:
                                    person['tudo'] = False
                                    person['rubi'] = False
                                    thedata['catalog'] = ['Temporäre Daten']

                                thedata['person'] = [person]

                        if thedata:
                            logger.info('POST /work')
                            # POST request
                            logger.info(json.dumps(thedata, indent=4))
                            try:
                                # post data
                                response = requests.post(
                                    '%s/%s' % (orcid_secrets.API, 'work'),
                                    headers={'Content-Type': 'application/json', 'Authorization': 'Bearer %s' % orcid_secrets.TOKEN},
                                    data=json.dumps(thedata)
                                )
                                status = response.status_code
                                logger.info('STATUS: %s' % status)
                                if status == 201:
                                    response_json = json.loads(response.content.decode("utf-8"))
                                    logger.info(response_json.get('work'))
                                    if response_json.get('message'):
                                        logger.info(response_json.get('message'))
                                else:
                                    logger.error('ERROR: %s: %s' % (status, response.content.decode("utf-8")))

                            except requests.exceptions.ConnectionError as e:
                                logging.error(e)
                            logger.info('')
コード例 #11
0
def export_oa_report(year=''):
    '''
        Getting a bibliography

        swagger_from_file: api_doc/export_oa_report.yml
    '''
    pubtype = request.args.get('pubtype', 'ArticleJournal')

    if theme(request.access_route) == 'dortmund':
        affiliation = 'tudo'
        affiliation_str = 'TU Dortmund'
    elif theme(request.access_route) == 'bochum':
        affiliation = 'rubi'
        affiliation_str = 'Ruhr-Universität Bochum'
    else:
        affiliation = ''
        affiliation_str = ''

    if affiliation:
        csv = '"AU";"TI";"SO";"DT";"RP";"EM";"OI";"PU";"ISSN";"E-ISSN";"DOI";"OA";"RP TUDO";"Fak"\n'

        # TODO search for all publications of the given year
        oa_solr = Solr(host=secrets.SOLR_HOST,
                       port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP,
                       core='hb2',
                       handler='query',
                       query='*:*',
                       facet='false',
                       rows=100000,
                       fquery=[
                           '%s:true' % affiliation,
                           'fdate:%s' % year,
                           'pubtype:%s' % pubtype
                       ])
        oa_solr.request()
        results = oa_solr.results

        if results:
            for record in results:
                thedata = json.loads(record.get('wtf_json'))

                author = ''
                corresponding_author = ''
                corresponding_affiliation = ''
                faks = ''
                for person in thedata.get('person'):
                    if 'aut' in person.get('role'):
                        author += person.get('name') + ';'
                        if person.get('corresponding_author'):
                            corresponding_author = person.get('name')
                            if person.get('tudo'):
                                corresponding_affiliation = True
                                if person.get('gnd'):
                                    tudo = persistence.get_person(
                                        person.get('gnd'))
                                    # print(person.get('gnd'))
                                    if tudo:
                                        if tudo.get('affiliation_id'):
                                            faks = ''
                                            for entry in tudo.get(
                                                    'affiliation_id'):
                                                affil = persistence.get_orga(
                                                    entry)
                                                fak = ''
                                                if affil:
                                                    has_parent = False
                                                    fak = affil.get(
                                                        'pref_label')
                                                    if affil.get('parent_id'):
                                                        has_parent = True
                                                        fak = '%s / %s' % (
                                                            affil.get(
                                                                'parent_label'
                                                            ),
                                                            affil.get(
                                                                'pref_label'))
                                                    while has_parent:
                                                        affil = persistence.get_orga(
                                                            affil.get(
                                                                'parent_id'))
                                                        if affil.get(
                                                                'parent_id'):
                                                            has_parent = True
                                                            fak = '%s / %s' % (
                                                                affil.get(
                                                                    'parent_label'
                                                                ),
                                                                affil.get(
                                                                    'pref_label'
                                                                ))
                                                        else:
                                                            has_parent = False
                                                else:
                                                    fak = 'LinkError: Person %s' % person.get(
                                                        'gnd')
                                                faks += fak + ';'
                                            faks = faks[:-1]

                author = author[:-1]

                publisher = ''
                journal_title = ''
                issn = ''
                journal_title = ''
                if record.get('is_part_of_id'):
                    if record.get('is_part_of_id')[0]:
                        host = persistence.get_work(
                            record.get('is_part_of_id')[0])
                        if host:
                            record = json.loads(host.get('wtf_json'))
                            # print(json.dumps(record, indent=4))
                            journal_title = record.get('title')
                            if record.get('fsubseries'):
                                journal_title = record.get('fsubseries')
                            publisher = ''
                            if record.get('publisher'):
                                publisher = record.get('publisher')
                            issn = ''
                            if record.get('ISSN'):
                                for entry in record.get('ISSN'):
                                    if entry:
                                        issn = entry
                                        break

                csv += '"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s"\n' % (
                    author,
                    thedata.get('title'),
                    journal_title,
                    'article',
                    corresponding_author,
                    '',
                    '',
                    publisher,
                    issn,
                    '',
                    thedata.get('DOI')[0],
                    thedata.get('oa_funded'),
                    corresponding_affiliation,
                    faks,
                )

        resp = make_response(csv, 200)
        resp.headers['Content-Type'] = 'text/csv; charset=utf-8'
        return resp
    else:
        return make_response(
            'No affiliation parameter set. Please contact the administrator!',
            400)
コード例 #12
0
ファイル: preprocessor.py プロジェクト: UB-Dortmund/mms
        fo.close()


# persons('../../person.json')
# works('../../works.json')
# issued_data('/data/backup/Samstag/2016-10-01_15-28-31_hb2.not_issued.json', '/data/issued/issuedData.json')
# delete_issued('/data/backup/Montag/2016-10-03_09-39-18_hb2.pubtype_Journal.json')

organisations = []

logging.debug('TEST')

try:
    record_solr = Solr(host=secrets.SOLR_HOST,
                       port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP,
                       core='organisation',
                       rows=100000)
    record_solr.request()

    for result in record_solr.results:
        organisations.append(result.get('id'))

    # print('results: %s' % organisations)
    # print('result_list: %s' % record_solr.results)

except Exception as e:
    logging.error(e)

# organisations = ['TUDO9144040']
コード例 #13
0
def export_openapc(year=''):
    '''
        Getting a bibliography

        swagger_from_file: api_doc/export_openapc.yml
    '''

    if theme(request.access_route) == 'dortmund':
        affiliation = 'tudo'
        affiliation_str = 'TU Dortmund'
    elif theme(request.access_route) == 'bochum':
        affiliation = 'rubi'
        affiliation_str = 'Ruhr-Universität Bochum'
    else:
        affiliation = ''
        affiliation_str = ''

    if affiliation:
        csv = '"institution";"period";"euro";"doi";"is_hybrid";"publisher";"journal_full_title";"issn";"url";"local_id"\n'

        oa_solr = Solr(host=secrets.SOLR_HOST,
                       port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP,
                       core='hb2',
                       handler='query',
                       query='oa_funds:true',
                       facet='false',
                       rows=100000,
                       fquery=['%s:true' % affiliation,
                               'fdate:%s' % year])
        oa_solr.request()
        results = oa_solr.results

        if len(results) > 0:
            for record in results:
                thedata = json.loads(record.get('wtf_json'))

                doi = record.get('doi')[0]
                is_hybrid = False
                if record.get('is_hybrid'):
                    is_hybrid = record.get('is_hybrid')
                publisher = ''
                journal_title = ''
                issn = ''
                url = ''
                if not doi:

                    journal_title = ''
                    if record.get('is_part_of_id'):
                        if record.get('is_part_of_id')[0]:
                            host = persistence.get_work(
                                record.get('is_part_of_id')[0])
                            if host:
                                record = json.loads(host.get('wtf_json'))
                                # print(json.dumps(record, indent=4))
                                journal_title = record.get('title')
                                if record.get('fsubseries'):
                                    journal_title = record.get('fsubseries')
                                publisher = ''
                                if record.get('publisher'):
                                    publisher = record.get('publisher')
                                issn = ''
                                if record.get('ISSN'):
                                    for entry in record.get('ISSN'):
                                        if entry:
                                            issn = entry
                                            break

                    url = ''
                    if thedata.get('uri'):
                        for uri in thedata.get('uri'):
                            url = uri
                            break

                csv += '"%s";%s;%s;"%s";"%s";"%s";"%s";"%s";"%s";"%s"\n' % (
                    affiliation_str, year, 0.00, doi, is_hybrid, publisher,
                    journal_title, issn, url, record.get('id'))

            resp = make_response(csv, 200)
            resp.headers['Content-Type'] = 'text/csv; charset=utf-8'
            return resp
        else:
            return make_response('No results', 404)
    else:
        return make_response(
            'No affiliation parameter set. Please contact the administrator!',
            400)
コード例 #14
0
from fuzzywuzzy import fuzz

from utils.solr_handler import Solr

try:
    import local_app_secrets as secrets
except ImportError:
    import app_secrets as secrets


# TODO: Deduplizierung nach Nachname, 1. Buchsctabe des Vornamens
# TODO: Vorname und Nachname sind gleich, aber GNDs unterschiedlich => Ist das ueberhaupt ein TODO?
# TODO: Nachname ist gleich und wenn Vorname in den Daten nur ein Buchstabe oder wenn echter Vorname, dann die
# ersten beiden Buchstaben vergleichen
results = []
new_titles = Solr(application=secrets.SOLR_APP, facet='false', rows=2000000,
                  fields=['pnd', 'id', 'title', 'pubtype', 'catalog'])
new_titles.request()

for doc in new_titles.results:
    # logging.info(doc)
    if doc.get('pnd'):
        catalog = 'tmp'
        if doc.get('catalog'):
            if 'Ruhr-Universität Bochum' in doc.get('catalog'):
                catalog = 'rub'
            elif 'Technische Universität Dortmund' in doc.get('catalog'):
                catalog = 'tudo'

        result = {'id': doc.get('id'), 'catalog': catalog, 'title': doc.get('title'), 'pubtype': doc.get('pubtype')}
        creators = []
        # TODO sobald es einen gibt mit len(ids) != 3 muss der Datensatz nicht betrachtet werden!
コード例 #15
0
ファイル: wtf_csl.py プロジェクト: UB-Dortmund/mms
def wtf_csl(wtf_records=None):
    csl_records = []

    # logging.info('wtf_records: %s' % wtf_records)
    if wtf_records is None:
        wtf_records = []

    if len(wtf_records) > 0:
        for record in wtf_records:
            # logging.info('record: %s' % record)
            hosts = []
            if record.get('is_part_of'):
                hosts = record.get('is_part_of')
            else:
                hosts.append({'is_part_of': ''})

            for host in hosts:
                csl_record = {}
                # id
                csl_record.setdefault('id', record.get('id'))
                # type
                csl_type = CSL_PUBTYPES.get(record.get('pubtype'))
                if csl_type is None:
                    csl_record.setdefault('pubtype', record.get('pubtype'))
                csl_record.setdefault('type', csl_type)
                # title
                title = record.get('title')
                if record.get('subtitle'):
                    title += ': %s' % record.get('subtitle')
                # TODO title supplements
                csl_record.setdefault('title', title)

                # doi
                if record.get('DOI') and record.get('DOI')[0] != '':
                    csl_record.setdefault('DOI', record.get('DOI')[0].strip())
                    csl_record.setdefault('URL', 'http://dx.doi.org/%s' % record.get('DOI')[0].strip())
                    csl_record.setdefault('uri', 'http://dx.doi.org/%s' % record.get('DOI')[0].strip())

                # uri
                if record.get('uri') and record.get('uri')[0] != '':
                    for uri in record.get('uri'):
                        csl_record.setdefault('URL', uri.strip())
                        csl_record.setdefault('uri', uri.strip())

                # contributors
                if record.get('person'):
                    author = []
                    editor = []
                    contributor = []
                    for person in record.get('person'):
                        # logging.info(person.get('name'))
                        family = person.get('name').split(', ')[0]
                        given = ''
                        if len(person.get('name').split(', ')) > 1:
                            given = person.get('name').split(', ')[1]
                        # logging.info('%s, %s' % (family, given))
                        if person.get('role'):
                            if 'aut' in person.get('role'):
                                author.append({'family': family, 'given': given})
                            elif 'edt' in person.get('role'):
                                editor.append({'family': family, 'given': given})
                            else:
                                contributor.append({'family': family, 'given': given})

                    if len(author) > 0:
                        csl_record.setdefault('author', author)
                    if len(editor) > 0:
                        csl_record.setdefault('editor', editor)
                    if len(contributor) > 0 and record.get('pubtype') == 'Lecture':
                        csl_record.setdefault('author', contributor)

                # container
                if host.get('is_part_of') != '':
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP, query='id:%s' % host.get('is_part_of'),
                                        facet='false', fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(ipo_solr.results[0].get('wtf_json'))
                            if myjson.get('pubtype') != 'Series':
                                title = myjson.get('title')
                                if myjson.get('subtitle'):
                                    title += ': %s' % myjson.get('subtitle')
                                csl_record.setdefault('container-title', title)
                                author = []
                                editor = []
                                for person in myjson.get('person'):
                                    # logging.info(person.get('name'))
                                    family = person.get('name').split(', ')[0]
                                    given = ''
                                    if len(person.get('name').split(', ')) > 1:
                                        given = person.get('name').split(', ')[1]
                                    # logging.info('%s, %s' % (family, given))
                                    if person.get('role'):
                                        if 'aut' in person.get('role'):
                                            author.append({'family': family, 'given': given})
                                        elif 'edt' in person.get('role'):
                                            editor.append({'family': family, 'given': given})

                                if len(author) > 0:
                                    csl_record.setdefault('author', author)
                                if len(editor) > 0:
                                    csl_record.setdefault('editor', editor)
                            else:
                                if myjson.get('fsubseries'):
                                    title = myjson.get('fsubseries')
                                else:
                                    title = myjson.get('title')
                                csl_record.setdefault('edition', title)
                        else:
                            csl_record.setdefault('container-title', host.get('is_part_of'))

                    except AttributeError as e:
                        logging.error(e)

                # volume
                if host.get('volume') and host.get('volume') != '':
                    csl_record.setdefault('volume', host.get('volume'))
                # issue
                if host.get('issue') and host.get('issue') != '':
                    csl_record.setdefault('issue', host.get('issue'))
                # page_first
                if host.get('page_first') and host.get('page_first') != '':
                    csl_record.setdefault('page_first', host.get('page_first').replace('-', '_'))
                # page_last
                if host.get('page_last') and host.get('page_last') != '':
                    csl_record.setdefault('page_last', host.get('page_last').replace('-', '_'))
                # page
                if host.get('page_first') and host.get('page_first') != '' and host.get('page_last') and host.get('page_last') != '':
                    csl_record.setdefault('page', '%s-%s' % (host.get('page_first').replace('-', '_'), host.get('page_last').replace('-', '_')))
                else:
                    if host.get('page_first') and host.get('page_first') != '':
                        csl_record.setdefault('page', host.get('page_first').replace('-', '_'))
                    # page_last
                    if host.get('page_last') and host.get('page_last') != '':
                        csl_record.setdefault('page', host.get('page_last').replace('-', '_'))

                # collection-number
                # collection-author
                # collection-editor
                # number_of_volumes
                if host.get('number_of_volumes') and host.get('number_of_volumes') != '':
                    csl_record.setdefault('number_of_volumes', host.get('number_of_volumes'))

                # language
                if record.get('language') and record.get('language')[0] != '' and record.get('language')[0] != 'None':
                    csl_record.setdefault('language', record.get('language')[0])
                # issued
                if record.get('issued'):
                    issued = {}
                    date_parts = []
                    for date_part in str(record.get('issued')).replace('[', '').replace(']', '').split('-'):
                        date_parts.append(date_part)
                    issued.setdefault('date-parts', []).append(date_parts)
                    csl_record.setdefault('issued', issued)
                # edition
                if record.get('edition'):
                    csl_record.setdefault('edition', record.get('edition'))

                # isbn
                if record.get('isbn'):
                    csl_record.setdefault('isbn', record.get('ISBN')[0])
                # issn
                if record.get('issn'):
                    csl_record.setdefault('issn', record.get('ISSN')[0])
                # ismn
                if record.get('ismn'):
                    csl_record.setdefault('ismn', record.get('ISMN')[0])

                # publisher
                if record.get('publisher'):
                    csl_record.setdefault('publisher', record.get('publisher'))
                    csl_record.setdefault('original-publisher', record.get('publisher'))
                # publisher_place
                if record.get('publisher_place'):
                    csl_record.setdefault('publisher-place', record.get('publisher_place'))
                    csl_record.setdefault('original-publisher-place', record.get('publisher_place'))
                # number_of_pages
                if record.get('number_of_pages'):
                    csl_record.setdefault('number_of_pages', record.get('number_of_pages'))
                # uri

                # WOSID
                if record.get('WOSID'):
                    csl_record.setdefault('WOSID', record.get('WOSID'))
                # PMID
                if record.get('PMID'):
                    csl_record.setdefault('PMID', record.get('PMID'))
                # abstract
                if record.get('abstract')[0] and record.get('abstract')[0].get('content') != '':
                    csl_record.setdefault('abstract', record.get('abstract')[0].get('content'))

                csl_records.append(csl_record)

    return csl_records
コード例 #16
0
def wtf_openurl(record=None):

    open_url = 'ctx_ver=Z39.88-2004'

    if record:

        # pubtype
        if record.get('pubtype') and OPENURL_KEV_MTX.get(
                record.get('pubtype')):
            open_url += '&rft_val_fmt=info:ofi/fmt:kev:mtx:%s' % OPENURL_KEV_MTX.get(
                record.get('pubtype'))
        else:
            open_url += '&rft_val_fmt=info:ofi/fmt:kev:mtx:%s' % 'book'
        if OPENURL_GENRE.get(record.get('pubtype')):
            open_url += '&rft.genre=%s' % OPENURL_GENRE.get(
                record.get('pubtype'))
        else:
            open_url += '&rft.genre=%s' % 'unknown'

        # sid
        # open_url += '&info:ofi/nam:info:sid:%s' % str(parse.quote(record.get('id'), 'utf-8'))

        # doi
        if record.get('DOI') and record.get('DOI')[0]:
            open_url += '&info:ofi/nam:info:doi:%s' % parse.quote(
                record.get('DOI')[0], 'utf-8')

        # authors
        for person in record.get('person'):
            open_url += '&rft.au=%s' % parse.quote(person.get('name'), 'utf8')

        if record.get('is_part_of') and record.get('is_part_of')[
                0] and record.get('is_part_of')[0].get('is_part_of'):
            for host in record.get('is_part_of'):
                if host.get('is_part_of'):
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST,
                                        port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP,
                                        query='id:%s' % host.get('is_part_of'),
                                        facet='false',
                                        fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(
                                ipo_solr.results[0].get('wtf_json'))
                            if myjson.get('pubtype') == 'journal':
                                open_url += '&rft.jtitle=%s' % parse.quote(
                                    myjson.get('title'), 'utf-8')
                                open_url += '&rft.issn=%s' % parse.quote(
                                    myjson.get('ISSN')[0], 'utf-8')
                                open_url += '&rft.volume=%s' % parse.quote(
                                    host.get('volume'), 'utf-8')
                                open_url += '&rft.issue=%s' % parse.quote(
                                    host.get('issue'), 'utf-8')
                                open_url += '&rft.pages=%s' % host.get(
                                    'page_first')
                                if host.get('page_last'):
                                    open_url += '-%s' % host.get('page_last')
                                # article title
                                open_url += '&rft.atitle=%s' % parse.quote(
                                    record.get('title'), 'utf-8')
                            elif myjson.get('pubtype') == 'Monograph' or \
                                            myjson.get('pubtype') == 'Collection' or \
                                            myjson.get('pubtype') == 'Conference' or \
                                            myjson.get('pubtype') == 'LegalCommentary':
                                # btitle
                                open_url += '&rft.btitle=%s' % parse.quote(
                                    myjson.get('title'), 'utf-8')
                                open_url += '&rft.isbn=%s' % parse.quote(
                                    myjson.get('ISBN')[0], 'utf-8')
                                open_url += '&rft.pages=%s' % host.get(
                                    'page_first')
                                if host.get('page_last'):
                                    open_url += '-%s' % host.get('page_last')
                    except AttributeError as e:
                        logging.error(e)
                    break

        if 'rft.atitle' not in open_url:
            open_url += '&rft.title=%s' % parse.quote(record.get('title'),
                                                      'utf-8')

        if record.get('ISSN'):
            open_url += '&rft.issn=%s' % parse.quote(
                record.get('ISSN')[0], 'utf-8')
        if record.get('ISBN'):
            open_url += '&rft.isbn=%s' % parse.quote(
                record.get('ISBN')[0], 'utf-8')

        # origin info
        if record.get('issued'):
            open_url += '&rft.date=%s' % record.get('issued')
        if record.get('publisher_place'):
            open_url += '&rft.place=%s' % parse.quote(
                record.get('publisher_place'), 'utf-8')
        if record.get('publisher'):
            open_url += '&rft.publisher=%s' % parse.quote(
                record.get('publisher'), 'utf-8')

        # other
        for corporation in record.get('corporation'):
            open_url += '&rft.inst=%s' % parse.quote(corporation.get('name'),
                                                     'utf-8')

    return open_url
コード例 #17
0
ファイル: orcid_processor.py プロジェクト: UB-Dortmund/mms
def wtf_orcid(affiliation='', wtf_records=None):
    orcid_records = []

    # logging.info('wtf_records: %s' % wtf_records)
    if wtf_records is None:
        wtf_records = []

    if len(wtf_records) > 0:
        for record in wtf_records:

            orcid_record = {}
            db = BibDatabase()
            db.entries = []
            bibtex_entry = {}

            # work type
            orcid_type = ORCID_PUBTYPES.get(record.get('pubtype'))
            if orcid_type is None:
                orcid_type.setdefault('pubtype', 'OTHER')
            orcid_record.setdefault('type', orcid_type)

            bibtex_type = BIBTEX_PUBTYPES.get(record.get('pubtype'))
            if bibtex_type is None:
                bibtex_type.setdefault('pubtype', 'misc')
            bibtex_entry.setdefault('ENTRYTYPE', bibtex_type)

            external_ids = {}
            external_id = []
            # ids - record id (source-work-id)
            ext_id = {}
            ext_id.setdefault('external-id-type', 'source-work-id')
            ext_id.setdefault('external-id-value', record.get('id'))
            ext_id.setdefault('external-id-relationship', 'SELF')
            if affiliation and affiliation in affiliation_url:
                ext_id.setdefault(
                    'external-id-url',
                    '%s%s/%s' % (affiliation_url.get(affiliation),
                                 record.get('pubtype'), record.get('id')))
            external_id.append(ext_id)
            bibtex_entry.setdefault('ID', record.get('id'))

            # ids - ISBN (isbn)
            if record.get('ISBN'):
                for isbn in record.get('ISBN'):
                    if isbn:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'isbn')
                        ext_id.setdefault('external-id-value', isbn)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

            # ids - ISSN (issn)
            if record.get('ISSN'):
                for issn in record.get('ISSN'):
                    if issn:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'issn')
                        ext_id.setdefault('external-id-value', issn)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

            # ids - ZDB (other-id)
            if record.get('ZDBID'):
                for zdbid in record.get('ZDBID'):
                    if zdbid:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'other-id')
                        ext_id.setdefault('external-id-value', zdbid)
                        ext_id.setdefault(
                            'external-id-url',
                            'http://ld.zdb-services.de/resource/%s' % zdbid)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

            # ids - PMID (pmc)
            if record.get('PMID'):
                ext_id = {}
                ext_id.setdefault('external-id-type', 'pmid')
                ext_id.setdefault('external-id-value', record.get('PMID'))
                ext_id.setdefault(
                    'external-id-url',
                    'http://www.ncbi.nlm.nih.gov/pubmed/%s' %
                    record.get('PMID'))
                ext_id.setdefault('external-id-relationship', 'SELF')
                external_id.append(ext_id)

            # ids - WOS-ID (wosuid)
            if record.get('WOSID'):
                ext_id = {}
                ext_id.setdefault('external-id-type', 'doi')
                ext_id.setdefault('external-id-value', record.get('WOSID'))
                ext_id.setdefault(
                    'external-id-url',
                    'http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info:ut/%s'
                    % record.get('WOSID'))
                ext_id.setdefault('external-id-relationship', 'SELF')
                external_id.append(ext_id)

            # ids - doi
            if record.get('DOI'):
                for doi in record.get('DOI'):
                    if doi:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'doi')
                        ext_id.setdefault('external-id-value', doi)
                        ext_id.setdefault('external-id-url',
                                          'http://dx.doi.org/%s' % doi)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

                if external_id:
                    external_ids.setdefault('external-id', external_id)

                bibtex_entry.setdefault('doi', record.get('DOI')[0])

            orcid_record.setdefault('external-ids', external_ids)

            # titles
            title = {}
            title.setdefault('title', record.get('title'))
            if record.get('subtitle'):
                title.setdefault('subtitle', record.get('subtitle'))
            orcid_record.setdefault('title', title)

            title = record.get('title')
            if record.get('subtitle'):
                title += ': %s' % record.get('subtitle')
            bibtex_entry.setdefault('title', title)

            # issued
            if record.get('issued'):
                publication_date = {}
                date_parts = []
                for date_part in str(record.get('issued')).replace(
                        '[', '').replace(']', '').split('-'):
                    date_parts.append(date_part)
                publication_date.setdefault('year', int(date_parts[0]))
                bibtex_entry.setdefault('year', date_parts[0])
                if len(date_parts) > 1:
                    publication_date.setdefault('month', int(date_parts[1]))
                    bibtex_entry.setdefault('month', date_parts[1])
                if len(date_parts) > 2:
                    publication_date.setdefault('day', int(date_parts[2]))
                    bibtex_entry.setdefault('day', date_parts[2])
                orcid_record.setdefault('publication-date', publication_date)

            # contributors
            contributors = {}
            contributor = []
            author_str = ''
            for author in record.get('person'):
                if 'aut' in author.get('role'):
                    con = {}
                    con.setdefault('credit-name', author.get('name'))
                    if author.get('orcid'):
                        con.setdefault('contributor-orcid', {
                            'uri':
                            'http://orcid.org/%s' % author.get('orcid')
                        })
                    contributor_attributes = {}
                    contributor_attributes.setdefault('contributor-role',
                                                      'AUTHOR')
                    con.setdefault('contributor-attributes',
                                   contributor_attributes)
                    contributor.append(con)
                    if author_str != '':
                        author_str += ' and '
                    author_str += author.get('name')
            contributors.setdefault('contributor', contributor)
            orcid_record.setdefault('contributors', contributors)

            bibtex_entry.setdefault('author', author_str)

            # language
            if record.get(
                    'language')[0] and record.get('language')[0] != 'None':
                orcid_record.setdefault(
                    'language-code',
                    str(
                        babelfish.Language.fromalpha3b(
                            record.get('language')[0])))

            # is_part_of
            hosts = []
            if record.get('is_part_of'):
                hosts = record.get('is_part_of')

            for host in hosts:
                if host.get('is_part_of') != '':
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST,
                                        port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP,
                                        query='id:%s' % host.get('is_part_of'),
                                        facet='false',
                                        fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(
                                ipo_solr.results[0].get('wtf_json'))
                            title = myjson.get('title')
                            if myjson.get('subtitle'):
                                title += ': %s' % myjson.get('subtitle')
                            orcid_record.setdefault('journal-title', title)
                            if bibtex_entry.get('ENTRYTYPE') == 'article':
                                bibtex_entry.setdefault('journal', title)
                            elif bibtex_entry.get('ENTRYTYPE') == 'inbook':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'inproceedings':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'incollection':
                                bibtex_entry.setdefault('booktitle', title)
                            else:
                                bibtex_entry.setdefault('series', title)
                        else:
                            orcid_record.setdefault('journal-title',
                                                    host.get('is_part_of'))
                    except AttributeError as e:
                        logging.error(e)
                if host.get('volume') != '':
                    bibtex_entry.setdefault('volume', host.get('volume'))

            if bibtex_entry:
                db.entries.append(bibtex_entry)

            citation = {}
            citation.setdefault('citation-type', 'BIBTEX')
            citation.setdefault('citation', bibtexparser.dumps(db))
            orcid_record.setdefault('citation', citation)

            orcid_records.append(orcid_record)

    return orcid_records