Python Solr Beispiele

Programmiersprache: Python

Namespace / Paketname: utils.solr_handler

Klasse / Typ: Solr

Beispiele auf hotexamples.com: 17

Python Solr - 17 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die utils.solr_handler.Solr, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

Solr(17)

request(15)

export(3)

count(1)

Häufig verwendete Methoden

Solr (17)

request (15)

export (3)

count (1)

Beispiel #1

Datei anzeigen

Datei: api.py Projekt: UB-Dortmund/mms

def person_delete(person_id=''):
    """
        Delete an existing person

        swagger_from_file: api_doc/person_delete.yml
    """

    if is_token_valid(request.headers.get('Authorization')):
        # TODO decide on base of the api key scopes
        # load group
        delete_person_solr = Solr(host=secrets.SOLR_HOST,
                                  port=secrets.SOLR_PORT,
                                  application=secrets.SOLR_APP,
                                  core='person',
                                  query='id:%s' % person_id)
        delete_person_solr.request()

        if delete_person_solr.results:
            thedata = json.loads(delete_person_solr.results[0].get('wtf_json'))
            form = PersonAdminForm.from_json(thedata)
            # modify status to 'deleted'
            form.editorial_status.data = 'deleted'
            form.changed.data = timestamp()
            form.note.data = 'Deleted via REST API'
            # save group
            persistence.person2solr(form, action='delete')

            return make_response('person deleted!', 204)
        else:
            return make_response(
                'person resource \'%s\' not found!' % person_id, 404)

    else:
        return make_response('Unauthorized', 401)

Beispiel #2

Datei anzeigen

Datei: api.py Projekt: UB-Dortmund/mms

def work_delete(work_id=''):
    """
        Delete an existing work

        swagger_from_file: api_doc/work_delete.yml
    """

    if is_token_valid(request.headers.get('Authorization')):
        # TODO decide on base of the api key scopes
        # load work
        delete_work_solr = Solr(host=secrets.SOLR_HOST,
                                port=secrets.SOLR_PORT,
                                application=secrets.SOLR_APP,
                                core='hb2',
                                query='id:%s' % work_id)
        delete_work_solr.request()

        if delete_work_solr.results:
            thedata = json.loads(delete_work_solr.results[0].get('wtf_json'))
            form = display_vocabularies.PUBTYPE2FORM.get(
                thedata.get('pubtype')).from_json(thedata)
            # modify status to 'deleted'
            form.editorial_status.data = 'deleted'
            form.changed.data = timestamp()
            form.note.data = 'Deleted via REST API'
            # save work
            persistence.record2solr(form, action='delete')

            return make_response('work deleted!', 204)
        else:
            return make_response('work resource \'%s\' not found!' % work_id,
                                 404)

    else:
        return make_response('Unauthorized', 401)

Beispiel #3

Datei anzeigen

Datei: orcid_sync.py Projekt: UB-Dortmund/mms

def sync_hb_to_orcid():
    get_user = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT,
                    application=p_secrets.SOLR_APP, core='hb2_users',
                    query='orcidid:%s' % orcid_id)
    get_user.request()

    if get_user.results:
        if '/activities/update' in get_user.results[0].get('orcidscopes'):
            # records = get_new_records(affiliation=affiliation, query='pnd:"1049808495%23Becker, Hans-Georg"')
            records = get_updated_records(affiliation=affiliation, query='pnd:"1019952040%23Höhner, Kathrin"')
            orcid_update_records(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token, works=records)

Beispiel #4

Datei anzeigen

def export_solr_dump(core=''):
    """
    Export the wtf_json field of every doc in the index to a new document in the users core and to the user's local file
    system. Uses the current user's ID and a timestamp as the document ID and file name.
    :param core:
    """
    dow = days_of_week[datetime.datetime.today().weekday()]
    if core != 'hb2_users':
        filename = '%s/%s/%s_%s.json' % (
            secrets.BACKUP_DIR, dow,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core)
        export_solr = Solr(host=secrets.SOLR_HOST,
                           port=secrets.SOLR_PORT,
                           application=secrets.SOLR_APP,
                           export_field='wtf_json',
                           core=core)
        export_docs = export_solr.export()

        fo = open(filename, 'w')
        fo.write(json.dumps(export_docs, indent=4))
        fo.close()

        filename = '%s/%s/%s_%s.not_imported.json' % (
            secrets.BACKUP_DIR, dow,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core)

        export_solr = Solr(host=secrets.SOLR_HOST,
                           port=secrets.SOLR_PORT,
                           application=secrets.SOLR_APP,
                           query='-editorial_status:imported',
                           export_field='wtf_json',
                           core=core)
        export_docs = export_solr.export()

        fo = open(filename, 'w')
        fo.write(json.dumps(export_docs, indent=4))
        fo.close()

    else:
        filename = '%s/%s/%s_%s.json' % (
            secrets.BACKUP_DIR, dow,
            datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core)
        export_solr = Solr(host=secrets.SOLR_HOST,
                           port=secrets.SOLR_PORT,
                           application=secrets.SOLR_APP,
                           core=core)
        export_docs = export_solr.export()

        fo = open(filename, 'w')
        fo.write(json.dumps(export_docs, indent=4))
        fo.close()

Beispiel #5

Datei anzeigen

Datei: get_dead_links.py Projekt: UB-Dortmund/mms

def export_solr_dump():
    dow = days_of_week[datetime.datetime.today().weekday()]
    filename = '%s/%s/%s_%s.dead_ends.json' % (secrets.BACKUP_DIR, dow,
                                                  datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), 'hb2')

    export_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP, query='is_part_of:[\'\' TO *]',
                       export_field='wtf_json', core='hb2')
    export_docs = export_solr.export()

    # TODO get id of the host and check if it exists
    dead_ends = []
    for doc in export_docs:
        for part in doc.get('is_part_of'):
            try:
                query = 'id:%s' % part.get('is_part_of')
                get_record_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                       application=secrets.SOLR_APP, core='hb2', query=query, facet='false',
                                       fields=['wtf_json'])
                get_record_solr.request()
                if len(get_record_solr.results) == 0:
                    print('%s is a dead end' % part.get('is_part_of'))
                    if part.get('is_part_of') not in dead_ends:
                        dead_ends.append(part.get('is_part_of'))
            except AttributeError as e:
                print(e)

    fo = open(filename, 'w')
    fo.write(json.dumps(dead_ends, indent=4))
    fo.close()

Beispiel #6

Datei anzeigen

Datei: orcid_sync.py Projekt: UB-Dortmund/mms

def get_updated_records(affiliation='', query='*:*'):

    get_record_solr = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT,
                           application=p_secrets.SOLR_APP,
                           query='%s AND orcid_put_code:[\'\' TO *]' % query, rows=100000)
    get_record_solr.request()

    orcid_records = {}

    if len(get_record_solr.results) == 0:
        logging.error('No records found for query: %s' % query)
    else:
        print(len(get_record_solr.results))
        # orcid_records.append(orcid_processor.wtf_orcid(affiliation=affiliation, wtf_records=[json.loads(get_record_solr.results[0].get('wtf_json'))])[0])
        for record in get_record_solr.results:
            wtf = json.loads(record.get('wtf_json'))
            orcid_records.setdefault(record.get('orcid_put_code')[0], orcid_processor.wtf_orcid(affiliation=affiliation, wtf_records=[wtf]))

    return orcid_records

Beispiel #7

Datei anzeigen

def export_solr_query(core='', query='*:*', filename=''):
    if core != 'hb2_users':
        if filename != '':
            dow = days_of_week[datetime.datetime.today().weekday()]
            filename = '%s/%s/%s_%s.%s' % (
                secrets.BACKUP_DIR, dow,
                datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), core,
                filename)

            export_solr = Solr(host=secrets.SOLR_HOST,
                               port=secrets.SOLR_PORT,
                               application=secrets.SOLR_APP,
                               query=query,
                               export_field='wtf_json',
                               core=core)
            export_docs = export_solr.export()

            fo = open(filename, 'w')
            fo.write(json.dumps(export_docs, indent=4))
            fo.close()

Beispiel #8

Datei anzeigen

def bibliography(agent='', agent_id='', style='harvard1'):
    """
        Getting a bibliography

        swagger_from_file: bibliography_doc/bibliography.yml
    """
    format = request.args.get('format', 'html')

    filter_by_year = request.args.get('filter_by_year', '')
    filter_by_type = request.args.get('filter_by_type', '')
    exclude_by_type = request.args.get('exclude_by_type', '')
    filter_by_pr = request.args.get('filter_by_pr', False)
    filter_by_ger = request.args.get('filter_by_ger', False)
    filter_by_eng = request.args.get('filter_by_eng', False)
    filter_by_current_members = request.args.get('filter_by_current_members', False)
    filter_by_former_members = request.args.get('filter_by_former_members', False)
    group_by_year = request.args.get('group_by_year', False)
    # logging.info('group_by_year = %s' % group_by_year)
    group_by_type = request.args.get('group_by_type', False)
    group_by_type_year = request.args.get('group_by_type_year', False)
    pubsort = request.args.get('pubsort', '')
    toc = request.args.get('toc', False)
    locale = request.args.get('locale', '')
    # TODO start-creationdate, end-creationdate >> Szenario Raumplanung
    start_creationdate = request.args.get('start_creationdate', '')
    end_creationdate = request.args.get('end_creationdate', '')

    reasoning = request.args.get('reasoning', False)
    refresh = request.args.get('refresh', False)

    formats = ['html', 'txt']
    agent_types = {
        'person': 'person',
        'research_group': 'organisation',
        'chair': 'organisation',
        'organisation': 'organisation',
        'working_group': 'group',
        'project': 'group',
    }
    pubsorts = ['stm', 'anh']
    STM_SORT = ['ArticleJournal', 'Chapter', 'Monograph', 'Journal', 'Series', 'Conference', 'Collection',
                'MultivolumeWork', 'SpecialIssue', 'Patent', 'Standard', 'Thesis', 'InternetDocument', 'Report', 'Lecture', 'Sonstiges',
                'ArticleNewspaper', 'PressRelease', 'RadioTVProgram', 'AudioVideoDocument',
                'ResearchData', 'Other']
    STM_LIST = {
        'ArticleJournal': '',
        'Chapter': '',
        'Monograph': '',
        'Journal': '',
        'Series': '',
        'Conference': '',
        'Collection': '',
        'MultivolumeWork': '',
        'SpecialIssue': '',
        'Patent': '',
        'Standard': '',
        'Thesis': '',
        'InternetDocument': '',
        'Report': '',
        'Lecture': '',
        'ArticleNewspaper': '',
        'PressRelease': '',
        'RadioTVProgram': '',
        'AudioVideoDocument': '',
        'ResearchData': '',
        'Other': '',
    }
    ANH_SORT = ['Monograph', 'ArticleJournal', 'ChapterInLegalCommentary', 'Chapter', 'LegalCommentary', 'Collection',
                 'MultivolumeWork', 'Conference', 'Edition', 'SpecialIssue', 'Journal', 'Series', 'Newspaper', 'Thesis',
                'ArticleNewspaper',
                'Lecture', 'Report', 'InternetDocument', 'RadioTVProgram', 'AudioVideoDocument',
                'PressRelease', 'ResearchData', 'Other']
    ANH_LIST = {
        'Monograph': '',
        'ArticleJournal': '',
        'ChapterInLegalCommentary': '',
        'Chapter': '',
        'LegalCommentary': '',
        'Collection': '',
        'MultivolumeWork': '',
        'Conference': '',
        'Edition': '',
        'SpecialIssue': '',
        'Journal': '',
        'Series': '',
        'Newspaper': '',
        'Thesis': '',
        'ArticleNewspaper': '',
        'Lecture': '',
        'Report': '',
        'InternetDocument': '',
        'RadioTVProgram': '',
        'AudioVideoDocument': '',
        'PressRelease': '',
        'ResearchData': '',
        'Other': '',
    }

    if format not in formats:
        return make_response('Bad request: format!', 400)
    elif agent not in agent_types.keys():
        return make_response('Bad request: agent!', 400)
    elif pubsort and pubsort not in pubsorts:
        return make_response('Bad request: pubsort!', 400)

    key = request.full_path.replace('&refresh=true', '').replace('?refresh=true', '?')
    # logging.debug('KEY: %s' % key)
    response = ''
    if not refresh:
        # request in cache?
        try:

            storage_publists_cache = app.extensions['redis']['REDIS_PUBLIST_CACHE']

            if storage_publists_cache.exists(key):
                response = storage_publists_cache.get(key)

        except Exception as e:
            logging.info('REDIS ERROR: %s' % e)

    if response == '':

        group = False
        group_field = ''
        group_limit = 100000
        if str2bool(group_by_year):
            group = True
            group_field = 'fdate'
        elif str2bool(group_by_type):
            group = True
            group_field = 'pubtype'

        filterquery = []
        if str2bool(filter_by_eng):
            filterquery.append('language:eng')
        elif str2bool(filter_by_ger):
            filterquery.append('language:ger')
        elif str2bool(filter_by_pr):
            filterquery.append('peer_reviewed:true')

        if filter_by_type != '':
            entries = filter_by_type.split('|')
            filter_string = ''
            for entry in entries:
                filter_string += 'pubtype:%s' % PUBTYPE_KEYS.get(entry.lower()) + '+OR+'
            filterquery.append(filter_string[:-4])

        if filter_by_year != '':
            entries = filter_by_year.split('|')
            filter_string = ''
            for entry in entries:
                filter_string += 'fdate:%s' % entry + '+OR+'
            filterquery.append(filter_string[:-4])

        if exclude_by_type:
            entries = exclude_by_type.split('|')
            for entry in entries:
                filterquery.append('-pubtype:"%s"' % PUBTYPE_KEYS.get(entry.lower()))

        fquery = ''
        if start_creationdate and not end_creationdate:
            fquery = 'recordCreationDate:[%s TO *]' % (start_creationdate + 'T00:00:00Z')
        elif not start_creationdate and end_creationdate:
            fquery = 'recordCreationDate:[*+TO+%s]' % (end_creationdate + 'T00:00:00Z')
        elif start_creationdate and end_creationdate:
            fquery = 'recordCreationDate:[%s+TO+%s]' % (start_creationdate + 'T00:00:00Z', end_creationdate + 'T00:00:00Z')

        if fquery:
            filterquery.append(fquery)

        query = ''
        results = []
        if agent_types.get(agent) == 'person':

            # get facet value
            actor_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                              application=secrets.SOLR_APP, query='gnd:%s' % agent_id, export_field='wtf_json',
                              core=agent_types.get(agent))
            actor_solr.request()

            if len(actor_solr.results) == 0:
                return make_response('Not Found: Unknown Agent!', 404)
            else:
                name = actor_solr.results[0].get('name')

                query = 'pndid:%s' % agent_id
                # query = 'pnd:"%s%s%s"' % (agent_id, '%23', name)
                # logging.info('query=%s' % query)

        else:
            # get orga/group doc
            actor_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                              application=secrets.SOLR_APP, query='id:%s' % agent_id,
                              export_field='wtf_json',
                              core=agent_types.get(agent))
            actor_solr.request()

            if actor_solr.results:

                name = actor_solr.results[0].get('pref_label')
                # logging.debug('name = %s' % name)

                if reasoning:
                    # logging.debug('reasoning: %s' % reasoning)
                    orgas = {}
                    orgas.setdefault(agent_id, name)
                    # get all children
                    if actor_solr.results[0].get('children'):
                        children = actor_solr.results[0].get('children')
                        for child_json in children:
                            child = json.loads(child_json)
                            orgas.setdefault(child.get('id'), child.get('label'))
                    query = ''
                    idx_o = 0
                    id_type = agent_types.get(agent)
                    if id_type == 'organisation':
                        id_type = 'affiliation'

                    for orga_id in orgas.keys():

                        fquery = ['gnd:[\'\' TO *]']

                        if not agent_types.get(agent) == 'person':
                            if filter_by_former_members:
                                fquery.append('personal_status:emeritus+OR+personal_status:alumnus')
                            elif filter_by_current_members:
                                fquery.append('-personal_status:emeritus')
                                fquery.append('-personal_status:alumnus')

                        member_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                           application=secrets.SOLR_APP, query='%s_id:"%s"' % (id_type, orga_id),
                                           fquery=fquery, fields=['gnd', 'name'], rows=100000,
                                           core='person')
                        member_solr.request()

                        query_part = ''

                        if member_solr.results and len(member_solr.results) > 0:
                            idx_p = 0
                            for member in member_solr.results:
                                query_part += 'pnd:"%s%s%s"' % (member.get('gnd'), '%23', member.get('name'))
                                idx_p += 1
                                if idx_p < len(member_solr.results) and query_part != '':
                                    query_part += ' OR '

                            if query_part != '':
                                query += query_part

                        idx_o += 1
                        if idx_o < len(orgas) and query != '':
                            query += ' OR '

                    while query.endswith(' OR '):
                        query = query[:-4]

                    # logging.info('query=%s' % query)

                else:
                    logging.debug('reasoning: %s' % reasoning)
                    id_type = agent_types.get(agent)
                    if id_type == 'organisation':
                        id_type = 'affiliation'
                    query = '%s_id:%s' % (id_type, agent_id)
            else:
                return make_response('Not Found: Unknown Agent!', 404)

        biblist_id = str(uuid.uuid4())
        biblist = ''
        biblist_toc = ''
        biblist_coins = ''

        STM_TOC = {
            'ArticleJournal': '',
            'Chapter': '',
            'Monograph': '',
            'Journal': '',
            'Series': '',
            'Conference': '',
            'Collection': '',
            'MultivolumeWork': '',
            'SpecialIssue': '',
            'Patent': '',
            'Standard': '',
            'Thesis': '',
            'InternetDocument': '',
            'Report': '',
            'Lecture': '',
            'ArticleNewspaper': '',
            'PressRelease': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'ResearchData': '',
        }
        ANH_TOC = {
            'Monograph': '',
            'ArticleJournal': '',
            'ChapterInLegalCommentary': '',
            'Chapter': '',
            'LegalCommentary': '',
            'Collection': '',
            'MultivolumeWork': '',
            'Conference': '',
            'Edition': '',
            'SpecialIssue': '',
            'Journal': '',
            'Series': '',
            'Newspaper': '',
            'Thesis': '',
            'ArticleNewspaper': '',
            'Lecture': '',
            'Report': '',
            'InternetDocument': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'PressRelease': '',
            'ResearchData': '',
        }

        STM_COINS = {
            'ArticleJournal': '',
            'Chapter': '',
            'Monograph': '',
            'Journal': '',
            'Series': '',
            'Conference': '',
            'Collection': '',
            'MultivolumeWork': '',
            'SpecialIssue': '',
            'Patent': '',
            'Standard': '',
            'Thesis': '',
            'InternetDocument': '',
            'Report': '',
            'Lecture': '',
            'ArticleNewspaper': '',
            'PressRelease': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'ResearchData': '',
        }
        ANH_COINS = {
            'Monograph': '',
            'ArticleJournal': '',
            'ChapterInLegalCommentary': '',
            'Chapter': '',
            'LegalCommentary': '',
            'Collection': '',
            'MultivolumeWork': '',
            'Conference': '',
            'Edition': '',
            'SpecialIssue': '',
            'Journal': '',
            'Series': '',
            'Newspaper': '',
            'Thesis': '',
            'ArticleNewspaper': '',
            'Lecture': '',
            'Report': '',
            'InternetDocument': '',
            'RadioTVProgram': '',
            'AudioVideoDocument': '',
            'PressRelease': '',
            'ResearchData': '',
        }

        if group_by_type_year:

            facet_tree = ('pubtype', 'fdate')

            publist_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                application=secrets.SOLR_APP, handler='query',
                                query=query, fquery=filterquery,
                                fields=['wtf_json'], rows=0,
                                facet='true', facet_tree=facet_tree, facet_sort=False, facet_limit=-1,
                                sort='fdate asc', core='hb2')
            publist_solr.request()
            # logging.info('publist_solr.tree: %s' % json.dumps(publist_solr.tree, indent=4))

            list_cnt = 0
            for pubtype in publist_solr.tree.get('pubtype,fdate'):
                # logging.debug('pubtype = %s' % pubtype.get('value'))
                # logging.debug('pubtype = %s' % pubtype)
                year_list = ''
                year_coins = ''
                if pubtype.get('pivot'):
                    for year in pubtype.get('pivot')[::-1]:
                        # logging.debug('\t%s: %s' % (year.get('value'), year.get('count')))
                        filterquery = []
                        filterquery.append('fdate:%s' % year.get('value'))
                        filterquery.append('pubtype:%s' % pubtype.get('value'))
                        pivot_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                          application=secrets.SOLR_APP, handler='query',
                                          query=query, fields=['wtf_json'], rows=100000,
                                          fquery=filterquery, core='hb2')
                        pivot_solr.request()
                        results = pivot_solr.results
                        # logging.debug('PIVOT_PUB_LIST: %s' % results)

                        publist_docs = []
                        for result in results:
                            publist_docs.append(json.loads(result.get('wtf_json')))
                            if format == 'html':
                                year_coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(json.loads(result.get('wtf_json'))).replace('&', '&amp;')

                        if not group_by_type:
                            if format == 'html':
                                year_list += '<h5>%s</h5>' % year.get('value')
                            else:
                                year_list += '%s\n' % year.get('value')

                        year_list += citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style)
                else:
                    filterquery = []
                    filterquery.append('pubtype:%s' % pubtype.get('value'))
                    pivot_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                      application=secrets.SOLR_APP, handler='query',
                                      query=query, fields=['wtf_json'], rows=100000,
                                      fquery=filterquery, core='hb2')
                    pivot_solr.request()
                    results = pivot_solr.results
                    # logging.debug('PIVOT_PUB_LIST: %s' % results)

                    publist_docs = []
                    for result in results:
                        publist_docs.append(json.loads(result.get('wtf_json')))
                        if format == 'html':
                            year_coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(
                                json.loads(result.get('wtf_json'))).replace('&', '&amp;')

                    year_list += citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style)

                if locale.startswith('de'):
                    group_value = display_vocabularies.PUBTYPE_GER.get(pubtype.get('value'))
                else:
                    group_value = display_vocabularies.PUBTYPE_ENG.get(pubtype.get('value'))

                list_cnt += 1
                if format == 'html':
                    header = '<h4 id="%s_%s">%s</h4>' % (biblist_id, list_cnt, group_value)
                elif format == 'txt':
                    header = '%s\n' % group_value
                else:
                    header = ''
                footer = ''
                if toc and format == 'html':
                    back_string = 'Back to table of contents'
                    if locale.startswith('de'):
                        back_string = 'Zurück zum Inhaltsverzeichnis'
                    footer = '<div class="toc_return"><a href="#%s_citetoc">%s</a></div>' % (biblist_id, back_string)

                if pubsort == 'stm':
                    STM_LIST[pubtype.get('value')] = header + year_list + footer
                    if format == 'html':
                        STM_TOC[pubtype.get('value')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                        STM_COINS[pubtype.get('value')] = year_coins
                elif pubsort == 'anh':
                    ANH_LIST[pubtype.get('value')] = header + year_list + footer
                    if format == 'html':
                        ANH_TOC[pubtype.get('value')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                        ANH_COINS[pubtype.get('value')] = year_coins
                else:
                    biblist += header + year_list
                    if format == 'html':
                        biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                        biblist_coins += year_coins

            if pubsort == 'anh':
                # logging.debug(ANH_LIST)
                biblist = ''
                biblist_toc = ''
                for pubtype in ANH_SORT:
                    if ANH_LIST.get(pubtype):
                        biblist += ANH_LIST.get(pubtype)
                        if format == 'html':
                            biblist_toc += ANH_TOC.get(pubtype)
                            biblist_coins += ANH_COINS.get(pubtype)
            elif pubsort == 'stm':
                # logging.debug(STM_LIST)
                biblist = ''
                biblist_toc = ''
                for pubtype in STM_SORT:
                    if STM_LIST.get(pubtype):
                        biblist += STM_LIST.get(pubtype)
                        if format == 'html':
                            biblist_toc += STM_TOC.get(pubtype)
                            biblist_coins += STM_COINS.get(pubtype)

        else:

            publist_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                application=secrets.SOLR_APP, handler='query',
                                query=query, fields=['wtf_json'],
                                rows=100000, fquery=filterquery,
                                group=group, group_field=group_field, group_limit=group_limit,
                                sort='fdate desc',
                                core='hb2')
            publist_solr.request()
            results.extend(publist_solr.results)
            # print('publist_solr.results: %s' % results)

            publist_docs = []
            if group:
                biblist = ''
                list_cnt = 0
                for result in results:
                    # logging.debug('groupValue: %s' % result.get('groupValue'))
                    # logging.debug('numFound: %s' % result.get('doclist').get('numFound'))
                    # logging.debug('docs: %s' % result.get('doclist').get('docs'))

                    coins = ''
                    for doc in result.get('doclist').get('docs'):
                        publist_docs.append(json.loads(doc.get('wtf_json')))
                        if format == 'html':
                            coins += '<div class="coins"><span class="Z3988" title="%s"></span></div>' % openurl_processor.wtf_openurl(json.loads(doc.get('wtf_json'))).replace('&', '&amp;')

                    group_value = result.get('groupValue')
                    if str2bool(group_by_type):
                        if locale.startswith('de'):
                            group_value = display_vocabularies.PUBTYPE_GER.get(result.get('groupValue'))
                        else:
                            group_value = display_vocabularies.PUBTYPE_ENG.get(result.get('groupValue'))

                    list_cnt += 1
                    if format == 'html':
                        header = '<h4 id="%s_%s">%s</h4>' % (biblist_id, list_cnt, group_value)
                    elif format == 'txt':
                        header = '%s' % group_value
                    else:
                        header = ''
                    footer = ''
                    if toc and format == 'html':
                        back_string = 'Back to table of contents'
                        if locale.startswith('de'):
                            back_string = 'Zurück zum Inhaltsverzeichnis'
                        footer = '<div class="toc_return"><a href="#%s_citetoc">%s</a></div>' % (biblist_id, back_string)

                    if str2bool(group_by_type):
                        if pubsort == 'stm':
                            STM_LIST[result.get('groupValue')] = header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                            if format == 'html':
                                STM_TOC[result.get('groupValue')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                                STM_COINS[result.get('groupValue')] = coins
                        elif pubsort == 'anh':
                            ANH_LIST[result.get('groupValue')] = header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                            if format == 'html':
                                ANH_TOC[result.get('groupValue')] = '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                                ANH_COINS[result.get('groupValue')] = coins
                        else:
                            biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                            if format == 'html':
                                biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                                biblist_coins += coins
                    elif str2bool(group_by_year):
                        biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer
                        if format == 'html':
                            biblist_toc += '<li><a href="#%s_%s">%s</a></li>' % (biblist_id, list_cnt, group_value)
                            biblist_coins += coins
                    else:
                        biblist += header + citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style) + footer

                    if str2bool(group_by_type) and pubsort == 'anh':
                        # logging.debug(ANH_LIST)
                        biblist = ''
                        biblist_toc = ''
                        for pubtype in ANH_SORT:
                            if ANH_LIST.get(pubtype):
                                biblist += ANH_LIST.get(pubtype)
                                if format == 'html':
                                    biblist_toc += ANH_TOC.get(pubtype)
                                    biblist_coins += ANH_COINS.get(pubtype)
                    elif str2bool(group_by_type) and pubsort == 'stm':
                        # logging.debug(STM_LIST)
                        biblist = ''
                        biblist_toc = ''
                        for pubtype in STM_SORT:
                            if STM_LIST.get(pubtype):
                                biblist += STM_LIST.get(pubtype)
                                if format == 'html':
                                    biblist_toc += STM_TOC.get(pubtype)
                                    biblist_coins += STM_COINS.get(pubtype)

                    publist_docs = []

            else:
                for result in results:
                    publist_docs.append(json.loads(result.get('wtf_json')))

                biblist = citeproc_node(wtf_csl.wtf_csl(publist_docs), format, locale, style)

        response = ''

        if toc and format == 'html':
            response += '<ul id="%s_citetoc">' % biblist_id + biblist_toc + '</ul>'

        response += biblist + biblist_coins

    if response:
        try:

            storage_publists_cache = app.extensions['redis']['REDIS_PUBLIST_CACHE']

            storage_publists_cache.set(key, response)
            storage_publists_cache.hset(agent_id, key, timestamp())

        except Exception as e:
            logging.error('REDIS: %s' % e)

    resp = make_response(response)
    if format == 'txt':
        resp.headers["Content-Type"] = "text/plain; charset=utf-8"
    else:
        resp.headers["Content-Type"] = "text/html; charset=utf-8"

    return resp

Beispiel #9

Datei anzeigen

Datei: bibtex_processor.py Projekt: UB-Dortmund/mms

def wtf_bibtex(wtf_records=None):

    # logging.info('wtf_records: %s' % wtf_records)
    if wtf_records is None:
        wtf_records = []

    if len(wtf_records) > 0:

        db = BibDatabase()
        db.entries = []

        for record in wtf_records:

            bibtex_entry = {}

            bibtex_type = BIBTEX_PUBTYPES.get(record.get('pubtype'))
            if bibtex_type is None:
                bibtex_type.setdefault('pubtype', 'misc')
            bibtex_entry.setdefault('ENTRYTYPE', bibtex_type)

            bibtex_entry.setdefault('ID', record.get('id'))

            title = record.get('title')
            if record.get('subtitle'):
                title += ': %s' % record.get('subtitle')
            bibtex_entry.setdefault('title', title)

            if record.get('issued'):
                date_parts = []
                for date_part in str(record.get('issued')).replace(
                        '[', '').replace(']', '').split('-'):
                    date_parts.append(date_part)
                bibtex_entry.setdefault('year', date_parts[0])
                if len(date_parts) > 1:
                    bibtex_entry.setdefault('month', date_parts[1])
                if len(date_parts) > 2:
                    bibtex_entry.setdefault('day', date_parts[2])

            if record.get('DOI'):
                bibtex_entry.setdefault('crossref', record.get('DOI')[0])

            author_str = ''
            for author in record.get('person'):
                if 'aut' in author.get('role'):
                    if author_str != '':
                        author_str += ' and '
                    author_str += author.get('name')

            bibtex_entry.setdefault('author', author_str)

            # is_part_of
            hosts = []
            if record.get('is_part_of'):
                hosts = record.get('is_part_of')

            for host in hosts:
                if host.get('is_part_of') != '':
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST,
                                        port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP,
                                        query='id:%s' % host.get('is_part_of'),
                                        facet='false',
                                        fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(
                                ipo_solr.results[0].get('wtf_json'))
                            title = myjson.get('title')
                            if myjson.get('subtitle'):
                                title += ': %s' % myjson.get('subtitle')
                            if bibtex_entry.get('ENTRYTYPE') == 'article':
                                bibtex_entry.setdefault('journal', title)
                            elif bibtex_entry.get('ENTRYTYPE') == 'inbook':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'inproceedings':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'incollection':
                                bibtex_entry.setdefault('booktitle', title)
                            else:
                                bibtex_entry.setdefault('series', title)
                    except AttributeError as e:
                        logging.error(e)
                if host.get('volume') != '':
                    bibtex_entry.setdefault('volume', host.get('volume'))

            if bibtex_entry:
                db.entries.append(bibtex_entry)

        return bibtexparser.dumps(db)

    else:
        return ''

Beispiel #10

Datei anzeigen

Datei: orcid_sync.py Projekt: UB-Dortmund/mms

def sync_orcid_to_hb():
    get_user = Solr(host=p_secrets.SOLR_HOST, port=p_secrets.SOLR_PORT,
                    application=p_secrets.SOLR_APP, core='hb2_users',
                    query='orcidid:%s' % orcid_id)
    get_user.request()

    if get_user.results:
        if '/read-limited' in get_user.results[0].get('orcidscopes'):
            works = orcid_read_works(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token)
            logger.info('results from ORCID: %s\n' % len(works))
            if works:
                for work in works:
                    do_break = False
                    hb2_record_id = None
                    orcid_record = None
                    for work_sum in work.get('work-summary'):
                        # - putcode is not in hb2
                        try:
                            response = requests.get('http://*****:*****@tu-dortmund.de']
                                # print(json.dumps(thedata, indent=4))
                            else:
                                # logger.info(json.dumps(orcid_record, indent=4))
                                thedata = orcid_processor.orcid_wtf(orcid_id, orcid_record)
                                print(thedata)
                                # add author via orcid_user_info
                                public_info = orcid_user_info(affiliation=affiliation, orcid_id=orcid_id, access_token=orcid_token)
                                person = {
                                    'name': '%s, %s' % (public_info.get('name').get('family-name').get('value'), public_info.get('name').get('given-names').get('value')),
                                    'orcid': orcid_id,
                                    'role': ['aut']
                                }
                                if affiliation == 'tudo':
                                    person['tudo'] = True
                                    person['rubi'] = False
                                    thedata['catalog'] = ['Technische Universität Dortmund']
                                elif affiliation == 'rub':
                                    person['tudo'] = False
                                    person['rubi'] = True
                                    thedata['catalog'] = ['Ruhr-Universität Bochum']
                                else:
                                    person['tudo'] = False
                                    person['rubi'] = False
                                    thedata['catalog'] = ['Temporäre Daten']

                                thedata['person'] = [person]

                        if thedata:
                            logger.info('POST /work')
                            # POST request
                            logger.info(json.dumps(thedata, indent=4))
                            try:
                                # post data
                                response = requests.post(
                                    '%s/%s' % (orcid_secrets.API, 'work'),
                                    headers={'Content-Type': 'application/json', 'Authorization': 'Bearer %s' % orcid_secrets.TOKEN},
                                    data=json.dumps(thedata)
                                )
                                status = response.status_code
                                logger.info('STATUS: %s' % status)
                                if status == 201:
                                    response_json = json.loads(response.content.decode("utf-8"))
                                    logger.info(response_json.get('work'))
                                    if response_json.get('message'):
                                        logger.info(response_json.get('message'))
                                else:
                                    logger.error('ERROR: %s: %s' % (status, response.content.decode("utf-8")))

                            except requests.exceptions.ConnectionError as e:
                                logging.error(e)
                            logger.info('')

Beispiel #11

Datei anzeigen

def export_oa_report(year=''):
    '''
        Getting a bibliography

        swagger_from_file: api_doc/export_oa_report.yml
    '''
    pubtype = request.args.get('pubtype', 'ArticleJournal')

    if theme(request.access_route) == 'dortmund':
        affiliation = 'tudo'
        affiliation_str = 'TU Dortmund'
    elif theme(request.access_route) == 'bochum':
        affiliation = 'rubi'
        affiliation_str = 'Ruhr-Universität Bochum'
    else:
        affiliation = ''
        affiliation_str = ''

    if affiliation:
        csv = '"AU";"TI";"SO";"DT";"RP";"EM";"OI";"PU";"ISSN";"E-ISSN";"DOI";"OA";"RP TUDO";"Fak"\n'

        # TODO search for all publications of the given year
        oa_solr = Solr(host=secrets.SOLR_HOST,
                       port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP,
                       core='hb2',
                       handler='query',
                       query='*:*',
                       facet='false',
                       rows=100000,
                       fquery=[
                           '%s:true' % affiliation,
                           'fdate:%s' % year,
                           'pubtype:%s' % pubtype
                       ])
        oa_solr.request()
        results = oa_solr.results

        if results:
            for record in results:
                thedata = json.loads(record.get('wtf_json'))

                author = ''
                corresponding_author = ''
                corresponding_affiliation = ''
                faks = ''
                for person in thedata.get('person'):
                    if 'aut' in person.get('role'):
                        author += person.get('name') + ';'
                        if person.get('corresponding_author'):
                            corresponding_author = person.get('name')
                            if person.get('tudo'):
                                corresponding_affiliation = True
                                if person.get('gnd'):
                                    tudo = persistence.get_person(
                                        person.get('gnd'))
                                    # print(person.get('gnd'))
                                    if tudo:
                                        if tudo.get('affiliation_id'):
                                            faks = ''
                                            for entry in tudo.get(
                                                    'affiliation_id'):
                                                affil = persistence.get_orga(
                                                    entry)
                                                fak = ''
                                                if affil:
                                                    has_parent = False
                                                    fak = affil.get(
                                                        'pref_label')
                                                    if affil.get('parent_id'):
                                                        has_parent = True
                                                        fak = '%s / %s' % (
                                                            affil.get(
                                                                'parent_label'
                                                            ),
                                                            affil.get(
                                                                'pref_label'))
                                                    while has_parent:
                                                        affil = persistence.get_orga(
                                                            affil.get(
                                                                'parent_id'))
                                                        if affil.get(
                                                                'parent_id'):
                                                            has_parent = True
                                                            fak = '%s / %s' % (
                                                                affil.get(
                                                                    'parent_label'
                                                                ),
                                                                affil.get(
                                                                    'pref_label'
                                                                ))
                                                        else:
                                                            has_parent = False
                                                else:
                                                    fak = 'LinkError: Person %s' % person.get(
                                                        'gnd')
                                                faks += fak + ';'
                                            faks = faks[:-1]

                author = author[:-1]

                publisher = ''
                journal_title = ''
                issn = ''
                journal_title = ''
                if record.get('is_part_of_id'):
                    if record.get('is_part_of_id')[0]:
                        host = persistence.get_work(
                            record.get('is_part_of_id')[0])
                        if host:
                            record = json.loads(host.get('wtf_json'))
                            # print(json.dumps(record, indent=4))
                            journal_title = record.get('title')
                            if record.get('fsubseries'):
                                journal_title = record.get('fsubseries')
                            publisher = ''
                            if record.get('publisher'):
                                publisher = record.get('publisher')
                            issn = ''
                            if record.get('ISSN'):
                                for entry in record.get('ISSN'):
                                    if entry:
                                        issn = entry
                                        break

                csv += '"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s";"%s"\n' % (
                    author,
                    thedata.get('title'),
                    journal_title,
                    'article',
                    corresponding_author,
                    '',
                    '',
                    publisher,
                    issn,
                    '',
                    thedata.get('DOI')[0],
                    thedata.get('oa_funded'),
                    corresponding_affiliation,
                    faks,
                )

        resp = make_response(csv, 200)
        resp.headers['Content-Type'] = 'text/csv; charset=utf-8'
        return resp
    else:
        return make_response(
            'No affiliation parameter set. Please contact the administrator!',
            400)

Beispiel #12

Datei anzeigen

Datei: preprocessor.py Projekt: UB-Dortmund/mms

        fo.close()


# persons('../../person.json')
# works('../../works.json')
# issued_data('/data/backup/Samstag/2016-10-01_15-28-31_hb2.not_issued.json', '/data/issued/issuedData.json')
# delete_issued('/data/backup/Montag/2016-10-03_09-39-18_hb2.pubtype_Journal.json')

organisations = []

logging.debug('TEST')

try:
    record_solr = Solr(host=secrets.SOLR_HOST,
                       port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP,
                       core='organisation',
                       rows=100000)
    record_solr.request()

    for result in record_solr.results:
        organisations.append(result.get('id'))

    # print('results: %s' % organisations)
    # print('result_list: %s' % record_solr.results)

except Exception as e:
    logging.error(e)

# organisations = ['TUDO9144040']

Beispiel #13

Datei anzeigen

def export_openapc(year=''):
    '''
        Getting a bibliography

        swagger_from_file: api_doc/export_openapc.yml
    '''

    if theme(request.access_route) == 'dortmund':
        affiliation = 'tudo'
        affiliation_str = 'TU Dortmund'
    elif theme(request.access_route) == 'bochum':
        affiliation = 'rubi'
        affiliation_str = 'Ruhr-Universität Bochum'
    else:
        affiliation = ''
        affiliation_str = ''

    if affiliation:
        csv = '"institution";"period";"euro";"doi";"is_hybrid";"publisher";"journal_full_title";"issn";"url";"local_id"\n'

        oa_solr = Solr(host=secrets.SOLR_HOST,
                       port=secrets.SOLR_PORT,
                       application=secrets.SOLR_APP,
                       core='hb2',
                       handler='query',
                       query='oa_funds:true',
                       facet='false',
                       rows=100000,
                       fquery=['%s:true' % affiliation,
                               'fdate:%s' % year])
        oa_solr.request()
        results = oa_solr.results

        if len(results) > 0:
            for record in results:
                thedata = json.loads(record.get('wtf_json'))

                doi = record.get('doi')[0]
                is_hybrid = False
                if record.get('is_hybrid'):
                    is_hybrid = record.get('is_hybrid')
                publisher = ''
                journal_title = ''
                issn = ''
                url = ''
                if not doi:

                    journal_title = ''
                    if record.get('is_part_of_id'):
                        if record.get('is_part_of_id')[0]:
                            host = persistence.get_work(
                                record.get('is_part_of_id')[0])
                            if host:
                                record = json.loads(host.get('wtf_json'))
                                # print(json.dumps(record, indent=4))
                                journal_title = record.get('title')
                                if record.get('fsubseries'):
                                    journal_title = record.get('fsubseries')
                                publisher = ''
                                if record.get('publisher'):
                                    publisher = record.get('publisher')
                                issn = ''
                                if record.get('ISSN'):
                                    for entry in record.get('ISSN'):
                                        if entry:
                                            issn = entry
                                            break

                    url = ''
                    if thedata.get('uri'):
                        for uri in thedata.get('uri'):
                            url = uri
                            break

                csv += '"%s";%s;%s;"%s";"%s";"%s";"%s";"%s";"%s";"%s"\n' % (
                    affiliation_str, year, 0.00, doi, is_hybrid, publisher,
                    journal_title, issn, url, record.get('id'))

            resp = make_response(csv, 200)
            resp.headers['Content-Type'] = 'text/csv; charset=utf-8'
            return resp
        else:
            return make_response('No results', 404)
    else:
        return make_response(
            'No affiliation parameter set. Please contact the administrator!',
            400)

Beispiel #14

Datei anzeigen

Datei: consolidate_persons.py Projekt: UB-Dortmund/mms

from fuzzywuzzy import fuzz

from utils.solr_handler import Solr

try:
    import local_app_secrets as secrets
except ImportError:
    import app_secrets as secrets


# TODO: Deduplizierung nach Nachname, 1. Buchsctabe des Vornamens
# TODO: Vorname und Nachname sind gleich, aber GNDs unterschiedlich => Ist das ueberhaupt ein TODO?
# TODO: Nachname ist gleich und wenn Vorname in den Daten nur ein Buchstabe oder wenn echter Vorname, dann die
# ersten beiden Buchstaben vergleichen
results = []
new_titles = Solr(application=secrets.SOLR_APP, facet='false', rows=2000000,
                  fields=['pnd', 'id', 'title', 'pubtype', 'catalog'])
new_titles.request()

for doc in new_titles.results:
    # logging.info(doc)
    if doc.get('pnd'):
        catalog = 'tmp'
        if doc.get('catalog'):
            if 'Ruhr-Universität Bochum' in doc.get('catalog'):
                catalog = 'rub'
            elif 'Technische Universität Dortmund' in doc.get('catalog'):
                catalog = 'tudo'

        result = {'id': doc.get('id'), 'catalog': catalog, 'title': doc.get('title'), 'pubtype': doc.get('pubtype')}
        creators = []
        # TODO sobald es einen gibt mit len(ids) != 3 muss der Datensatz nicht betrachtet werden!

Beispiel #15

Datei anzeigen

Datei: wtf_csl.py Projekt: UB-Dortmund/mms

def wtf_csl(wtf_records=None):
    csl_records = []

    # logging.info('wtf_records: %s' % wtf_records)
    if wtf_records is None:
        wtf_records = []

    if len(wtf_records) > 0:
        for record in wtf_records:
            # logging.info('record: %s' % record)
            hosts = []
            if record.get('is_part_of'):
                hosts = record.get('is_part_of')
            else:
                hosts.append({'is_part_of': ''})

            for host in hosts:
                csl_record = {}
                # id
                csl_record.setdefault('id', record.get('id'))
                # type
                csl_type = CSL_PUBTYPES.get(record.get('pubtype'))
                if csl_type is None:
                    csl_record.setdefault('pubtype', record.get('pubtype'))
                csl_record.setdefault('type', csl_type)
                # title
                title = record.get('title')
                if record.get('subtitle'):
                    title += ': %s' % record.get('subtitle')
                # TODO title supplements
                csl_record.setdefault('title', title)

                # doi
                if record.get('DOI') and record.get('DOI')[0] != '':
                    csl_record.setdefault('DOI', record.get('DOI')[0].strip())
                    csl_record.setdefault('URL', 'http://dx.doi.org/%s' % record.get('DOI')[0].strip())
                    csl_record.setdefault('uri', 'http://dx.doi.org/%s' % record.get('DOI')[0].strip())

                # uri
                if record.get('uri') and record.get('uri')[0] != '':
                    for uri in record.get('uri'):
                        csl_record.setdefault('URL', uri.strip())
                        csl_record.setdefault('uri', uri.strip())

                # contributors
                if record.get('person'):
                    author = []
                    editor = []
                    contributor = []
                    for person in record.get('person'):
                        # logging.info(person.get('name'))
                        family = person.get('name').split(', ')[0]
                        given = ''
                        if len(person.get('name').split(', ')) > 1:
                            given = person.get('name').split(', ')[1]
                        # logging.info('%s, %s' % (family, given))
                        if person.get('role'):
                            if 'aut' in person.get('role'):
                                author.append({'family': family, 'given': given})
                            elif 'edt' in person.get('role'):
                                editor.append({'family': family, 'given': given})
                            else:
                                contributor.append({'family': family, 'given': given})

                    if len(author) > 0:
                        csl_record.setdefault('author', author)
                    if len(editor) > 0:
                        csl_record.setdefault('editor', editor)
                    if len(contributor) > 0 and record.get('pubtype') == 'Lecture':
                        csl_record.setdefault('author', contributor)

                # container
                if host.get('is_part_of') != '':
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST, port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP, query='id:%s' % host.get('is_part_of'),
                                        facet='false', fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(ipo_solr.results[0].get('wtf_json'))
                            if myjson.get('pubtype') != 'Series':
                                title = myjson.get('title')
                                if myjson.get('subtitle'):
                                    title += ': %s' % myjson.get('subtitle')
                                csl_record.setdefault('container-title', title)
                                author = []
                                editor = []
                                for person in myjson.get('person'):
                                    # logging.info(person.get('name'))
                                    family = person.get('name').split(', ')[0]
                                    given = ''
                                    if len(person.get('name').split(', ')) > 1:
                                        given = person.get('name').split(', ')[1]
                                    # logging.info('%s, %s' % (family, given))
                                    if person.get('role'):
                                        if 'aut' in person.get('role'):
                                            author.append({'family': family, 'given': given})
                                        elif 'edt' in person.get('role'):
                                            editor.append({'family': family, 'given': given})

                                if len(author) > 0:
                                    csl_record.setdefault('author', author)
                                if len(editor) > 0:
                                    csl_record.setdefault('editor', editor)
                            else:
                                if myjson.get('fsubseries'):
                                    title = myjson.get('fsubseries')
                                else:
                                    title = myjson.get('title')
                                csl_record.setdefault('edition', title)
                        else:
                            csl_record.setdefault('container-title', host.get('is_part_of'))

                    except AttributeError as e:
                        logging.error(e)

                # volume
                if host.get('volume') and host.get('volume') != '':
                    csl_record.setdefault('volume', host.get('volume'))
                # issue
                if host.get('issue') and host.get('issue') != '':
                    csl_record.setdefault('issue', host.get('issue'))
                # page_first
                if host.get('page_first') and host.get('page_first') != '':
                    csl_record.setdefault('page_first', host.get('page_first').replace('-', '_'))
                # page_last
                if host.get('page_last') and host.get('page_last') != '':
                    csl_record.setdefault('page_last', host.get('page_last').replace('-', '_'))
                # page
                if host.get('page_first') and host.get('page_first') != '' and host.get('page_last') and host.get('page_last') != '':
                    csl_record.setdefault('page', '%s-%s' % (host.get('page_first').replace('-', '_'), host.get('page_last').replace('-', '_')))
                else:
                    if host.get('page_first') and host.get('page_first') != '':
                        csl_record.setdefault('page', host.get('page_first').replace('-', '_'))
                    # page_last
                    if host.get('page_last') and host.get('page_last') != '':
                        csl_record.setdefault('page', host.get('page_last').replace('-', '_'))

                # collection-number
                # collection-author
                # collection-editor
                # number_of_volumes
                if host.get('number_of_volumes') and host.get('number_of_volumes') != '':
                    csl_record.setdefault('number_of_volumes', host.get('number_of_volumes'))

                # language
                if record.get('language') and record.get('language')[0] != '' and record.get('language')[0] != 'None':
                    csl_record.setdefault('language', record.get('language')[0])
                # issued
                if record.get('issued'):
                    issued = {}
                    date_parts = []
                    for date_part in str(record.get('issued')).replace('[', '').replace(']', '').split('-'):
                        date_parts.append(date_part)
                    issued.setdefault('date-parts', []).append(date_parts)
                    csl_record.setdefault('issued', issued)
                # edition
                if record.get('edition'):
                    csl_record.setdefault('edition', record.get('edition'))

                # isbn
                if record.get('isbn'):
                    csl_record.setdefault('isbn', record.get('ISBN')[0])
                # issn
                if record.get('issn'):
                    csl_record.setdefault('issn', record.get('ISSN')[0])
                # ismn
                if record.get('ismn'):
                    csl_record.setdefault('ismn', record.get('ISMN')[0])

                # publisher
                if record.get('publisher'):
                    csl_record.setdefault('publisher', record.get('publisher'))
                    csl_record.setdefault('original-publisher', record.get('publisher'))
                # publisher_place
                if record.get('publisher_place'):
                    csl_record.setdefault('publisher-place', record.get('publisher_place'))
                    csl_record.setdefault('original-publisher-place', record.get('publisher_place'))
                # number_of_pages
                if record.get('number_of_pages'):
                    csl_record.setdefault('number_of_pages', record.get('number_of_pages'))
                # uri

                # WOSID
                if record.get('WOSID'):
                    csl_record.setdefault('WOSID', record.get('WOSID'))
                # PMID
                if record.get('PMID'):
                    csl_record.setdefault('PMID', record.get('PMID'))
                # abstract
                if record.get('abstract')[0] and record.get('abstract')[0].get('content') != '':
                    csl_record.setdefault('abstract', record.get('abstract')[0].get('content'))

                csl_records.append(csl_record)

    return csl_records

Beispiel #16

Datei anzeigen

def wtf_openurl(record=None):

    open_url = 'ctx_ver=Z39.88-2004'

    if record:

        # pubtype
        if record.get('pubtype') and OPENURL_KEV_MTX.get(
                record.get('pubtype')):
            open_url += '&rft_val_fmt=info:ofi/fmt:kev:mtx:%s' % OPENURL_KEV_MTX.get(
                record.get('pubtype'))
        else:
            open_url += '&rft_val_fmt=info:ofi/fmt:kev:mtx:%s' % 'book'
        if OPENURL_GENRE.get(record.get('pubtype')):
            open_url += '&rft.genre=%s' % OPENURL_GENRE.get(
                record.get('pubtype'))
        else:
            open_url += '&rft.genre=%s' % 'unknown'

        # sid
        # open_url += '&info:ofi/nam:info:sid:%s' % str(parse.quote(record.get('id'), 'utf-8'))

        # doi
        if record.get('DOI') and record.get('DOI')[0]:
            open_url += '&info:ofi/nam:info:doi:%s' % parse.quote(
                record.get('DOI')[0], 'utf-8')

        # authors
        for person in record.get('person'):
            open_url += '&rft.au=%s' % parse.quote(person.get('name'), 'utf8')

        if record.get('is_part_of') and record.get('is_part_of')[
                0] and record.get('is_part_of')[0].get('is_part_of'):
            for host in record.get('is_part_of'):
                if host.get('is_part_of'):
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST,
                                        port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP,
                                        query='id:%s' % host.get('is_part_of'),
                                        facet='false',
                                        fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(
                                ipo_solr.results[0].get('wtf_json'))
                            if myjson.get('pubtype') == 'journal':
                                open_url += '&rft.jtitle=%s' % parse.quote(
                                    myjson.get('title'), 'utf-8')
                                open_url += '&rft.issn=%s' % parse.quote(
                                    myjson.get('ISSN')[0], 'utf-8')
                                open_url += '&rft.volume=%s' % parse.quote(
                                    host.get('volume'), 'utf-8')
                                open_url += '&rft.issue=%s' % parse.quote(
                                    host.get('issue'), 'utf-8')
                                open_url += '&rft.pages=%s' % host.get(
                                    'page_first')
                                if host.get('page_last'):
                                    open_url += '-%s' % host.get('page_last')
                                # article title
                                open_url += '&rft.atitle=%s' % parse.quote(
                                    record.get('title'), 'utf-8')
                            elif myjson.get('pubtype') == 'Monograph' or \
                                            myjson.get('pubtype') == 'Collection' or \
                                            myjson.get('pubtype') == 'Conference' or \
                                            myjson.get('pubtype') == 'LegalCommentary':
                                # btitle
                                open_url += '&rft.btitle=%s' % parse.quote(
                                    myjson.get('title'), 'utf-8')
                                open_url += '&rft.isbn=%s' % parse.quote(
                                    myjson.get('ISBN')[0], 'utf-8')
                                open_url += '&rft.pages=%s' % host.get(
                                    'page_first')
                                if host.get('page_last'):
                                    open_url += '-%s' % host.get('page_last')
                    except AttributeError as e:
                        logging.error(e)
                    break

        if 'rft.atitle' not in open_url:
            open_url += '&rft.title=%s' % parse.quote(record.get('title'),
                                                      'utf-8')

        if record.get('ISSN'):
            open_url += '&rft.issn=%s' % parse.quote(
                record.get('ISSN')[0], 'utf-8')
        if record.get('ISBN'):
            open_url += '&rft.isbn=%s' % parse.quote(
                record.get('ISBN')[0], 'utf-8')

        # origin info
        if record.get('issued'):
            open_url += '&rft.date=%s' % record.get('issued')
        if record.get('publisher_place'):
            open_url += '&rft.place=%s' % parse.quote(
                record.get('publisher_place'), 'utf-8')
        if record.get('publisher'):
            open_url += '&rft.publisher=%s' % parse.quote(
                record.get('publisher'), 'utf-8')

        # other
        for corporation in record.get('corporation'):
            open_url += '&rft.inst=%s' % parse.quote(corporation.get('name'),
                                                     'utf-8')

    return open_url

Beispiel #17

Datei anzeigen

Datei: orcid_processor.py Projekt: UB-Dortmund/mms

def wtf_orcid(affiliation='', wtf_records=None):
    orcid_records = []

    # logging.info('wtf_records: %s' % wtf_records)
    if wtf_records is None:
        wtf_records = []

    if len(wtf_records) > 0:
        for record in wtf_records:

            orcid_record = {}
            db = BibDatabase()
            db.entries = []
            bibtex_entry = {}

            # work type
            orcid_type = ORCID_PUBTYPES.get(record.get('pubtype'))
            if orcid_type is None:
                orcid_type.setdefault('pubtype', 'OTHER')
            orcid_record.setdefault('type', orcid_type)

            bibtex_type = BIBTEX_PUBTYPES.get(record.get('pubtype'))
            if bibtex_type is None:
                bibtex_type.setdefault('pubtype', 'misc')
            bibtex_entry.setdefault('ENTRYTYPE', bibtex_type)

            external_ids = {}
            external_id = []
            # ids - record id (source-work-id)
            ext_id = {}
            ext_id.setdefault('external-id-type', 'source-work-id')
            ext_id.setdefault('external-id-value', record.get('id'))
            ext_id.setdefault('external-id-relationship', 'SELF')
            if affiliation and affiliation in affiliation_url:
                ext_id.setdefault(
                    'external-id-url',
                    '%s%s/%s' % (affiliation_url.get(affiliation),
                                 record.get('pubtype'), record.get('id')))
            external_id.append(ext_id)
            bibtex_entry.setdefault('ID', record.get('id'))

            # ids - ISBN (isbn)
            if record.get('ISBN'):
                for isbn in record.get('ISBN'):
                    if isbn:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'isbn')
                        ext_id.setdefault('external-id-value', isbn)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

            # ids - ISSN (issn)
            if record.get('ISSN'):
                for issn in record.get('ISSN'):
                    if issn:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'issn')
                        ext_id.setdefault('external-id-value', issn)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

            # ids - ZDB (other-id)
            if record.get('ZDBID'):
                for zdbid in record.get('ZDBID'):
                    if zdbid:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'other-id')
                        ext_id.setdefault('external-id-value', zdbid)
                        ext_id.setdefault(
                            'external-id-url',
                            'http://ld.zdb-services.de/resource/%s' % zdbid)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

            # ids - PMID (pmc)
            if record.get('PMID'):
                ext_id = {}
                ext_id.setdefault('external-id-type', 'pmid')
                ext_id.setdefault('external-id-value', record.get('PMID'))
                ext_id.setdefault(
                    'external-id-url',
                    'http://www.ncbi.nlm.nih.gov/pubmed/%s' %
                    record.get('PMID'))
                ext_id.setdefault('external-id-relationship', 'SELF')
                external_id.append(ext_id)

            # ids - WOS-ID (wosuid)
            if record.get('WOSID'):
                ext_id = {}
                ext_id.setdefault('external-id-type', 'doi')
                ext_id.setdefault('external-id-value', record.get('WOSID'))
                ext_id.setdefault(
                    'external-id-url',
                    'http://ws.isiknowledge.com/cps/openurl/service?url_ver=Z39.88-2004&rft_id=info:ut/%s'
                    % record.get('WOSID'))
                ext_id.setdefault('external-id-relationship', 'SELF')
                external_id.append(ext_id)

            # ids - doi
            if record.get('DOI'):
                for doi in record.get('DOI'):
                    if doi:
                        ext_id = {}
                        ext_id.setdefault('external-id-type', 'doi')
                        ext_id.setdefault('external-id-value', doi)
                        ext_id.setdefault('external-id-url',
                                          'http://dx.doi.org/%s' % doi)
                        ext_id.setdefault('external-id-relationship', 'SELF')
                        external_id.append(ext_id)

                if external_id:
                    external_ids.setdefault('external-id', external_id)

                bibtex_entry.setdefault('doi', record.get('DOI')[0])

            orcid_record.setdefault('external-ids', external_ids)

            # titles
            title = {}
            title.setdefault('title', record.get('title'))
            if record.get('subtitle'):
                title.setdefault('subtitle', record.get('subtitle'))
            orcid_record.setdefault('title', title)

            title = record.get('title')
            if record.get('subtitle'):
                title += ': %s' % record.get('subtitle')
            bibtex_entry.setdefault('title', title)

            # issued
            if record.get('issued'):
                publication_date = {}
                date_parts = []
                for date_part in str(record.get('issued')).replace(
                        '[', '').replace(']', '').split('-'):
                    date_parts.append(date_part)
                publication_date.setdefault('year', int(date_parts[0]))
                bibtex_entry.setdefault('year', date_parts[0])
                if len(date_parts) > 1:
                    publication_date.setdefault('month', int(date_parts[1]))
                    bibtex_entry.setdefault('month', date_parts[1])
                if len(date_parts) > 2:
                    publication_date.setdefault('day', int(date_parts[2]))
                    bibtex_entry.setdefault('day', date_parts[2])
                orcid_record.setdefault('publication-date', publication_date)

            # contributors
            contributors = {}
            contributor = []
            author_str = ''
            for author in record.get('person'):
                if 'aut' in author.get('role'):
                    con = {}
                    con.setdefault('credit-name', author.get('name'))
                    if author.get('orcid'):
                        con.setdefault('contributor-orcid', {
                            'uri':
                            'http://orcid.org/%s' % author.get('orcid')
                        })
                    contributor_attributes = {}
                    contributor_attributes.setdefault('contributor-role',
                                                      'AUTHOR')
                    con.setdefault('contributor-attributes',
                                   contributor_attributes)
                    contributor.append(con)
                    if author_str != '':
                        author_str += ' and '
                    author_str += author.get('name')
            contributors.setdefault('contributor', contributor)
            orcid_record.setdefault('contributors', contributors)

            bibtex_entry.setdefault('author', author_str)

            # language
            if record.get(
                    'language')[0] and record.get('language')[0] != 'None':
                orcid_record.setdefault(
                    'language-code',
                    str(
                        babelfish.Language.fromalpha3b(
                            record.get('language')[0])))

            # is_part_of
            hosts = []
            if record.get('is_part_of'):
                hosts = record.get('is_part_of')

            for host in hosts:
                if host.get('is_part_of') != '':
                    try:
                        ipo_solr = Solr(host=secrets.SOLR_HOST,
                                        port=secrets.SOLR_PORT,
                                        application=secrets.SOLR_APP,
                                        query='id:%s' % host.get('is_part_of'),
                                        facet='false',
                                        fields=['wtf_json'])
                        ipo_solr.request()
                        if len(ipo_solr.results) > 0:
                            myjson = json.loads(
                                ipo_solr.results[0].get('wtf_json'))
                            title = myjson.get('title')
                            if myjson.get('subtitle'):
                                title += ': %s' % myjson.get('subtitle')
                            orcid_record.setdefault('journal-title', title)
                            if bibtex_entry.get('ENTRYTYPE') == 'article':
                                bibtex_entry.setdefault('journal', title)
                            elif bibtex_entry.get('ENTRYTYPE') == 'inbook':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'inproceedings':
                                bibtex_entry.setdefault('booktitle', title)
                            elif bibtex_entry.get(
                                    'ENTRYTYPE') == 'incollection':
                                bibtex_entry.setdefault('booktitle', title)
                            else:
                                bibtex_entry.setdefault('series', title)
                        else:
                            orcid_record.setdefault('journal-title',
                                                    host.get('is_part_of'))
                    except AttributeError as e:
                        logging.error(e)
                if host.get('volume') != '':
                    bibtex_entry.setdefault('volume', host.get('volume'))

            if bibtex_entry:
                db.entries.append(bibtex_entry)

            citation = {}
            citation.setdefault('citation-type', 'BIBTEX')
            citation.setdefault('citation', bibtexparser.dumps(db))
            orcid_record.setdefault('citation', citation)

            orcid_records.append(orcid_record)

    return orcid_records