コード例 #1
0
    def parseAuthor(self, authorStr):
        """Hathi data files include an author column that combines author name
        with their birth and death dates (sometimes). This method parses
        those dates from the name and assigns them as Date objects to the
        constructed agent record. This record is then assigned to the work.
        """
        logger.info('Storing author {} for work {}'.format(
            authorStr, self.work))
        authorDateGroup = re.search(r'([0-9\-c?\'.]{4,})', authorStr)
        authorDates = None
        if authorDateGroup is not None:
            authorDates = authorDateGroup.group(1)
            authorName = authorStr.replace(authorDates, '').strip(' ,.')
            logger.debug('Found lifespan dates {}'.format(authorDates))
        else:
            authorName = authorStr
            logger.debug('Found no lifespan dates')

        authorRec = Agent(name=authorName, role='author')

        if authorDates is not None:
            logger.info('Creating date objects for author lifespan')
            lifespan = authorDates.strip(' ,.').split('-')
            if len(lifespan) == 1:
                logger.debug('Found single date, default to death_date')
                dateType = 'death_date'
                datePrefix = re.search(r' b(?: |\.)', authorStr)
                if datePrefix is not None:
                    authorRec.name = re.sub(r' b(?: |\.|$)', '',
                                            authorName).strip(' ,.')
                    logger.debug('Detected single birth_date (living author)')
                    dateType = 'birth_date'

                logger.debug('Storing single date {} of type {}'.format(
                    lifespan[0], dateType))
                authorRec.addClassItem(
                    'dates', Date, **{
                        'display_date': lifespan[0],
                        'date_range': lifespan[0],
                        'date_type': dateType
                    })

            else:
                logger.debug('Storing lifespan {}-{} as dates'.format(
                    lifespan[0], lifespan[1]))
                authorRec.addClassItem(
                    'dates', Date, **{
                        'display_date': lifespan[0],
                        'date_range': lifespan[0],
                        'date_type': 'birth_date'
                    })
                authorRec.addClassItem(
                    'dates', Date, **{
                        'display_date': lifespan[1],
                        'date_range': lifespan[1],
                        'date_type': 'death_date'
                    })
        logger.debug('Appending agent record {} to work'.format(authorRec))
        self.work.agents.append(authorRec)
コード例 #2
0
def buildAgent(name, role, agentType='personal'):

    newAgent = Agent(name=name, role=role)

    viafResp = requests.get('{}{}&queryType={}'.format(
        'https://dev-platform.nypl.org/api/v0.1/research-now/viaf-lookup?queryName=',
        quote_plus(name), agentType))
    responseJSON = viafResp.json()
    logger.debug(responseJSON)
    if 'viaf' in responseJSON:
        if responseJSON['name'] != name:
            newAgent.aliases.append(name)
            newAgent.name = responseJSON.get('name', '')
        newAgent.viaf = responseJSON.get('viaf', None)
        newAgent.lcnaf = responseJSON.get('lcnaf', None)

    return newAgent
コード例 #3
0
ファイル: parseOCLC.py プロジェクト: NYPL/sfr-ingest-pipeline
def buildAgent(name, role):

    newAgent = Agent(name=name, role=role)

    queryStr = '{}{}'.format(
        'https://dev-platform.nypl.org/api/v0.1/research-now/viaf-lookup?queryName=',
        quote_plus(name)
    )
    if role in ['publisher', 'manufacturer']:
        queryStr = '{}&{}'.format(queryStr, 'queryType=corporate')

    viafResp = requests.get(queryStr)
    responseJSON = viafResp.json()
    logger.debug(responseJSON)
    if 'viaf' in responseJSON:
        if responseJSON['name'] != name:
            newAgent.aliases.append(name)
            newAgent.name = responseJSON.get('name', '')
        newAgent.viaf = responseJSON.get('viaf', None)
        newAgent.lcnaf = responseJSON.get('lcnaf', None)

    return newAgent