Example #1
0
def rowParser(row, columns, countryCodes):
    """Parse single HathiTrust item entry (corresponding to an item-level
    record in the SFR model) into the SFR data model and pass the resulting
    object to Kinesis for introduction into the SFR data pipeline.

    This method is a manager that handles methods around a HathiRecord object.
    Each method creates/enhances a part of the SFR metadata object, allowing
    for the object to both be built up and its components easily treated
    as seperate components if necessary

    Arguments:
    row -- list of fields from the HathiTrust source CSV file
    columns -- list of columns that corresponds to the source row
    countryCodes -- dict of country code and name translations

    Output: None, writes resulting work record to a Kinesis stream
    """

    logger.info('Reading entry for HathiTrust item {}'.format(row[0]))

    logger.debug('Generating source dict from row and column names')
    # This quickly builds a dictionary with column names that can be used to
    # retrieve specific values
    hathiDict = dict(zip(columns, row))
    # Generate a hathi record object with the source dict
    hathiRec = HathiRecord(hathiDict)

    try:
        # Generate an SFR-compliant object
        hathiRec.buildDataModel(countryCodes)
    except DataError as err:
        logger.error('Unable to process record {}'.format(
            hathiRec.ingest['htid']))
        logger.debug(err.message)
        raise ProcessingError('DataError', err.message)

    try:
        logger.debug('Writing hathi record {} to kinesis for ingest'.format(
            hathiRec.work.primary_identifier.identifier))
        KinesisOutput.putRecord(
            {
                'status': 200,
                'type': 'work',
                'method': 'insert',
                'data': hathiRec.work
            }, os.environ['OUTPUT_STREAM'])
    except KinesisError as err:
        logger.error('Unable to output record {} to Kinesis'.format(
            hathiRec.ingest['htid']))
        logger.debug(err.message)
        raise ProcessingError('KinesisError', err.message)

    # On success, return tuple containg status and identifier, verifies record
    # was passed to next step in the data pipeline
    return ('success', 'HathiTrust Item {}'.format(hathiRec.ingest['htid']))
Example #2
0
    def test_build_data_model(self):
        testRow = {
            'title': 'Work Test',
            'description': '1st of 4',
            'bib_key': '0000000',
            'htid': 'test.000000000',
            'gov_doc': 'f',
            'author': 'Author, Test',
            'copyright_date': '2019',
            'rights': 'test_rights'
        }
        workTest = HathiRecord(testRow)

        workTest.buildWork = MagicMock()
        workTest.buildInstance = MagicMock()
        workTest.buildItem = MagicMock()
        workTest.createRights = MagicMock()

        workTest.buildDataModel('countryCodes')
        self.assertIsInstance(workTest, HathiRecord)