Exemple #1
0
def process_marcpatterns(params, transforms, input_model, main_phase=False):
    if main_phase:
        # Need to sort our way through the input model so that the materializations occur
        # at the same place each time, otherwise canonicalization fails due to the
        # addition of the subfield context (at the end of materialize())

        # XXX Is the int() cast necessary? If not we could do key=operator.itemgetter(0)
        input_model_iter= sorted(list(params['input_model']), key=lambda x: int(x[0]))
    else:
        input_model_iter= params['input_model']
    params['to_postprocess'] = []
    for lid, marc_link in input_model_iter:
        origin, taglink, val, attribs = marc_link
        if taglink == MARCXML_NS + '/leader':
            params['leader'] = leader = val
            continue
        #Sort out attributes
        params['indicators'] = indicators = { k: v for k, v in attribs.items() if k.startswith('ind') }
        params['subfields'] = subfields = attribs.copy() # preserve class
        for k in list(subfields.keys()):
            if k[:3] in ('tag', 'ind'):
                del subfields[k]
        params['code'] = tag = attribs['tag']
        if taglink.startswith(MARCXML_NS + '/control'):
            #No indicators on control fields. Turn them off, in effect
            indicator_list = ('#', '#')
            key = 'tag-' + tag
            if tag == '006':
                params['fields006'].append(val)
            if tag == '007':
                params['fields007'].append(val)
            if tag == '008':
                params['field008'] = val
            if main_phase:
                params['transform_log'].append((tag, key))
                params['fields_used'].append((tag,))
        elif taglink.startswith(MARCXML_NS + '/data'):
            indicator_list = ((attribs.get('ind1') or ' ')[0].replace(' ', '#'), (attribs.get('ind2') or ' ')[0].replace(' ', '#'))
            key = 'tag-' + tag
            #logger.debug('indicators: ', repr(indicators))
            #indicator_list = (indicators['ind1'], indicators['ind2'])
            if main_phase: params['fields_used'].append(tuple([tag] + list(subfields.keys())))

        #This is where we check each incoming MARC link to see if it matches a transform into an output link (e.g. renaming 001 to 'controlCode')
        to_process = []
        #Start with most specific matches, then to most general

        # "?" syntax in lookups is a single char wildcard
        #First with subfields, with & without indicators:
        for k, v in subfields.items():
            #if indicator_list == ('#', '#'):
            lookups = [
                '{0}-{1}{2}${3}'.format(tag, indicator_list[0], indicator_list[1], k),
                '{0}-?{2}${3}'.format(tag, indicator_list[0], indicator_list[1], k),
                '{0}-{1}?${3}'.format(tag, indicator_list[0], indicator_list[1], k),
                '{0}${1}'.format(tag, k),
            ]
            for valitems in v:
                for lookup in lookups:
                    if lookup in transforms:
                        to_process.append((transforms[lookup], valitems))
                    else:
                        # don't report on subfields for which a code-transform exists,
                        # disregard wildcards
                        if main_phase and not tag in transforms and '?' not in lookup:

                            params['dropped_codes'].setdefault(lookup,0)
                            params['dropped_codes'][lookup] += 1

        #Now just the tag, with & without indicators
        lookups = [
            '{0}-{1}{2}'.format(tag, indicator_list[0], indicator_list[1]),
            '{0}-?{2}'.format(tag, indicator_list[0], indicator_list[1]),
            '{0}-{1}?'.format(tag, indicator_list[0], indicator_list[1]),
            tag,
        ]

        #Remember how many lookups were successful based on subfields
        subfields_results_len = len(to_process)
        for lookup in lookups:
            if lookup in transforms:
                to_process.append((transforms[lookup], val))

        if main_phase and subfields_results_len == len(to_process) and not subfields:
            # Count as dropped if subfields were not processed and theer were no matches on non-subfield lookups
            params['dropped_codes'].setdefault(tag,0)
            params['dropped_codes'][tag] += 1

        mat_ent = functools.partial(materialize_entity, ctx_params=params, loop=params['loop'])

        #Apply all the handlers that were found
        for funcinfo, val in to_process:
            #Support multiple actions per lookup
            funcs = funcinfo if isinstance(funcinfo, tuple) else (funcinfo,)

            for func in funcs:
                extras = {
                    WORKID: params['workid'],
                    IID: params['instanceids'][0],
                    'indicators': indicators,
                    'logger': params['logger'],
                    'postprocessing': [],
                    'inputns': MARC,
                }
                #Build Versa processing context
                #Should we include indicators?
                #Should we be passing in taglik rather than tag?
                ctx = bfcontext((origin, tag, val, subfields), input_model,
                                    params['output_model'], extras=extras,
                                    base=params['vocabbase'], idgen=mat_ent,
                                    existing_ids=params['existing_ids'])
                func(ctx)
                params['to_postprocess'].extend(ctx.extras['postprocessing'])

        if main_phase and not to_process:
            #Nothing else has handled this data field; go to the fallback
            fallback_rel_base = '../marcext/tag-' + tag
            if not subfields:
                #Fallback for control field: Captures MARC tag & value
                params['output_model'].add(I(params['workid']), I(iri.absolutize(fallback_rel_base, params['vocabbase'])), val)
            for k, v in subfields.items():
                #Fallback for data field: Captures MARC tag, indicators, subfields & value
                fallback_rel = '../marcext/{0}-{1}{2}-{3}'.format(
                    fallback_rel_base, indicator_list[0].replace('#', 'X'),
                    indicator_list[1].replace('#', 'X'), k)
                #params['transform_log'].append((code, fallback_rel))
                for valitem in v:
                    try:
                        params['output_model'].add(I(params['workid']), I(iri.absolutize(fallback_rel, params['vocabbase'])), valitem)
                    except ValueError as e:
                        control_code = list(marc_lookup(input_model, '001')) or ['NO 001 CONTROL CODE']
                        dumb_title = list(marc_lookup(input_model, '245$a')) or ['NO 245$a TITLE']
                        params['logger'].warning('{}\nSkipping statement for {}: "{}"'.format(e, control_code[0], dumb_title[0]))

    extra_stmts = set() # prevent duplicate statements
    extra_transforms = params['extra_transforms']
    for origin, k, v in itertools.chain(
                extra_transforms.process_leader(params),
                extra_transforms.process_006(params['fields006'], params),
                extra_transforms.process_007(params['fields007'], params),
                extra_transforms.process_008(params['field008'], params)):
        v = v if isinstance(v, tuple) else (v,)
        for item in v:
            o = origin or I(params['workid'])
            if o and (o, k, item) not in extra_stmts:
                params['output_model'].add(o, k, item)
                extra_stmts.add((o, k, item))
    return
Exemple #2
0
def record_handler( loop, model, entbase=None, vocabbase=BL, limiting=None,
                    plugins=None, ids=None, postprocess=None, out=None,
                    logger=logging, transforms=TRANSFORMS,
                    extra_transforms=default_extra_transforms(),
                    canonical=False, **kwargs):
    '''
    loop - asyncio event loop
    model - the Versa model for the record
    entbase - base IRI used for IDs of generated entity resources
    limiting - mutable pair of [count, limit] used to control the number of records processed
    '''
    _final_tasks = set() #Tasks for the event loop contributing to the MARC processing

    plugins = plugins or []
    if ids is None: ids = idgen(entbase)

    #FIXME: For now always generate instances from ISBNs, but consider working this through the plugins system
    instancegen = isbn_instancegen

    existing_ids = set()
    #Start the process of writing out the JSON representation of the resulting Versa
    if out and not canonical: out.write('[')
    first_record = True

    try:
        while True:
            input_model = yield
            leader = None
            #Add work item record, with actual hash resource IDs based on default or plugged-in algo
            #FIXME: No plug-in support yet
            params = {'input_model': input_model, 'output_model': model, 'logger': logger, 'entbase': entbase, 'vocabbase': vocabbase, 'ids': ids, 'existing_ids': existing_ids, 'plugins': plugins}
            workhash = record_hash_key(input_model)
            workid = materialize_entity('Work', ctx_params=params, loop=loop, hash=workhash)
            is_folded = workid in existing_ids
            existing_ids.add(workid)
            control_code = list(marc_lookup(input_model, '001')) or ['NO 001 CONTROL CODE']
            dumb_title = list(marc_lookup(input_model, '245$a')) or ['NO 245$a TITLE']
            logger.debug('Control code: {0}'.format(control_code[0]))
            logger.debug('Uniform title: {0}'.format(dumb_title[0]))
            logger.debug('Work hash result: {0} from \'{1}\''.format(workid, 'Work' + workhash))

            if entbase:
                workid = I(iri.absolutize(workid, entbase))
            else:
                workid = I(workid)

            folded = [workid] if is_folded else []

            model.add(workid, TYPE_REL, I(iri.absolutize('Work', vocabbase)))

            params['workid'] = workid
            params['folded'] = folded

            #Figure out instances
            params['materialize_entity'] = materialize_entity
            instanceids = instancegen(params, loop, model)
            if instanceids:
                instanceid = instanceids[0]

            params['leader'] = None
            params['workid'] = workid
            params['instanceids'] = instanceids
            params['folded'] = folded
            params['transforms'] = [] # set()
            params['fields_used'] = []
            params['dropped_codes'] = {}
            #Defensive coding against missing leader or 008
            field008 = leader = None
            params['fields006'] = fields006 = []
            params['fields007'] = fields007 = []
            #Prepare cross-references (i.e. 880s)
            #XXX: Figure out a way to declare in TRANSFRORMS? We might have to deal with non-standard relationship designators: https://github.com/lcnetdev/marc2bibframe/issues/83
            xrefs = {}
            remove_links = set()
            add_links = []
            for lid, marc_link in input_model:
                origin, taglink, val, attribs = marc_link
                if taglink == MARCXML_NS + '/leader' or taglink.startswith(MARCXML_NS + '/data/9'):
                    #900 fields are local and might not follow the general xref rules
                    params['leader'] = leader = val
                    continue
                tag = attribs['tag']
                for xref in attribs.get('6', []):
                    xref_parts = xref.split('-')
                    if len(xref_parts) < 2:
                        logger.debug('Invalid $6: {}'.format(xref_parts))
                        continue

                    xreftag, xrefid = xref_parts
                    #Locate the matching taglink
                    if tag == '880' and xrefid.startswith('00'):
                        #Special case, no actual xref, just the non-roman text
                        #Rule for 880s: merge in & add language indicator
                        langinfo = xrefid.split('/')[-1]
                        #Not using langinfo, really, at present because it seems near useless. Eventually we can handle by embedding a lang indicator token into attr values for later postprocessing
                        attribs['tag'] = xreftag
                        add_links.append((origin, MARCXML_NS + '/data/' + xreftag, val, attribs))

                    links = input_model.match(None, MARCXML_NS + '/data/' + xreftag)
                    for link in links:
                        #6 is the cross-reference subfield
                        for dest in link[ATTRIBUTES].get('6', []):
                            if [tag, xrefid] == dest.split('/')[0].split('-'):
                                if tag == '880':
                                    #880s will be handled by merger via xref, so take out for main loop
                                    #XXX: This does, however, make input_model no longer a true representation of the input XML. Problem?
                                    remove_links.add(lid)

                                if xreftag == '880':
                                    #Rule for 880s: merge in & add language indicator
                                    langinfo = dest.split('/')[-1]
                                    #Not using langinfo, really, at present because it seems near useless. Eventually we can handle by embedding a lang indicator token into attr values for later postprocessing
                                    remove_links.add(lid)
                                    copied_attribs = attribs.copy()
                                    for k, v in link[ATTRIBUTES].items():
                                        if k[:3] not in ('tag', 'ind'):
                                            copied_attribs.setdefault(k, []).extend(v)
                                    add_links.append((origin, taglink, val, copied_attribs))

            for lid in remove_links:
                input_model.remove(lid)

            for linfo in add_links:
                input_model.add(*linfo)

            # hook for plugins interested in the input model
            for plugin in plugins:
                if BF_INPUT_TASK in plugin:
                    yield from plugin[BF_INPUT_TASK](loop, input_model, params)

            # need to sort our way through the input model so that the materializations occur
            # at the same place each time, otherwise canonicalization fails due to the
            # addition of the subfield context (at the end of materialize())
            for lid, marc_link in sorted(list(input_model), key=lambda x: int(x[0])):
                origin, taglink, val, attribs = marc_link
                if taglink == MARCXML_NS + '/leader':
                    params['leader'] = leader = val
                    continue
                #Sort out attributes
                params['indicators'] = indicators = { k: v for k, v in attribs.items() if k.startswith('ind') }
                params['subfields'] = subfields = { k: v for k, v in attribs.items() if k[:3] not in ('tag', 'ind') }
                params['code'] = tag = attribs['tag']
                if taglink.startswith(MARCXML_NS + '/control'):
                    #No indicators on control fields. Turn them off, in effect
                    indicator_list = ('#', '#')
                    key = 'tag-' + tag
                    if tag == '006':
                        params['fields006'].append(val)
                    if tag == '007':
                        params['fields007'].append(val)
                    if tag == '008':
                        params['field008'] = field008 = val
                    params['transforms'].append((tag, key))
                    params['fields_used'].append((tag,))
                elif taglink.startswith(MARCXML_NS + '/data'):
                    indicator_list = ((attribs.get('ind1') or ' ')[0].replace(' ', '#'), (attribs.get('ind2') or ' ')[0].replace(' ', '#'))
                    key = 'tag-' + tag
                    #logger.debug('indicators: ', repr(indicators))
                    #indicator_list = (indicators['ind1'], indicators['ind2'])
                    params['fields_used'].append(tuple([tag] + list(subfields.keys())))

                #This is where we check each incoming MARC link to see if it matches a transform into an output link (e.g. renaming 001 to 'controlCode')
                to_process = []
                #Start with most specific matches, then to most general

                # "?" syntax in lookups is a single char wildcard
                #First with subfields, with & without indicators:
                for k, v in subfields.items():
                    #if indicator_list == ('#', '#'):
                    lookups = [
                        '{0}-{1}{2}${3}'.format(tag, indicator_list[0], indicator_list[1], k),
                        '{0}-?{2}${3}'.format(tag, indicator_list[0], indicator_list[1], k),
                        '{0}-{1}?${3}'.format(tag, indicator_list[0], indicator_list[1], k),
                        '{0}${1}'.format(tag, k),
                    ]
                    for valitems in v:
                        for lookup in lookups:
                            if lookup in transforms:
                                to_process.append((transforms[lookup], valitems))
                            else:
                                # don't report on subfields for which a code-transform exists,
                                # disregard wildcards
                                if not tag in transforms and '?' not in lookup:

                                    params['dropped_codes'].setdefault(lookup,0)
                                    params['dropped_codes'][lookup] += 1

                #Now just the tag, with & without indicators
                lookups = [
                    '{0}-{1}{2}'.format(tag, indicator_list[0], indicator_list[1]),
                    '{0}-?{2}'.format(tag, indicator_list[0], indicator_list[1]),
                    '{0}-{1}?'.format(tag, indicator_list[0], indicator_list[1]),
                    tag,
                ]

                #Remember how many lookups were successful based on subfields
                subfields_results_len = len(to_process)
                for lookup in lookups:
                    if lookup in transforms:
                        to_process.append((transforms[lookup], val))

                if subfields_results_len == len(to_process) and not subfields:
                    # Count as dropped if subfields were not processed and theer were no matches on non-subfield lookups
                    params['dropped_codes'].setdefault(tag,0)
                    params['dropped_codes'][tag] += 1

                mat_ent = functools.partial(materialize_entity, ctx_params=params, loop=loop)
                #Apply all the handlers that were found
                for funcinfo, val in to_process:
                    #Support multiple actions per lookup
                    funcs = funcinfo if isinstance(funcinfo, tuple) else (funcinfo,)

                    for func in funcs:
                        extras = { WORKID: workid, IID: instanceid }
                        #Build Versa processing context
                        #Should we include indicators?
                        #Should we be passing in taglik rather than tag?
                        ctx = bfcontext((origin, tag, val, subfields), input_model, model, extras=extras, base=vocabbase, idgen=mat_ent, existing_ids=existing_ids)
                        func(ctx)

                if not to_process:
                    #Nothing else has handled this data field; go to the fallback
                    fallback_rel_base = '../marcext/tag-' + tag
                    if not subfields:
                        #Fallback for control field: Captures MARC tag & value
                        model.add(I(workid), I(iri.absolutize(fallback_rel_base, vocabbase)), val)
                    for k, v in subfields.items():
                        #Fallback for data field: Captures MARC tag, indicators, subfields & value
                        fallback_rel = '../marcext/{0}-{1}{2}-{3}'.format(
                            fallback_rel_base, indicator_list[0].replace('#', 'X'),
                            indicator_list[1].replace('#', 'X'), k)
                        #params['transforms'].append((code, fallback_rel))
                        for valitem in v:
                            model.add(I(workid), I(iri.absolutize(fallback_rel, vocabbase)), valitem)

            extra_stmts = set() # prevent duplicate statements
            for origin, k, v in itertools.chain(
                        extra_transforms.process_leader(params),
                        extra_transforms.process_006(fields006, params),
                        extra_transforms.process_007(fields007, params),
                        extra_transforms.process_008(field008, params)):
                v = v if isinstance(v, tuple) else (v,)
                for item in v:
                    o = origin or I(workid)
                    if (o,k,item) not in extra_stmts:
                        model.add(o, k, item)
                        extra_stmts.add((o, k, item))

            instance_postprocess(params)

            logger.debug('+')

            for plugin in plugins:
                #Each plug-in is a task
                #task = asyncio.Task(plugin[BF_MARCREC_TASK](loop, relsink, params), loop=loop)
                if BF_MARCREC_TASK in plugin:
                    yield from plugin[BF_MARCREC_TASK](loop, model, params)
                logger.debug("Pending tasks: %s" % asyncio.Task.all_tasks(loop))
                #FIXME: This blocks and thus serializes the plugin operation, rather than the desired coop scheduling approach
                #For some reason seting to async task then immediately deferring to next task via yield from sleep leads to the "yield from wasn't used with future" error (Not much clue at: https://codereview.appspot.com/7396044/)
                #yield from asyncio.Task(asyncio.sleep(0.01), loop=loop)
                #yield from asyncio.async(asyncio.sleep(0.01))
                #yield from asyncio.sleep(0.01) #Basically yield to next task

            #Can we somehow move this to passed-in postprocessing?
            if out and not canonical and not first_record: out.write(',\n')
            if out:
                if not canonical:
                    first_record = False
                    last_chunk = None
                    #Using iterencode avoids building a big JSON string in memory, or having to resort to file pointer seeking
                    #Then again builds a big list in memory, so still working on opt here
                    for chunk in json.JSONEncoder().iterencode([ link for link in model ]):
                        if last_chunk is None:
                            last_chunk = chunk[1:]
                        else:
                            out.write(last_chunk)
                            last_chunk = chunk
                    if last_chunk: out.write(last_chunk[:-1])
            #FIXME: Postprocessing should probably be a task too
            if postprocess: postprocess()
            #limiting--running count of records processed versus the max number, if any
            limiting[0] += 1
            if limiting[1] is not None and limiting[0] >= limiting[1]:
                break
    except GeneratorExit:
        logger.debug('Completed processing {0} record{1}.'.format(limiting[0], '' if limiting[0] == 1 else 's'))
        if out and not canonical: out.write(']')

        #if not plugins: loop.stop()
        for plugin in plugins:
            #Each plug-in is a task
            func = plugin.get(BF_FINAL_TASK)
            if not func: continue
            task = asyncio.Task(func(loop), loop=loop)
            _final_tasks.add(task)
            def task_done(task):
                #print('Task done: ', task)
                _final_tasks.remove(task)
                #logger.debug((plugins))
                #if plugins and len(_final_tasks) == 0:
                    #print("_final_tasks is empty, stopping loop.")
                    #loop = asyncio.get_event_loop()
                #    loop.stop()
            #Once all the plug-in tasks are done, all the work is done
            task.add_done_callback(task_done)
        #print('DONE')
        #raise

    return
Exemple #3
0
def record_handler(loop, relsink, entbase=None, vocabbase=BFZ, limiting=None, plugins=None, ids=None, postprocess=None, out=None, logger=logging, **kwargs):
    '''
    loop - asyncio event loop
    entbase - base IRI used for IDs of generated entity resources
    limiting - mutable pair of [count, limit] used to control the number of records processed
    '''
    _final_tasks = set() #Tasks for the event loop contributing to the MARC processing
    
    plugins = plugins or []
    if ids is None: ids = idgen(entbase)

    #FIXME: For now always generate instances from ISBNs, but consider working this through th plugins system
    instancegen = isbn_instancegen

    existing_ids = set()
    initialize(hashidgen=ids, existing_ids=existing_ids)
    #Start the process of writing out the JSON representation of the resulting Versa
    out.write('[')
    first_record = True
    try:
        while True:
            rec = yield
            leader = None
            #Add work item record, with actual hash resource IDs based on default or plugged-in algo
            #FIXME: No plug-in support yet
            workhash = record_hash_key(rec)
            workid = ids.send('Work:' + workhash)
            existing_ids.add(workid)
            logger.debug('Uniform title from 245$a: {0}'.format(marc_lookup(rec, ['245$a'])))
            logger.debug('Work hash result: {0} from \'{1}\''.format(workid, 'Work' + workhash))

            if entbase: workid = I(iri.absolutize(workid, entbase))
            relsink.add(I(workid), TYPE_REL, I(iri.absolutize('Work', vocabbase)))

            params = {'workid': workid, 'rec': rec, 'logger': logger, 'model': relsink, 'entbase': entbase, 'vocabbase': vocabbase, 'ids': ids, 'existing_ids': existing_ids}

            #Figure out instances
            instanceids = instancegen(params)
            if instanceids:
                instanceid = instanceids[0]

            params['instanceids'] = instanceids
            params['transforms'] = [] # set()
            params['fields_used'] = []
            for row in rec:
                code = None

                if row[0] == LEADER:
                    params['leader'] = leader = row[1]
                elif row[0] == CONTROLFIELD:
                    code, val = row[1], row[2]
                    key = 'tag-' + code
                    if code == '008':
                        params['field008'] = field008 = val
                    params['transforms'].append((code, key))
                    relsink.add(I(instanceid), I(iri.absolutize(key, vocabbase)), val)
                    params['fields_used'].append((code,))
                elif row[0] == DATAFIELD:
                    code, xmlattrs, subfields = row[1], row[2], row[3]
                    #xmlattribs include are indicators
                    indicators = ((xmlattrs.get('ind1') or ' ')[0].replace(' ', '#'), (xmlattrs.get('ind2') or ' ')[0].replace(' ', '#'))
                    key = 'tag-' + code

                    handled = False
                    params['subfields'] = subfields
                    params['indicators'] = indicators
                    params['fields_used'].append(tuple([code] + list(subfields.keys())))

                    to_process = []
                    #logger.debug(repr(indicators))
                    if indicators != ('#', '#'):
                        #One or other indicator is set, so let's check the transforms against those
                        lookup = '{0}-{1}{2}'.format(*((code,) + indicators))
                    for k, v in subfields.items():
                        lookup = '{0}${1}'.format(code, k)
                        if lookup in TRANSFORMS: to_process.append((TRANSFORMS[lookup], v))

                    if code in TRANSFORMS: to_process.append((TRANSFORMS[code], ''))
                    #if code == '100':
                    #    logger.debug(to_process)

                    #Apply all the handlers that were found
                    for func, val in to_process:
                        #Build Versa processing context
                        ctx = bfcontext(workid, code, [(workid, code, val, subfields)], relsink, base=vocabbase, hashidgen=ids, existing_ids=existing_ids)
                        new_stmts = func(ctx, workid, instanceid)
                        #FIXME: Use add
                        for s in new_stmts: relsink.add(*s)
                        #logger.debug('.')

                    if not to_process:
                        #Nothing else has handled this data field; go to the fallback
                        fallback_rel_base = 'tag-' + code
                        for k, v in subfields.items():
                            fallback_rel = fallback_rel_base + k
                            #params['transforms'].append((code, fallback_rel))
                            relsink.add(I(workid), I(iri.absolutize(fallback_rel, vocabbase)), v)

                params['code'] = code

            special_properties = {}
            for k, v in process_leader(leader):
                special_properties.setdefault(k, set()).add(v)

            for k, v in process_008(field008):
                special_properties.setdefault(k, set()).add(v)
            params['special_properties'] = special_properties

            #We get some repeated values out of leader & 008 processing, and we want to
            #Remove dupes so we did so by working with sets then converting to lists
            for k, v in special_properties.items():
                special_properties[k] = list(v)
                for item in v:
                #logger.debug(v)
                    relsink.add(I(instanceid), I(iri.absolutize(k, vocabbase)), item)

            instance_postprocess(params)

            logger.debug('+')

            for plugin in plugins:
                #Each plug-in is a task
                #task = asyncio.Task(plugin[BF_MARCREC_TASK](loop, relsink, params), loop=loop)
                yield from plugin[BF_MARCREC_TASK](loop, relsink, params)
                logger.debug("Pending tasks: %s" % asyncio.Task.all_tasks(loop))
                #FIXME: This blocks and thus serializes the plugin operation, rather than the desired coop scheduling approach
                #For some reason seting to async task then immediately deferring to next task via yield from sleep leads to the "yield from wasn't used with future" error (Not much clue at: https://codereview.appspot.com/7396044/)
                #yield from asyncio.Task(asyncio.sleep(0.01), loop=loop)
                #yield from asyncio.async(asyncio.sleep(0.01))
                #yield from asyncio.sleep(0.01) #Basically yield to next task

            if not first_record: out.write(',\n')
            first_record = False
            last_chunk = None
            #Using iterencode avoids building a big JSON string in memory, or having to resort to file pointer seeking
            #Then again builds a big list in memory, so still working on opt here
            for chunk in json.JSONEncoder().iterencode([ link for link in relsink ]):
                if last_chunk is None:
                    last_chunk = chunk[1:]
                else:
                    out.write(last_chunk)
                    last_chunk = chunk
            if last_chunk: out.write(last_chunk[:-1])
            #FIXME: Postprocessing should probably be a task too
            if postprocess: postprocess(rec)
            #limiting--running count of records processed versus the max number, if any
            limiting[0] += 1
            if limiting[1] is not None and limiting[0] >= limiting[1]:
                break
    except GeneratorExit:
        logger.debug('Completed processing {0} record{1}.'.format(limiting[0], '' if limiting[0] == 1 else 's'))
        out.write(']')

        #if not plugins: loop.stop()
        for plugin in plugins:
            #Each plug-in is a task
            task = asyncio.Task(plugin[BF_FINAL_TASK](loop), loop=loop)
            _final_tasks.add(task)
            def task_done(task):
                #print('Task done: ', task)
                _final_tasks.remove(task)
                #logger.debug((plugins))
                #if plugins and len(_final_tasks) == 0:
                    #print("_final_tasks is empty, stopping loop.")
                    #loop = asyncio.get_event_loop()
                #    loop.stop()
            #Once all the plug-in tasks are done, all the work is done
            task.add_done_callback(task_done)
        #print('DONE')
        #raise

    return