Ejemplo n.º 1
0
def create_authorlist_ticket(matching_fields, identifier, queue):
    """
    This function will submit a ticket generated by UNDEFINED affiliations
    in extracted authors from collaboration authorlists.

    :param matching_fields: list of (tag, field_instances) for UNDEFINED nodes
    :type matching_fields: list

    :param identifier: OAI identifier of record
    :type identifier: string

    :param queue: the RT queue to send a ticket to
    :type queue: string

    :return: return the ID of the created ticket, or None on failure
    :rtype: int or None
    """
    subject = "[OAI Harvest] UNDEFINED affiliations for record %s" % (identifier,)
    text = """
Harvested record with identifier %(ident)s has had its authorlist extracted and contains some UNDEFINED affiliations.

To see the record, go here: %(baseurl)s/search?p=%(ident)s

If the record is not there yet, try again later. It may take some time for it to load into the system.

List of unidentified fields:
%(fields)s
    """ % {
        'ident': identifier,
        'baseurl': CFG_SITE_URL,
        'fields': "\n".join([field_xml_output(field, tag) for tag, field_instances in matching_fields
                             for field in field_instances])
    }
    return create_ticket(queue, subject, text)
Ejemplo n.º 2
0
def create_authorlist_ticket(matching_fields, identifier, queue):
    """
    This function will submit a ticket generated by UNDEFINED affiliations
    in extracted authors from collaboration authorlists.

    :param matching_fields: list of (tag, field_instances) for UNDEFINED nodes
    :type matching_fields: list

    :param identifier: OAI identifier of record
    :type identifier: string

    :param queue: the RT queue to send a ticket to
    :type queue: string

    :return: return the ID of the created ticket, or None on failure
    :rtype: int or None
    """
    subject = "[OAI Harvest] UNDEFINED affiliations for record %s" % (
        identifier, )
    text = """
Harvested record with identifier %(ident)s has had its authorlist extracted and contains some UNDEFINED affiliations.

To see the record, go here: %(baseurl)s/search?p=%(ident)s

If the record is not there yet, try again later. It may take some time for it to load into the system.

List of unidentified fields:
%(fields)s
    """ % {
        'ident':
        identifier,
        'baseurl':
        CFG_SITE_URL,
        'fields':
        "\n".join([
            field_xml_output(field, tag)
            for tag, field_instances in matching_fields
            for field in field_instances
        ])
    }
    return create_ticket(queue, subject, text)
Ejemplo n.º 3
0
def create_MARC(extracted_image_data, tarball, refno):
    """
    Take the images and their captions and the name of the associated TeX
    file and build a MARCXML record for them.

    @param: extracted_image_data ([(string, string, list, list), ...]):
        a list of tuples of images matched to labels, captions and contexts from
        this document.
    @param: refno (string): the name for the record number field, or None

    @output: a MARCXML record detailing all the arguments as appropriate
        at tarball.insert.xml and a duplicate one at tarball.correct.xml

    @return: the path to the MARCXML record, None if no plots
    """
    root_dir = os.path.dirname(tarball) + os.sep + os.path.basename(tarball) + \
                 '_plots' + os.sep

    marcxml_fft = []
    index = 0
    for (image_location, caption, dummy, contexts) in extracted_image_data:
        if len(image_location) < 3:
            # If not useful URL -> move on to next
            continue

        # Merge subfolder into docname, until root directory
        relative_image_path = image_location.replace(root_dir, '')
        docname = "_".join(relative_image_path.split('.')[:-1]).replace('/', '_').replace(';', '').replace(':', '')

        if type(caption) == list:
            caption = " ".join(caption)

        if len(caption) < 3:
            subfields = []
            subfields.append(('a', image_location))
            subfields.append(('t', "PlotMisc"))
            subfields.append(('d', "%05d %s" % (index, caption.replace(' : ', ''))))
            subfields.append(('n', docname))
            subfields.append(('o', "HIDDEN"))
            marcxml_fft.append(field_xml_output((subfields, ' ', ' ', None), "FFT"))
        else:
            # Add PLOT MARCXML
            subfields = []
            subfields.append(('a', image_location))
            subfields.append(('t', "Plot"))
            subfields.append(('d', "%05d %s" % (index, caption.replace(' : ', ''))))
            subfields.append(('n', docname))
            marcxml_fft.append(field_xml_output((subfields, ' ', ' ', None), "FFT"))
            if contexts:
                # Add CONTEXT MARCXML
                subfields = []
                subfields.append(('a', "%s.context" % (image_location,)))
                subfields.append(('t', "Plot"))
                subfields.append(('f', ".png;context"))
                subfields.append(('n', docname))
                subfields.append(('o', "HIDDEN"))
                marcxml_fft.append(field_xml_output((subfields, ' ', ' ', None), "FFT"))
        index += 1

    if marcxml_fft:
        # For building result MARCXML
        marcxml_header = ['<record>']

        # Datafield := (subfields, ind1, ind2, controlfield)
        # Subfield := (code, value)

        #FIXME: Determine what to do without refno
        if refno and refno.isdigit():
            field = (None, ' ', ' ', refno)
            marcxml_header.append(field_xml_output(field, '001'))
        marcxml = marcxml_header + marcxml_fft
        marcxml.append('</record>')
        return '\n'.join(marcxml)
    return ""