Beispiel #1
0
def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        else:
            print 'unrecognized XML format'
            return None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
    else:
        #Marc Binary
        if len(data) != int(data[:5]):
            return json.dumps({'success':False, 'error':'Bad MARC length'})
    
        rec = MarcBinary(data)
        edition = read_edition(rec)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)

    parse_meta_headers(edition_builder)
    
    return edition_builder.get_dict()
Beispiel #2
0
def parse_data(data):
    """
    Takes POSTed data and determines the format, and returns an Edition record
    suitable for adding to OL.

    :param str data: Raw data
    :rtype: (dict|None, str|None)
    :return: (Edition record, format (rdf|opds|marcxml|json|marc)) or (None, None)
    """
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(
                init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=obj)
        format = 'json'
    else:
        #Marc Binary
        if len(data) < MARC_LENGTH_POS or len(data) != int(
                data[:MARC_LENGTH_POS]):
            raise DataError('no-marc-record')
        rec = MarcBinary(data)

        edition = read_edition(rec)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format
Beispiel #3
0
def metaxml_to_edition_dict(root):

    ia_to_ol_map = {
        'identifier': 'ocaid',
        'creator': 'author',
        'date': 'publish_date',
        'boxid': 'ia_box_id',
    }

    edition_builder = import_edition_builder()

    for element in root.iter():
        #print("got %s -> %s" % (element.tag, element.text))

        if 'collection' == element.tag:
            key = 'subject'
            values = parse_collection(element.text)
        elif 'isbn' == element.tag:
            key, values = parse_isbn(element.text)
        elif element.tag in ia_to_ol_map:
            key = ia_to_ol_map[element.tag]
            values = [element.text]
        else:
            key = element.tag
            values = [element.text]

        for value in values:
            if key.startswith('ia_'):
                edition_builder.add(key, value, restrict_keys=False)
            else:
                edition_builder.add(key, value)

    return edition_builder.get_dict()
Beispiel #4
0
def metaxml_to_edition_dict(root):

    ia_to_ol_map = {
        'identifier' : 'ocaid',
        'creator'    : 'author',
        'date'       : 'publish_date',
        'boxid'      : 'ia_box_id',
    }

    edition_builder = import_edition_builder()

    for element in root.iter():
        #print("got %s -> %s" % (element.tag, element.text))

        if 'collection' == element.tag:
            key = 'subject'
            values = parse_collection(element.text)
        elif 'isbn' == element.tag:
            key, values = parse_isbn(element.text)
        elif element.tag in ia_to_ol_map:
            key = ia_to_ol_map[element.tag]
            values = [element.text]
        else:
            key = element.tag
            values = [element.text]

        for value in values:
            if key.startswith('ia_'):
                edition_builder.add(key, value, restrict_keys=False)
            else:
                edition_builder.add(key, value)

    return edition_builder.get_dict()
Beispiel #5
0
def parse_data(data):
    """
    Takes POSTed data and determines the format, and returns an Edition record
    suitable for adding to OL.

    :param str data: Raw data
    :rtype: (dict|None, str|None)
    :return: (Edition record, format (rdf|opds|marcxml|json|marc)) or (None, None)
    """
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
            format = 'marcxml'
        else:
            print('unrecognized XML format')
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
        format = 'json'
    else:
        #Marc Binary
        if len(data) < MARC_LENGTH_POS or len(data) != int(data[:MARC_LENGTH_POS]):
            raise DataError('no-marc-record')
        rec = MarcBinary(data)

        edition = read_edition(rec)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format
Beispiel #6
0
def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(
                init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=obj)
        format = 'json'
    else:
        #Marc Binary
        if len(data) != int(data[:5]):
            return json.dumps({'success': False, 'error': 'Bad MARC length'})

        rec = MarcBinary(data)
        edition = read_edition(rec)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format
Beispiel #7
0
def parse(root):
    edition_builder = import_edition_builder.import_edition_builder()

    for e in root:
        if isinstance(e.tag, basestring):
            #print e.tag
            if e.tag in parser_map:
                key = parser_map[e.tag][0]
                (new_key, val) = parser_map[e.tag][1](e, key)
                if new_key:
                    edition_builder.add(new_key, val)

    return edition_builder
Beispiel #8
0
def parse(root):
    edition_builder = import_edition_builder.import_edition_builder()

    for e in root:
        if isinstance(e.tag, six.string_types):
            #print e.tag
            if e.tag in parser_map:
                key = parser_map[e.tag][0]
                (new_key, val) = parser_map[e.tag][1](e, key)
                if new_key:
                    edition_builder.add(new_key, val)

    return edition_builder
Beispiel #9
0
def parse(root):
    edition_builder = import_edition_builder.import_edition_builder()
    
    for e in root.iter():
        if isinstance(e.tag, basestring): 
            #print e.tag
            if e.tag in parser_map:
                key = parser_map[e.tag][0]
                (new_key, val) = parser_map[e.tag][1](e, key)
                if new_key:
                    if isinstance(val, list):
                        for v in val:
                            edition_builder.add(new_key, v)
                    else:
                        edition_builder.add(new_key, val)
    return edition_builder
Beispiel #10
0
def parse(root):
    edition_builder = import_edition_builder.import_edition_builder()

    for e in root.iter():
        if isinstance(e.tag, six.string_types):
            #print e.tag
            if e.tag in parser_map:
                key = parser_map[e.tag][0]
                (new_key, val) = parser_map[e.tag][1](e, key)
                if new_key:
                    if isinstance(val, list):
                        for v in val:
                            edition_builder.add(new_key, v)
                    else:
                        edition_builder.add(new_key, val)
    return edition_builder
Beispiel #11
0
def parse_data(data):
    """
    Takes POSTed data and determines the format, and returns an Edition record
    suitable for adding to OL.

    :param str data: Raw data
    :rtype: (dict|None, str|None)
    :return: (Edition record, format (rdf|opds|marcxml|json|marc)) or (None, None)
    """
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records, DEPRECATED: use import/ia endpoint
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            # see ia_importapi to address `imagecount` limitations
            status = ia.get_item_status(itemid, metadata)
            if status != 'ok':
                raise DataError(status)

            try:
                rec = get_marc_record_from_ia(itemid)

                # skip serials
                if rec and rec.leader()[7] == 's':
                    raise DataError("item-is-serial")
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({'success':False, 'error':'Bad MARC length'})

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format
Beispiel #12
0
def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            if not ia.edition_from_item_metadata(itemid, metadata):
                raise DataError("item-not-a-book")

            try:
                rec = get_marc_record_from_ia(itemid)

                # skip serials
                if rec.leader()[7] == 's':
                    raise DataError("item-is-serial")
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({'success':False, 'error':'Bad MARC length'})

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)
    
    return edition_builder.get_dict(), format
Beispiel #13
0
def parse_data(data):
    data = data.strip()
    if -1 != data[:10].find('<?xml'):
        root = etree.fromstring(data)
        #print root.tag
        if '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}RDF' == root.tag:
            edition_builder = import_rdf.parse(root)
            format = 'rdf'
        elif '{http://www.w3.org/2005/Atom}entry' == root.tag:
            edition_builder = import_opds.parse(root)
            format = 'opds'
        elif '{http://www.loc.gov/MARC21/slim}record' == root.tag:
            if root.tag == '{http://www.loc.gov/MARC21/slim}collection':
                root = root[0]
            rec = MarcXml(root)
            edition = read_edition(rec)
            edition_builder = import_edition_builder.import_edition_builder(
                init_dict=edition)
            format = 'marcxml'
        else:
            print 'unrecognized XML format'
            return None, None
    elif data.startswith('{') and data.endswith('}'):
        obj = json.loads(data)
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=obj)
        format = 'json'
    else:
        # Special case to load IA records
        # Just passing ia:foo00bar is enough to load foo00bar from IA.
        if data.startswith("ia:"):
            source_records = [data]
            itemid = data[len("ia:"):]

            metadata = ia.get_metadata(itemid)
            if not metadata:
                raise DataError("invalid-ia-identifier")

            if not ia.edition_from_item_metadata(itemid, metadata):
                raise DataError("item-not-a-book")

            try:
                rec = get_marc_record_from_ia(itemid)
            except IOError:
                raise DataError("no-marc-record")

            if not rec:
                raise DataError("no-marc-record")
        else:
            source_records = None
            itemid = None

            #Marc Binary
            if len(data) != int(data[:5]):
                return json.dumps({
                    'success': False,
                    'error': 'Bad MARC length'
                })

            rec = MarcBinary(data)

        edition = read_edition(rec)
        if source_records:
            edition['source_records'] = source_records
            edition['ocaid'] = itemid
        edition_builder = import_edition_builder.import_edition_builder(
            init_dict=edition)
        format = 'marc'

    parse_meta_headers(edition_builder)

    return edition_builder.get_dict(), format