Ejemplo n.º 1
0
    def decode_record(self, record):
        r"""
        >>> reader = Reader('http://opac.uthsc.edu', 2)
        >>> raw = "\nLEADER 00000cas  2200517 a 4500 \n001    1481253 \n003    OCoLC \n005    19951109120000.0 \n008    750727c19589999fr qrzp   b   0   b0fre d \n010    sn 86012727 \n022    0003-3995 \n030    AGTQAH \n035    0062827|bMULS|aPITT  NO.  0639600000|asa64872000|bFULS \n040    MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n       |dTUM \n041 0  engfre|bgeritaspa \n042    nsdp \n049    TUMS \n069 1  A32025000 \n210 0  Ann. genet. \n222  0 Annales de genetique \n229 00 Annales de genetique \n229    Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260    Paris :|bExpansion scientifique,|c1958-2004. \n300    v. :|bill. &#59;|c28 cm. \n310    Quarterly \n321    Two no. a year \n362 0  1,1958-47,2004. \n510 1  Excerpta medica \n510 1  Index medicus|x0019-3879 \n510 2  Biological abstracts|x0006-3169 \n510 2  Chemical abstracts|x0009-2258 \n510 2  Life sciences collection \n510 0  Bulletin signaletique \n510 0  Current contents \n546    French and English, with summaries in German, Italian, and\n       Spanish. \n550    Journal of the Societe francaise de genetique. \n650  2 Genetics|vPeriodicals. \n710 2  Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics.  \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n       at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936    Unknown|ajuin 1977 \n"
        >>> record = reader.decode_record(raw)
        >>> print record.title
        Annales de genetique
        """
        
        pseudo_marc = record.strip().split('\n')
        raw_fields = []
        if pseudo_marc[0][0:6] == 'LEADER':
            record = Record()
            record.leader = pseudo_marc[0][7:].strip()
        else:
            return None

        for field in pseudo_marc[1:]:
            tag = field[:3]
            data = unescape_entities(field[6:].decode('latin1')).encode('utf8')

            if tag.startswith(' '):
                # Additional field data needs to be prepended with an extra space 
                # for certain fields ...
                #for special_tag in ('55','260'):
                #    data = " %s" % (data,) if tag.startswith(special_tag) else data
                data = " %s" % (data.strip(),)
                raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data)
                raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip())
            else:
                data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,)
                raw_fields.append({
                    'tag': tag, 
                    'indicator1': field[3], 
                    'indicator2': field[4], 
                    'value': data.strip(), 
                    'raw': field.strip()
                })
        
        for raw in raw_fields:
            tag = raw['tag']
            data = raw['value'].strip()
            field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data)
            if not field.is_control_field():
                for sub in data.split('|'):
                    try:
                        field.add_subfield(sub[0].strip(), sub[1:].strip())
                    except Exception:
                        # Skip blank/empty subfields
                        continue
            record.add_field(field)
            
        record.parse_leader()
        
        # Disregard record if no title present
        if not record.get_fields('245'):
            return None
        else:
            return record
Ejemplo n.º 2
0
class JsonHandler:
    """Handle JSON."""
    def __init__(self):
        """Init."""
        self.records = []
        self._record = None
        self._field = None
        self._text = []

    def element(self, element_dict, name=None):
        """Converts a JSON `element_dict` to pymarc fields."""
        if not name:
            self._record = Record()
            self.element(element_dict, "leader")
        elif name == "leader":
            self._record.leader = element_dict[name]
            self.element(element_dict, "fields")
        elif name == "fields":
            fields = iter(element_dict[name])
            for field in fields:
                tag, remaining = field.popitem()
                self._field = Field(tag)
                if self._field.is_control_field():
                    self._field.data = remaining
                else:
                    self.element(remaining, "subfields")
                    self._field.indicators.extend(
                        [remaining["ind1"], remaining["ind2"]])
                self._record.add_field(self._field)
            self.process_record(self._record)
        elif name == "subfields":
            subfields = iter(element_dict[name])
            for subfield in subfields:
                code, text = subfield.popitem()
                self._field.add_subfield(code, text)

    def elements(self, dict_list):
        """Sends `dict_list` to `element`."""
        if type(dict_list) is not list:
            dict_list = [dict_list]
        for rec in dict_list:
            self.element(rec)
        return self.records

    def process_record(self, record):
        """Append `record` to `self.records`."""
        self.records.append(record)
Ejemplo n.º 3
0
class JsonHandler:
    def __init__(self):
        self.records = []
        self._record = None
        self._field = None
        self._text = []

    def element(self, element_dict, name=None):
        if not name:
            self._record = Record()
            self.element(element_dict, 'leader')
        elif name == 'leader':
            self._record.leader = element_dict[name]
            self.element(element_dict, 'fields')
        elif name == 'fields':
            fields = iter(element_dict[name])
            for field in fields:
                tag, remaining = field.popitem()
                self._field = Field(tag)
                if self._field.is_control_field():
                    self._field.data = remaining
                else:
                    self.element(remaining, 'subfields')
                    self._field.indicators.extend(
                        [remaining['ind1'], remaining['ind2']])
                self._record.add_field(self._field)
            self.process_record(self._record)
        elif name == 'subfields':
            subfields = iter(element_dict[name])
            for subfield in subfields:
                code, text = subfield.popitem()
                self._field.add_subfield(code, text)

    def elements(self, dict_list):
        if type(dict_list) is not list:
            dict_list = [dict_list]
        for rec in dict_list:
            self.element(rec)
        return self.records

    def process_record(self, record):
        self.records.append(record)