def decode_record(self, record): r""" >>> reader = Reader('http://opac.uthsc.edu', 2) >>> raw = "\nLEADER 00000cas 2200517 a 4500 \n001 1481253 \n003 OCoLC \n005 19951109120000.0 \n008 750727c19589999fr qrzp b 0 b0fre d \n010 sn 86012727 \n022 0003-3995 \n030 AGTQAH \n035 0062827|bMULS|aPITT NO. 0639600000|asa64872000|bFULS \n040 MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n |dTUM \n041 0 engfre|bgeritaspa \n042 nsdp \n049 TUMS \n069 1 A32025000 \n210 0 Ann. genet. \n222 0 Annales de genetique \n229 00 Annales de genetique \n229 Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260 Paris :|bExpansion scientifique,|c1958-2004. \n300 v. :|bill. ;|c28 cm. \n310 Quarterly \n321 Two no. a year \n362 0 1,1958-47,2004. \n510 1 Excerpta medica \n510 1 Index medicus|x0019-3879 \n510 2 Biological abstracts|x0006-3169 \n510 2 Chemical abstracts|x0009-2258 \n510 2 Life sciences collection \n510 0 Bulletin signaletique \n510 0 Current contents \n546 French and English, with summaries in German, Italian, and\n Spanish. \n550 Journal of the Societe francaise de genetique. \n650 2 Genetics|vPeriodicals. \n710 2 Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics. \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936 Unknown|ajuin 1977 \n" >>> record = reader.decode_record(raw) >>> print record.title Annales de genetique """ pseudo_marc = record.strip().split('\n') raw_fields = [] if pseudo_marc[0][0:6] == 'LEADER': record = Record() record.leader = pseudo_marc[0][7:].strip() else: return None for field in pseudo_marc[1:]: tag = field[:3] data = unescape_entities(field[6:].decode('latin1')).encode('utf8') if tag.startswith(' '): # Additional field data needs to be prepended with an extra space # for certain fields ... #for special_tag in ('55','260'): # data = " %s" % (data,) if tag.startswith(special_tag) else data data = " %s" % (data.strip(),) raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data) raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip()) else: data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,) raw_fields.append({ 'tag': tag, 'indicator1': field[3], 'indicator2': field[4], 'value': data.strip(), 'raw': field.strip() }) for raw in raw_fields: tag = raw['tag'] data = raw['value'].strip() field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data) if not field.is_control_field(): for sub in data.split('|'): try: field.add_subfield(sub[0].strip(), sub[1:].strip()) except Exception: # Skip blank/empty subfields continue record.add_field(field) record.parse_leader() # Disregard record if no title present if not record.get_fields('245'): return None else: return record
class JsonHandler: """Handle JSON.""" def __init__(self): """Init.""" self.records = [] self._record = None self._field = None self._text = [] def element(self, element_dict, name=None): """Converts a JSON `element_dict` to pymarc fields.""" if not name: self._record = Record() self.element(element_dict, "leader") elif name == "leader": self._record.leader = element_dict[name] self.element(element_dict, "fields") elif name == "fields": fields = iter(element_dict[name]) for field in fields: tag, remaining = field.popitem() self._field = Field(tag) if self._field.is_control_field(): self._field.data = remaining else: self.element(remaining, "subfields") self._field.indicators.extend( [remaining["ind1"], remaining["ind2"]]) self._record.add_field(self._field) self.process_record(self._record) elif name == "subfields": subfields = iter(element_dict[name]) for subfield in subfields: code, text = subfield.popitem() self._field.add_subfield(code, text) def elements(self, dict_list): """Sends `dict_list` to `element`.""" if type(dict_list) is not list: dict_list = [dict_list] for rec in dict_list: self.element(rec) return self.records def process_record(self, record): """Append `record` to `self.records`.""" self.records.append(record)
class JsonHandler: def __init__(self): self.records = [] self._record = None self._field = None self._text = [] def element(self, element_dict, name=None): if not name: self._record = Record() self.element(element_dict, 'leader') elif name == 'leader': self._record.leader = element_dict[name] self.element(element_dict, 'fields') elif name == 'fields': fields = iter(element_dict[name]) for field in fields: tag, remaining = field.popitem() self._field = Field(tag) if self._field.is_control_field(): self._field.data = remaining else: self.element(remaining, 'subfields') self._field.indicators.extend( [remaining['ind1'], remaining['ind2']]) self._record.add_field(self._field) self.process_record(self._record) elif name == 'subfields': subfields = iter(element_dict[name]) for subfield in subfields: code, text = subfield.popitem() self._field.add_subfield(code, text) def elements(self, dict_list): if type(dict_list) is not list: dict_list = [dict_list] for rec in dict_list: self.element(rec) return self.records def process_record(self, record): self.records.append(record)