def decode_record(self, record): r""" >>> reader = Reader('http://opac.uthsc.edu', 2) >>> raw = "\nLEADER 00000cas 2200517 a 4500 \n001 1481253 \n003 OCoLC \n005 19951109120000.0 \n008 750727c19589999fr qrzp b 0 b0fre d \n010 sn 86012727 \n022 0003-3995 \n030 AGTQAH \n035 0062827|bMULS|aPITT NO. 0639600000|asa64872000|bFULS \n040 MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n |dTUM \n041 0 engfre|bgeritaspa \n042 nsdp \n049 TUMS \n069 1 A32025000 \n210 0 Ann. genet. \n222 0 Annales de genetique \n229 00 Annales de genetique \n229 Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260 Paris :|bExpansion scientifique,|c1958-2004. \n300 v. :|bill. ;|c28 cm. \n310 Quarterly \n321 Two no. a year \n362 0 1,1958-47,2004. \n510 1 Excerpta medica \n510 1 Index medicus|x0019-3879 \n510 2 Biological abstracts|x0006-3169 \n510 2 Chemical abstracts|x0009-2258 \n510 2 Life sciences collection \n510 0 Bulletin signaletique \n510 0 Current contents \n546 French and English, with summaries in German, Italian, and\n Spanish. \n550 Journal of the Societe francaise de genetique. \n650 2 Genetics|vPeriodicals. \n710 2 Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics. \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936 Unknown|ajuin 1977 \n" >>> record = reader.decode_record(raw) >>> print record.title Annales de genetique """ pseudo_marc = record.strip().split('\n') raw_fields = [] if pseudo_marc[0][0:6] == 'LEADER': record = Record() record.leader = pseudo_marc[0][7:].strip() else: return None for field in pseudo_marc[1:]: tag = field[:3] data = unescape_entities(field[6:].decode('latin1')).encode('utf8') if tag.startswith(' '): # Additional field data needs to be prepended with an extra space # for certain fields ... #for special_tag in ('55','260'): # data = " %s" % (data,) if tag.startswith(special_tag) else data data = " %s" % (data.strip(),) raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data) raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip()) else: data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,) raw_fields.append({ 'tag': tag, 'indicator1': field[3], 'indicator2': field[4], 'value': data.strip(), 'raw': field.strip() }) for raw in raw_fields: tag = raw['tag'] data = raw['value'].strip() field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data) if not field.is_control_field(): for sub in data.split('|'): try: field.add_subfield(sub[0].strip(), sub[1:].strip()) except Exception: # Skip blank/empty subfields continue record.add_field(field) record.parse_leader() # Disregard record if no title present if not record.get_fields('245'): return None else: return record