Python Field.add_subfieldの例、pymarc.Field.add_subfield Pythonの例

コード例 #1

0

ファイルを表示

ファイル: iiitools.py プロジェクト: anarchivist/iiitools

    def decode_record(self, record):
        r"""
        >>> reader = Reader('http://opac.uthsc.edu', 2)
        >>> raw = "\nLEADER 00000cas  2200517 a 4500 \n001    1481253 \n003    OCoLC \n005    19951109120000.0 \n008    750727c19589999fr qrzp   b   0   b0fre d \n010    sn 86012727 \n022    0003-3995 \n030    AGTQAH \n035    0062827|bMULS|aPITT  NO.  0639600000|asa64872000|bFULS \n040    MUL|cMUL|dFUL|dOCL|dCOO|dNYG|dHUL|dSER|dAIP|dNST|dAGL|dDLC\n       |dTUM \n041 0  engfre|bgeritaspa \n042    nsdp \n049    TUMS \n069 1  A32025000 \n210 0  Ann. genet. \n222  0 Annales de genetique \n229 00 Annales de genetique \n229    Ann Genet \n242 00 Annals on genetics \n245 00 Annales de genetique. \n260    Paris :|bExpansion scientifique,|c1958-2004. \n300    v. :|bill. &#59;|c28 cm. \n310    Quarterly \n321    Two no. a year \n362 0  1,1958-47,2004. \n510 1  Excerpta medica \n510 1  Index medicus|x0019-3879 \n510 2  Biological abstracts|x0006-3169 \n510 2  Chemical abstracts|x0009-2258 \n510 2  Life sciences collection \n510 0  Bulletin signaletique \n510 0  Current contents \n546    French and English, with summaries in German, Italian, and\n       Spanish. \n550    Journal of the Societe francaise de genetique. \n650  2 Genetics|vPeriodicals. \n710 2  Societ\xe9 fran\xe7aise de genetique. \n785 00 |tEuropean journal of medical genetics.  \n856 41 |uhttp://library.uthsc.edu/ems/eresource/3581|zFull text \n       at ScienceDirect: 43(1) Jan 2000 - 47(4) Dec 2004 \n936    Unknown|ajuin 1977 \n"
        >>> record = reader.decode_record(raw)
        >>> print record.title
        Annales de genetique
        """
        
        pseudo_marc = record.strip().split('\n')
        raw_fields = []
        if pseudo_marc[0][0:6] == 'LEADER':
            record = Record()
            record.leader = pseudo_marc[0][7:].strip()
        else:
            return None

        for field in pseudo_marc[1:]:
            tag = field[:3]
            data = unescape_entities(field[6:].decode('latin1')).encode('utf8')

            if tag.startswith(' '):
                # Additional field data needs to be prepended with an extra space 
                # for certain fields ...
                #for special_tag in ('55','260'):
                #    data = " %s" % (data,) if tag.startswith(special_tag) else data
                data = " %s" % (data.strip(),)
                raw_fields[-1]['value'] = "%s%s" % (raw_fields[-1]['value'], data)
                raw_fields[-1]['raw'] = "%s%s" % (raw_fields[-1]['raw'], field.strip())
            else:
                data = data if (tag < '010' and tag.isdigit()) else "a%s" % (data,)
                raw_fields.append({
                    'tag': tag, 
                    'indicator1': field[3], 
                    'indicator2': field[4], 
                    'value': data.strip(), 
                    'raw': field.strip()
                })
        
        for raw in raw_fields:
            tag = raw['tag']
            data = raw['value'].strip()
            field = Field(tag=tag, indicators=[raw['indicator1'], raw['indicator2']], data=data)
            if not field.is_control_field():
                for sub in data.split('|'):
                    try:
                        field.add_subfield(sub[0].strip(), sub[1:].strip())
                    except Exception:
                        # Skip blank/empty subfields
                        continue
            record.add_field(field)
            
        record.parse_leader()
        
        # Disregard record if no title present
        if not record.get_fields('245'):
            return None
        else:
            return record

コード例 #2

0

ファイルを表示

ファイル: marc_helpers.py プロジェクト: jermnelson/aristotle-library-apps

    def processURLs(self,
                    marc_record,
                    proxy_location,
                    public_note='View online - Access limited to subscribers',
                    note_prefix='Available via Internet'):
        """ Method extracts URL from 856 field, sets 538 and 856 to CC's format practices.

            Parameters:
        :param marc_record: - MARC Record
        :param proxy_location: - proxy prefix prepended to extracted URL from 856 field
        :param public_note: - subfield z value, default is for CC
        :param note_prefix: - prefix for original URL in 538 note field, default is for CC.
        """
        all538fields = marc_record.get_fields('538')
        for field538 in all538fields:
            marc_record.remove_field(field538)
        all856fields = marc_record.get_fields('856')
        for field856 in all856fields:
            # Extracts raw url from 856 subfield u, creates a url object
            # for original and proxy urls and replaces net location with WAM location
            # for proxy
            raw_url = urlparse.urlparse(field856.get_subfields('u')[0])
            if re.match(r'http://',proxy_location):
                protocol = ''
            else:
                protocol = 'http://'
            proxy_raw_url = '%s%s%s?%s' % (protocol,
                                           proxy_location,
                                           raw_url.path,
                                           raw_url.query)
            proxy_url = urlparse.urlparse(proxy_raw_url)
            # Sets values for new 538 with constructed note in     
            new538 = Field(tag='538',
                           indicators=[' ',' '],
                           subfields=['a','%s, %s' % (note_prefix,raw_url.geturl())])
            marc_record.add_field(new538)
            # Sets values for 856 field
            new856 = Field(tag='856',
                           indicators = ['4','0'],
                           subfields=['u',proxy_url.geturl()])
            # Checks for subfield 3 in original 856 field, adds to public note
            # in subfield z
            new_public_note = public_note
            if len(field856.get_subfields('3')) > 0:
                for subfield3 in field856.get_subfields('3'):
                    subfield3_all = "%s - %s" % (public_note,
                                                 subfield3)
                new_public_note = subfield3_all
            new856.add_subfield('z',new_public_note)
            marc_record.remove_field(field856)
            marc_record.add_field(new856)
        return marc_record

コード例 #3

0

ファイルを表示

ファイル: aco_functions.py プロジェクト: NYULibraries/aco-karms

def sort_6_subs(rec):
	msg = ''
	new_rec = Record(to_unicode=True, force_utf8=True)
	new_rec_fields = []
	rec_fields = rec.get_fields()
	for field in rec_fields:
		script_field = False
		if not field.is_control_field() and (len(field.get_subfields('6')) > 0):	# the field contains a subfield $6
			script_field = True
			ind1 = field.indicator1
			ind2 = field.indicator2
			tag = field.tag
			first_sub = True		# variable to keep track of whether you're on the first subfield in the field
			needs_sorted = True		# variable to keep track of whether the field needs sorted or if the $6 is already correctly the first subfield
			field_subs = []			# list variable to capture all the subfields in the field *except* for the subfield $6
			for subfield in field:
				# check if $6 is the first subfield - if so, the field is OK and does *not* need to be sorted
				if needs_sorted and first_sub and subfield[0] == '6':
					needs_sorted = False
				
				elif needs_sorted:
					if first_sub:
						# this is the first subfield and is *not* $6, so the field needs sorted - creates one instance of a new_field object only when the 1st subfield is encountered
						new_field = Field(tag=tag, indicators=[ind1,ind2], subfields=[])
					
					# when subfield $6 is finally encountered in the field (not the 1st), add it to the new_field object now so it becomes the first subfield
					# Note: subfield[0] is the subfield code and subfield[1] is the subfield content for this subfield
					if subfield[0]=='6':
						new_field.add_subfield(subfield[0],subfield[1])
					
					# if the subfield is *not* $6, add it to the list of subfields to be added later to the new_field
					else:
						field_subs.append([subfield[0],subfield[1]])
				
				first_sub = False
			
			if needs_sorted:
				# then the $6 was *not* the 1st subfield and we need to now add the remaining subfields to the new_field object
				for sub in field_subs:
					# add the remaining subfields to the new_field object
					new_field.add_subfield(sub[0],sub[1])
				new_rec_fields.append(new_field)	# add the new field to the record
		
		if not script_field or not needs_sorted:
			new_rec_fields.append(field)
	
	for new_f in new_rec_fields:
		new_rec.add_field(new_f)
	
	return new_rec

コード例 #4

0

ファイルを表示

ファイル: marc_helpers.py プロジェクト: Tutt-Library/marc-batch-app

    def processURLs(
        self,
        marc_record,
        proxy_location,
        public_note="View online - Access limited to subscribers",
        note_prefix="Available via Internet",
    ):
        """ Method extracts URL from 856 field, sets 538 and 856 to CC's format practices.

            Parameters:
        :param marc_record: - MARC Record
        :param proxy_location: - proxy prefix prepended to extracted URL from 856 field
        :param public_note: - subfield z value, default is for CC
        :param note_prefix: - prefix for original URL in 538 note field, default is for CC.
        """
        all538fields = marc_record.get_fields("538")
        for field538 in all538fields:
            marc_record.remove_field(field538)
        all856fields = marc_record.get_fields("856")
        for field856 in all856fields:
            # Extracts raw url from 856 subfield u, creates a url object
            # for original and proxy urls and replaces net location with WAM location
            # for proxy
            raw_url = urllib.parse.urlparse(field856.get_subfields("u")[0])
            if re.match(r"http://", proxy_location):
                protocol = ""
            else:
                protocol = "http://"
            proxy_raw_url = "{}{}{}?{}".format(protocol, proxy_location, raw_url.path, raw_url.query)
            proxy_url = urllib.parse.urlparse(proxy_raw_url)
            # Sets values for new 538 with constructed note in
            new538 = Field(
                tag="538", indicators=[" ", " "], subfields=["a", "%s, %s" % (note_prefix, raw_url.geturl())]
            )
            marc_record.add_field(new538)
            # Sets values for 856 field
            new856 = Field(tag="856", indicators=["4", "0"], subfields=["u", proxy_url.geturl()])
            # Checks for subfield 3 in original 856 field, adds to public note
            # in subfield z
            new_public_note = public_note
            if len(field856.get_subfields("3")) > 0:
                for subfield3 in field856.get_subfields("3"):
                    subfield3_all = "%s - %s" % (public_note, subfield3)
                new_public_note = subfield3_all
            new856.add_subfield("z", new_public_note)
            marc_record.remove_field(field856)
            marc_record.add_field(new856)
        return marc_record

コード例 #5

0

ファイルを表示

class JsonHandler:
    """Handle JSON."""
    def __init__(self):
        """Init."""
        self.records = []
        self._record = None
        self._field = None
        self._text = []

    def element(self, element_dict, name=None):
        """Converts a JSON `element_dict` to pymarc fields."""
        if not name:
            self._record = Record()
            self.element(element_dict, "leader")
        elif name == "leader":
            self._record.leader = element_dict[name]
            self.element(element_dict, "fields")
        elif name == "fields":
            fields = iter(element_dict[name])
            for field in fields:
                tag, remaining = field.popitem()
                self._field = Field(tag)
                if self._field.is_control_field():
                    self._field.data = remaining
                else:
                    self.element(remaining, "subfields")
                    self._field.indicators.extend(
                        [remaining["ind1"], remaining["ind2"]])
                self._record.add_field(self._field)
            self.process_record(self._record)
        elif name == "subfields":
            subfields = iter(element_dict[name])
            for subfield in subfields:
                code, text = subfield.popitem()
                self._field.add_subfield(code, text)

    def elements(self, dict_list):
        """Sends `dict_list` to `element`."""
        if type(dict_list) is not list:
            dict_list = [dict_list]
        for rec in dict_list:
            self.element(rec)
        return self.records

    def process_record(self, record):
        """Append `record` to `self.records`."""
        self.records.append(record)

コード例 #6

0

ファイルを表示

ファイル: oso.py プロジェクト: Tutt-Library/marc-batch-app

    def generate538(self, marc_record):
        """Method creates a 538 field following a standard pattern

        Args:
            marc_record(pymarc.Record): MARC21 record

        Returns:
            pymarc.Record
        """
        field856 = marc_record['856']
        original_url = field856['u']
        new538 = Field(tag='538', indicators=[' ',' '])
        new538.add_subfield(
            'a',
            'Available via Internet, {}'.format(original_url))
        marc_record.add_field(new538)
        return marc_record

コード例 #7

0

ファイルを表示

ファイル: aco-1-xml2mrc-oclc-nums.py プロジェクト: NYULibraries/aco-karms

def create_999_field(rec, oclc_nums):
	rec_003 = rec.get_fields('003')[0].value()
	rec_001 = rec.get_fields('001')[0].value()
	
	rec_999s = rec.get_fields('999')
	if len(rec_999s) == 0:
		new_999 = Field(tag='999', indicators=[' ',' '], subfields=['i',rec_001])
		for oclc_num in oclc_nums:
			new_999.add_subfield('o',oclc_num)
		
		
		rec_orig.add_ordered_field(new_999)
		rec.add_ordered_field(new_999)
		msg += 'Record 999:  '+new_999.value()+'\n'
	elif len(rec_999s) > 0:
		msg += 'ERROR-MISC:  Record contains at least one 999 field\n'
		for rec_999 in rec_999s:
			msg += '   '+rec_999+'\n'

コード例 #8

0

ファイルを表示

ファイル: aco-1-xml2mrc-oclc-nums.py プロジェクト: aa173/aco-karms

def create_999_field(rec, oclc_nums):
    rec_003 = rec.get_fields('003')[0].value()
    rec_001 = rec.get_fields('001')[0].value()

    rec_999s = rec.get_fields('999')
    if len(rec_999s) == 0:
        new_999 = Field(tag='999',
                        indicators=[' ', ' '],
                        subfields=['i', rec_001])
        for oclc_num in oclc_nums:
            new_999.add_subfield('o', oclc_num)

        rec_orig.add_ordered_field(new_999)
        rec.add_ordered_field(new_999)
        msg += 'Record 999:  ' + new_999.value() + '\n'
    elif len(rec_999s) > 0:
        msg += 'ERROR-MISC:  Record contains at least one 999 field\n'
        for rec_999 in rec_999s:
            msg += '   ' + rec_999 + '\n'

コード例 #9

0

ファイルを表示

class JsonHandler:
    def __init__(self):
        self.records = []
        self._record = None
        self._field = None
        self._text = []

    def element(self, element_dict, name=None):
        if not name:
            self._record = Record()
            self.element(element_dict, 'leader')
        elif name == 'leader':
            self._record.leader = element_dict[name]
            self.element(element_dict, 'fields')
        elif name == 'fields':
            fields = iter(element_dict[name])
            for field in fields:
                tag, remaining = field.popitem()
                self._field = Field(tag)
                if self._field.is_control_field():
                    self._field.data = remaining
                else:
                    self.element(remaining, 'subfields')
                    self._field.indicators.extend(
                        [remaining['ind1'], remaining['ind2']])
                self._record.add_field(self._field)
            self.process_record(self._record)
        elif name == 'subfields':
            subfields = iter(element_dict[name])
            for subfield in subfields:
                code, text = subfield.popitem()
                self._field.add_subfield(code, text)

    def elements(self, dict_list):
        if type(dict_list) is not list:
            dict_list = [dict_list]
        for rec in dict_list:
            self.element(rec)
        return self.records

    def process_record(self, record):
        self.records.append(record)

コード例 #10

0

ファイルを表示

ファイル: marc_helpers.py プロジェクト: jermnelson/ppacatalog

    def validate245(self,marc_record):
        """
        Method adds a subfield 'h' with value of electronic resource 
        to the 245 field.

        Parameters:
        `marc_record`: Required, MARC record
        """
        all245s = marc_record.get_fields('245')
        subfield_h_val = '[electronic resource]'
        if len(all245s) > 0:
            field245 = all245s[0]
            marc_record.remove_field(field245)
            subfield_a,subfield_c= '',''
            a_subfields = field245.get_subfields('a')
            indicator1,indicator2 = field245.indicators
            if len(a_subfields) > 0:
                subfield_a = a_subfields[0]
                if len(subfield_a) > 0:
                    if subfield_a[-1] == '/': 
                        subfield_a = subfield_a[:-1].strip()
            new245 = Field(tag='245',
                           indicators=[indicator1,indicator2],
                           subfields = ['a','%s ' % subfield_a])
            b_subfields = field245.get_subfields('b')
            c_subfields = field245.get_subfields('c')
            if len(c_subfields) > 0 and len(b_subfields) < 1:
                new245.add_subfield('h','%s / ' % subfield_h_val)
            elif len(b_subfields) > 0:
                new245.add_subfield('h','%s : ' % subfield_h_val)
            else:
                new245.add_subfield('h',subfield_h_val)
            if len(b_subfields) > 0:
                for subfield_b in b_subfields:
                    new245.add_subfield('b',subfield_b)
            if len(c_subfields) > 0:
                for subfield_c in c_subfields:
                    new245.add_subfield('c',subfield_c)
            marc_record.add_field(new245)
        return marc_record

コード例 #11

0

ファイルを表示

ファイル: base_converter.py プロジェクト: Tutt-Library/rda-enhancement

    def __format245__(self, field245):
        """Method takes a 245 field from a MARC record and returns properly
        formatted subfields. By not copying subfield 'h', performs the first
        conversion PCC recommendation.

        Args:
            field245(pymarc.Field): 245 field

        Returns:
            pymarc.Field
        """
        if field245.tag != '245':
            return
        subfield_a,subfield_c= '',''
        a_subfields = field245.get_subfields('a')
        indicator1,indicator2 = field245.indicators
        if len(a_subfields) > 0:
            subfield_a = a_subfields[0]
            if len(subfield_a) > 0:
                if ['.','\\'].count(subfield_a[-1]) > 0:
                    subfield_a = subfield_a[:-1].strip()
        new245 = Field(tag='245',
                       indicators=[indicator1,indicator2],
                       subfields = ['a', u'{0} '.format(subfield_a)])
        b_subfields = field245.get_subfields('b')
        c_subfields = field245.get_subfields('c')
        n_subfields = field245.get_subfields('n')
        p_subfields = field245.get_subfields('p')
        # Order for 245 subfields are:
        # $a $n $p $b $c
        if len(n_subfields) > 0:
             for subfield_n in n_subfields:
                new245.add_subfield('n', subfield_n)
        if len(p_subfields) > 0:
             for subfield_p in p_subfields:
                new245.add_subfield('p', subfield_p)

        if len(c_subfields) > 0 and len(b_subfields) < 1:
            if 'a' in new245.subfields:
                new245['a'] = u'{0} /'.format(new245['a'].strip())
        elif len(b_subfields) > 0:
            if 'a' in new245.subfields:
                new245['a'] = u'{0} :'.format(new245['a'].strip())
        if len(b_subfields) > 0:
            for subfield_b in b_subfields:
                new245.add_subfield('b',subfield_b)
        if len(c_subfields) > 0:
            for subfield_c in c_subfields:
                new245.add_subfield('c',subfield_c)
        return new245

コード例 #12

0

ファイルを表示

def create_subordinate_records(parent_record, subordinate_data_list):
    '''If a journal record includes a list of individual issues or volumes,    this function creates separate marc files for each of those issues or volumes. The journal title and url    are taken from the parent record (the journal record) and kept in the subordinate records.'''
    result_list = []

    for subordinate_resource in subordinate_data_list:
        sub_record = Record(force_utf8=True)

        # add fields 006, 007 and 008 with minimal physical information to every marc file
        if 'title_full' in subordinate_resource:
            sub_record.add_field(Field(tag='006', data="m"))
            sub_record.add_field(Field(tag='007', data="cr"))

            # the value of field 008 is taken from the parent record and put into the subordinate one
            field008val = "            o       0eng d"  # DEFAULT ENG
            if 'languages' in parent_record and parent_record[
                    'languages'] is not None:
                field008val = field008val[0:21] + lang_map.get(
                    parent_record['languages'][0], "   ") + field008val[24:]

            sub_record.add_field(Field(tag='008', data=field008val))

            sub_record.add_field(
                Field(
                    tag='245',
                    indicators=['0', '0'],
                    subfields=['a',
                               subordinate_resource['title_full'][:9000]]))
            sub_record.add_field(
                Field(tag='506',
                      indicators=['0', '#'],
                      subfields=["a", "Open access"]))

        if parent_record['246']['a']:
            sub_record.add_field(
                Field(tag='490',
                      indicators=['0', '0'],
                      subfields=['a', parent_record['246']['a']]))

        # put together the issue/volume url, the journal url and the domain in field 856;
        # domain and journal url taken from the parent record, issue/volume url taken from the subordinate record
        if 'url' in subordinate_resource:
            current_field = Field(tag='856', indicators=['0', '0'])

            current_field.add_subfield('u', subordinate_resource['url'])

            if parent_record['856']['a']:
                current_field.add_subfield('a', parent_record['856']['a'])

            if parent_record['856']['u']:
                current_field.add_subfield('d', parent_record['856']['u'])

            sub_record.add_field(current_field)

        result_list.append(sub_record)

    return result_list

コード例 #13

0

ファイルを表示

ファイル: springer.py プロジェクト: Tutt-Library/marc-batch-app

    def validate300(self, marc_record):
        """Method modifies existing 300 field to the following RDA format:

        300  1 online resource (xxvi, 368 pages) : $b illustrations

        Args:
            marc_record(pymarc.Record): Input MARC21

        Returns:
            pymarc.Record: Modified MARC21
        """
        preface_pages_re = re.compile(r"(\w+), (\w+) p+")
        illus_re = re.compile(r"illus")
        all300Fields = marc_record.get_fields('300')
        for field in all300Fields:
            new_a = "1 online resource"
            new300 = Field(tag='300', indicators=[' ',' '])
            subfield_a_lst = field.get_subfields('a')
            if len(subfield_a_lst) < 1:
                new300.add_subfield('a', new_a)
            for subfield in subfield_a_lst:
                illus_search = illus_re.search(subfield)
                preface_search = preface_pages_re.search(subfield)

                if preface_search is not None:
                    preface, pages = preface_search.groups()
                    new_a = "{} ({}, {} pages)".format(
                        new_a,
                        preface.lower(),
                        pages)
                    if illus_search is not None:
                        new_a += " :"
                new300.add_subfield('a', new_a)
                if illus_search is not None:
                    new300.add_subfield('b', 'illustrations')
            marc_record.remove_field(field)
            marc_record.add_field(new300)
        return marc_record

コード例 #14

0

ファイルを表示

ファイル: csv2marc.py プロジェクト: jindrichmynarz/MARC2CSV

class CSV2MARC (object):
  """
    Converts CSV to MARC records.
  """
  
  def __init__(self):
    """
      Load the CSV file.
    """
    if len(sys.argv) > 1:
      filepath = sys.argv[1]
    else:
      raise Exception(
        "You need to provide a file path to the CSV file as an argument."
      )
    try:
      self.reader = csv.reader(
        open(filepath, "r"),
        delimiter = ","
      )
    except IOError:
      print >>sys.stderr, "Cannot open {0}".format(filepath)
      raise SystemExit
    
    output = "{0}.mrc".format(os.path.splitext(filepath)[0])
    self.file = open(output, "w")
    
    # State variables
    self.sysno = False
    self.record = False
    self.field = False
    self.fieldTag = False
    self.fieldTagOccurrence = False
    self.subfieldLabel = False
    self.subfieldLabelOccurrence = False
    self.line = False
      
  def checkFieldChange(self, fieldTag, fieldTagOccurrence):
    if (self.fieldTag != fieldTag) or ((self.fieldTag == fieldTag) and (self.fieldTagOccurrence != fieldTagOccurrence)):
      return True
    else:
      return False
  
  def checkRecordChange(self, sysno):
    if not (sysno == self.sysno):
      return True
    else:
      return False
  
  def writeMARCRecord(self, record):
    writer = MARCWriter(self.file)
    writer.write(record)
    
  def getNewRecord(self, sysno):
    self.sysno = sysno
    self.record = Record()
  
  def getNewField(self, line):
    self.fieldTag = line["fieldTag"]
    self.fieldTagOccurrence = line["fieldTagOccurrence"]
    if line["subfieldLabel"]:
      # Normal field
      self.field = Field(
        tag = line["fieldTag"],
        indicators = [
          line["indicator1"],
          line["indicator2"]
        ]
      )
    else:
      # Datafield    
      self.field = Field(
        tag = line["fieldTag"],
        data = line["value"]
      )
        
  def main(self):
    for line in self.reader:
      # Parse the line
      line = {
        "sysno" : line[0],
        "fieldTag" : line[1],
        "fieldTagOccurrence" : line[2],
        "indicator1" : line[3],
        "indicator2" : line[4],
        "subfieldLabel" : line[5],
        "subfieldLabelOccurrence" : line[6],
        "value" : line[7],
      }
        
      if not self.sysno:
        self.getNewRecord(line["sysno"])
      if self.checkRecordChange(line["sysno"]):
        self.record.add_field(self.field) # Add the last field of the previous record
        self.field = False # Remove the last field of the previous record
        self.fieldTag = False
        self.writeMARCRecord(self.record)
        self.getNewRecord(line["sysno"])
      
      if not self.fieldTag:
        self.getNewField(line)
      if self.checkFieldChange(line["fieldTag"], line["fieldTagOccurrence"]):
        self.record.add_field(self.field)
        self.getNewField(line)
      
      if line["subfieldLabel"]: # If we have a subfield
        self.field.add_subfield(
          line["subfieldLabel"],
          line["value"]
        )
    self.record.add_field(self.field) # Write the last field
    self.writeMARCRecord(self.record) # Write the last record after the iteration has ended
    self.file.close()

コード例 #15

0

ファイルを表示

ファイル: marc_helpers.py プロジェクト: jermnelson/aristotle-library-apps

    def validate245(self,marc_record):
        """
        Method adds a subfield 'h' with value of electronic resource 
        to the 245 field.

        Parameters:
        `marc_record`: Required, MARC record
        """
        all245s = marc_record.get_fields('245')
        subfield_h_val = '[electronic resource]'
        if len(all245s) > 0:
            field245 = all245s[0]
            marc_record.remove_field(field245)
            subfield_a,subfield_c= '',''
            a_subfields = field245.get_subfields('a')
            indicator1,indicator2 = field245.indicators
            if len(a_subfields) > 0:
                subfield_a = a_subfields[0]
                if len(subfield_a) > 0:
                    if ['.','\\'].count(subfield_a[-1]) > 0: 
                        subfield_a = subfield_a[:-1].strip()
            new245 = Field(tag='245',
                           indicators=[indicator1,indicator2],
                           subfields = ['a', u'{0} '.format(subfield_a)])
            b_subfields = field245.get_subfields('b')
            c_subfields = field245.get_subfields('c')
            n_subfields = field245.get_subfields('n')
            p_subfields = field245.get_subfields('p')
            # Order for 245 subfields are:
            # $a $n $p $h $b $c
            if len(n_subfields) > 0:
                 for subfield_n in n_subfields:
                    new245.add_subfield('n', subfield_n)
            if len(p_subfields) > 0:
                 for subfield_p in p_subfields:
                    new245.add_subfield('p', subfield_p)
            
            if len(c_subfields) > 0 and len(b_subfields) < 1:
                new245.add_subfield('h','{0} / '.format(subfield_h_val))
            elif len(b_subfields) > 0:
                new245.add_subfield('h','{0} : '.format(subfield_h_val))
            else:
                new245.add_subfield('h',subfield_h_val)
            if len(b_subfields) > 0:
                for subfield_b in b_subfields:
                    new245.add_subfield('b',subfield_b)
            if len(c_subfields) > 0:
                for subfield_c in c_subfields:
                    new245.add_subfield('c',subfield_c)                
            marc_record.add_field(new245)
        return marc_record

コード例 #16

0

ファイルを表示

ファイル: aco_functions.py プロジェクト: NYULibraries/aco-karms

def convert_2_eres_rec(rec, rda_rec):
	msg = ''
 	rec_003_value = rec.get_fields('003')[0].value()	# the partner's institutional code from the 003
 	rec_001_value = rec.get_fields('001')[0].value()	# the partner's local record number (BSN) from the 001
	
	if rec_003_value == 'NNU':
		inst_name = 'New York Univeristy Libraries'
		inst_710a = 'New York University.'
		inst_710b = 'Libraries.'
	elif rec_003_value == 'NIC':
		inst_name = 'Cornell University Libraries'
		inst_710a = 'Cornell University.'
		inst_710b = 'Libraries.'
	elif rec_003_value == 'NNC':
		inst_name = 'Columbia University Libraries'
		inst_710a = 'Columbia University.'
		inst_710b = 'Libraries.'
	elif rec_003_value == 'NjP':
		inst_name = 'Princeton University Libraries'
		inst_710a = 'Princeton University.'
		inst_710b = 'Library.'
	elif rec_003_value == 'LeBAU':
		inst_name = "American University of Beirut's Jafet Memorial Library"
		inst_710a = 'Jafet Memorial Library.'
		inst_710b = ''
	elif rec_003_value == 'UaCaAUL':
		inst_name = "American University in Cairo Library"
		inst_710a = 'American University in Cairo.'
		inst_710b = 'Library.'
	else:
		inst_name = ''
		inst_710a = ''
		inst_710b = ''
		msg += 'ERROR-MISC:  003 code - '+rec_003_value+' - did not match any of the partner institutions.\n'
	
	if rec_001_value.startswith('o'):	# this OCLC record did not get processed in step 4
		msg += 'ERROR-MISC:  003/001 field values did not change to institutional code and BSN\n'
		msg += '   Record 003/001: '+rec_003_value+'_'+rec_001_value+'\n'
		for rec_035 in rec.get_fields('035'):
			msg += '   '+str(rec_035)+'\n'
	
	# delete the 005 field
	for rec_005 in rec.get_fields('005'):
		rec.remove_field(rec_005)
	
	# change the cataloging date in bytes 00-05 of the 008 to the current date 
	curr_date = datetime.date.today()
	yy = str(curr_date.year)[2:].zfill(2)
	mm = str(curr_date.month).zfill(2)
	dd = str(curr_date.day).zfill(2)
	rec_008_value = rec.get_fields('008')[0].value()
	new_008_data = yy+mm+dd+rec_008_value[6:]
	new_008 = Field(tag='008', data=new_008_data)
	rec.remove_field(rec.get_fields('008')[0])
	rec.add_ordered_field(new_008)
	
	# change byte 23 in the 008 field to code 'o' for 'online'
	rec_008_value = rec['008'].data
	rec['008'].data = rec_008_value[0:23] + 'o' + rec_008_value[24:]
	
	# add the 006/007 format fields for electronic resource characteristics
	if len(rec.get_fields('006')) > 0:
		for rec_006 in rec.get_fields('006'):
			rec_006_value = rec_006.value()
			msg += 'ERROR-MISC: 006  '+rec_006_value+'\n'
			rec.remove_field(rec_006)
	new_006 = Field(tag='006', data='m        d        ')
	rec.add_ordered_field(new_006)
	
	if len(rec.get_fields('007')) > 0:
		for rec_007 in rec.get_fields('007'):
			rec_007_value = rec_007.value()
			msg += 'ERROR-MISC: 007  '+rec_007_value+'\n'
			rec.remove_field(rec_007)
	new_007 = Field(tag='007', data='cr cn |||m|||a')
	rec.add_ordered_field(new_007)
	
	# delete fields that relate to the print version
	if len(rec.get_fields('016')) > 0:
		for rec_016 in rec.get_fields('016'):
			rec.remove_field(rec_016)
	
	if len(rec.get_fields('019')) > 0:
		for rec_019 in rec.get_fields('019'):
			rec.remove_field(rec_019)
	
	if len(rec.get_fields('025')) > 0:
		for rec_025 in rec.get_fields('025'):
			rec.remove_field(rec_025)
	
	if len(rec.get_fields('029')) > 0:
		for rec_029 in rec.get_fields('029'):
			rec.remove_field(rec_029)
	
	if len(rec.get_fields('042')) > 0:
		for rec_042 in rec.get_fields('042'):
			rec.remove_field(rec_042)
	
	if len(rec.get_fields('049')) > 0:
		for rec_049 in rec.get_fields('049'):
			rec.remove_field(rec_049)
	
	# create new 040 field for NNU
	for rec_040 in rec.get_fields('040'):
		rec.remove_field(rec_040)	# delete the existing 040 field(s)
	if rec_003_value == 'LeBAU':
		cat_lang = 'ara'
	else:
		cat_lang = 'eng'
	if rda_rec:
		new_040 = Field(tag='040', indicators=[' ',' '], subfields=['a','NNU','b',cat_lang,'e','rda','c','NNU'])
	else:
		new_040 = Field(tag='040', indicators=[' ',' '], subfields=['a','NNU','b',cat_lang,'c','NNU'])
	rec.add_ordered_field(new_040)
	
	# correct the 041 language code field when multiple codes exist in the same subfield
	if len(rec.get_fields('041')) > 0:
		for rec_041 in rec.get_fields('041'):
			for rec_041_sub in rec_041:
				mult_langs = False
				new_041_subs = []
				# Note: sub[0] is the subfield code and sub[1] is the subfield content for this subfield
				if len(rec_041_sub[1]) > 3:		# there are multiple language codes in this 041 subfield
					mult_langs = True
					rec_041_sub_langs = re.findall('...',rec_041_sub[1])
					for rec_041_sub_lang in rec_041_sub_langs:
						new_041_subs.append([rec_041_sub[0],rec_041_sub_lang])
				else:
					new_041_subs.append([rec_041_sub[0],rec_041_sub[1]])
			
			if mult_langs:
				rec_041_ind1 = rec_041.indicator1
				rec_041_ind2 = rec_041.indicator2
				new_rec_041 = Field(tag='041', indicators=[rec_041_ind1,rec_041_ind2], subfields=[])
				for new_041_sub in new_041_subs:
					new_rec_041.add_subfield(new_041_sub[0],new_041_sub[1])
				
				rec.remove_field(rec_041)
				rec.add_ordered_field(new_rec_041)
	
 	# correct the 050 indicator 2
 	rec_050s = rec.get_fields('050')
 	for rec_050 in rec_050s:
 		this_index = rec_050s.index(rec_050)
 		# check indicator 2 value and fix if needed
 		if rec_050.indicator2 == ' ':
 			rec.get_fields('050')[this_index].indicator2 = '4'
	
	# correct the 082 indicator 1
	rec_082s = rec.get_fields('082')
	for rec_082 in rec_082s:
		this_index = rec_082s.index(rec_082)
 		# check indicator 1 value and fix if needed
 		if rec_082.indicator1 == ' ':
 			rec.get_fields('082')[this_index].indicator1 = '0'
	
	if not rda_rec:
		# add GMD to 245$h for "[electronic resource]"
		rec_245s = rec.get_fields('245')
		gmd_added = False
		if len(rec_245s) == 0:
			msg += 'ERROR-MISC: Record is missing a 245 field\n'
		elif len(rec_245s) > 1:
			msg += 'ERROR-MISC: Record has multiple 245 fields\n'
		else:
			for rec_245 in rec_245s:
				rec_245_ind1 = rec_245.indicator1
				rec_245_ind2 = rec_245.indicator2
				new_rec_245 = Field(tag='245', indicators=[rec_245_ind1,rec_245_ind2], subfields=[])
				# delete any existing 245 $h GMD subfields
				if len(rec_245.get_subfields('h')) > 0:
					for rec_245h in rec_245.get_subfields('h'):
						msg += 'ERROR-MISC: Original record for the print contains a 245$h GMD: '+rec_245h+'\n'
						rec_245.delete_subfield('h')
				rec_245_str = ''
				for rec_245_sub in rec_245:
					rec_245_str += '|$'+rec_245_sub[0]+rec_245_sub[1]			# sub[0]=the subfield code; sub[1]=the subfield content
					rec_245_list = rec_245_str.split('|')
				
				rec_245_re1 = re.compile('\$a[^\$]*$')							# matches subfield pattern $a not followed by any other subfield
				if rec_245_re1.search(rec_245_str) and not gmd_added:
					for sub in rec_245_list:
						post_gmd_sub_code = ''
						if sub.startswith('$a'):
							sub_index = rec_245_list.index(sub)
							if len(rec_245_list) > sub_index+1:
								post_gmd_sub = rec_245_list[sub_index+1]
								post_gmd_sub_code = post_gmd_sub[0:2]
						new_rec_245, gmd_added = add_ordered_gmd(sub,'$a', post_gmd_sub_code, new_rec_245, gmd_added)
				
				rec_245_re2 = re.compile('\$a[^\$]*\$[^np]')					# matches subfield pattern $a not followed by $n or $p
				if rec_245_re2.search(rec_245_str) and not gmd_added:
					for sub in rec_245_list:
						post_gmd_sub_code = ''
						if sub.startswith('$a'):
							sub_index = rec_245_list.index(sub)
							if len(rec_245_list) > sub_index+1:
								post_gmd_sub = rec_245_list[sub_index+1]
								post_gmd_sub_code = post_gmd_sub[0:2]
						new_rec_245, gmd_added = add_ordered_gmd(sub,'$a', post_gmd_sub_code, new_rec_245, gmd_added)

				rec_245_re3 = re.compile('\$a[^\$]*\$n[^\$]*\$[^np]')			# matches subfield pattern $a $n not followed by $n or $p
				if rec_245_re3.search(rec_245_str) and not gmd_added:
					for sub in rec_245_list:
						post_gmd_sub_code = ''
						if sub.startswith('$n'):
							sub_index = rec_245_list.index(sub)
							if len(rec_245_list) > sub_index+1:
								post_gmd_sub = rec_245_list[sub_index+1]
								post_gmd_sub_code = post_gmd_sub[0:2]
						new_rec_245, gmd_added = add_ordered_gmd(sub,'$n', post_gmd_sub_code, new_rec_245, gmd_added)
				
				rec_245_re4 = re.compile('\$a[^\$]*\$p[^\$]*\$[^np]')			# matches subfield pattern $a $p not followed by $n or $p
				if rec_245_re4.search(rec_245_str) and not gmd_added:
					for sub in rec_245_list:
						post_gmd_sub_code = ''
						if sub.startswith('$p'):
							sub_index = rec_245_list.index(sub)
							if len(rec_245_list) > sub_index+1:
								post_gmd_sub = rec_245_list[sub_index+1]
								post_gmd_sub_code = post_gmd_sub[0:2]
						new_rec_245, gmd_added = add_ordered_gmd(sub,'$p', post_gmd_sub_code, new_rec_245, gmd_added)
				
				rec_245_re5 = re.compile('\$a[^\$]*\$n[^\$]*\$p[^\$]*\$[^np]')	# matches subfield pattern $a $n $p not followed by $n or $p
				if rec_245_re5.search(rec_245_str) and not gmd_added:
					for sub in rec_245_list:
						post_gmd_sub_code = ''
						if sub.startswith('$p'):
							sub_index = rec_245_list.index(sub)
							if len(rec_245_list) > sub_index+1:
								post_gmd_sub = rec_245_list[sub_index+1]
								post_gmd_sub_code = post_gmd_sub[0:2]
						new_rec_245, gmd_added = add_ordered_gmd(sub,'$p', post_gmd_sub_code, new_rec_245, gmd_added)
				
				rec.remove_field(rec_245)
				rec.add_ordered_field(new_rec_245)
		
		if not gmd_added:
			msg += 'ERROR-MISC: GMD did not get added to non-RDA record\n'

		# NEED TO FIGURE OUT HOW TO ADD GMD to corresponding 880 field if it exists
	
	# delete subfield $c from 300 fields, modify punctuation in subfields $a and $b, and add 'online resource' to subfield $a
 	for rec_300 in rec.get_fields('300'):
 		if not rec_300.get_subfields('a')[0].startswith('online'):
	 		rec_300.delete_subfield('c')
	 		rec_300a = rec_300.get_subfields('a')[0]
	 		rec_300a = rec_300a.strip(' ;')
	 		rec_300a_pgs = rec_300a.split(' :')
	 		rec_300.delete_subfield('a')
	 		try:
	 			rec_300b = rec_300.get_subfields('b')[0]
	 			rec_300b = rec_300b.strip(' ;')
	 			rec_300.delete_subfield('b')
	 			rec_300a_mod = 'online resource ('+rec_300a_pgs[0]+') :'
	 			rec_300.add_subfield('a', rec_300a_mod)
	 			rec_300.add_subfield('b', rec_300b)
	 		except:
	 			# there is no subfield $b in the 300
	 			rec_300a_mod = 'online resource ('+rec_300a_pgs[0]+')'
	 			rec_300.add_subfield('a', rec_300a_mod)
	
	if rda_rec:
		# Delete any existing 336, 337, and 338 fields for the print characteristics
		if len(rec.get_fields('336')) > 0:
			for rec_336 in rec.get_fields('336'):
				rec.remove_field(rec_336)
		if len(rec.get_fields('337')) > 0:
			for rec_337 in rec.get_fields('337'):
				rec.remove_field(rec_337)
		if len(rec.get_fields('338')) > 0:
			for rec_338 in rec.get_fields('338'):
				rec.remove_field(rec_338)
		
		# Add 336, 337, and 338 fields for the e-resource characteristics for content, media, and carrier
		new_rec_336 = Field(tag='336', indicators=[' ',' '], subfields=['a','text','2','rdacontent'])
		new_rec_337 = Field(tag='337', indicators=[' ',' '], subfields=['a','computer','2','rdamedia'])
		new_rec_338 = Field(tag='338', indicators=[' ',' '], subfields=['a','online resource','2','rdacarrier'])
		
		rec.add_ordered_field(new_rec_336)
		rec.add_ordered_field(new_rec_337)
		rec.add_ordered_field(new_rec_338)
	
	# add ACO note field
	new_500_aco = Field(tag='500', indicators=[' ',' '], subfields=['a','Part of the Arabic Collections Online (ACO) project, contributed by '+inst_name+'.'])
	rec.add_ordered_field(new_500_aco)
	
	# delete any print record's reference to other formats
	if len(rec.get_fields('530')) > 0:
		for rec_530 in rec.get_fields('530'):
			rec.remove_field(rec_530)
	
	# delete any existing 533 fields (e.g. for microform)
	for rec_533 in rec.get_fields('533'):
		rec.remove_field(rec_533)
	
	# add 533 field related to electronic reproduction
	curr_year = datetime.date.today().year
	new_533 = Field(tag='533', indicators=[' ',' '], subfields=['a', 'Electronic reproduction.', 'b', 'New York, N.Y. :', 'c', 'New York University,', 'd', str(curr_year)+'.', '5', 'NNU'])
	rec.add_ordered_field(new_533)
	
	# delete any existing 539 fields (e.g. for microform)
	for rec_539 in rec.get_fields('539'):
		rec.remove_field(rec_539)
	
# 	new_539 = Field(tag='539', indicators=[' ',' '], subfields=['a', 's', 'b', str(curr_year), 'd', 'nyu', 'e', 'n', 'g', 'o'])
# 	rec.add_ordered_field(new_539)
	
	# add headings referring to the ACO project and partners
	if not inst_710b == '':
		new_710 = Field(tag='710', indicators=['2',' '], subfields=['a', inst_710a, 'b', inst_710b])
	else:
		new_710 = Field(tag='710', indicators=['2',' '], subfields=['a', inst_710a])
	
 	rec.add_ordered_field(new_710)
	
	new_730 = Field(tag='730', indicators=['0',' '], subfields=['a','Arabic Collections Online.'])
	rec.add_ordered_field(new_730)
	
	# add a new 776 field referencing the relationship to the print version
	new_776 = Field(tag='776', indicators=['0','8'], subfields=['i', 'Print version:'])
		
	# capture name entry from 100 or 110 if they exist and insert into new 776 subfield $a to reference print version
	if len(rec.get_fields('100', '110')) > 0:
		new_776a = rec.get_fields('100', '110')[0].value()
		if new_776a.startswith('8'):
			new_776a = new_776a[7:]
		new_776.add_subfield('a', new_776a)
	
	# capture title entry from 245 and insert into new 776 subfield $t to reference print version
	new_776t = rec.get_fields('245')[0].get_subfields('a')[0]
	new_776t = new_776t.rstrip(' /:.,')
	new_776.add_subfield('t', new_776t)
	
	# capture institutional ID entry from 003/001 and insert into new 776 subfield $w to reference print version
	new_776.add_subfield('w', '('+rec_003_value+')'+rec_001_value)
	
	if len(rec.get_fields('010')) > 0:
		if len(rec.get_fields('010')[0].get_subfields('a')) > 0:
			new_776w_010 = rec.get_fields('010')[0].get_subfields('a')[0]
			new_776.add_subfield('w', '(DLC)'+new_776w_010)
			rec.remove_field(rec.get_fields('010')[0])

	if len(rec.get_fields('035')) > 0:
		for rec_035 in rec.get_fields('035'):
			rec_035a = rec_035.get_subfields('a')[0]
			if rec_035a.startswith('(OCoLC)'):
				new_776w_oclc = rec_035a
				new_776.add_subfield('w', new_776w_oclc)
			rec.remove_field(rec_035)
	
	new_020z_fields = []							# variable to collect the 020 fields as "invalid" subfield z's instead of subfield a's
	new_020z_subfields = []							# variable to collect the print ISBNs to add to the 776 field
	if len(rec.get_fields('020')) > 0:					# record contains 020 ISBN fields
		for rec_020 in rec.get_fields('020'):			# iterate through each of the 020 fields
			msg += '020s: YES\n'
			if len(rec_020.get_subfields('a')) > 0:			# the 020 field has a subfield a
				for rec_020a in rec_020.get_subfields('a'):	# iterate through the subfield a's
					msg += '020a: '+str(rec_020a)+'\n'
					new_020z_field = Field(tag='020', indicators=[' ',' '], subfields=['z', rec_020a])
					new_020z_fields.append(new_020z_field)
					new_020z_subfields.append(rec_020a)
			rec.remove_field(rec_020)
	
	for new_020z_field in new_020z_fields:
		rec.add_ordered_field(new_020z_field)
	
	for new_776z in new_020z_subfields:
		new_776.add_subfield('z', new_776z)
	
	rec.add_ordered_field(new_776)
	
	# delete any 090 $h/$i fields
	if len(rec.get_fields('090')) > 0:
		for rec_090 in rec.get_fields('090'):
			if len(rec_090.get_subfields('h')) > 0:
				for rec_090h in rec_090.get_subfields('h'):
					rec_090.delete_subfield('h')
			if len(rec_090.get_subfields('i')) > 0:
				for rec_090i in rec_090.get_subfields('i'):
					rec_090.delete_subfield('i')
			if rec_090.format_field()=='':
				rec.remove_field(rec_090)
	
	# delete any local fields (9XXs, OWN, AVA)
	rec_9XXs = rec.get_fields('852','903','907','910','938','945','950','955','981','987','994','998','OWN','AVA')
	if len(rec_9XXs) > 0:
		for rec_9XX in rec_9XXs:
			rec.remove_field(rec_9XX)
			
	return (rec, msg)

コード例 #17

0

ファイルを表示

ファイル: marc_helpers.py プロジェクト: Tutt-Library/marc-batch-app

    def validate245(self, marc_record):
        """
        Method adds a subfield 'h' with value of electronic resource
        to the 245 field.

        Parameters:
        `marc_record`: Required, MARC record
        """
        all245s = marc_record.get_fields("245")
        subfield_h_val = "[electronic resource]"
        if len(all245s) > 0:
            field245 = all245s[0]
            marc_record.remove_field(field245)
            subfield_a, subfield_c = "", ""
            a_subfields = field245.get_subfields("a")
            indicator1, indicator2 = field245.indicators
            if len(a_subfields) > 0:
                subfield_a = a_subfields[0]
                if len(subfield_a) > 0:
                    if [".", "\\"].count(subfield_a[-1]) > 0:
                        subfield_a = subfield_a[:-1].strip()
            new245 = Field(tag="245", indicators=[indicator1, indicator2], subfields=["a", u"{0} ".format(subfield_a)])
            b_subfields = field245.get_subfields("b")
            c_subfields = field245.get_subfields("c")
            n_subfields = field245.get_subfields("n")
            p_subfields = field245.get_subfields("p")
            # Order for 245 subfields are:
            # $a $n $p $h $b $c
            if len(n_subfields) > 0:
                for subfield_n in n_subfields:
                    new245.add_subfield("n", subfield_n)
            if len(p_subfields) > 0:
                for subfield_p in p_subfields:
                    new245.add_subfield("p", subfield_p)

            if len(c_subfields) > 0 and len(b_subfields) < 1:
                new245.add_subfield("h", "{0} / ".format(subfield_h_val))
            elif len(b_subfields) > 0:
                new245.add_subfield("h", "{0} : ".format(subfield_h_val))
            else:
                new245.add_subfield("h", subfield_h_val)
            if len(b_subfields) > 0:
                for subfield_b in b_subfields:
                    new245.add_subfield("b", subfield_b)
            if len(c_subfields) > 0:
                for subfield_c in c_subfields:
                    new245.add_subfield("c", subfield_c)
            marc_record.add_field(new245)
        return marc_record

コード例 #18

0

ファイルを表示

 #--------------------------------------------
 # Create 506 field for the Rights statement
 rights = fields[6].strip()
 if not rights == '':
     rec_506 = Field(tag='506',
                     indicators=[' ', ' '],
                     subfields=['a', rights])
     new_marc_rec.add_ordered_field(rec_506)
 #--------------------------------------------
 # Create 260 field for the Publisher and Date Issued fields
 date = fields[7].strip()
 pub = fields[8].strip()
 rec_260 = Field(tag='260', indicators=[' ', ' '])
 add_260 = False
 if not pub == '':
     rec_260.add_subfield('b', pub)
     add_260 = True
 if not date == '':
     rec_260.add_subfield('c', date)
     add_260 = True
 if add_260:
     new_marc_rec.add_ordered_field(rec_260)
 #--------------------------------------------
 # Create 008 field with Date Issued as bytes 07-10 (Date1) and Language as bytes 35-37
 # Descriptions of the 008 fields are at: http://www.oclc.org/bibformats/en/fixedfield.html
 # For breakdown of 008 byte positions, see: http://www.oclc.org/bibformats/en/fixedfield/008summary.html
 curr_date = datetime.date.today()
 yy = str(curr_date.year)[2:].zfill(2)
 mm = str(curr_date.month).zfill(2)
 dd = str(curr_date.day).zfill(2)
 entered = yy + mm + dd

コード例 #19

0

ファイルを表示

def json_to_marc(infilename, outfilename):
    print('Processing: ' + infilename)  #progress message
    data = json.load(open(infilename, "r"))
    record = Record(force_utf8=True)  #create MARC record, enforce Unicode

    # add fields 006, 007 and 008 with minimal physical information to every marc file
    record.add_field(Field(tag='006', data="m"))
    record.add_field(Field(tag='007', data="cr"))

    # the iana language code from the json file is taken, checked against the list of language codes,
    # substituted with its iso639-2 equivalent and put in position 21-24 of the field 008 content
    field008val = "            o       0eng d"  # DEFAULT ENG
    try:
        if 'languages' in data and data['languages'][0] is not None:
            field008val = field008val[0:21] + lang_map.get(
                data['languages'][0], "   ") + field008val[24:]
    except IndexError:
        field008val = field008val[0:21] + "   " + field008val[24:]

    record.add_field(Field(tag='008', data=field008val))

    # extract issn, in json 'generic' and/or 'electronic', and put into separate subfields of 022

    if "identifiers" in data and "issn" in data["identifiers"]:
        field_issn = Field(tag='022', indicators=['0', '#'])

        if "generic" in data["identifiers"]["issn"]:
            field_issn.add_subfield('a',
                                    data["identifiers"]["issn"]["generic"][0])

        if "electronic" in data["identifiers"]["issn"]:
            field_issn.add_subfield(
                'l', data["identifiers"]["issn"]["electronic"][0])

        record.add_field(field_issn)

    # title of the series or journal
    if data["is_part_of"] is not None and data["is_part_of"]['title_full']:
        record.add_field(
            Field(tag='245',
                  indicators=['0', '0'],
                  subfields=["a", data["is_part_of"]["title_full"][:9000]]))
    if data["title"]:
        record.add_field(
            Field(tag='246',
                  indicators=['0', '0'],
                  subfields=["a", data["title"][:9000]]))

    if data["year"]:
        record.add_field(
            Field(tag="260",
                  indicators=["#", "#"],
                  subfields=["c", data["year"]]))

    # add field 506 to all records, as not present in all json files
    record.add_field(
        Field(tag='506', indicators=['0', '#'], subfields=["a",
                                                           "Open access"]))

    # some json files contain a very long description; the maximum length of data in a variable field
    #in MARC21 is 9,999 bytes, so here only a certain amount of content is put into the 520 field
    if data["description"]:
        record.add_field(
            Field(tag='520',
                  indicators=['2', '#'],
                  subfields=["a", data["description"][:9000]]))

    # keep together the journal url, host and domain as different subfields of field 856
    # check if either exists, before initializing a new field instance
    if data['url'] or (data['is_part_of'] is not None
                       and data['is_part_of']['url']):
        field = Field(tag='856', indicators=['0', '0'])
        if data['domain']:
            field.add_subfield('a', data['domain'])

        if data['is_part_of'] is not None and data['is_part_of']['url']:
            field.add_subfield('d', data['is_part_of']['url'])

        if data['url']:
            field.add_subfield('u', data['url'])

        record.add_field(field)

        if data["volume"]:
            record.add_field(
                Field(tag='866',
                      indicators=['0', '0'],
                      subfields=["a", data["volume"]]))

        #output marc file with same filename in Output directory
        out = open(outfilename, 'wb')
        out.write(record.as_marc())
        out.close()

        # execute function for creating separate records for subordinate resources
        if data['subordinate_resources'] is not None:
            subordinate_records = create_subordinate_records(
                record, data['subordinate_resources'])

        counter = 0

        # add counter and "-sub" to filenames of subordinate records
        for subordinate_record in subordinate_records:
            out = open(
                outfilename.replace(".marc", "-sub" + str(counter) + ".marc"),
                'wb')
            out.write(subordinate_record.as_marc())
            out.close()
            counter = counter + 1