Ejemplo n.º 1
0
def read(inputfile):
    outputfile = inputfile + "_no338_" + \
        datetime.datetime.now().isoformat() + ".mrc"
    has338Count = 0
    no338Count = 0
    totalCount = 0
    supplements = 0
    with open(inputfile, 'rb') as f:
        reader = MARCReader(f)
        writer = MARCWriter(open(outputfile, 'wb'))
        while True:
            try:
                record = next(reader)
                totalCount += 1
                if not testFor336To338(record):
                    print(record)
                    no338Count += 1
                    try:
                        writer.write(record)
                    except Exception as e:
                        print("Error with writing.")
                else:
                    has338Count += 1
                if (isSupplement(record)):
                    supplements += 1
            except UnicodeDecodeError:
                print("There was a Unicode error.")
            except StopIteration:
                print("End of file.")
                break
        writer.close()
    print(
        "{0} / {1} ({2} %) records have no 338 field.".format(no338Count,
                                                              totalCount, countPercentage(no338Count, totalCount)))
    print("The file contained {0} supplement records.".format(supplements))
def get_blanks_from_iso(file_name: str, file_name_txt: str) -> None:
    blank_file_name = file_name[:-4] + "_空白isbn.txt"
    data_file_name = file_name[:-4] + "_纯净数据.iso"
    temp_file_name = file_name[:-4] + "_临时数据.iso"
    fp = open(data_file_name, 'w', encoding='utf-8')
    fp.close()
    records, datas, blanks = read_iso(file_name), [], []
    data_nums = get_isbn_from_txt(file_name_txt)

    for index, record in enumerate(records):
        if record_is_blank(record):  # 空白数据.写入"空白_isbn.txt".
            # blanks.append(record.get_fields("001")[0].data)
            blanks.append(data_nums[index])
        else:  # 有效数据.写入"_纯净数据.iso"
            with open(temp_file_name, 'wb') as fh:
                writer = MARCWriter(fh)
                writer.write(record)
            # 从临时文件录入到生成文件中
            fp1, fp2 = open(temp_file_name, 'r',
                            encoding='utf-8'), open(data_file_name,
                                                    'a',
                                                    encoding='utf-8')
            fp2.write(fp1.readline())
            fp2.write('\n')
            fp1.close()
            fp2.close()

    fp = open(blank_file_name, 'w', encoding='utf-8')
    for blank_num in blanks:
        fp.write(blank_num + "\n")
    fp.close()
    os.remove(temp_file_name)
Ejemplo n.º 3
0
    def test_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'))
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))
def merge_five_isos(directory_name: str) -> None:
    file_names = ["", "", "", "", ""]
    keywords = {
        "Berkeley.iso": 0,
        "Yale.iso": 1,
        "Michigan.iso": 2,
        "US.iso": 3,
        "British.iso": 4
    }
    for file_name in os.listdir(directory_name):
        for k, v in keywords.items():
            if k == file_name[-len(k):]:
                file_names[v] = file_name
    # 把缺失的文件位置删除.
    for i in range(5):
        if len(file_names[4 - i]) == 0:
            del file_names[4 - i]
    # 依次获取对应文件名下的全部信息.
    datas, records = [], []
    for file_name in file_names:
        datas.append(read_iso(directory_name + "\\" + file_name))
    data_num = len(datas[0])
    for i in range(data_num):
        # 依次从各个文件寻找.
        for j in range(len(datas)):
            # 如果找到
            if not record_is_blank(datas[j][i]):
                records.append(datas[j][i])
                break
            # 如果没有找到,增加一个空白数据
            elif j == len(datas) - 1:
                records.append(datas[-1][i])
    output_file_name = directory_name + "\\" + "五合一.iso"
    temp_file_name = "临时文件.iso"
    # 先刷新output_file_name
    fp1 = open(output_file_name, 'w', encoding='utf-8')
    fp1.close()
    for index, record in enumerate(records):
        # 数据生成完毕,写入临时文件
        with open(temp_file_name, 'wb') as fh:
            writer = MARCWriter(fh)
            writer.write(record)
        # 从临时文件录入到生成文件中
        fp1, fp2 = open(temp_file_name, 'r',
                        encoding='utf-8'), open(output_file_name,
                                                'a',
                                                encoding='utf-8')
        fp2.write(fp1.readline())
        fp2.write('\n')
        fp1.close()
        fp2.close()
    # 删除临时文件
    os.remove(temp_file_name)
def main():
    total_count = 0
    valid_count = 0
    with open(options['INPUT'], 'rb') as fh:
        reader = MARCReader(fh, to_unicode=True, force_utf8=True)
        # 1) first mode: write a MARC output file
        if not options['--csv']:
            writer = MARCWriter(open('out.mrc' or options['--output'], 'wb'))
            for record in reader:
                # whether we'll include the _bib_ record in export file
                include_record = False
                # Koha stores item data in 952 fields, one per item
                for item in record.get_fields('952'):
                    valid = validate_item(item)

                    total_count += 1
                    if valid is True:
                        valid_count += 1
                        # if there's any valid item then the bib should be included
                        include_record = True

                if include_record is True:
                    writer.write(record)

            print('Total items: %i | Items included: %i' %
                  (total_count, valid_count))
        elif options['--csv']:
            koha_record_ids = set()
            for record in reader:
                total_count += 1
                for item in record.get_fields('952'):
                    valid = validate_item(item)
                    if valid:
                        id = record.get_fields(MARC_ID_FIELD)[0].get_subfields(
                            MARC_ID_SUBFIELD)[0]
                        koha_record_ids.add(id)
                        # stop looking at items after we find the first valid one
                        break

            csvreader = csv.DictReader(open(options['--csv'], 'r'))
            gg_record_ids = set()
            for row in csvreader:
                gg_record_ids.add(row[GG_ID_COLUMN])

            print('Total Koha Bibs: %i' % total_count)
            print('Koha Bibs with circulating items: %i ' %
                  len(koha_record_ids))
            print('Total GreenGlass Bibs: %i' % len(gg_record_ids))
            print('Weeded Items (I in GG & not in Koha): %i' %
                  len(gg_record_ids - koha_record_ids))
            print('Added Items (I in Koha & not in GG): %i' %
                  len(koha_record_ids - gg_record_ids))
Ejemplo n.º 6
0
 def __init__(self, name, encoding="utf-8", path=""):
     """Pri inicializacii sa pripravi subor na zapisovanie.
     Arguments:
         name {str} -- nazov suboru
     
     Keyword Arguments:
         encoding {str} -- kodovanie suboru (default: {"utf-8"})
         path {str} -- cesta kde bude subor ulozeny (default: {""})
     """
     self.CONST_FIELD_008 = "|2018    ne || ||||   ||   ||eng |"
     self.CONST_FIELD_LEADER = "nab a22001211a 4500"
     self.CONST_INDICATOR_1 = ' '
     self.CONST_INDICATOR_2 = ' '
     self.writer = MARCWriter(open(path + name, 'wb'))
Ejemplo n.º 7
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'wb'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo', 'rb'), to_unicode=True)
        record = next(reader)
        self.assertEqual(record['245']['a'], unichr(0x1234))
        reader.close()

        os.remove('test/foo')
Ejemplo n.º 8
0
class file_writer:
    """Stara sa o zapisovanie ohlasov do suboru. 
    """
    def __init__(self, name, encoding="utf-8", path=""):
        """Pri inicializacii sa pripravi subor na zapisovanie.
        Arguments:
            name {str} -- nazov suboru
        
        Keyword Arguments:
            encoding {str} -- kodovanie suboru (default: {"utf-8"})
            path {str} -- cesta kde bude subor ulozeny (default: {""})
        """
        self.CONST_FIELD_008 = "|2018    ne || ||||   ||   ||eng |"
        self.CONST_FIELD_LEADER = "nab a22001211a 4500"
        self.CONST_INDICATOR_1 = ' '
        self.CONST_INDICATOR_2 = ' '
        self.writer = MARCWriter(open(path + name, 'wb'))

    def write_record(self, references, field035="", field008=""):
        """Zapise do suboru jeden record vo forme iso2709        
        Arguments:
            field035 -- retazec obsahujuci data do pola 035
            field008 -- retazec obsahujuci data do pola 008
            references {set(reference)} -- set ohlasov na zapisanie
            do pola 591 
        """

        if (field008 == ""):
            field008 = self.CONST_FIELD_008
        record = Record(force_utf8=True)
        record.add_field(Field(tag='008', data=field008))
        record.add_field(
            Field(tag='035',
                  indicators=[self.CONST_INDICATOR_1, self.CONST_INDICATOR_2],
                  subfields=['a', field035]))
        for i in references:
            record.add_field(i.to_marc_field())
        record.leader = record.leader[:5] + 'n' + record.leader[6:]
        record.leader = record.leader[:7] + 'b' + record.leader[8:]
        record.leader = record.leader[:18] + 'a' + record.leader[19:]
        self.writer.write(record)

    def close(self):
        """Ukonci zapis a zavrie subor.
        """
        self.writer.close()
Ejemplo n.º 9
0
def write_marc(outfile, ptype=None):
    """Write Student MARC records to outfile"""
    ecount = count = 0
    writer = MARCWriter(open(outfile, 'w'))
    if ptype:
        # filter on patron type
        students = Student.objects.filter(ptype=ptype)
    else:
        students = Student.objects.all()
    for s in students:
        try:
            writer.write(s.as_marc())
            count += 1
        except (TypeError, UnicodeDecodeError, UnicodeEncodeError), e:
            log.info("%s: %s" % (s.student_id, s.full_name()))
            log.info("%s" % (s.as_marc().as_marc(), ))
            log.exception("Error: %s" % (e, ))
            ecount += 1
Ejemplo n.º 10
0
def write_marc(outfile, ptype=None):
    """Write Student MARC records to outfile"""
    ecount = count = 0
    writer = MARCWriter(open(outfile,'w'))
    if ptype:
        # filter on patron type
        students = Student.objects.filter(ptype=ptype)
    else:
        students = Student.objects.all()
    for s in students:
        try:
            writer.write(s.as_marc())
            count += 1
        except (TypeError, UnicodeDecodeError, UnicodeEncodeError), e:
            log.info("%s: %s" % (s.student_id,s.full_name()))
            log.info("%s" % (s.as_marc().as_marc(),))
            log.exception("Error: %s" % (e,))
            ecount += 1
Ejemplo n.º 11
0
 def MakeMARCFile(self, recs, filename):
     filenameNoExt = re.sub('.\w*$', '', filename)
     mrcFileName = filenameNoExt + '_OUT.mrc'
     print('\n<Compiling file to MARC>\n')
     writer = MARCWriter(open(mrcFileName, "wb"))
     for r in recs:
         try:
             writer.write(r.as_marc())
         except:
             r.force_utf8 = True
             writer.write(r)
     writer.close()
     return recs
Ejemplo n.º 12
0
 def test_edit_mixed_code(self):
     reader = MARCReader(
         file('test/mixed-code.dat'), 
         to_unicode=True, 
         force_utf8=True, 
         utf8_handling='ignore'
     )
     writer = MARCWriter(open('test/foo', 'w'))
     for record in reader:
         field = Field(
             tag = '941',
             indicators = ['',''],
             subfields = [ 'a', 'x' ]
         )
         record.add_field(field)
         writer.write(record)
     writer.close()
     reader = MARCReader(open('test/foo'), to_unicode=True)
     for record in reader:
         self.assertEquals(type(record), Record)
     os.remove('test/foo')
Ejemplo n.º 13
0
    def writer(self, data_file):
        """
        Yield a MARCWriter instance.

        Args:
            data_file (str): The file basename.
        """

        path = os.path.join(self.path, data_file)

        with open(path, 'ab') as fh:
            yield MARCWriter(fh)
def campus_split():
    '''
    Finds the master format files created by fmt_split(). then writes the records in each format file to
    separate files for holding campuses based on coding in MARC 049 subfield a. Outputs one file per campus per format.
    '''
    campuses = ['MNGE', 'MNXN']

    for campus in campuses:

        files = [
            f for f in os.listdir()
            if re.match(r'.+(bks|ser|maps|vis|other)\.mrc', f)
        ]

        for file in files:

            with open(file, 'rb') as f:

                filename = str(file)
                fpref, fsuf = filename.split('.')
                writer = MARCWriter(open(fpref + '_' + campus + '.mrc', 'wb'))
                reader = MARCReader(f)

                for rec in reader:
                    fields049 = rec.get_fields("049")
                    for field in fields049:
                        suba049 = field.get_subfields("a")
                        for suba in suba049:
                            if campus in suba:
                                writer.write(rec)
                            else:
                                continue

                writer.close()
Ejemplo n.º 15
0
def output_iso_from_iso(output_file_name: str, records: list) -> None:
    output_file_name = output_file_name[:-4] + ".iso"
    temp_file_name = "临时文件.iso"
    # 先刷新output_file_name
    fp1 = open(output_file_name, 'w', encoding='utf-8')
    fp1.close()
    for index, record in enumerate(records):
        # 数据生成完毕,写入临时文件
        with open(temp_file_name, 'wb') as fh:
            writer = MARCWriter(fh)
            writer.write(record)
        # 从临时文件录入到生成文件中
        fp1, fp2 = open(temp_file_name, 'r',
                        encoding='utf-8'), open(output_file_name,
                                                'a',
                                                encoding='utf-8')
        fp2.write(fp1.readline())
        fp2.write('\n')
        fp1.close()
        fp2.close()
    # 删除临时文件
    os.remove(temp_file_name)
Ejemplo n.º 16
0
def save2marc(outfile, bib):
    try:
        writer = MARCWriter(open(outfile, 'ab'))
        writer.write(bib)
    except WindowsError:
        raise WindowsError
    finally:
        writer.close()
Ejemplo n.º 17
0
def write_marc21(outfile, bib):
    try:
        writer = MARCWriter(open(outfile, "a"))
        writer.write(bib)
    except WindowsError:
        raise WindowsError
    finally:
        writer.close()
Ejemplo n.º 18
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ['1', '0'], ['a', unichr(0x1234)]))
        record.leader = '         a              '
        writer = MARCWriter(open('test/foo', 'w'))
        writer.write(record)
        writer.close()

        reader = MARCReader(open('test/foo'), to_unicode=True)
        record = reader.next()
        self.assertEqual(record['245']['a'], unichr(0x1234))

        os.remove('test/foo')
Ejemplo n.º 19
0
    def test_writing_unicode(self):
        record = Record()
        record.add_field(Field(245, ["1", "0"], ["a", chr(0x1234)]))
        record.leader = "         a              "
        writer = MARCWriter(open("test/foo", "wb"))
        writer.write(record)
        writer.close()

        reader = MARCReader(open("test/foo", "rb"), to_unicode=True)
        record = next(reader)
        self.assertEqual(record["245"]["a"], chr(0x1234))
        reader.close()

        os.remove("test/foo")
Ejemplo n.º 20
0
def save2marc(outfile: str, record: Record) -> None:
    """
    Appends MARC records to outfile

    Args:
        outfile:                    file path
        record:                     MARC record as pymarc object
    """
    try:
        writer = MARCWriter(open(outfile, "ab"))
        writer.write(record)
    except:
        raise
    finally:
        writer.close()
    "20": "2 day Reserve Loan",
    "21": "Regular Loan",
    "50": "Audio/Video & Map Loan",
    "x": "Find me"
}

multiple930Needed = {}
deletedItems = []

loanStatusReader = readCsvFile("LoanStatuses.csv")
LoanStatusList = transferToLoansArray(loanStatusReader)

deletedItemsreader = readTextFile("deleted_items_list_20190110.txt")
deletedItems = transferDeletedItems(deletedItemsreader)

SerialsRecordsWriter = MARCWriter(open("itemCreatedSerials.mrc", "wb"))
BooksRecordsWriter = MARCWriter(open("itemCreatedBooks.mrc", "wb"))
myErrorFile = createPrintFiles("myErrorFile.csv", "w")
mydeletesFile = createPrintFiles("deletedItemsFile.csv", "w")
myMultipleFile = createPrintFiles("multiple930Needed.csv", "w")
myWriterFile = createPrintFiles("itemsCreated.csv", "w")

file = open("bibssansitems_with852.mrc", "rb")
reader = MARCReader(file,
                    to_unicode=True,
                    force_utf8=True,
                    utf8_handling="strict")

number = 1
for record in reader:
Ejemplo n.º 22
0
 def writeMARCRecord(self, record):
   writer = MARCWriter(self.file)
   writer.write(record)
Ejemplo n.º 23
0
    scp_vals = ['CDL', 'UC open access']
    for fld in record.get_fields('856'):
        # 856 can have multiple $x
        for sfld in fld.get_subfields('x'):
            if sfld in scp_vals:
                # Should be only one $u per 856 field
                fld['u'] = 'dummyURL'
                # If fld was modified, break out of the sfld loop
                break


### Main code starts here ###
if len(sys.argv) != 4:
    raise ValueError(f'Usage: {sys.argv[0]} in_file out_file case# (3-5)')
reader = MARCReader(open(sys.argv[1], 'rb'), utf8_handling="ignore")
writer = MARCWriter(open(sys.argv[2], 'wb'))
case_no = sys.argv[3]
if case_no not in ['3', '4', '5']:
    raise ValueError(f'Invalid value {case_no}; must be 3, 4, or 5')

for record in reader:
    delete_590(record)
    delete_599(record)
    delete_793(record)
    if case_no == '3':
        modify_035(record)
        delete_856(record)
    elif case_no == '4':
        modify_035(record)
        modify_856(record)
    else:  #5
Ejemplo n.º 24
0
    if rda:
        continue  #skip record, go onto next record

    #time to fix that pesky misspelling...
    fix_245_misspelling(title_a_raw, word, 'a', title_245)
    fix_245_misspelling(title_b_raw, word, 'b', title_245)

    #get the bib record from the 907 field prior to deletion
    n = marc.get_fields('907')
    for field in n:
        bib_rec_num_raw = field.get_subfields('a')

    bib_rec_num = subfield_to_string(bib_rec_num_raw)

    #add 949 local field for overlay of bib record and creation of order record when record is uploaded into Millennium
    marc.add_field(
        Field(tag='949',
              indicators=[' ', ' '],
              subfields=['a', '*recs-b;ov-%s;' % (bib_rec_num)]))

    #delete 907, 998, 910, 945 fields
    for f in marc.get_fields('907', '998', '910', '945'):
        if f['a'] != '':
            marc.remove_field(f)

    #append record to a generic file.dat file
    writer = MARCWriter(file(word + '.dat', 'a'))
    writer.write(marc)

#closes .dat file
writer.close()
Ejemplo n.º 25
0
"""
Base script for DLF Forum 2014 Listening-Based Python workshop.

Modified from files at https://github.com/LibraryCodeYearIG/MARC-record-edit .
"""

import os
from pymarc import Field, MARCReader, MARCWriter, record_to_xml

records = MARCReader(open('../../exampledump.mrc'),
                     to_unicode=True,
                     force_utf8=True,
                     utf8_handling='ignore')

index = 1

for marc in records:
    filename_dat = 'examplerecord_%s.dat' % index
    filename_xml = 'examplerecord_%s.xml' % index

    writer_dat = MARCWriter(file(filename_dat,'a'))
    writer_xml = open(filename_xml,'a')

    writer_dat.write(marc)
    writer_xml.write(record_to_xml(marc) + "\n")

    writer_dat.close()
    writer_xml.close()

    index += 1
Ejemplo n.º 26
0
isbns = ['025710491', '8105610391', '9781041740192', '9791037492192',
         '1049126950', '819251', '4018597182', '978103784952X',
         '1023894675102', '910384765']

for i in range(10, 20):
    filename = 'examplerecord_%s.dat' % i 
    filename_xml = 'examplerecord_%s.xml' % i 
    filename_out = 'examplerecord_%s.out' % i 

    records = MARCReader(open(filename),
                         to_unicode=True,
                         force_utf8=True,
                         utf8_handling='ignore')

    writer_dat = MARCWriter(file(filename_out,'a'))
    writer_xml = open(filename_xml,'a')

    for marc in records:

        isbn_list = marc.get_fields('020')
        try:
            isbn_field = isbn_list[0]
        except Exception, e:
            j = i - 10
            marc.add_ordered_field(
                Field(
                    tag='020',
                    indicators=[' ', ' '],
                    subfields = ['a', isbns[j]]
                    ))
Ejemplo n.º 27
0
    barcodes = {}
    for line in bc.readlines():
        id_, barcode = (v.strip() for v in line.strip().split('\t'))
        if id_ in barcodes:
            barcodes[id_].append(barcode)
        else:
            barcodes[id_] = [barcode]

m = 150  # max records to process (for DEBUG)
DEBUG = False
writer = None

# Load bibliographic MARC records to add barcodes
# Output written to file out.mrc
with open(fnbiblio, 'rb') as marcdata:
    writer = MARCWriter(open('out.mrc', 'wb'))
    records = MARCReader(marcdata, to_unicode=True)
    for i, record in enumerate(records):
        if record is None:
            print('None record')
            #continue
        if record['876']:
            #print('HOLDING found!')
            continue
        else:
            print('%s Biblio found' % i)
            enc = record.leader[9]
            if enc != ' ':
                print('%s non MARC8 record found!' % i)

            id_ = record['001'].value().strip()
Ejemplo n.º 28
0
def fchange_sort(MARCfile, fname):
    
    '''
    Parses a MARC binary file based on 960 values into separate files for books, serials, maps, visual materials, and
    other formats. Output is one .mrc file for each format.
    '''
    #open a path to put the files for the FTP server - both OCN and BIB updates
    sorted_files_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/sorted_for_FTP " + today
    if not os.path.isdir(sorted_files_path):
        os.mkdir(sorted_files_path)
    
    #make a place to put the files with OCN updates for manual checking
    ocn_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/OCN_updates_" + today
    if not os.path.isdir(ocn_updates_path):
        os.mkdir(ocn_updates_path)
        
    #make a place to put the files with URL updates for manual checking
    url_updates_path = "C:/Users/kjthomps/Documents/WCM/file_fetching/updates/URL_updates_" + today
    if not os.path.isdir(url_updates_path):
        os.mkdir(url_updates_path)
        

    fname_str = str(fname)
    print(fname)
    fname_str = fname_str.replace(".","")
    fname_str = fname_str.replace("mrc",".mrc")
    print(fname_str)
    fpref, fsuf = fname_str.split('.')
    print(fpref)
    print(fsuf)
    
    print(MARCfile)

    with open(MARCfile,'rb') as f:

        reader = MARCReader(f)
        
        # first, see if there are OCN or URL changes in the set; this will determine whether creating a file is necessary
        OCN_change_ct = 0
        URL_change_ct = 0
        writer_new = False
        writer_URLs = False
        
        for rec in reader:
            if rec['960']:
                field_960 = str(rec['960']['a'])
                if 'OCLC control number change' in field_960:
                    OCN_change_ct += 1
                if 'KB URL change' in field_960:
                    URL_change_ct += 1
        print("OCN_change_ct " ,OCN_change_ct)
        print("URL_change_ct ",OCN_change_ct)
        #if there are OCN updates or KB URL changes, create files to put those records in
    if OCN_change_ct > 0:
        writer_new_oclc_num_manual = MARCWriter(open(ocn_updates_path + "/" + fpref + '_new_oclc_num.mrc', 'wb'))
        writer_new = True
        print(writer_new)
    if URL_change_ct > 0:
        writer_update_URLs = MARCWriter(open(url_updates_path + "/" + fpref + '_update_URLs.mrc', 'wb'))
        writer_URLs = True
        print(writer_URLs)
        
    #create a file for all updates
    writer_update_bibs = MARCWriter(open(sorted_files_path + "/" + fpref + '_update_bibs.mrc', 'wb'))
    v = 0
    with open(MARCfile, 'rb') as f:
        reader = MARCReader(f)
        for rec in reader:
            v += 1
            print(v)
            if rec['960']:
                field_960 = str(rec['960']['a'])
                print(field_960)
                #writes record to correct file based on regex matches
                #these are ordered such that if a 960 field has more than one reason for the update, that the most critical to handle 
                #will be addressed first.  These are, in order: OCN change (affects matching), URL change, bib update.
                #Update: OCN changes can be processed alongside Bib updates.  URLs will need to be handled manually due to multi-vols?
                if 'OCLC control number change' in field_960:
                    writer_update_bibs.write(rec)
                    writer_new_oclc_num_manual.write(rec)
                    if 'KB URL change' in field_960:
                        writer_update_URLs.write(rec)
                elif 'KB URL change' in field_960:
                    writer_update_URLs.write(rec)
                    writer_update_bibs.write(rec)
                elif 'Subsequent record output' in field_960:
                    writer_update_bibs.write(rec)
                elif 'Master record variable field' in field_960:
                    writer_update_bibs.write(rec)
                else:
                    writer_update_bibs.write(rec)

    #closes master format files    
    writer_update_bibs.close()
    if writer_URLs == True:
        writer_update_URLs.close()
    if writer_new == True:
        writer_new_oclc_num_manual.close()
Ejemplo n.º 29
0
"""
Base script for DLF Forum 2014 Listening-Based Python workshop.

Modified from files at https://github.com/LibraryCodeYearIG/MARC-record-edit .
"""

import os
from pymarc import Field, MARCReader, MARCWriter, record_to_xml

records = MARCReader(open('examplerecord.mrc'),
                     to_unicode=True,
                     force_utf8=True,
                     utf8_handling='ignore')

writer_dat = MARCWriter(file('file.dat', 'a'))
writer_xml = open('file.xml', 'a')
writer_xml.write('<collection>')

for marc in records:

    other_identifier_list = marc.get_fields('024')
    other_identifier_field = other_identifier_list[0]
    other_identifier = other_identifier_field.get_subfields('a')

    isbn_list = marc.get_fields('020')
    isbn_field = isbn_list[0]
    isbn = isbn_field.get_subfields('a')[0]

    if len(isbn) == 10:
        isbn = '978' + isbn
        marc.remove_field(isbn_field)
Ejemplo n.º 30
0
def output_iso(file_name: str) -> None:
    output_file_name = file_name[:-4] + ".iso"
    temp_file_name = "临时文件.iso"
    # 先刷新output_file_name
    fp1 = open(output_file_name, 'w', encoding='utf-8')
    fp1.close()
    # 用list-dict显示出来
    dataFrame_temp = pd.read_csv(file_name, encoding='utf-8',
                                 dtype=str).to_dict(orient='records')
    dataFrame = []
    # 先把表格中的全部信息录入dataFrame中.注意,如果是nan的部分,则删掉不计入;另,需要删除掉Unnamed列与continue列
    for index, value in enumerate(dataFrame_temp):
        data_single = {}
        for k in value:
            v = str(value[k])
            if v == 'nan' or len(
                    v.strip()) == 0 or "Unnamed" in k or "continue" in k:
                pass
            else:
                data_single[k] = v.strip()
        dataFrame.append(data_single)

    for data in dataFrame:
        record = Record()
        # 先把isbn列筛掉,同时把head列改成000列
        data2 = {}
        for key, value in data.items():
            if key == "head":
                data2["000"] = value
            elif '0' <= key[0] <= '9':
                data2[key] = value

        # 然后对其列进行排序
        keys = list(data2.keys())
        keys.sort()
        # 按照排序后的顺序,逐一进行抓取,并添加入record数据
        for key in keys:
            # 如果是"000",是题名
            if key == "000":
                record.leader = data2[key]
            # 如果是"009"及以内的数据
            elif key <= "009":
                record.add_field(Field(tag=key, data=data2[key]))
            # 如果是"009"以上的数据,需要把"▼"都换成"|",且把第一个"|"之前的数据作为指示符
            elif key > "009":
                # 替换特殊字符
                data2[key] = data2[key].replace("▼", "|")
                # 选中指示位
                indicators = data2[key].split("|")[0]
                if len(indicators) == 0:
                    indicators = [" ", " "]
                elif len(indicators) == 1:
                    indicators = [indicators[0], " "]
                else:
                    indicators = [indicators[0], indicators[1]]
                # 选中数据内容.按照"|"切割,每段"|"之前写两个数据内容
                subfields = []
                for words in data2[key].split("|")[1:]:
                    subfields.append(words[0])
                    subfields.append(words[1:])
                # 加入数据
                record.add_field(
                    Field(tag=key[:3],
                          indicators=indicators,
                          subfields=subfields))

        # 数据生成完毕,写入临时文件
        with open(temp_file_name, 'wb') as fh:
            writer = MARCWriter(fh)
            writer.write(record)
        # 从临时文件录入到生成文件中
        fp1, fp2 = open(temp_file_name, 'r',
                        encoding='utf-8'), open(output_file_name,
                                                'a',
                                                encoding='utf-8')
        fp2.write(fp1.readline())
        fp2.write('\n')
        fp1.close()
        fp2.close()
    # 删除临时文件
    os.remove(temp_file_name)
Ejemplo n.º 31
0
csvFileWriter = createPrintFiles("coverageAnalysis_Results.csv")
csvFileWriter.writerow([
    "System Number", "Oclc Number", "Title", "866 subfield A",
    "930 subfield S", "Analysis result"
])
onlyFileWriter = createPrintFiles("all_explained.csv")
onlyFileWriter.writerow([
    "System Number", "Oclc Number", "Title", "866 subfield A",
    "930 subfield S", "How was record manipulated"
])
deleted930FileWriter = createPrintFiles("deletedItems.csv")
deleted930FileWriter.writerow(
    ["System Number", "Oclc Number", "Title", "A 930 deleted due to status"])

changedRecordsWriter = MARCWriter(open("changedRecord.mrc", "wb"))
unchangedRecordsWriter = MARCWriter(open("unchangedRecord.mrc", "wb"))
deletedRecordsWriter = MARCWriter(open("deletedRecord.mrc", "wb"))

rowsDeleted = 0
rowsAdded = 0

currentDir = os.getcwd()
filesDir = currentDir + "\\files\\"

for filename in os.listdir(filesDir):
    print(filename)
    sysNumCountArray = []
    originalItemCount = 0
    finalItemCount = 0
Ejemplo n.º 32
0
#!/usr/bin/env python
"""
write all the records with 856 fields out to an ebooks-only MARC file
"""
from pymarc import MARCReader, MARCWriter

"""
the MARCReader params come from the penultimate comment here:
github.com/edsu/pymarc/issues/7
basically, these work around mixed character encodings
"""
allRecords = MARCReader( open( 'ebooks.MRC' ), to_unicode=True, force_utf8=True, utf8_handling='ignore' )
onlyEbooks = MARCWriter( file( 'ebooks-edited.MRC', 'w' ) )

errCount = 0

for rec in allRecords:
    if rec[ '856' ] is not None:
        try:
            onlyEbooks.write( rec )
        except UnicodeDecodeError:
            print rec[ '245' ]
            errCount += 1

print "\nNumber of Errors: ", errCount

onlyEbooks.close()
Ejemplo n.º 33
0
#!/usr/bin/env python
"""
write out a MARC file of records with multiple 856 fields
"""
from pymarc import MARCReader, MARCWriter


reader = MARCReader( open( 'ebooks.MRC' ), to_unicode=True, force_utf8=True, utf8_handling='ignore' )
writer = MARCWriter( file( 'Two856s.MRC', 'w' ) )
limit = 200000
i = 0

for rec in reader:
    if i < limit:
        i += 1
        if rec[ '856' ] is not None:
            all856s = rec.get_fields( '856' )
            if len( all856s ) > 1:
                writer.write( rec )

writer.close()
	#time to fix that pesky misspelling...
	fix_245_misspelling(title_a_raw,word,'a',title_245)
	fix_245_misspelling(title_b_raw,word,'b',title_245)
	
	#get the bib record from the 907 field prior to deletion
	n = marc.get_fields('907')
	for field in n:
		bib_rec_num_raw = field.get_subfields('a')

	bib_rec_num = subfield_to_string(bib_rec_num_raw)

	#add 949 local field for overlay of bib record and creation of order record when record is uploaded into Millennium
	marc.add_field(
		Field(
			tag = '949',
			indicators = [' ',' '],
			subfields = [
				'a', '*recs-b;ov-%s;' %(bib_rec_num)
			]))	

	#delete 907, 998, 910, 945 fields
	for f in marc.get_fields('907', '998', '910', '945'):
		  if f['a'] != '':
			  marc.remove_field(f) 
	
	#append record to a generic file.dat file
	writer = MARCWriter(file(word+'.dat','a'))
	writer.write(marc)

#closes .dat file
writer.close() 
Ejemplo n.º 35
0
#!/usr/bin/env python
"""
given MARC file:
- proxy certain vendor 856$u URLs
- delete other weird URLs that we don't need
- delete all 856$z subfields (make sense in our OPAC)

outputs to new MARC file. File paths are hardcoded in the "books"
and "processed" variables below.
"""
from pymarc import MARCReader, MARCWriter

books = MARCReader(open('MARC/2013-12-13-full-catalog.MRC'),
                   to_unicode=True, force_utf8=True, utf8_handling='ignore')
processed = MARCWriter(file('TEST.MRC', 'w'))

# limit output, for quicker testing
limit = 300000
i = 0
# initialize stat counters
num_total_books = 0
num_proxied_ebooks = 0
num_fields_removed = 0

# list of domains in 856 $u that correspond to an ebook subscription we have
subscription_domains = [
    'hdl.handle.net',  # ACLS Humanities
    'galenet.galegroup.com',  # GVRL
    'find.galengroup.com',
    'www.netlibrary.com',  # NetLibrary / EBSCO
    'www.netLibrary.com',  # case sensitive
def fmt_split(MARCfile):
    '''
    Parses a MARC binary file based on LDR/06-07 values into separate files for books, serials, maps, visual materials, and
    other formats. Output is one .mrc file for each format.
    '''

    fname_str = str(MARCfile)
    fpref, fsuf = fname_str.split('.')
    today = str(date.today())

    with open(MARCfile, 'rb') as f:

        reader = MARCReader(f)

        #opens a file for each format
        writer_bks = MARCWriter(open(fpref + '_bks.mrc', 'wb'))
        writer_ser = MARCWriter(open(fpref + '_ser.mrc', 'wb'))
        writer_maps = MARCWriter(open(fpref + '_maps.mrc', 'wb'))
        writer_vis = MARCWriter(open(fpref + '_vis.mrc', 'wb'))
        writer_other = MARCWriter(open(fpref + '_other.mrc', 'wb'))

        for rec in reader:

            field_909 = pymarc.Field(tag='909',
                                     indicators=[' ', ' '],
                                     subfields=[
                                         'a', 'bcat', 'b', 'MNU', 'c', today,
                                         'd', 'marcive'
                                     ])

            rec.add_ordered_field(field_909)

            ldr = rec.leader

            #regexes for string matching to determine format
            bks_re = re.compile('^.{6}am.*')
            ser_re = re.compile('^.{6}a[s|i].*')
            maps_re = re.compile('^.{6}e.*')
            vis_re = re.compile('^.{6}k.*')

            #determines format based on regex match of LDR/06-07 values
            bks = bks_re.match(ldr)
            ser = ser_re.match(ldr)
            maps = maps_re.match(ldr)
            vis = vis_re.match(ldr)

            #writes record to correct file based on regex matches
            if bks:
                writer_bks.write(rec)
            elif ser:
                writer_ser.write(rec)
            elif maps:
                writer_maps.write(rec)
            elif vis:
                writer_vis.write(rec)
            else:
                writer_other.write(rec)

    #closes master format files
    writer_bks.close()
    writer_ser.close()
    writer_maps.close()
    writer_vis.close()
    writer_other.close()
    # MARC field 700
    if creators:
        for creator in creators:
            marc_record.add_field(
                Field(
                    tag = '700',
                    indicators = ['1', ''],
                    subfields = [
                        'a', f'{creator}',
                        't', '',
                    ]))
            
    # MARC field 856
    if identifiers:
        for identifier in identifiers:
            marc_record.add_field(
                Field(
                    tag = '856',
                    indicators = ['4', '2'],
                    subfields = [
                        '3', 'Finding aid',
                        'u', f'{identifier}',
                    ]))
    
    # write to MARC output file
    writer = MARCWriter(open(save_file,'ab'))
    writer.write(marc_record)
    writer.close()

# open up MARC record in default viewer (NOTEPAD most likely)    
os.system(save_file)
Ejemplo n.º 38
0
def output_iso_from_data(file_name: str, isbn_total: list,
                         data_total: dict) -> None:
    temp_file_name = "临时文件.iso"
    fp = open(file_name, 'w', encoding='utf-8')
    fp.close()
    records = []
    for isbn in isbn_total:
        record = Record()
        if isbn in data_total:
            data = data_total[isbn]
            for key, value in data.items():
                # 把一些utf8无法识别的符号替换掉.
                for character in NON_CHARACTERS_IN_UTF_8:
                    key, value = str(key).replace(character,
                                                  ""), str(value).replace(
                                                      character, "")
                if key in ['continue']:
                    continue
                elif key[:3] == '000':
                    record.leader = value
                elif key[:3] <= '009':
                    record.add_field(Field(tag=key[:3], data=value))
                else:
                    subfields = []
                    words = value[2:].replace("$", " ").replace("|",
                                                                "$").strip()
                    for word in words.split("$"):
                        if len(word.strip()) == 0:
                            continue
                        else:
                            subfields.append(word.strip()[0])
                            subfields.append(word.strip()[1:])
                    record.add_field(
                        Field(tag=key[:3],
                              indicators=[value[0], value[1]],
                              subfields=subfields))
        if str(record.leader) == str(Record().leader):  # 新的数据
            record.add_field(Field(tag='001', data=isbn))
        record = record_sorted(record)
        records.append(record)

        # 数据生成完毕,写入临时文件
        with open(temp_file_name, 'wb') as fh:
            writer = MARCWriter(fh)
            try:
                writer.write(record)
                # 测试读取是否有问题(如大英9780714827308)
            except UnicodeEncodeError:
                print("编号为:{}的数据格式有误,清空数据以利于输出.".format(isbn))
                record = Record()
                record.add_field(Field(tag='001', data=isbn))
                writer.write(record)

        # 从临时文件录入到生成文件中
        fp1, fp2 = open(temp_file_name, 'r',
                        encoding='utf-8'), open(file_name,
                                                'a',
                                                encoding='utf-8')
        try:
            fp2.write(fp1.readline())
        except UnicodeDecodeError:  # 部分解码有误 如大英9780714827308
            fp1.close()
            fp2.close()
            with open(temp_file_name, 'wb') as fh:
                writer = MARCWriter(fh)
                record = Record()
                record.add_field(Field(tag='001', data=isbn))
                writer.write(record)
            fp1, fp2 = open(temp_file_name, 'r',
                            encoding='utf-8'), open(file_name,
                                                    'a',
                                                    encoding='utf-8')
            fp2.write(fp1.readline())
        fp2.write('\n')
        fp1.close()
        fp2.close()

    # 删除临时文件
    os.remove(temp_file_name)