def test_07_components_no_blanks(self): for lccn in lccns: expected = lccns[lccn] comps = callnumber.LC(lccn).components() self.assertTrue(lccn) self.assertEqual(len(expected), len(comps)) self.assertEqual(expected, comps)
def test_08_components_no_blanks(self): for lccn in lccns_with_blanks: expected = lccns_with_blanks[lccn] comps = callnumber.LC(lccn).components(include_blanks=True) self.assertTrue(lccn) self.assertEqual(len(expected), len(comps)) self.assertEqual(expected, comps)
def process(addons=None): if addons: # Transform list into XML file root = ET.Element('printout') for i, item in enumerate(addons): temp = ET.Element('ROW') child = ET.Element('BSN') child.text = item temp.append(child) child = ET.Element('BARCODE') child.text = str(i) temp.append(child) root.append(temp) # pretty string xmlstr = prettify_xml(ET.tostring(root)) # Write append record to xml file with open('app/data/tmp/append_bsns.xml', 'w') as f: f.write(xmlstr) # Delete infile? # Combine xml NT report with append # File should be named report.xml # Make an argument? process_infile = f'app/data/in/{in_file}' process_tmp = 'app/data/tmp/report.xml' copyfile(process_infile, process_tmp) combined_xml = combine_xml('app/data/tmp/') xmlstr = prettify_xml(combined_xml) process_outfile = 'app/data/out/full_report.xml' with open(process_outfile, "w") as f: f.write(xmlstr) with open(process_outfile) as f: doc = xmltodict.parse(f.read()) # Logging? print('There are {} records in this month\'s report.'.format(len(doc['printout']['ROW']))) report = [] for row in doc['printout']['ROW']: item = {} item['barcode'] = row['BARCODE'] item['bsn'] = row['BSN'] if 'VOLUME_INFO' in row.keys(): item['volume'] = row['VOLUME_INFO'] if '(' in item['volume']: item['volume'] = item['volume'].replace('(',' (') if 'Z13_IMPRINT' in row.keys(): item['imprint'] = row['Z13_IMPRINT'] report.append(item) barcodes = [item['barcode'] for item in report] bsns = [item['bsn'] for item in report] # pprint(list(zip(barcodes, bsns))) # Move to newtitles.py # http://stackoverflow.com/a/3308844 import unicodedata as ud latin_letters= {} def is_latin(uchr): try: return latin_letters[uchr] except KeyError: return latin_letters.setdefault(uchr, 'LATIN' in ud.name(uchr)) def only_roman_chars(unistr): return all(is_latin(uchr) for uchr in unistr if uchr.isalpha()) # isalpha suggested by John Machin def check_bsn(bsn): urlstring = '%s%s' % (os.getenv('LIBRARY_API'), bsn) url = urllib.request.urlopen(urlstring) tree = ET.parse(url) root = tree.getroot() check = root.findall(".//{http://www.openarchives.org/OAI/2.0/}metadata") return True if check else False records = [] processed = 0 successes = 0 for i, barcode in enumerate(barcodes): bc_index = barcodes.index(barcode) bsn = report[bc_index]['bsn'] processed += 1 if check_bsn(bsn): successes += 1 new_title = NewTitle(bsn) # API call already made in check--capture that information so there is no need to make second call? #print("Processing record %d: %s" % (i+1, bsn)) record = {} record['bsn'] = bsn record['title'] = new_title.format_title() record['char'] = only_roman_chars(record['title']) record['contributor'] = new_title.format_contributor() record['edition'] = new_title.format_edition() if 'imprint' in report[bc_index].keys(): record['imprint'] = report[bc_index]['imprint'].strip() record['imprint'] = record['imprint'][:-1] if record['imprint'][-1] == '.' else record['imprint'] else: record['imprint'] = new_title.format_imprint() record['imprint'] = new_title.format_imprint() record['collection'] = new_title.format_collection() record['series'] = new_title.format_series() if 'volume' in report[bc_index].keys(): record['volume'] = report[bc_index]['volume'].replace('.', '. ') else: record['volume'] = "" # FIX! record['callnumber'] = new_title.format_callnumber() if record['callnumber']: record['lccn'] = callnumber.LC(record['callnumber']).normalized else: record['lccn'] = "Call number missing" if record['lccn'] == None: record['lccn'] = record['callnumber'].strip().title() if record['volume']: if record['callnumber']: record['callnumber'] += " " + record['volume'] record['gift'] = new_title.format_gift() record['handle'] = new_title.format_handle() records.append(record) else: print(f'{bsn} is an invalid BSN. Skipping record...') print('\nFinished processing %d records with %d successes.' % (processed, successes)) ## Choose category using call number map with open('app/data/ref/lc_classes.csv', 'r') as f: reader = csv.reader(f) lc_classes = list(reader) for i, record in enumerate(records): #print(i, record['title'], record['callnumber']) record['category'] = 'other' if record['callnumber']: cn = callnumber.LC(record['callnumber']) cn_split = cn.components() #print(cn_split) if cn_split: if len(cn_split) > 1: if cn_split[0] in [item[0] for item in lc_classes]: #print('Yes') rows = [item for item in lc_classes if cn_split[0]==item[0]] for row in rows: #print(row) if float(row[1]) <= float(cn_split[1]) <= float(row[2]): #print(float(row[1]) <= float(cn_split[1]) <= float(row[2])) record['category'] = row[3] #print('Updated!') break else: print(record['title'], record['lccn']) ## Guess category from app.categorize_nt import predict_categories # ^^^ Can put any categorization algorithm into this module titles = [record['title'] for record in records] predicted_categories = predict_categories(titles) for i, category in enumerate(predicted_categories): if records[i]['category'] == 'other': records[i]['title'] = "*"+records[i]['title'] records[i]['category'] = category records = sorted(records, key=lambda k: (k['lccn'], int(''.join(list(filter(str.isdigit, "0"+ k['volume'])))))) with open('app/data/ref/newtitles.p', 'wb') as f: pickle.dump(records, f)
def test_06_start_of_range_equivalence(self): for lccn in lccns: lccn = callnumber.LC(lccn) self.assertTrue(lccn.normalized, lccn.range_start)
def test_05_compound_range(self): lccn = callnumber.LC('A11.1') self.assertTrue(lccn.range_start, 'A 001110') self.assertTrue(lccn.range_end, 'A 001119~999~999~999')
def test_04_simple_range(self): lccn = callnumber.LC('A') self.assertTrue(lccn.range_start, 'A') self.assertTrue(lccn.range_end, 'A~~')
def test_01_compound_normalization(self): lccn = callnumber.LC('A11.1') self.assertTrue(lccn.denormalized, 'A11.1') self.assertTrue(lccn.normalized, 'A 001110')
def test_00_simple_normalization(self): lccn = callnumber.LC('A') self.assertTrue(lccn.denormalized, 'A') self.assertTrue(lccn.normalized, 'A')