Ejemplo n.º 1
0
def record_as_html(rec, bn, db, fn):
    creator = ''
    if rec.author():
        creator = marc8_to_unicode(rec.author())
    creatordiv = build_div_html(creator, 'creator')
    title = marc8_to_unicode(rec['245'].format_field())
    titlediv = build_div_html(title, 'title')
    scopecontent = build_div(rec.get_fields('520'), 'abstract scopecontent')
    bioghist = build_div(rec.get_fields('545'), 'description bioghist')
    subjdiv = build_div(rec.subjects(), 'subject')
    addedentries = build_div(rec.addedentries(), 'addedentry')
    repos = build_div(rec.location(), 'isLocatedAt repos')
    #    db = get_catdb(rec)
    #    bn = get_bibno(rec)
    recordbody = '\n%s%s%s%s%s%s%s%s' % (creatordiv, titlediv, scopecontent,
                                         bioghist, subjdiv, addedentries,
                                         repos, catalogdiv)
    recordbodydiv = build_div_html(recordbody, 'record')
    url = hip_url(bn, db)
    out = htmltemplate.substitute(creator=creator,
                                  title=title,
                                  url=url,
                                  jsurl=url.replace('&', '&'),
                                  recordbody=recordbodydiv)

    #def write_htmlfile(record, fn):
    try:
        outfile = open(fn, 'w')
    except:
        write_error(fn)
    outfile.write(out.encode("utf-8"))
    outfile.close()
Ejemplo n.º 2
0
 def test_eszett_euro(self):
     # MARC-8 mapping: Revised June 2004 to add the Eszett (M+C7) and the
     # Euro Sign (M+C8) to the MARC-8 set.
     self.assertEqual(marc8_to_unicode(b'ESZETT SYMBOL: \xc7 is U+00DF'),
                      u'ESZETT SYMBOL: \u00df is U+00DF')
     self.assertEqual(marc8_to_unicode(b'EURO SIGN: \xc8 is U+20AC'),
                      u'EURO SIGN: \u20ac is U+20AC')
Ejemplo n.º 3
0
def build_div(fields, divclass):
    divlist = [
        build_div_html(marc8_to_unicode(field.format_field()), divclass)
        for field in fields
    ]
    divs = "".join(divlist)
    return divs
Ejemplo n.º 4
0
 def to_unicode(self):
   """Converts MARC8 encoded data to Unicode."""
   result = self.__str__()
   result_html = Parser().to_html(result)
   try:
     result_out = marc8_to_unicode(result_html)
   except:
     if IGNORE_UNICODE_ERRORS == True:
       result_out = "<strong>NOTE: MARC8 to Unicode conversion failed on this \
                record.</strong><br/>\n%s" % result
     else:
       raise
   return result_out
Ejemplo n.º 5
0
    def test_marc8_to_unicode(self):
        marc8_file = file('test/test_marc8.txt')
        utf8_file = file('test/test_utf8.txt')
        count = 0

        while True:
            marc8 = marc8_file.readline().strip("\n")
            utf8 = utf8_file.readline().strip("\n")
            if marc8 == '' or utf8 == '':
                break
            count += 1
            self.assertEquals(marc8_to_unicode(marc8).encode('utf8'), utf8)

        self.assertEquals(count, 1515)
Ejemplo n.º 6
0
    def test_marc8_to_unicode(self):
        marc8_file = file('test/test_marc8.txt')
        utf8_file = file('test/test_utf8.txt')
        count = 0

        while True:
            marc8 = marc8_file.readline().strip("\n")
            utf8 = utf8_file.readline().strip("\n")
            if marc8 == '' or utf8 == '':
                break
            count += 1
            self.assertEquals(marc8_to_unicode(marc8).encode('utf8'), utf8)

        self.assertEquals(count, 1515)
Ejemplo n.º 7
0
    def test_marc8_to_unicode(self):
        marc8_file = open("test/test_marc8.txt", "rb")
        utf8_file = open("test/test_utf8.txt", "rb")
        count = 0

        while True:
            marc8 = marc8_file.readline().strip(b"\n")
            utf8 = utf8_file.readline().strip(b"\n")
            if marc8 == b"" or utf8 == b"":
                break
            count += 1
            self.assertEqual(marc8_to_unicode(marc8).encode("utf8"), utf8)

        self.assertEqual(count, 1515)
        marc8_file.close()
        utf8_file.close()
Ejemplo n.º 8
0
    def test_marc8_to_unicode(self):
        marc8_file = open('test/test_marc8.txt', 'rb')
        utf8_file = open('test/test_utf8.txt', 'rb')
        count = 0

        while True:
            marc8 = marc8_file.readline().strip(b"\n")
            utf8 = utf8_file.readline().strip(b"\n")
            if marc8 == b'' or utf8 == b'':
                break
            count += 1
            self.assertEqual(marc8_to_unicode(marc8).encode('utf8'), utf8)

        self.assertEqual(count, 1515)
        marc8_file.close()
        utf8_file.close()
Ejemplo n.º 9
0
 def test_subscript_2(self):
     self.assertEqual(marc8_to_unicode('CO\x1bb2\x1bs is a gas'), u'CO\u2082 is a gas')
     self.assertEqual(marc8_to_unicode('CO\x1bb2\x1bs'), u'CO\u2082')
Ejemplo n.º 10
0
 def test_alif(self):
     # MARC-8 mapping: Revised March 2005 to change the mapping from MARC-8
     # to Unicode for the Alif (M+2E) from U+02BE to U+02BC.
     self.assertEqual(marc8_to_unicode(b"ALIF: \xae is U+02BC"),
                      u"ALIF: \u02bc is U+02BC")
Ejemplo n.º 11
0
 def test_subscript_2(self):
     self.assertEqual(marc8_to_unicode(b"CO\x1bb2\x1bs is a gas"),
                      u"CO\u2082 is a gas")
     self.assertEqual(marc8_to_unicode(b"CO\x1bb2\x1bs"), u"CO\u2082")
Ejemplo n.º 12
0
 def test_alif(self):
     # MARC-8 mapping: Revised March 2005 to change the mapping from MARC-8
     # to Unicode for the Alif (M+2E) from U+02BE to U+02BC.
     self.assertEqual(marc8_to_unicode(b'ALIF: \xae is U+02BC'),
                      u'ALIF: \u02bc is U+02BC')
def utf8_join(in_list):
  out = ' '.join(in_list)
  out = pymarc.marc8_to_unicode(out)
  return out.strip('.:,;/ ')
Ejemplo n.º 14
0
 def test_subscript_2(self):
     self.assertEqual(marc8_to_unicode(b'CO\x1bb2\x1bs is a gas'),
                      u'CO\u2082 is a gas')
     self.assertEqual(marc8_to_unicode(b'CO\x1bb2\x1bs'), u'CO\u2082')
Ejemplo n.º 15
0
def main(marcfile):
    reader = MARCReader(file(marcfile))
    letters = list(string.uppercase)
    interviews = []
    ohiindex = {}
    ohititle = 'Oral History Interviews held by the Niels Bohr Library'
    transtitle = 'Online Oral History Transcripts from the Niels Bohr Library'
    ohibacklink = """A <a href="http://aip.org/history/ohilist/transcripts.html">separate list of transcripts available online</a> is also available."""
    transbacklink = """A <a href="http://aip.org/history/ohilist/">separate list of all transcripts</a> is also available."""
    transindex = {}
    recordcounter = 0

    for record in reader:
        if record['998'] is not None:
            collection = record.get_fields('998')
            for field in collection:
                collectionC = field['c']
                if collectionC == 'oh':
                    catdb = get_catdb(record)
                    bibno = get_bibno(record)
                    transcript_url = None
                    if record['856'] is not None:
                        links = record.get_fields('856')
                        for field in links:
                            if 'http://www.aip.org/history/ohilist' in field[
                                    'u']:
                                transcript_url = field['u']
                    url = 'http://www.aip.org/history/catalog/%s/%s.html' % (
                        catdb, bibno)
                    interviewee = marc8_to_unicode(record.author())
                    date = parse_date(record)
                    interviewdate = '(Interview date: %s)' % date.rstrip(',. ')
                    interview = [interviewee, interviewdate]
                    label = " ".join(interview)
                    interviews.append(
                        (url, label, transcript_url, alafiling(label)))
                    recordcounter += 1
                else:
                    pass

    status = '%d OHI records found in %s' % (recordcounter, marcfile)

    if recordcounter == 0:
        sys.exit(status)
    else:
        sys.stderr.write(status)
        sys.stderr.write('\n')

    interviews.sort(key=lambda interviewkey: interviewkey[3])

    for interview in interviews:
        for letter in letters:
            initial = interview[1].upper()[0]
            if initial == letter:
                linkdata = '<li>%s' % make_link(interview[0], interview[1])
                if interview[2] is not None:
                    try:
                        urllib2.urlopen(interview[2])
                    except urllib2.HTTPError, e:
                        if e.code == 404:
                            sys.stderr.write('404 on %s\n' % interview[2])
                        else:
                            sys.stderr.write('Fail: %d, %s' % (e.code, e.msg))
                    except urllib2.URLError, e:
                        sys.stderr.write('Fail: %s' % e.reason)
                    linkdata = "%s - <strong>%s</strong>" % (
                        linkdata,
                        make_link(interview[2], 'Online transcript available'))
                    transonlydata = '<li>%s</li>\n' % make_link(
                        interview[2], interview[1])
                    transindex.setdefault(letter, []).append(transonlydata)
                linkdata = '%s</li>\n' % linkdata
                ohiindex.setdefault(letter, []).append(linkdata)
Ejemplo n.º 16
0
def main(marcfile):
    reader = MARCReader(file(marcfile))
    xmlURLs = []
    nutchURLs = []
    browsetitles = []
    browserepos = []
    browselinks = []
    '''
	Creating list of preset repository codes.
	'''
    reposmasterlist = [[
        u"American Institute of Physics", "MdCpAIP",
        "http://www.aip.org/history/nbl/index.html"
    ]]
    reposmasterlist.append([
        u"Académie des Sciences", "FrACADEMIE",
        "http://www.academie-sciences.fr/en/archive.htm"
    ])  #not an official MARC code
    reposmasterlist.append([
        u"American Association for the Advancement of Science", "daaas",
        "http://archives.aaas.org/"
    ])
    reposmasterlist.append([
        u"American Philosophical Society", "ppamp",
        "http://www.amphilsoc.org/library"
    ])
    reposmasterlist.append(
        [u"Amherst College", "ma", "https://www.amherst.edu/library/archives"])
    reposmasterlist.append(
        [u"Armagh Observatory", "ukARMAGH",
         "http://www.arm.ac.uk/history/"])  #not an official MARC code
    reposmasterlist.append(
        [u"Auburn University", "aapa", "http://www.lib.auburn.edu/sca/"])
    reposmasterlist.append([
        u"Austin Public Library", "TxAu", "http://www.austinlibrary.com/ahc/"
    ])
    reposmasterlist.append([
        u"Australian Academy of Science", "AUAAS",
        "http://science.org.au/basser/"
    ])  #not an official MARC code
    reposmasterlist.append([
        u"Birmingham Reference Library", "BCA",
        "http://www.birmingham.gov.uk/cs/Satellite/localstudieslibrary?packedargs=website%3D1&rendermode=live"
    ])
    reposmasterlist.append([
        u"Brandeis University", "MWalB",
        "http://lts.brandeis.edu/research/archives-speccoll/"
    ])
    reposmasterlist.append([
        u"Brigham Young University", "upb",
        "http://library.byuh.edu/library/archives"
    ])
    reposmasterlist.append([
        u"British Antarctic Survey", "ukntnls-BAS",
        "http://www.antarctica.ac.uk/about_bas/our_organisation/eid/archives.php"
    ])  #not an official MARC code
    reposmasterlist.append([
        u"California Institute of Technology", "CPT",
        "http://archives.caltech.edu/"
    ])
    reposmasterlist.append(
        [u"Cambridge University", "UkCU", "http://www.lib.cam.ac.uk/"])
    reposmasterlist.append(
        [u"Canisius College", "nbucc", "http://www.canisius.edu/archives/"])
    reposmasterlist.append([
        u"Carnegie Institution of Washington", "dcit",
        "http://carnegiescience.edu/legacy/"
    ])
    reposmasterlist.append(
        [u"Carnegie Mellon University", "cmu", "http://diva.library.cmu.edu/"])
    reposmasterlist.append([
        u"Case Western Reserve University", "oclw",
        "http://library.case.edu/ksl/collections/special/"
    ])
    reposmasterlist.append([
        u"Catholic University of America", "dcu",
        "http://libraries.cua.edu/rarebooks/index.cfm"
    ])
    reposmasterlist.append([
        u"Central Michigan University", "MiMtpT",
        "http://quod.lib.umich.edu/c/clarke/"
    ])
    reposmasterlist.append([
        u"CERN (European Organization for Nuclear Research)", "szgecern",
        "http://library.web.cern.ch/library/Archives/"
    ])
    reposmasterlist.append([
        u"Chemical Heritage Foundation", "paphchf",
        "http://www.chemheritage.org/discover/collections/index.aspx"
    ])
    reposmasterlist.append([
        u"Christ's College", "UkCU-CHR",
        "http://www.christs.cam.ac.uk/current-students/library/"
    ])
    reposmasterlist.append([
        u"Churchill College", "UkCU-CHU", "http://www.chu.cam.ac.uk/archives/"
    ])
    reposmasterlist.append([
        u"Clark University", "MWC", "http://www.clarku.edu/research/archives/"
    ])
    reposmasterlist.append([
        u"Clemson University", "sccleu",
        "http://www.clemson.edu/library/special_collections/"
    ])
    reposmasterlist.append([
        u"College of William and Mary", "viw",
        "https://swem.wm.edu/research/special-collections"
    ])
    reposmasterlist.append(
        [u"Columbia University", "nncrb", "http://library.columbia.edu/"])
    reposmasterlist.append(
        [u"Cornell University", "NIC-RMC", "http://rmc.library.cornell.edu/"])
    reposmasterlist.append([
        u"Dartmouth College", "nhd",
        "http://www.dartmouth.edu/~library/rauner/"
    ])
    reposmasterlist.append([
        u"DePauw University", "ingradi",
        "http://www.depauw.edu/libraries/about/librarylocations/archives/"
    ])
    reposmasterlist.append(
        [u"Dickinson College", "PCarlD", "http://archives.dickinson.edu/"])
    reposmasterlist.append(
        [u"Dudley Observatory", "nald", "http://www.dudleyobservatory.org/"])
    reposmasterlist.append(
        [u"Duke University", "NcD", "http://library.duke.edu/rubenstein/"])
    reposmasterlist.append([
        u"Duke University Medical Center", "NcD-MC",
        "http://archives.mc.duke.edu/"
    ])
    reposmasterlist.append([
        u"Dwight D. Eisenhower Library", "KAbE",
        "http://www.eisenhower.archives.gov/"
    ])
    reposmasterlist.append(
        [u"Earlham College", "InRE", "http://library.earlham.edu/"])
    reposmasterlist.append([
        u"Eastern Kentucky University", "kyre",
        "http://libguides.eku.edu/archives"
    ])
    reposmasterlist.append(
        [u"Fermilab", "IBatF", "http://history.fnal.gov/index.html"])
    reposmasterlist.append(
        [u"George Mason University", "vifgm", "http://sca.gmu.edu/"])
    reposmasterlist.append([
        u"Georgetown University", "dgu",
        "http://www.library.georgetown.edu/dept/speccoll/"
    ])
    reposmasterlist.append([
        u"George Washington University", "DGW",
        "http://library.gwu.edu/collections/scrc"
    ])
    reposmasterlist.append([
        u"Georgia Institute of Technology", "GAT",
        "http://www.library.gatech.edu/"
    ])
    reposmasterlist.append([
        u"Georgia Southern University", "gstg",
        "http://library.georgiasouthern.edu/specialcollections/main.html"
    ])
    reposmasterlist.append([
        u"Gerald R. Ford Library", "MiAaF", "http://www.fordlibrarymuseum.gov/"
    ])
    reposmasterlist.append([
        u"Hagley Museum and Library", "DeGH", "http://www.hagley.org/library"
    ])
    reposmasterlist.append(
        [u"Harry S. Truman Library", "MoIT", "http://www.trumanlibrary.org/"])
    reposmasterlist.append(
        [u"Harvard University", "MH", "http://hul.harvard.edu/"])
    reposmasterlist.append([
        u"Hebrew University of Jerusalem", "IsJJNL",
        "http://www.huji.ac.il/huji/eng/library_e.htm"
    ])
    reposmasterlist.append([
        u"Henry E. Huntington Library", "CSmH",
        "http://www.huntington.org/huntingtonlibrary.aspx?id=554"
    ])
    reposmasterlist.append([
        u"Hoover Institution on War, Revolution and Peace", "csth",
        "http://www.hoover.org/library-and-archives"
    ])
    reposmasterlist.append([
        u"Houston Academy of Medicine", "TxHMC", "http://www.library.tmc.edu/"
    ])
    reposmasterlist.append([
        u"Houston Public Library. Houston Metropolitan Research Center", "TxH",
        "http://www2.houstonlibrary.org/hmrc/"
    ])
    reposmasterlist.append([
        u"Imperial College of Science and Technology", "UkLU-ICA",
        "http://www3.imperial.ac.uk/recordsandarchives"
    ])
    reposmasterlist.append([
        u"Imperial War Museum", "IWM",
        "http://www.iwm.org.uk/collections-research"
    ])
    reposmasterlist.append([
        u"Indiana University", "inu",
        "http://www.indiana.edu/~liblilly/index.php"
    ])
    reposmasterlist.append(
        [u"Institute for Advanced Study", "NjPI", "http://library.ias.edu/"])
    reposmasterlist.append([
        u"Institution of Engineering and Technology", "IET",
        "http://www.theiet.org/resources/library/archives/"
    ])
    reposmasterlist.append([
        u"Iowa State University", "iaamsusc",
        "http://www.lib.iastate.edu/spcl/index.html"
    ])
    reposmasterlist.append([
        u"Johns Hopkins University", "mdbj",
        "http://www.library.jhu.edu/collections/specialcollections/"
    ])
    reposmasterlist.append([
        u"King's College", "UkLU-K",
        "http://www.kcl.ac.uk/library/archivespec/index.aspx"
    ])
    reposmasterlist.append([
        u"Lawrence Berkeley National Laboratory", "culbla",
        "https://commons.lbl.gov/display/aro/Archives+and+Records"
    ])
    reposmasterlist.append([
        u"Lehigh University", "PBL",
        "http://www.lehigh.edu/library/speccoll/index.html"
    ])
    reposmasterlist.append([
        u"Library and Archives Canada", "OONL",
        "http://www.collectionscanada.gc.ca/index-e.html"
    ])
    reposmasterlist.append(
        [u"Library of Congress", "DLC", "http://findingaids.loc.gov/"])
    reposmasterlist.append([
        u"Louisiana State University", "LU", "http://www.lib.lsu.edu/special/"
    ])
    reposmasterlist.append(
        [u"Lowell Observatory", "AzFLO", "http://www.lowell.edu/"])
    reposmasterlist.append([
        u"Massachusetts Institute of Technology", "MCM-B",
        "http://libraries.mit.edu/archives/"
    ])
    reposmasterlist.append([
        u"McMaster University", "OHMA", "http://library.mcmaster.ca/archives/"
    ])
    reposmasterlist.append(
        [u"Michigan State University", "MiEM", "http://archives.msu.edu/"])
    reposmasterlist.append([
        u"Mount Holyoke College", "mshm",
        "http://www.mtholyoke.edu/archives/index.html"
    ])
    reposmasterlist.append([
        u"Museu de Astronomia e Ciências Afins (Brazil)", "BR-MAST",
        "http://www.mast.br/acervos_arquivistico.html"
    ])  #not an official MARC code
    reposmasterlist.append([
        u"National Academy of Sciences", "DNAS",
        "http://www.nasonline.org/about-nas/history/archives/"
    ])
    reposmasterlist.append([
        u"National Archives and Records Administration", "dna",
        "http://www.archives.gov/"
    ])
    reposmasterlist.append([
        u"National Center for Atmospheric Research/University Corporation for Atmospheric Research",
        "CoBA", "http://opensky.library.ucar.edu/"
    ])
    reposmasterlist.append([
        u"National Library of Australia", "AuCNL",
        "http://www.austehc.unimelb.edu.au/"
    ])
    reposmasterlist.append([
        u"National Radio Astronomy Observatory", "ViCRA",
        "http://www.nrao.edu/archives/"
    ])
    reposmasterlist.append(
        [u"New Mexico State University", "NmLcU", "http://rmoa.unm.edu/"])
    reposmasterlist.append(
        [u"New York Public Library", "NN", "http://www.nypl.org/"])
    reposmasterlist.append(
        [u"New York University", "NNU", "http://library.nyu.edu/"])
    reposmasterlist.append(
        [u"Niels Bohr Archive", "DK-KoNBA", "http://nba.nbi.dk/webpage.html"])
    reposmasterlist.append([
        u"North Carolina Department of Cultural Resources", "Nc-Ar",
        "http://www.ncdcr.gov/"
    ])  #not the name associated with this MARC code
    reposmasterlist.append([
        u"North Carolina State University", "ncrhsus",
        "http://www.lib.ncsu.edu/specialcollections/"
    ])
    reposmasterlist.append([
        u"Northwestern University", "IEN",
        "http://www.library.northwestern.edu/libraries-collections/evanston-campus/university-archives"
    ])
    reposmasterlist.append([
        u"Nuffield College, Oxford", "UkOxU-N",
        "http://www.nuffield.ox.ac.uk/library/"
    ])
    reposmasterlist.append(
        [u"Oberlin College", "oo", "http://www.oberlin.edu/library/special/"])
    reposmasterlist.append([
        u"Oregon State University", "orcs",
        "http://osulibrary.oregonstate.edu/"
    ])
    reposmasterlist.append([
        u"Pennsylvania State University", "PSt",
        "http://www.libraries.psu.edu/psul/home.html"
    ])
    reposmasterlist.append(
        [u"Princeton University", "njp", "http://www.princeton.edu/~rbsc/"])
    reposmasterlist.append(
        [u"Queen's University", "canQUEEN",
         "http://archives.queensu.ca/"])  #not an official MARC code
    reposmasterlist.append([
        u"Radcliffe Institute for Advanced Study", "MCR-S",
        "http://www.radcliffe.edu/schlesinger_library.aspx"
    ])
    reposmasterlist.append([
        u"Rice University", "TxHR",
        "http://library.rice.edu/collections/WRC/manuscripts"
    ])
    reposmasterlist.append(
        [u"Rockefeller Archive Center", "NNttR", "http://www.rockarch.org/"])
    reposmasterlist.append([
        u"Royal Astronomical Society", "uklors",
        "http://www.ras.org.uk/library"
    ])
    reposmasterlist.append([
        u"Royal Institution of Great Britain", "UkRi",
        "http://www.rigb.org/contentControl?action=displayContent&id=00000002889"
    ])
    reposmasterlist.append(
        [u"Royal Society", "UkLRs", "http://royalsociety.org/Collections/"])
    reposmasterlist.append([
        u"Rutgers University", "NjR",
        "http://www.libraries.rutgers.edu/rul/libs/scua/scua.shtml"
    ])
    reposmasterlist.append([
        u"Schenectady Museum", "NSchM",
        "http://www.schenectadymuseum.org/archives/archives.html"
    ])
    reposmasterlist.append([
        u"Science Museum (Great Britain)", "UkLS",
        "http://www.sciencemuseum.org.uk/"
    ])
    reposmasterlist.append([
        u"Scripps Institution of Oceanography", "CaLjSIOA",
        "http://libraries.ucsd.edu/locations/sio/scripps-archives/index.html"
    ])
    reposmasterlist.append(
        [u"Simon Fraser University", "BVAS", "http://www.sfu.ca/archives/"])
    reposmasterlist.append([
        u"Smith College", "MNS",
        "http://www.smith.edu/libraries/libs/archives/"
    ])
    reposmasterlist.append([
        u"Smithsonian Institution. Archives", "dsiai",
        "http://siarchives.si.edu/"
    ])
    reposmasterlist.append([
        u"Smithsonian Institution. National Air and Space Museum", "dsinas",
        "http://www.nasm.si.edu/research/"
    ])
    reposmasterlist.append([
        u"Smithsonian Institution. National Museum of American History",
        "dsimah", "http://americanhistory.si.edu/archives/ac-i.htm"
    ])
    reposmasterlist.append([
        u"Southern Methodist University", "TxDaM",
        "http://www.smu.edu/Libraries"
    ])
    reposmasterlist.append([
        u"Stanford Linear Accelerator Center (SLAC)", "CSt-SLAC",
        "http://www.slac.stanford.edu/history/"
    ])  #not an official MARC code
    reposmasterlist.append([
        u"Stanford University", "CSt-SCUA",
        "http://www-sul.stanford.edu/depts/spc/mss/"
    ])
    reposmasterlist.append([
        u"State University of New York at Albany", "nalsu",
        "http://library.albany.edu/speccoll/"
    ])
    reposmasterlist.append([
        u"State University of New York at Buffalo", "nbuuar",
        "http://library.buffalo.edu/specialcollections/"
    ])
    reposmasterlist.append([
        u"State University of New York at Stony Brook", "nsbsu",
        "http://www.stonybrook.edu/libspecial/"
    ])
    reposmasterlist.append(
        [u"Syracuse University", "NSyU", "http://library.syr.edu/find/scrc/"])
    reposmasterlist.append(
        [u"Temple University", "PPT", "http://library.temple.edu/collections"])
    reposmasterlist.append([
        u"Tennessee State Library and Archives", "T",
        "http://www.tn.gov/tsla/Collections.htm"
    ])
    reposmasterlist.append(
        [u"Texas A&M University", "TxCcTAM", "http://library.tamu.edu/"])
    reposmasterlist.append([
        u"Trinity College", "UkCU-TRI", "http://library.trincoll.edu/index.cfm"
    ])
    reposmasterlist.append(
        [u"Tufts University", "MMeT-DCA", "http://sites.tufts.edu/dca/"])
    reposmasterlist.append([
        u"United States Naval Academy", "MdAN", "http://usna.edu/Library/sca/"
    ])
    reposmasterlist.append([
        u"Université Louis Pasteur de Strasbourg", "FrSULP",
        "http://www.hp-physique.org/"
    ])
    reposmasterlist.append([
        u"University College, London", "UCL",
        "http://www.ucl.ac.uk/library/special-coll/"
    ])
    reposmasterlist.append([
        u"University of Adelaide", "AuAU",
        "http://www.adelaide.edu.au/library/special/"
    ])
    reposmasterlist.append([
        u"University of Alaska", "AkU",
        "http://library.uaf.edu/apr-collections"
    ])
    reposmasterlist.append([
        u"University of Alberta", "AEUA", "http://www.ualberta.edu/~archives/"
    ])
    reposmasterlist.append([
        u"University of Arizona", "AzU", "http://speccoll.library.arizona.edu/"
    ])
    reposmasterlist.append(
        [u"University of Bath", "Uk-Bath", "http://www.bath.ac.uk/library/"])
    reposmasterlist.append([
        u"University of Birmingham", "UkBU",
        "http://www.special-coll.bham.ac.uk/"
    ])
    reposmasterlist.append([
        u"University of Bristol", "UkBrU",
        "http://www.bristol.ac.uk/library/resources/specialcollections/"
    ])
    reposmasterlist.append([
        u"University of California, Berkeley", "CU-BANC",
        "http://bancroft.berkeley.edu/"
    ])
    reposmasterlist.append([
        u"University of California, Irvine", "CU-I",
        "http://special.lib.uci.edu/"
    ])
    reposmasterlist.append([
        u"University of California, Los Angeles", "CLSU",
        "http://www2.library.ucla.edu/libraries/special.cfm"
    ])
    reposmasterlist.append([
        u"University of California, San Diego", "CUS",
        "http://libraries.ucsd.edu/collections/sca/index.html"
    ])
    reposmasterlist.append([
        u"University of California, Santa Barbara", "cusb",
        "http://www.library.ucsb.edu/special-collections"
    ])
    reposmasterlist.append([
        u"University of California, Santa Cruz", "cmthl",
        "http://library.ucsc.edu/speccoll"
    ])
    reposmasterlist.append([
        u"University of Chicago", "ICU", "http://www.lib.uchicago.edu/e/scrc/"
    ])
    reposmasterlist.append([
        u"University of Cincinnati", "ohciuar",
        "http://libraries.uc.edu/collections/"
    ])
    reposmasterlist.append([
        u"University of Colorado", "CoU",
        "http://ucblibraries.colorado.edu/archives/index.htm"
    ])
    reposmasterlist.append([
        u"University of Dayton", "odau",
        "http://www.udayton.edu/libraries/archives_and_collections/"
    ])
    reposmasterlist.append([
        u"University of Delaware", "deneuar",
        "http://www.lib.udel.edu/ud/spec/"
    ])
    reposmasterlist.append(
        [u"University of Denver", "CoDU", "http://library.du.edu/site/"])
    reposmasterlist.append(
        [u"University of Florida", "fu", "http://web.uflib.ufl.edu/spec/"])
    reposmasterlist.append([
        u"University of Houston", "TxAHU-Li",
        "http://info.lib.uh.edu/about/campus-libraries-collections/special-collections"
    ])
    reposmasterlist.append([
        u"University of Idaho", "idu",
        "http://www.lib.uidaho.edu/special-collections/"
    ])
    reposmasterlist.append([
        u"University of Illinois at Chicago", "ICIU",
        "http://library.uic.edu/home/collections/manuscripts-and-rare-books"
    ])
    reposmasterlist.append([
        u"University of Illinois at Urbana-Champaign", "IU-Ar",
        "http://archives.library.illinois.edu/"
    ])
    reposmasterlist.append(
        [u"University of Iowa", "IaU", "http://www.lib.uiowa.edu/spec-coll/"])
    reposmasterlist.append([
        u"University of Kansas", "kus",
        "http://spencer.lib.ku.edu/collections/sc/"
    ])
    reposmasterlist.append([
        u"University of Leeds", "UkLeUBL",
        "http://library.leeds.ac.uk/special-collections"
    ])
    reposmasterlist.append([
        u"University of London, Birkbeck College", "UkLU-B",
        "http://www.bbk.ac.uk/lib/"
    ])
    reposmasterlist.append(
        [u"University of London Library", "UkLU", "http://www.ull.ac.uk/"])
    reposmasterlist.append([
        u"University of Manchester", "ukmajru",
        "http://www.library.manchester.ac.uk/specialcollections/"
    ])
    reposmasterlist.append([
        u"University of Maryland", "MdCpUHL", "http://www.lib.umd.edu/special/"
    ])
    reposmasterlist.append([
        u"University of Massachusetts at Amherst", "mu",
        "http://www.library.umass.edu/spcoll/"
    ])
    reposmasterlist.append(
        [u"University of Melbourne", "AuMU", "http://library.unimelb.edu.au/"])
    reposmasterlist.append([
        u"University of Miami", "fmu",
        "http://www.library.miami.edu/specialcollections/"
    ])
    reposmasterlist.append(
        [u"University of Michigan", "miu", "http://bentley.umich.edu/"])
    reposmasterlist.append(
        [u"University of Minnesota", "MnU", "http://special.lib.umn.edu/"])
    reposmasterlist.append([
        u"University of Mississippi", "MsU",
        "http://www.olemiss.edu/depts/general_library/archives/"
    ])
    reposmasterlist.append([
        u"University of Missouri", "mou",
        "http://mulibraries.missouri.edu/specialcollections/"
    ])
    reposmasterlist.append([
        u"University of Nebraska-Lincoln", "NbU",
        "http://libraries.unl.edu/spec"
    ])
    reposmasterlist.append([
        u"University of Nevada, Reno", "nvreusc",
        "http://knowledgecenter.unr.edu/materials/specoll/"
    ])
    reposmasterlist.append([
        u"University of New Hampshire", "nhu",
        "http://www.library.unh.edu/milne/"
    ])
    reposmasterlist.append([
        u"University of North Carolina at Chapel Hill", "ncush",
        "http://www.lib.unc.edu/wilson/"
    ])
    reposmasterlist.append([
        u"University of North Dakota", "NdU",
        "http://webapp.und.edu/dept/library/Collections/"
    ])
    reposmasterlist.append([
        u"University of Notre Dame", "inndh",
        "http://archives.nd.edu/index.htm"
    ])
    reposmasterlist.append([
        u"University of Nottingham", "UkNtU",
        "http://www.nottingham.ac.uk/manuscriptsandspecialcollections/index.aspx"
    ])
    #	reposmasterlist.append([u"University of Oregon", "OrU", "http://libweb.uoregon.edu/speccoll/archives/"])
    reposmasterlist.append([
        u"University of Oxford", "UkOxU", "http://www.bodleian.ox.ac.uk/bodley"
    ])
    reposmasterlist.append([
        u"University of Pennsylvania", "puar", "http://www.library.upenn.edu/"
    ])
    reposmasterlist.append([
        u"University of Pittsburgh", "PPiU",
        "http://www.library.pitt.edu/libraries/archives/archives.html"
    ])
    reposmasterlist.append([
        u"University of Puget Sound", "WaTU",
        "http://www.pugetsound.edu/academics/academic-resources/collins-memorial-library/archives/"
    ])
    reposmasterlist.append([
        u"University of Reading", "UkReU",
        "http://www.reading.ac.uk/special-collections/sp-home.aspx"
    ])
    reposmasterlist.append([
        u"University of Rhode Island", "RUn",
        "http://www.uri.edu/library/special_collections/"
    ])
    reposmasterlist.append([
        u"University of Rochester", "NRU", "http://www.library.rochester.edu/"
    ])
    reposmasterlist.append([
        u"University of Sheffield", "UkShU",
        "http://www.sheffield.ac.uk/library/special"
    ])
    reposmasterlist.append(
        [u"University of South Dakota", "SdU", "http://www.usd.edu/library/"])
    reposmasterlist.append([
        u"University of Tennessee, Knoxville", "tusc",
        "http://www.lib.utk.edu/special/"
    ])
    reposmasterlist.append([
        u"University of Texas at Austin", "TxU", "http://www.cah.utexas.edu/"
    ])
    reposmasterlist.append([
        u"University of Toronto", "OTUP",
        "http://onesearch.library.utoronto.ca/special-collections"
    ])
    reposmasterlist.append([
        u"University of Utah", "uumlsc",
        "http://www.lib.utah.edu/collections/special-collections/"
    ])
    reposmasterlist.append([
        u"University of Virginia", "ViU",
        "http://www.lib.virginia.edu/index.html"
    ])
    reposmasterlist.append([
        u"University of Wales, Aberystwyth", "WlAbUW",
        "https://archives.aber.ac.uk/index.php/"
    ])
    reposmasterlist.append([
        u"University of Washington", "WaU-AR",
        "http://www.lib.washington.edu/specialcollections/"
    ])
    reposmasterlist.append(
        [u"University of Wyoming", "WyU-AH", "http://ahc.uwyo.edu/"])
    reposmasterlist.append([
        u"Vanderbilt University", "TNJ",
        "http://www.library.vanderbilt.edu/speccol/"
    ])
    reposmasterlist.append(
        [u"Vassar College", "NPV", "http://specialcollections.vassar.edu/"])
    reposmasterlist.append([
        u"Virginia Polytechnic Institute and State University", "viblbv",
        "http://spec.lib.vt.edu/"
    ])
    reposmasterlist.append([
        u"Washington University", "moslwua",
        "http://library.wustl.edu/units/spec/"
    ])
    reposmasterlist.append([
        u"Wellcome Institute for the History of Medicine", "UkLW",
        "http://library.wellcome.ac.uk/"
    ])
    reposmasterlist.append([
        u"Wellesley College", "MWelC",
        "http://www.wellesley.edu/lts/collections/archives"
    ])
    #	reposmasterlist.append([u"Western Historical Manuscript Collection", "MoCoJ", "http://shs.umsystem.edu/manuscripts/"])
    reposmasterlist.append([
        u"Woods Hole Oceanographic Institution", "MWhB",
        "http://dla.whoi.edu/dla/"
    ])
    reposmasterlist.append([
        u"Worcester Polytechnic Institute", "MWP",
        "http://www.wpi.edu/academics/library/"
    ])
    reposmasterlist.append(
        [u"Yale University Library", "CtY-BR", "http://www.library.yale.edu/"])

    newreposcounter = 0
    newrepos = ""
    newreposlist = []

    findingaidcounter = 0
    reposcounter = 0

    for record in reader:
        if record['903']:  # Get only records where 903a="PHFAWS"
            phfawsfull = record.get_fields('903')
            for field in phfawsfull:
                phfawsnote = field['a']
                if 'PHFAWS' in phfawsnote:
                    if record[
                            '852'] is not None:  # Get only records where 852/repository is not blank
                        repository = record.get_fields('852')
                        for field in repository:
                            reposname = field['a']
                        reposname = marc8_to_unicode(reposname)
                        reposname = reposname.rstrip('.,')
                        reposcode = None
                        reposurl = None
                        for row in reposmasterlist:  # Match field 852 against the repository list.
                            if row[0] == reposname:  # If it's in the list, use the list to populate our repository-related fields
                                reposcode = row[1]
                                reposurl = row[2]
                        author = marc8_to_unicode(record.author())
                        author = author.rstrip('.,')
                        title = marc8_to_unicode(record.title())
                        date = parse_date(record)
                        title = '%s %s' % (title, date)
                        title = title.rstrip('.,')
                        if record[
                                '856'] is not None:  # Get only records where 856 is not blank
                            links = record.get_fields('856')
                            for field in links:
                                human_url = None
                                titlenote = None
                                human_code = None
                                linksthree = field['3']
                                if linksthree is not None and "online finding aid" in linksthree:
                                    #								if linksthree == '(online finding aid)':	# Use only 856 entries for finding aids. A record may have multiple 856es. #Can this be less strict? (if in...)
                                    if reposcode == None:  # If this record's repository wasn't inthe repository list, we need to create output for the list of new repositories
                                        newreposcounter += 1
                                        newrepos = '%s %s \n' % (newrepos,
                                                                 reposname)
                                        reposcode = "NEWCODE" + str(
                                            newreposcounter)
                                        reposurl = "TEST"
                                        reposmasterlist.append(
                                            [reposname, reposcode, reposurl])
                                        newreposlist.append(
                                            [reposname, reposcode, reposurl])
                                    human_url = field['u']
                                    titlenote = field['z']
                                    if titlenote is not None:
                                        fulltitle = '%s, %s' % (title,
                                                                titlenote)
                                    else:
                                        fulltitle = title
                                    filingtitlejoin = '%s %s' % (author,
                                                                 fulltitle)
                                    linkdata = '<tr><td width="35%%">%s</td><td width="65%%">%s</td></tr>' % (
                                        make_link(human_url, author),
                                        make_link(human_url, fulltitle)
                                    )  # create link rows for the browse page
                                    browselinks.append(
                                        [
                                            alafiling(filingtitlejoin),
                                            linkdata, reposname
                                        ]
                                    )  # add links to the browse page list along with the sorting metadata
                                    findingaidcounter += 1
                                    human_code = field['w']
                                    if record[
                                            '857'] is not None:  # If there's a separate URL for indexing, we need to handle that too.
                                        crawl_code = None
                                        crawllinks = record.get_fields('857')
                                        for crawlfield in crawllinks:
                                            crawl_code = crawlfield['w']
                                            if human_code == crawl_code:  # Since each record may have multiple 856 entries, we need to make sure they're linked with the 857 entries. We do that by matching 856w to 857w
                                                crawl_url = crawlfield['u']
                                                shortstartnum = crawl_url.rfind(
                                                    "/") + 1
                                                if crawl_url[
                                                        shortstartnum:] == -1:
                                                    shorttitle = crawl_url[
                                                        shortstartnum:]
                                                else:
                                                    shortendnum = crawl_url.rfind(
                                                        ".")
                                                    shorttitle = crawl_url[
                                                        shortstartnum:
                                                        shortendnum]
#												xmlURLs.append([fulltitle.replace("&", "&amp;"), reposcode, reposurl.replace("&", "&amp;"), reposname.replace("&", "&amp;"), crawl_url.replace("&", "&amp;"), human_url.replace("&", "&amp;"), shorttitle])
                                                nutchURLs.append([
                                                    reposname,
                                                    alafiling(filingtitlejoin),
                                                    crawl_url, human_url,
                                                    fulltitle, author, reposurl
                                                ])
                                    else:
                                        crawl_url = human_url
                                        shortstartnum = crawl_url.rfind(
                                            "/") + 1
                                        if crawl_url[shortstartnum:].find(
                                                ".") == -1:
                                            shorttitle = crawl_url[
                                                shortstartnum:]
                                        else:
                                            shortendnum = crawl_url.rfind(".")
                                            shorttitle = crawl_url[
                                                shortstartnum:shortendnum]


#										xmlURLs.append([fulltitle.replace("&", "&amp;"), reposcode, reposurl.replace("&", "&amp;"), reposname.replace("&", "&amp;"), crawl_url.replace("&", "&amp;"), human_url.replace("&", "&amp;"), shorttitle])
                                        nutchURLs.append([
                                            reposname,
                                            alafiling(filingtitlejoin),
                                            crawl_url, human_url, fulltitle,
                                            author, reposurl
                                        ])
                                else:
                                    pass
                        else:
                            pass
                    else:
                        pass
                else:
                    pass
        else:
            pass

    # Output lists needed by crawlers
    xmlURLs.sort(key=lambda name: name[0])
    xmlURLs.sort(key=lambda repo: repo[3])
    nutchURLs.sort(key=lambda name: name[1])
    nutchURLs.sort(key=lambda repo: repo[0])
    #	make_xml(xmlURLs, 'ead_urls.xml')	# Output XML for Verity
    make_nutch(nutchURLs, 'nutch')  # Output list for nutch
    #	make_ead_urls(nutchURLs, 'ead_urls.sh')	# Output URLs list for Verity
    #	make_titles_bif(nutchURLs, 'titles.bif')	# Output titles list for Verity

    # Output browse.html
    for row in reposmasterlist:
        browserepos.append([row[0], row[1], row[2], alafiling(row[0])])
        reposcounter += 1
    browselinks.sort(key=lambda name: name[0])
    make_browse_page(browserepos, browselinks, 'browse.html')

    # Output list of new repositories
    newreposlist.sort(key=lambda rep: rep[0])
    if newreposcounter != 0:
        status = '%d new repositories found. you must add information on these repositories, then run phfaws.py again. Please see the newly updated rewrepos.txt for details.' % (
            newreposcounter)
        sys.stderr.write(status)
        make_newrepos_list(newreposlist, 'newrepos.txt')

    # Output file of counts
    make_counts(findingaidcounter, reposcounter, 'phfawscounts.txt')
Ejemplo n.º 17
0
def main(marcfile, newsletterissue, detailed):
    faissue = 'fa%s' % (newsletterissue)
    recordcounter = 0
    docpreslist = []
    try:
        detailed = sys.argv[3]
    except IndexError:
        detailed = ''
    reader = MARCReader(file(marcfile))
    for record in reader:
        """ 
        the try/except is a default handler so if exceptions come up the whole
        thing won't crash. better a slightly incomplete docpres list than no
        docpres list at all, right?
        
        use 'print get_bibno(record)' at different points to help debug.
        """
        try:
            # test to see if 901 tag is there
            if record['901'] is not None:
                if newsletterissue in record['901'].format_field():
                    """
                    this is not ideal, but we need to handle missing fields or
                    it'll skip entire records. perhaps add this as function to
                    aipmarc.py? EDIT - 3/7/08 - gsf working on adding fix to
                    pymarc for this.
                    """
                    if record['904'] is not None:
                        country = record['904']['a']
                    else:
                        country = 'RECORD MISSING 904 TAG'
                    if record['852'] is not None:
                        repos = record['852'].format_field()
                    else:
                        repos = 'RECORD MISSING 852 TAG'
                    if record.author() is not None:
                        creator = record.author()
                    else:
                        creator = 'RECORD MISSING AUTHOR'
                    title = subfield_list(record['245'], 'akhbcnps')
                    title = '%s.' % strip_isbd(title)
                    #print marc8_to_unicode(title)
                    date = parse_date(record)
                    extent = get_all_tag(record, '300')
                    restrictions = get_all_tag(record, '506')
                    scopecontent = get_all_tag(record, '520')
                    bioghist = get_all_tag(record, '545')
                    item = {
                        'issue': record['901'].format_field(),
                        'country': country,
                        'repos': marc8_to_unicode(repos),
                        'creator': marc8_to_unicode(creator),
                        'title': marc8_to_unicode(title),
                        'date': date,
                        'extent': extent,
                        'restrictions': marc8_to_unicode(restrictions),
                        'scopecontent': marc8_to_unicode(scopecontent),
                        'bioghist': marc8_to_unicode(bioghist)
                    }
                    #print item
                    docpreslist.append(item)
                    recordcounter += 1
        except:
            pass

    print '%s records matching "%s" found.' % (recordcounter, newsletterissue)
    docpreslist.sort(
        key=lambda a: (a['country'], a['repos'], a['creator'], a['title']))
    reposlist = [
        list(repos) for key, repos in groupby(docpreslist, itemgetter('repos'))
    ]
    newcoll = []
    newfa = []
    newcollsout = open('901a.html', 'w')
    newfasout = open('901b.html', 'w')

    for repos in reposlist:
        if faissue in repos[0]['issue']:
            build_list(repos, newfa, detailed)
        else:
            build_list(repos, newcoll, detailed)
    newcolls = ''.join(newcoll)
    newfas = ''.join(newfa)
    newcollsout.write(
        dppage.substitute(collections=newcolls, subtitle='New Collections'))
    newfasout.write(
        dppage.substitute(collections=newfas, subtitle='New Finding Aids'))
    newcollsout.close()
    newfasout.close()
 repos_name = repos_address = repos_country = gc_address = normalized_address = address_source = u''
 repos = {}
 repos_detail = {}
 repos_coll = {}
 if record['852'] is not None:
   repos_name = utf8_join(record['852'].get_subfields('a', 'b'))
   repos_address = utf8_join(record['852'].get_subfields('e'))
 else:
   (repos_name, repos_address) = (default_name, default_address)
 repos = { 'label': repos_name, 'type': 'repository' }
 auth = record.author()
 if auth == None: auth = ''
 repos_detail['id'] = repos_name
 repos_coll['id'] = repos_name
 repos_coll['repository'] = repos_name
 repos_coll['auth'] = pymarc.marc8_to_unicode(auth)
 repos_coll['label'] = pymarc.marc8_to_unicode(record['245'].format_field())
 try:
   repos_country = pymarc.marc8_to_unicode(record['904']['a']).strip('.:,;/ ')
 except:
   pass
 if repos not in repos_list:
   repos_list.extend([repos])
   repos_detail['country'] = repos_country
   repos_detail['address'] = repos_address
   for engine in engines:
     normalized_address = normalize_address(repos_address)
     try:
       canonical_address, (lat, lng) = engine.geocode(normalized_address)
       address_source = engine.__class__.__name__
       repos_detail['normalized_address'] = normalized_address