def callZ3950(search_id, target, depth=0): if target == 'UIU': print "UIUC NUMBER: ", search_id query = zoom.Query('PQF', '@attr 1=12 %s' % str(search_id)) database_address = 'z3950.carli.illinois.edu' username = '******' database_name = 'voyager' else: print "LC NUMBER: ", search_id query = zoom.Query('PQF', '@attr 1=9 %s' % str(formatLCCN(search_id))) database_address = 'lx2.loc.gov' username = '' if 'n' in search_id: database_name = 'NAF' else: database_name = 'SAF' # conn = establishZ3950Connection(database_address,210,username,database_name) res = queryZ3950(database_address, username, database_name, query) print len(res) print res if len(res) > 0: for r in res: valid_leader = checkLeader(r.data[:24]) if valid_leader: if len(res) > 1: try: new_record = Record(data=r.data) except UnicodeDecodeError: return (False, 'BROKEN CHARACTER IN RECORD') lccn = new_record.get_fields('001')[0].data.replace( " ", "") if lccn == search_id: marc_record = new_record fixNames(marc_record) else: try: marc_record = Record(data=r.data) except UnicodeDecodeError: return (False, 'BROKEN CHARACTER IN RECORD') fixNames(marc_record) else: return (False, 'BROKEN LEADER') return (marc_record, None) elif depth < 20: waitSixSeconds(datetime.datetime.now().time()) return callZ3950(search_id, target, depth=depth + 1) else: return (None, 'RECORD NOT FOUND')
def read_mc(sys_no): """ Loads marc data from aleph.unibas.ch for one single system number. :param sys_no: System number to which the marc entry is to be loaded. :return: marc binary for said system number. """ # print("reading: "+sys_no) try: conn = zoom.Connection('aleph.unibas.ch', 9909) conn.databaseName = 'dsv05' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('PQF', '@attr 1=1032 ' + sys_no) res = conn.search(query) data = bytes(res[0].data) except zoom.ConnectionError: print("\n!!! Error: could not connect to aleph !!!\n") return __write_to_cache(data, sys_no) reader = MARCReader(bytes(data), force_utf8=True, to_unicode=True) tmp = next(reader) # print("loaded data from aleph.") return tmp
def build_qobject(self, qstring): """ Builds and returns a PyZ3950.zoom.Query instance object. Called by search() """ qobject = zoom.Query('PQF'.encode('utf-8'), qstring.encode('utf-8')) log.debug('type(qobject), `%s`' % type(qobject)) log.debug('pprint.pformat(qobject), `%s`' % pprint.pformat(qobject)) return qobject
def run(self): if not parse_only: self.conn = zoom.Connection('localhost', 2100) self.conn.preferredRecordSyntax = 'SUTRS' while 1: self.count += 1 # if not (self.count % 100): # print("Thread", currentThread (), "Count", self.count) query_str = random.choice(self.queries) try: q = zoom.Query('CCL', query_str) if not parse_only: r = self.conn.search(q) for rec in r: self.consume(rec) except zoom.Bib1Err as err: pass except zoom.QuerySyntaxError as e: print("e", e, "q", query_str) if self.count > 500: if not parse_only: self.conn.close() # should randomly do clean vs. not clean exit self.terminate_queue.put(self, 1) break
def run (): conn = zoom.Connection ('amicus.nlc-bnc.ca', 210) conn.databaseName = 'NL' q = zoom.Query ('CCL', 'ti="1066"') ss = conn.scan (q) for s in ss[0:10]: print s
def RunQuery(): "Run a Z39.50 query & save MARC results to files" #open connection conn = zoom.Connection ('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' #setup query query = zoom.Query('CCL', 'ti="1066 and all that"') #run query res = conn.search(query) #for each record in the resultset, save as file ifilecount = 0 for r in res: sSaveAs = os.path.join(tempfile.gettempdir(), "PyZ3950 search resultset %d.bin" % ifilecount) print("Saving as file:", sSaveAs) fx = open(sSaveAs, "wb") fx.write(r.data) fx.close() ifilecount += 1 #parse each record as we save ParseRecord(sSaveAs) #close connection conn.close()
def library_search(self, query): """ @param query: The query to be performed @type query: molly.apps.library.models.LibrarySearchQuery @return: A list of results @rtype: [LibrarySearchResult] """ connection = self._make_connection() # Convert Query object into a Z39.50 query - we escape for the query by # removing quotation marks z3950_query = [] if query.author != None: z3950_query.append('(au="%s")' % query.author.replace('"', '')) if query.title != None: z3950_query.append('(ti="%s")' % query.title.replace('"', '')) if query.isbn != None: z3950_query.append('(isbn="%s")' % query.isbn.replace('"', '')) if query.issn != None: z3950_query.append('((1,8)="%s")' % query.issn.replace('"', '')) z3950_query = zoom.Query('CCL', 'and'.join(z3950_query)) try: results = self.Results(connection.search(z3950_query), self._wrapper) except zoom.Bib1Err as e: # 31 = Resources exhausted - no results available if e.condition in (31, 108): return [] else: raise else: return results
def z3950_query(target=None, keyword=None, qualifier='(1,1016)', query_type='CCL'): if target is not None: host = target['host'] database = target['database'] port = target['port'] syntax = target['syntax'] user = target['user'] password = target['password'] try: if user is not None \ and password is not None: conn = zoom.Connection(host, port, user=user, password=password) else: conn = zoom.Connection(host, port) conn.databaseName = database conn.preferredRecordSyntax = syntax query_str = qualifier + '=' + keyword query = zoom.Query(query_type, query_str) res = conn.search(query) return True, res except zoom.ConnectionError: raise else: raise ValueError('Z3950 target not provided.')
def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.WAF.gather') log.debug('z3950Harvester gather_stage for job: %r', harvest_job) self.harvest_job = harvest_job # Get source URL source_url = harvest_job.source.url self._set_source_config(harvest_job.source.config) # get current objects out of db query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).filter(HarvestObject.current==True).\ filter(HarvestObject.harvest_source_id==harvest_job.source.id) guid_to_package_id = dict((res[0], res[1]) for res in query) current_guids = set(guid_to_package_id.keys()) current_guids_in_harvest = set() # Get contents try: conn = zoom.Connection(source_url, int(self.source_config.get('port', 210))) conn.databaseName = self.source_config.get('database', '') conn.preferredRecordSyntax = 'XML' conn.elementSetName = 'T' query = zoom.Query('CCL', 'metadata') res = conn.search(query) ids = [] for num, result in enumerate(res): hash = hashlib.md5(result.data).hexdigest() if hash in current_guids: current_guids_in_harvest.add(hash) else: obj = HarvestObject( job=harvest_job, guid=hash, extras=[ HOExtra(key='status', value='new'), HOExtra(key='original_document', value=result.data.decode('latin-1')), HOExtra(key='original_format', value='fgdc') ]) obj.save() ids.append(obj.id) for guid in (current_guids - current_guids_in_harvest): obj = HarvestObject( job=harvest_job, guid=guid, package_id=guid_to_package_id[guid], extras=[HOExtra(key='status', value='delete')]) obj.save() ids.append(obj.id) return ids except Exception, e: self._save_gather_error('Unable to get content for URL: %s: %r' % \ (source_url, e),harvest_job) return None
def zoom_record(self, bibid): query = zoom.Query('PQF', '@attr 1=12 %s' % bibid.encode('utf-8')) try: results = self.conn.search(query) if len(results) > 0: return results[0] except: raise
def run (): conn = zoom.Connection ('amicus.nlc-bnc.ca', 210) conn.databaseName = 'NL' q = zoom.Query ('CCL', 'ti=A') conn.numberOfEntries = 80 ss = conn.scan (q) for i in range (len (ss)): print ss.get_term (i), ss.get_fields (i)
def lookup(self, term): text = 'attrset(XD1/(1,1)="%s")' % (term, ) q = zoom.Query('CCL', text) res = self.conn.search(q) l = [] parsedict = {'XML': xmlparse, 'GRS-1': grs1parse} for r in res: parser = parsedict.get(r.syntax, None) if parser == None: print "Unknown syntax:", r.syntax, "for", r.data continue l += parser(r.data) return l
def search(host, port, database, query, start=1, limit=10): conn = zoom.Connection(host, port) conn.databaseName = database conn.preferredRecordSyntax = 'XML' query = zoom.Query ('CCL', str(query)) res = conn.search (query) collector = [] #if we were dealing with marc8 results, would probably need this #m = zmarc.MARC8_to_Unicode () # how many to present? At most 10 for now. to_show = min(len(res)-(start - 1), limit) if limit: to_show = min(to_show, limit) #this seems to an efficient way of snagging the records #would be good to cache the result set for iterative display for r in range(start - 1,(start-1) + to_show): #would need to translate marc8 records, evergreen doesn't need this #collector.append(m.translate(r.data)) collector.append(str(res.__getitem__(r)).replace('\n','')) conn.close () raw = "" . join(collector) raw_records = [] err = None pat = re.compile('<record .*?</record>', re.M) raw_records = pat.findall(raw) parsed = [] for rec in raw_records: # TODO: fix this ascii/replace, once our z3950/marc encoding # issues are sorted out. rec = unicode(rec, 'ascii', 'replace') # replace multiple 'unknown' characters with a single one. rec = re.sub(u'\ufffd+', u'\ufffd', rec) assert isinstance(rec, unicode) # this must be true. parsed.append(ET.fromstring(rec.encode('utf-8'))) return parsed, len(res)
def get_marc(target_name, cclquery, result_offset): target = targets[target_name] m = re_identifier.match(target['identifier']) (host, port, db) = m.groups() port = int(port) if port else 210 conn = zoom.Connection (host, port) if db: conn.databaseName = db conn.preferredRecordSyntax = 'USMARC' query = zoom.Query ('PQF', cclquery) res = conn.search (query) offset = 0 for r in res: return r.data offset += 1 if offset == result_offset: return r.data
def run(self): if not parse_only: self.conn = zoom.Connection('localhost', 2100) self.conn.preferredRecordSyntax = 'SUTRS' while 1: self.count += 1 # if not (self.count % 100): # print "Thread", currentThread (), "Count", self.count query_str = random.choice(self.queries) try: q = zoom.Query('CCL', query_str) if not parse_only: r = self.conn.search(q) for rec in r: self.consume(rec) except zoom.Bib1Err, err: pass except zoom.QuerySyntaxError, e: print "e", e, "q", query_str
def control_number_search(self, control_number): """ @param control_number: The unique ID of the item to be looked up @type control_number: str @return: The item with this control ID, or None if none can be found @rtype: LibrarySearchResult """ # Escape input control_number = control_number.replace('"', '') z3950_query = zoom.Query( 'CCL', '(1,%s)="%s"' % (self._control_number_key, control_number)) connection = self._make_connection() results = self.Results(connection.search(z3950_query), self._wrapper) if len(results) > 0: return results[0] else: return None
def open_stream(self, stream): server = stream.replace('z3950', 'https') (transport, user, passwd, host, port, dirname, filename, args, anchor) = self._parse_url(server) conn = zoom.Connection(host, port) conn.databaseName = dirname q = args['query'] qo = zoom.Query('CQL', q) if 'preferredRecordSyntax' in args: conn.preferredRecordSyntax = args['preferredRecordSyntax'] else: conn.preferredRecordSyntax = 'USMARC' if 'elementSetName' in args: conn.elementSetName = args['elementSetName'] else: conn.elementSetName = 'F' rs = conn.search(qo) self.total = len(rs) return rs
def search(self, query, position=0, size=10, syntax='CCL'): """Return the results of a database query. :param query: The database query. :param position: The position of the first record (zero-based index). :param size: The maximum number of records to return. :param syntax: The syntax of the query, either CCL, S-CCL, CQL, S-CQL, PQF, C2, ZSQL or CQL-TREE. :returns: A :class:`Dataset` object containing the raw record data. """ conn = self._connect() try: q = zoom.Query(syntax, query) except (zoom.QuerySyntaxError) as e: # pragma: no cover raise zoom.QuerySyntaxError("The query could not be parsed.") start = int(position) end = start + int(size) rs = conn.search(q) return Dataset([r.data for r in rs[start:end]], total=len(rs))
field = sys.argv[2] if (field == "Author"): field = "au" elif (field == "ISBN"): field = "isbn" elif (field == "LCCN"): field = "lccn" else: field = "ti" conn = zoom.Connection ('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query ('CCL', field + "=" + query) doc = Document () root = doc.createElement ("importedData") doc.appendChild (root) collection = doc.createElement ("List") collection.setAttribute ("name", "Library of Congress Import") root.appendChild (collection) res = conn.search (query) count = 0 for r in res: m = MARC (MARC=r.data)
#!/usr/bin/env python from PyZ3950 import zoom, z3950 zoom.trace_extract = 1 #z3950.msg_size = 0x600 conn = zoom.Connection('z3950.loc.gov', 7090) #conn = zoom.Connection ('ipac.lib.uchicago.edu', 210) conn.databaseName = 'VOYAGER' #conn.databaseName = 'uofc' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', 'au=Thucydides') res = conn.search(query) res.presentChunk = 1 for a in res: print a conn.close()
#!/usr/bin/env python from PyZ3950 import zoom, zmarc def make_conn (): conn = zoom.Connection ('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' return conn def fetch_mods (query): res = conn.search (query) mods_list = [] for r in res: marc_obj = zmarc.MARC (r.data) mods_list.append (marc_obj.toMODS ()) return mods_list conn = make_conn () mods_list = fetch_mods (zoom.Query ('CCL', 'ti="1066 and all that"')) print mods_list
loc = left + right queryString = 'lccn=' + loc else: queryString = 'ti="' + title + '"' if (authors != ""): queryString = queryString + ' and au="' + authors + '"' conn = zoom.Connection('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' sys.stderr.write("<!-- " + queryString + "-->\n") query = zoom.Query('CCL', str(queryString)) doc = Document() root = doc.createElement("importedData") doc.appendChild(root) collection = doc.createElement("List") collection.setAttribute("name", "Library of Congress Import") root.appendChild(collection) res = conn.search(query) count = 0 for r in res: m = MARC(MARC=r.data)
#!/usr/bin/env python """Demonstrates adding qualifiers at runtime by mutating ccl.qual_dict, and calling ccl.relex ().""" from PyZ3950 import zoom, ccl ccl.add_qual('AUPERSONAL', (1, 1)) conn = zoom.Connection('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', 'aupersonal=MacLane, Saunders') res = conn.search(query) for r in res[:20]: print(r) conn.close()
#!/usr/bin/env python from PyZ3950 import zoom conn = zoom.Connection('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', 'isbn=0253333490') res = conn.search(query) print res[0] conn.close()
def run_query(self, qstr): query = zoom.Query('CCL', qstr) res = conn.search(query) print qstr self.disp_res(res) return res
#!/usr/bin/env python from PyZ3950 import zoom conn = zoom.Connection('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', 'ti="1066 and all that"') res = conn.search(query) for r in res: print(r) conn.close()
#!/usr/bin/env python from __future__ import print_function, absolute_import from PyZ3950 import zoom conn = zoom.Connection('ilsz3950.nlm.nih.gov', 7090) conn.databaseName = 'voyager' conn.preferredRecordSyntax = 'MARC8' query1 = zoom.Query('CCL', 'au=Gould, Stephen Jay') res1 = conn.search(query1) query2 = zoom.Query('CCL', 'au=Pynchon, Thomas') res2 = conn.search(query2) for i in range(0, max(len(res1), len(res2))): if i < len(res1): print("1:", res1[i]) if i < len(res2): print("2:", res2[i]) conn.close()
# coding: utf-8 """ Simple script to search a Z39.50 target using Python and PyZ3950. """ from PyZ3950 import zoom ISBNs = ['978-1-905017-60-7', '2-86377-125-6'] conn = zoom.Connection('z3950.bnf.fr', 2211) conn.databaseName = 'TOUT' conn.preferredRecordSyntax = 'UNIMARC' for isbn in ISBNs: query = zoom.Query('CQL', 'find @attr 1=7 ' + isbn) print(query) res = conn.search(query) for r in res: print(str(r)) conn.close()
from PyZ3950 import zoom conn = zoom.Connection('www.lib.csu.ru', 210) conn.databaseName = 'arefd+knigi+liter+period' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', 'ti=Journal or au=Turgenev') res = conn.search(query) for r in res: print "db:", repr(r.databaseName), r
def _get_offers_z3950(id, library): offers = [] # determine which server to talk to if library == 'Georgetown': conf = settings.Z3950_SERVERS['GT'] elif library == 'George Mason': id = id.strip('m') conf = settings.Z3950_SERVERS['GM'] else: raise Exception("unrecognized library %s" % library) # search for the id, and get the first record z = zoom.Connection(conf['IP'], conf['PORT']) z.databaseName = conf['DB'] z.preferredRecordSyntax = conf['SYNTAX'] q = zoom.Query('PQF', '@attr 1=12 %s' % id.encode('utf-8')) results = z.search(q) if len(results) == 0: return [] rec = results[0] # normalize holdings information as schema.org offers if hasattr(rec, 'data') and not hasattr(rec.data, 'holdingsData'): return [] for holdings_data in rec.data.holdingsData: h = holdings_data[1] o = {'@type': 'Offer', 'seller': library} if hasattr(h, 'callNumber'): o['sku'] = h.callNumber.rstrip('\x00').strip() if hasattr(h, 'localLocation'): o['availabilityAtOrFrom'] = h.localLocation.rstrip('\x00') if hasattr(h, 'publicNote') and library == 'Georgetown': note = h.publicNote.rstrip('\x00') if note == 'AVAILABLE': o['availability'] = 'http://schema.org/InStock' o['description'] = 'Available' elif note in ('SPC USE ONLY', 'LIB USE ONLY'): o['availability'] = 'http://schema.org/InStoreOnly' o['description'] = 'Available' else: # set availabilityStarts from "DUE 09-15-14" m = re.match('DUE (\d\d)-(\d\d)-(\d\d)', note) if m: m, d, y = [int(i) for i in m.groups()] o['availabilityStarts'] = "20%02i-%02i-%02i" % (y, m, d) o['availability'] = 'http://schema.org/OutOfStock' o['description'] = 'Checked Out' elif hasattr(h, 'circulationData'): cd = h.circulationData[0] if cd.availableNow is True: o['availability'] = 'http://schema.org/InStock' o['description'] = 'Available' else: if hasattr(cd, 'availabilityDate') and cd.availablityDate: m = re.match("^(\d{4}-\d{2}-\d{2}).+", cd.availablityDate) if m: o['availabilityStarts'] = m.group(1) o['availability'] = 'http://schema.org/OutOfStock' o['description'] = 'Checked Out' else: logging.warn("unknown availability: bibid=%s library=%s h=%s", id, library, h) # some locations have a weird period before the name o['availabilityAtOrFrom'] = o.get('availabilityAtOrFrom', '').lstrip('.') offers.append(o) return offers