def z3950_query(target=None, keyword=None, qualifier='(1,1016)', query_type='CCL'): if target is not None: host = target['host'] database = target['database'] port = target['port'] syntax = target['syntax'] user = target['user'] password = target['password'] try: if user is not None \ and password is not None: conn = zoom.Connection(host, port, user=user, password=password) else: conn = zoom.Connection(host, port) conn.databaseName = database conn.preferredRecordSyntax = syntax query_str = qualifier + '=' + keyword query = zoom.Query(query_type, query_str) res = conn.search(query) return True, res except zoom.ConnectionError: raise else: raise ValueError('Z3950 target not provided.')
def run (): conn = zoom.Connection ('amicus.nlc-bnc.ca', 210) conn.databaseName = 'NL' q = zoom.Query ('CCL', 'ti="1066"') ss = conn.scan (q) for s in ss[0:10]: print s
def RunQuery(): "Run a Z39.50 query & save MARC results to files" #open connection conn = zoom.Connection ('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' #setup query query = zoom.Query('CCL', 'ti="1066 and all that"') #run query res = conn.search(query) #for each record in the resultset, save as file ifilecount = 0 for r in res: sSaveAs = os.path.join(tempfile.gettempdir(), "PyZ3950 search resultset %d.bin" % ifilecount) print("Saving as file:", sSaveAs) fx = open(sSaveAs, "wb") fx.write(r.data) fx.close() ifilecount += 1 #parse each record as we save ParseRecord(sSaveAs) #close connection conn.close()
def run(self): if not parse_only: self.conn = zoom.Connection('localhost', 2100) self.conn.preferredRecordSyntax = 'SUTRS' while 1: self.count += 1 # if not (self.count % 100): # print("Thread", currentThread (), "Count", self.count) query_str = random.choice(self.queries) try: q = zoom.Query('CCL', query_str) if not parse_only: r = self.conn.search(q) for rec in r: self.consume(rec) except zoom.Bib1Err as err: pass except zoom.QuerySyntaxError as e: print("e", e, "q", query_str) if self.count > 500: if not parse_only: self.conn.close() # should randomly do clean vs. not clean exit self.terminate_queue.put(self, 1) break
def read_mc(sys_no): """ Loads marc data from aleph.unibas.ch for one single system number. :param sys_no: System number to which the marc entry is to be loaded. :return: marc binary for said system number. """ # print("reading: "+sys_no) try: conn = zoom.Connection('aleph.unibas.ch', 9909) conn.databaseName = 'dsv05' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('PQF', '@attr 1=1032 ' + sys_no) res = conn.search(query) data = bytes(res[0].data) except zoom.ConnectionError: print("\n!!! Error: could not connect to aleph !!!\n") return __write_to_cache(data, sys_no) reader = MARCReader(bytes(data), force_utf8=True, to_unicode=True) tmp = next(reader) # print("loaded data from aleph.") return tmp
def gather_stage(self, harvest_job): log = logging.getLogger(__name__ + '.WAF.gather') log.debug('z3950Harvester gather_stage for job: %r', harvest_job) self.harvest_job = harvest_job # Get source URL source_url = harvest_job.source.url self._set_source_config(harvest_job.source.config) # get current objects out of db query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).filter(HarvestObject.current==True).\ filter(HarvestObject.harvest_source_id==harvest_job.source.id) guid_to_package_id = dict((res[0], res[1]) for res in query) current_guids = set(guid_to_package_id.keys()) current_guids_in_harvest = set() # Get contents try: conn = zoom.Connection(source_url, int(self.source_config.get('port', 210))) conn.databaseName = self.source_config.get('database', '') conn.preferredRecordSyntax = 'XML' conn.elementSetName = 'T' query = zoom.Query('CCL', 'metadata') res = conn.search(query) ids = [] for num, result in enumerate(res): hash = hashlib.md5(result.data).hexdigest() if hash in current_guids: current_guids_in_harvest.add(hash) else: obj = HarvestObject( job=harvest_job, guid=hash, extras=[ HOExtra(key='status', value='new'), HOExtra(key='original_document', value=result.data.decode('latin-1')), HOExtra(key='original_format', value='fgdc') ]) obj.save() ids.append(obj.id) for guid in (current_guids - current_guids_in_harvest): obj = HarvestObject( job=harvest_job, guid=guid, package_id=guid_to_package_id[guid], extras=[HOExtra(key='status', value='delete')]) obj.save() ids.append(obj.id) return ids except Exception, e: self._save_gather_error('Unable to get content for URL: %s: %r' % \ (source_url, e),harvest_job) return None
def run (): conn = zoom.Connection ('amicus.nlc-bnc.ca', 210) conn.databaseName = 'NL' q = zoom.Query ('CCL', 'ti=A') conn.numberOfEntries = 80 ss = conn.scan (q) for i in range (len (ss)): print ss.get_term (i), ss.get_fields (i)
def establishZ3950Connection(database_address, port, username, database_name): try: conn = zoom.Connection(database_address, port, user=username) conn.databaseName = database_name conn.preferredRecordSyntax = 'USMARC' return conn except zoom.ConnectionError: print "GRACEFUL CLOSE ERROR -- ESTABLISHING CONNECTION" waitSixSeconds(datetime.datetime.now().time()) return establishZ3950Connection(database_address, port, username, database_name)
def _connect(self): """Return a connection to the configured database. :returns: A connection. """ conn = zoom.Connection(self.host, self.port, user=self.user, password=self.password) conn.databaseName = self.db conn.preferredRecordSyntax = self.syntax conn.elementSetName = self.elem_set_name return conn
def connect(self): """ Connects to z3950 server. Called by __init__() """ conn = zoom.Connection( self.HOST, int(self.PORT), databaseName=self.DB_NAME, preferredRecordSyntax= u'OPAC', # Getting records in "opac" format. (Others were not more helpful.) charset='utf-8') log.debug('connection made.') return conn
def __init__(self, host, port, db): self.conn = zoom.Connection(host, port) self.conn.databaseName = db self.conn.elementSetName = 'F' # self.conn.preferredRecordSyntax = 'XML' # currently (2002-7-17) the dbiref.kub.nl seems broken for XML # in two ways: # 1) Much more data is returned for elementSetName 'B' than 'F'. # 2) even for 'B', the term name for the top-level record isn't # returned, but the term id is. The term name isn't marked as # optional in the spec. self.conn.preferredRecordSyntax = 'GRS-1'
def _make_connection(self): """ Returns a connection to the Z39.50 server """ # Create connection to database connection = zoom.Connection( self._host, self._port, charset=self._charset, ) connection.databaseName = self._database connection.preferredRecordSyntax = self._syntax return connection
def get_marc(target_name, cclquery, result_offset): target = targets[target_name] m = re_identifier.match(target['identifier']) (host, port, db) = m.groups() port = int(port) if port else 210 conn = zoom.Connection (host, port) if db: conn.databaseName = db conn.preferredRecordSyntax = 'USMARC' query = zoom.Query ('PQF', cclquery) res = conn.search (query) offset = 0 for r in res: return r.data offset += 1 if offset == result_offset: return r.data
def search(host, port, database, query, start=1, limit=10): conn = zoom.Connection(host, port) conn.databaseName = database conn.preferredRecordSyntax = 'XML' query = zoom.Query ('CCL', str(query)) res = conn.search (query) collector = [] #if we were dealing with marc8 results, would probably need this #m = zmarc.MARC8_to_Unicode () # how many to present? At most 10 for now. to_show = min(len(res)-(start - 1), limit) if limit: to_show = min(to_show, limit) #this seems to an efficient way of snagging the records #would be good to cache the result set for iterative display for r in range(start - 1,(start-1) + to_show): #would need to translate marc8 records, evergreen doesn't need this #collector.append(m.translate(r.data)) collector.append(str(res.__getitem__(r)).replace('\n','')) conn.close () raw = "" . join(collector) raw_records = [] err = None pat = re.compile('<record .*?</record>', re.M) raw_records = pat.findall(raw) parsed = [] for rec in raw_records: # TODO: fix this ascii/replace, once our z3950/marc encoding # issues are sorted out. rec = unicode(rec, 'ascii', 'replace') # replace multiple 'unknown' characters with a single one. rec = re.sub(u'\ufffd+', u'\ufffd', rec) assert isinstance(rec, unicode) # this must be true. parsed.append(ET.fromstring(rec.encode('utf-8'))) return parsed, len(res)
def run(self): if not parse_only: self.conn = zoom.Connection('localhost', 2100) self.conn.preferredRecordSyntax = 'SUTRS' while 1: self.count += 1 # if not (self.count % 100): # print "Thread", currentThread (), "Count", self.count query_str = random.choice(self.queries) try: q = zoom.Query('CCL', query_str) if not parse_only: r = self.conn.search(q) for rec in r: self.consume(rec) except zoom.Bib1Err, err: pass except zoom.QuerySyntaxError, e: print "e", e, "q", query_str
def open_stream(self, stream): server = stream.replace('z3950', 'https') (transport, user, passwd, host, port, dirname, filename, args, anchor) = self._parse_url(server) conn = zoom.Connection(host, port) conn.databaseName = dirname q = args['query'] qo = zoom.Query('CQL', q) if 'preferredRecordSyntax' in args: conn.preferredRecordSyntax = args['preferredRecordSyntax'] else: conn.preferredRecordSyntax = 'USMARC' if 'elementSetName' in args: conn.elementSetName = args['elementSetName'] else: conn.elementSetName = 'F' rs = conn.search(qo) self.total = len(rs) return rs
queryString = '' if (isbn != None): # isbn = replace (replace (isbn, "-", ""), " ", ""); queryString = 'isbn="' + isbn + '"' # elif (field == "LCCN"): # field = "lccn" else: queryString = 'ti="' + title + '"' if (authors != ""): queryString = queryString + ' and au="' + authors + '"' conn = zoom.Connection('opac.sbn.it', 3950) conn.databaseName = 'nopac' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', str(queryString)) query doc = Document() root = doc.createElement("importedData") doc.appendChild(root) collection = doc.createElement("List") collection.setAttribute("name", "Importato dall\'OPAC SBN") root.appendChild(collection) res = conn.search(query)
def make_conn (): conn = zoom.Connection ('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' return conn
#!/usr/bin/env python from PyZ3950 import zoom conn = zoom.Connection('z3950.loc.gov', 7090) conn.databaseName = 'VOYAGER' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', 'isbn=0253333490') res = conn.search(query) print res[0] conn.close()
self.attributes[attr] = 1 if hasattr(ti, 'dbCombinations'): dbcomb = ti.dbCombinations for db1 in dbcomb: for db2 in db1: self.databases[db2] = 1 if hasattr(ti, 'recordSyntaxes'): self.record_syntaxes = ti.recordSyntaxes def disp_res(self, res): for r in res: print r if __name__ == '__main__': # XXX what record syntax does catalogue.bized.ac.uk:2105 want? #conn = zoom.Connection ('z3950.copac.ac.uk', 210) # conn = zoom.Connection ('www.cnshb.ru', 210) # conn = zoom.Connection ('blpcz.bl.uk', 21021) # conn = zoom.Connection('sherlock.berkeley.edu', 2100) conn = zoom.Connection('gondolin.hist.liv.ac.uk', 210) conn.databaseName = 'IR-Explain-1' conn.preferredRecordSyntax = 'EXPLAIN' # conn._cli.test = 1 e = Explainer(conn) e.databases['l5r'] = 1 e.databases['scifi'] = 1 e.run() conn.close()
#!/usr/bin/env python from PyZ3950 import zoom #conn = zoom.Connection ('z3950.loc.gov', 7090) #conn.databaseName = 'VOYAGER' conn = zoom.Connection('z3950.bibsys.no', 2100) conn.databaseName = 'BIBSYS' conn.preferredRecordSyntax = 'USMARC' query1 = zoom.Query('CCL', 'au=Gould, Stephen Jay') res1 = conn.search(query1) query2 = zoom.Query('CCL', 'au=Pynchon, Thomas') res2 = conn.search(query2) for i in range(0, max(len(res1), len(res2))): if i < len(res1): print "1:", res1[i] if i < len(res2): print "2:", res2[i] conn.close()
#!/usr/bin/env python from __future__ import print_function, absolute_import from PyZ3950 import zoom conn = zoom.Connection('ilsz3950.nlm.nih.gov', 7090) conn.databaseName = 'voyager' conn.preferredRecordSyntax = 'MARC8' query1 = zoom.Query('CCL', 'au=Gould, Stephen Jay') res1 = conn.search(query1) query2 = zoom.Query('CCL', 'au=Pynchon, Thomas') res2 = conn.search(query2) for i in range(0, max(len(res1), len(res2))): if i < len(res1): print("1:", res1[i]) if i < len(res2): print("2:", res2[i]) conn.close()
def _getconn(self, ip, port, name, syntax): conn = zoom.Connection(ip, port) conn.databaseName = name conn.preferredRecordSyntax = syntax return conn
from PyZ3950 import zoom conn = zoom.Connection('www.lib.csu.ru', 210) conn.databaseName = 'arefd+knigi+liter+period' conn.preferredRecordSyntax = 'USMARC' query = zoom.Query('CCL', 'ti=Journal or au=Turgenev') res = conn.search(query) for r in res: print "db:", repr(r.databaseName), r
def _get_offers_z3950(id, library): offers = [] # determine which server to talk to if library == 'Georgetown': conf = settings.Z3950_SERVERS['GT'] elif library == 'George Mason': id = id.strip('m') conf = settings.Z3950_SERVERS['GM'] else: raise Exception("unrecognized library %s" % library) # search for the id, and get the first record z = zoom.Connection(conf['IP'], conf['PORT']) z.databaseName = conf['DB'] z.preferredRecordSyntax = conf['SYNTAX'] q = zoom.Query('PQF', '@attr 1=12 %s' % id.encode('utf-8')) results = z.search(q) if len(results) == 0: return [] rec = results[0] # normalize holdings information as schema.org offers if hasattr(rec, 'data') and not hasattr(rec.data, 'holdingsData'): return [] for holdings_data in rec.data.holdingsData: h = holdings_data[1] o = {'@type': 'Offer', 'seller': library} if hasattr(h, 'callNumber'): o['sku'] = h.callNumber.rstrip('\x00').strip() if hasattr(h, 'localLocation'): o['availabilityAtOrFrom'] = h.localLocation.rstrip('\x00') if hasattr(h, 'publicNote') and library == 'Georgetown': note = h.publicNote.rstrip('\x00') if note == 'AVAILABLE': o['availability'] = 'http://schema.org/InStock' o['description'] = 'Available' elif note in ('SPC USE ONLY', 'LIB USE ONLY'): o['availability'] = 'http://schema.org/InStoreOnly' o['description'] = 'Available' else: # set availabilityStarts from "DUE 09-15-14" m = re.match('DUE (\d\d)-(\d\d)-(\d\d)', note) if m: m, d, y = [int(i) for i in m.groups()] o['availabilityStarts'] = "20%02i-%02i-%02i" % (y, m, d) o['availability'] = 'http://schema.org/OutOfStock' o['description'] = 'Checked Out' elif hasattr(h, 'circulationData'): cd = h.circulationData[0] if cd.availableNow is True: o['availability'] = 'http://schema.org/InStock' o['description'] = 'Available' else: if hasattr(cd, 'availabilityDate') and cd.availablityDate: m = re.match("^(\d{4}-\d{2}-\d{2}).+", cd.availablityDate) if m: o['availabilityStarts'] = m.group(1) o['availability'] = 'http://schema.org/OutOfStock' o['description'] = 'Checked Out' else: logging.warn("unknown availability: bibid=%s library=%s h=%s", id, library, h) # some locations have a weird period before the name o['availabilityAtOrFrom'] = o.get('availabilityAtOrFrom', '').lstrip('.') offers.append(o) return offers
# coding: utf-8 """ Simple script to search a Z39.50 target using Python and PyZ3950. """ from PyZ3950 import zoom ISBNs = ['978-1-905017-60-7', '2-86377-125-6'] conn = zoom.Connection('z3950.bnf.fr', 2211) conn.databaseName = 'TOUT' conn.preferredRecordSyntax = 'UNIMARC' for isbn in ISBNs: query = zoom.Query('CQL', 'find @attr 1=7 ' + isbn) print(query) res = conn.search(query) for r in res: print(str(r)) conn.close()