Exemple #1
0
def z3950_query(target=None,
                keyword=None,
                qualifier='(1,1016)',
                query_type='CCL'):
    if target is not None:
        host = target['host']
        database = target['database']
        port = target['port']
        syntax = target['syntax']
        user = target['user']
        password = target['password']

        try:
            if user is not None \
                    and password is not None:
                conn = zoom.Connection(host,
                                       port,
                                       user=user,
                                       password=password)
            else:
                conn = zoom.Connection(host, port)

            conn.databaseName = database
            conn.preferredRecordSyntax = syntax
            query_str = qualifier + '=' + keyword
            query = zoom.Query(query_type, query_str)
            res = conn.search(query)

            return True, res

        except zoom.ConnectionError:
            raise
    else:
        raise ValueError('Z3950 target not provided.')
Exemple #2
0
def run ():
    conn = zoom.Connection ('amicus.nlc-bnc.ca', 210)
    conn.databaseName = 'NL'
    q = zoom.Query ('CCL', 'ti="1066"')
    ss = conn.scan (q)
    for s in ss[0:10]:
        print s
Exemple #3
0
def RunQuery():
    "Run a Z39.50 query & save MARC results to files"
    #open connection
    conn = zoom.Connection ('z3950.loc.gov', 7090)
    conn.databaseName = 'VOYAGER'
    conn.preferredRecordSyntax = 'USMARC'
    
    #setup query
    query = zoom.Query('CCL', 'ti="1066 and all that"')
    
    #run query
    res = conn.search(query)
    
    #for each record in the resultset, save as file
    ifilecount = 0
    for r in res:
        sSaveAs = os.path.join(tempfile.gettempdir(),
                               "PyZ3950 search resultset %d.bin" % ifilecount)
        print("Saving as file:", sSaveAs)
        fx = open(sSaveAs, "wb")
        fx.write(r.data)
        fx.close()
        ifilecount += 1
        #parse each record as we save
        ParseRecord(sSaveAs)
    #close connection
    conn.close()
Exemple #4
0
    def run(self):
        if not parse_only:
            self.conn = zoom.Connection('localhost', 2100)
            self.conn.preferredRecordSyntax = 'SUTRS'
        while 1:
            self.count += 1
            #            if not (self.count % 100):
            #                print("Thread", currentThread (), "Count", self.count)
            query_str = random.choice(self.queries)
            try:
                q = zoom.Query('CCL', query_str)
                if not parse_only:
                    r = self.conn.search(q)
                    for rec in r:
                        self.consume(rec)
            except zoom.Bib1Err as err:
                pass
            except zoom.QuerySyntaxError as e:
                print("e", e, "q", query_str)

            if self.count > 500:
                if not parse_only:
                    self.conn.close()
                # should randomly do clean vs. not clean exit
                self.terminate_queue.put(self, 1)
                break
Exemple #5
0
def read_mc(sys_no):
    """
    Loads marc data from aleph.unibas.ch for one single system number.

    :param sys_no: System number to which the marc entry is to be loaded.
    :return: marc binary for said system number.
    """

    #    print("reading: "+sys_no)

    try:
        conn = zoom.Connection('aleph.unibas.ch', 9909)
        conn.databaseName = 'dsv05'
        conn.preferredRecordSyntax = 'USMARC'

        query = zoom.Query('PQF', '@attr 1=1032 ' + sys_no)
        res = conn.search(query)
        data = bytes(res[0].data)
    except zoom.ConnectionError:
        print("\n!!! Error: could not connect to aleph !!!\n")
        return

    __write_to_cache(data, sys_no)

    reader = MARCReader(bytes(data), force_utf8=True, to_unicode=True)
    tmp = next(reader)
    #    print("loaded data from aleph.")
    return tmp
Exemple #6
0
    def gather_stage(self, harvest_job):

        log = logging.getLogger(__name__ + '.WAF.gather')
        log.debug('z3950Harvester gather_stage for job: %r', harvest_job)

        self.harvest_job = harvest_job

        # Get source URL
        source_url = harvest_job.source.url

        self._set_source_config(harvest_job.source.config)

        # get current objects out of db
        query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).filter(HarvestObject.current==True).\
                                    filter(HarvestObject.harvest_source_id==harvest_job.source.id)

        guid_to_package_id = dict((res[0], res[1]) for res in query)
        current_guids = set(guid_to_package_id.keys())
        current_guids_in_harvest = set()

        # Get contents
        try:
            conn = zoom.Connection(source_url,
                                   int(self.source_config.get('port', 210)))
            conn.databaseName = self.source_config.get('database', '')
            conn.preferredRecordSyntax = 'XML'
            conn.elementSetName = 'T'
            query = zoom.Query('CCL', 'metadata')
            res = conn.search(query)
            ids = []
            for num, result in enumerate(res):
                hash = hashlib.md5(result.data).hexdigest()
                if hash in current_guids:
                    current_guids_in_harvest.add(hash)
                else:
                    obj = HarvestObject(
                        job=harvest_job,
                        guid=hash,
                        extras=[
                            HOExtra(key='status', value='new'),
                            HOExtra(key='original_document',
                                    value=result.data.decode('latin-1')),
                            HOExtra(key='original_format', value='fgdc')
                        ])
                    obj.save()
                    ids.append(obj.id)
            for guid in (current_guids - current_guids_in_harvest):
                obj = HarvestObject(
                    job=harvest_job,
                    guid=guid,
                    package_id=guid_to_package_id[guid],
                    extras=[HOExtra(key='status', value='delete')])
                obj.save()
                ids.append(obj.id)
            return ids
        except Exception, e:
            self._save_gather_error('Unable to get content for URL: %s: %r' % \
                                        (source_url, e),harvest_job)
            return None
Exemple #7
0
def run ():
    conn = zoom.Connection ('amicus.nlc-bnc.ca', 210)
    conn.databaseName = 'NL'

    q = zoom.Query ('CCL', 'ti=A')
    conn.numberOfEntries = 80
    ss = conn.scan (q)
    for i in range (len (ss)):
        print ss.get_term (i), ss.get_fields (i)
Exemple #8
0
def establishZ3950Connection(database_address, port, username, database_name):
    try:
        conn = zoom.Connection(database_address, port, user=username)
        conn.databaseName = database_name
        conn.preferredRecordSyntax = 'USMARC'
        return conn
    except zoom.ConnectionError:
        print "GRACEFUL CLOSE ERROR -- ESTABLISHING CONNECTION"
        waitSixSeconds(datetime.datetime.now().time())
        return establishZ3950Connection(database_address, port, username,
                                        database_name)
Exemple #9
0
    def _connect(self):
        """Return a connection to the configured database.

        :returns: A connection.
        """
        conn = zoom.Connection(self.host, self.port, user=self.user,
                               password=self.password)
        conn.databaseName = self.db
        conn.preferredRecordSyntax = self.syntax
        conn.elementSetName = self.elem_set_name

        return conn
Exemple #10
0
 def connect(self):
     """ Connects to z3950 server.
         Called by __init__() """
     conn = zoom.Connection(
         self.HOST,
         int(self.PORT),
         databaseName=self.DB_NAME,
         preferredRecordSyntax=
         u'OPAC',  # Getting records in "opac" format. (Others were not more helpful.)
         charset='utf-8')
     log.debug('connection made.')
     return conn
Exemple #11
0
 def __init__(self, host, port, db):
     self.conn = zoom.Connection(host, port)
     self.conn.databaseName = db
     self.conn.elementSetName = 'F'
     #        self.conn.preferredRecordSyntax = 'XML'
     # currently (2002-7-17) the dbiref.kub.nl seems broken for XML
     # in two ways:
     # 1) Much more data is returned for elementSetName 'B' than 'F'.
     # 2) even for 'B', the term name for the top-level record isn't
     #    returned, but the term id is.  The term name isn't marked as
     #    optional in the spec.
     self.conn.preferredRecordSyntax = 'GRS-1'
Exemple #12
0
    def _make_connection(self):
        """
        Returns a connection to the Z39.50 server
        """
        # Create connection to database
        connection = zoom.Connection(
            self._host,
            self._port,
            charset=self._charset,
        )
        connection.databaseName = self._database
        connection.preferredRecordSyntax = self._syntax

        return connection
Exemple #13
0
def get_marc(target_name, cclquery, result_offset):
    target = targets[target_name]
    m = re_identifier.match(target['identifier'])
    (host, port, db) = m.groups()
    port = int(port) if port else 210
    conn = zoom.Connection (host, port)
    if db:
        conn.databaseName = db
    conn.preferredRecordSyntax = 'USMARC'
    query = zoom.Query ('PQF', cclquery)
    res = conn.search (query)
    offset = 0
    for r in res:
        return r.data
        offset += 1
        if offset == result_offset:
            return r.data
Exemple #14
0
def search(host, port, database, query, start=1, limit=10):

    conn = zoom.Connection(host, port)
    conn.databaseName = database
    conn.preferredRecordSyntax = 'XML'
    
    query = zoom.Query ('CCL', str(query))
    res = conn.search (query)
    collector = []
    #if we were dealing with marc8 results, would probably need this
    #m = zmarc.MARC8_to_Unicode ()

    # how many to present? At most 10 for now.
    to_show = min(len(res)-(start - 1), limit)
    if limit:
        to_show = min(to_show, limit)


    #this seems to an efficient way of snagging the records
    #would be good to cache the result set for iterative display
    for r in range(start - 1,(start-1) + to_show):
        #would need to translate marc8 records, evergreen doesn't need this
        #collector.append(m.translate(r.data))
        collector.append(str(res.__getitem__(r)).replace('\n',''))
    conn.close ()


    raw = "" . join(collector)

    raw_records = []
    err = None

    pat = re.compile('<record .*?</record>', re.M)
    raw_records = pat.findall(raw)

    parsed = []
    for rec in raw_records:
        # TODO: fix this ascii/replace, once our z3950/marc encoding
        # issues are sorted out.
        rec = unicode(rec, 'ascii', 'replace')
        # replace multiple 'unknown' characters with a single one.
        rec = re.sub(u'\ufffd+', u'\ufffd', rec)

        assert isinstance(rec, unicode) # this must be true.
        parsed.append(ET.fromstring(rec.encode('utf-8')))
    return parsed, len(res)
Exemple #15
0
 def run(self):
     if not parse_only:
         self.conn = zoom.Connection('localhost', 2100)
         self.conn.preferredRecordSyntax = 'SUTRS'
     while 1:
         self.count += 1
         #            if not (self.count % 100):
         #                print "Thread", currentThread (), "Count", self.count
         query_str = random.choice(self.queries)
         try:
             q = zoom.Query('CCL', query_str)
             if not parse_only:
                 r = self.conn.search(q)
                 for rec in r:
                     self.consume(rec)
         except zoom.Bib1Err, err:
             pass
         except zoom.QuerySyntaxError, e:
             print "e", e, "q", query_str
    def open_stream(self, stream):
        server = stream.replace('z3950', 'https')
        (transport, user, passwd, host, port, dirname, filename, args,
         anchor) = self._parse_url(server)

        conn = zoom.Connection(host, port)
        conn.databaseName = dirname
        q = args['query']
        qo = zoom.Query('CQL', q)

        if 'preferredRecordSyntax' in args:
            conn.preferredRecordSyntax = args['preferredRecordSyntax']
        else:
            conn.preferredRecordSyntax = 'USMARC'
        if 'elementSetName' in args:
            conn.elementSetName = args['elementSetName']
        else:
            conn.elementSetName = 'F'
        rs = conn.search(qo)
        self.total = len(rs)
        return rs
Exemple #17
0
queryString = ''

if (isbn != None):
    #	isbn = replace (replace (isbn, "-", ""), " ", "");
    queryString = 'isbn="' + isbn + '"'

# elif (field == "LCCN"):
# 	field = "lccn"
else:
    queryString = 'ti="' + title + '"'

    if (authors != ""):
        queryString = queryString + ' and au="' + authors + '"'

conn = zoom.Connection('opac.sbn.it', 3950)
conn.databaseName = 'nopac'
conn.preferredRecordSyntax = 'USMARC'

query = zoom.Query('CCL', str(queryString))
query

doc = Document()
root = doc.createElement("importedData")
doc.appendChild(root)
collection = doc.createElement("List")
collection.setAttribute("name", "Importato dall\'OPAC SBN")
root.appendChild(collection)

res = conn.search(query)
Exemple #18
0
def make_conn ():
    conn = zoom.Connection ('z3950.loc.gov', 7090)
    conn.databaseName = 'VOYAGER'
    conn.preferredRecordSyntax = 'USMARC'
    return conn
Exemple #19
0
#!/usr/bin/env python
from PyZ3950 import zoom

conn = zoom.Connection('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'

query = zoom.Query('CCL', 'isbn=0253333490')

res = conn.search(query)
print res[0]

conn.close()
Exemple #20
0
                    self.attributes[attr] = 1
        if hasattr(ti, 'dbCombinations'):
            dbcomb = ti.dbCombinations
            for db1 in dbcomb:
                for db2 in db1:
                    self.databases[db2] = 1
        if hasattr(ti, 'recordSyntaxes'):
            self.record_syntaxes = ti.recordSyntaxes

    def disp_res(self, res):
        for r in res:
            print r


if __name__ == '__main__':
    # XXX what record syntax does catalogue.bized.ac.uk:2105 want?
    #conn = zoom.Connection ('z3950.copac.ac.uk', 210)
    # conn = zoom.Connection ('www.cnshb.ru', 210)
    #    conn = zoom.Connection ('blpcz.bl.uk', 21021)
    #     conn = zoom.Connection('sherlock.berkeley.edu', 2100)
    conn = zoom.Connection('gondolin.hist.liv.ac.uk', 210)

    conn.databaseName = 'IR-Explain-1'
    conn.preferredRecordSyntax = 'EXPLAIN'
    #    conn._cli.test = 1
    e = Explainer(conn)
    e.databases['l5r'] = 1
    e.databases['scifi'] = 1
    e.run()
    conn.close()
Exemple #21
0
#!/usr/bin/env python

from PyZ3950 import zoom
#conn = zoom.Connection ('z3950.loc.gov', 7090)
#conn.databaseName = 'VOYAGER'
conn = zoom.Connection('z3950.bibsys.no', 2100)
conn.databaseName = 'BIBSYS'

conn.preferredRecordSyntax = 'USMARC'

query1 = zoom.Query('CCL', 'au=Gould, Stephen Jay')
res1 = conn.search(query1)
query2 = zoom.Query('CCL', 'au=Pynchon, Thomas')
res2 = conn.search(query2)
for i in range(0, max(len(res1), len(res2))):
    if i < len(res1):
        print "1:", res1[i]
    if i < len(res2):
        print "2:", res2[i]
conn.close()
Exemple #22
0
#!/usr/bin/env python
from __future__ import print_function, absolute_import

from PyZ3950 import zoom
conn = zoom.Connection('ilsz3950.nlm.nih.gov', 7090)
conn.databaseName = 'voyager'

conn.preferredRecordSyntax = 'MARC8'

query1 = zoom.Query('CCL', 'au=Gould, Stephen Jay')
res1 = conn.search(query1)
query2 = zoom.Query('CCL', 'au=Pynchon, Thomas')
res2 = conn.search(query2)
for i in range(0, max(len(res1), len(res2))):
    if i < len(res1):
        print("1:", res1[i])
    if i < len(res2):
        print("2:", res2[i])
conn.close()
Exemple #23
0
 def _getconn(self, ip, port, name, syntax):
     conn = zoom.Connection(ip, port)
     conn.databaseName = name
     conn.preferredRecordSyntax = syntax
     return conn
Exemple #24
0
from PyZ3950 import zoom

conn = zoom.Connection('www.lib.csu.ru', 210)
conn.databaseName = 'arefd+knigi+liter+period'
conn.preferredRecordSyntax = 'USMARC'
query = zoom.Query('CCL', 'ti=Journal or au=Turgenev')
res = conn.search(query)
for r in res:
    print "db:", repr(r.databaseName), r
Exemple #25
0
def _get_offers_z3950(id, library):
    offers = []

    # determine which server to talk to
    if library == 'Georgetown':
        conf = settings.Z3950_SERVERS['GT']
    elif library == 'George Mason':
        id = id.strip('m')
        conf = settings.Z3950_SERVERS['GM']
    else:
        raise Exception("unrecognized library %s" % library)

    # search for the id, and get the first record
    z = zoom.Connection(conf['IP'], conf['PORT'])
    z.databaseName = conf['DB']
    z.preferredRecordSyntax = conf['SYNTAX']
    q = zoom.Query('PQF', '@attr 1=12 %s' % id.encode('utf-8'))
    results = z.search(q)
    if len(results) == 0:
        return []
    rec = results[0]

    # normalize holdings information as schema.org offers

    if hasattr(rec, 'data') and not hasattr(rec.data, 'holdingsData'):
        return []

    for holdings_data in rec.data.holdingsData:
        h = holdings_data[1]
        o = {'@type': 'Offer', 'seller': library}

        if hasattr(h, 'callNumber'):
            o['sku'] = h.callNumber.rstrip('\x00').strip()

        if hasattr(h, 'localLocation'):
            o['availabilityAtOrFrom'] = h.localLocation.rstrip('\x00')

        if hasattr(h, 'publicNote') and library == 'Georgetown':
            note = h.publicNote.rstrip('\x00')
            if note == 'AVAILABLE':
                o['availability'] = 'http://schema.org/InStock'
                o['description'] = 'Available'
            elif note in ('SPC USE ONLY', 'LIB USE ONLY'):
                o['availability'] = 'http://schema.org/InStoreOnly'
                o['description'] = 'Available'
            else:
                # set availabilityStarts from "DUE 09-15-14"
                m = re.match('DUE (\d\d)-(\d\d)-(\d\d)', note)
                if m:
                    m, d, y = [int(i) for i in m.groups()]
                    o['availabilityStarts'] = "20%02i-%02i-%02i" % (y, m, d)

                o['availability'] = 'http://schema.org/OutOfStock'
                o['description'] = 'Checked Out'

        elif hasattr(h, 'circulationData'):
            cd = h.circulationData[0]
            if cd.availableNow is True:
                o['availability'] = 'http://schema.org/InStock'
                o['description'] = 'Available'
            else:
                if hasattr(cd, 'availabilityDate') and cd.availablityDate:
                    m = re.match("^(\d{4}-\d{2}-\d{2}).+", cd.availablityDate)
                    if m:
                        o['availabilityStarts'] = m.group(1)
                o['availability'] = 'http://schema.org/OutOfStock'
                o['description'] = 'Checked Out'

        else:
            logging.warn("unknown availability: bibid=%s library=%s h=%s", id,
                         library, h)

        # some locations have a weird period before the name
        o['availabilityAtOrFrom'] = o.get('availabilityAtOrFrom',
                                          '').lstrip('.')

        offers.append(o)

    return offers
Exemple #26
0
# coding: utf-8
"""
Simple script to search a Z39.50 target using Python
and PyZ3950. 
"""

from PyZ3950 import zoom

ISBNs = ['978-1-905017-60-7', '2-86377-125-6']

conn = zoom.Connection('z3950.bnf.fr', 2211)
conn.databaseName = 'TOUT'
conn.preferredRecordSyntax = 'UNIMARC'

for isbn in ISBNs:
    query = zoom.Query('CQL', 'find @attr 1=7 ' + isbn)
    print(query)
    res = conn.search(query)
    for r in res:
        print(str(r))

conn.close()