Example #1
0
def callZ3950(search_id, target, depth=0):
    if target == 'UIU':
        print "UIUC NUMBER: ", search_id
        query = zoom.Query('PQF', '@attr 1=12 %s' % str(search_id))

        database_address = 'z3950.carli.illinois.edu'
        username = '******'
        database_name = 'voyager'
    else:
        print "LC NUMBER: ", search_id
        query = zoom.Query('PQF', '@attr 1=9 %s' % str(formatLCCN(search_id)))

        database_address = 'lx2.loc.gov'
        username = ''
        if 'n' in search_id:
            database_name = 'NAF'
        else:
            database_name = 'SAF'

#	conn = establishZ3950Connection(database_address,210,username,database_name)
    res = queryZ3950(database_address, username, database_name, query)
    print len(res)
    print res

    if len(res) > 0:
        for r in res:
            valid_leader = checkLeader(r.data[:24])

            if valid_leader:
                if len(res) > 1:
                    try:
                        new_record = Record(data=r.data)
                    except UnicodeDecodeError:
                        return (False, 'BROKEN CHARACTER IN RECORD')
                    lccn = new_record.get_fields('001')[0].data.replace(
                        " ", "")
                    if lccn == search_id:
                        marc_record = new_record
                        fixNames(marc_record)
                else:
                    try:
                        marc_record = Record(data=r.data)
                    except UnicodeDecodeError:
                        return (False, 'BROKEN CHARACTER IN RECORD')
                    fixNames(marc_record)
            else:
                return (False, 'BROKEN LEADER')

        return (marc_record, None)
    elif depth < 20:
        waitSixSeconds(datetime.datetime.now().time())
        return callZ3950(search_id, target, depth=depth + 1)
    else:
        return (None, 'RECORD NOT FOUND')
Example #2
0
def read_mc(sys_no):
    """
    Loads marc data from aleph.unibas.ch for one single system number.

    :param sys_no: System number to which the marc entry is to be loaded.
    :return: marc binary for said system number.
    """

    #    print("reading: "+sys_no)

    try:
        conn = zoom.Connection('aleph.unibas.ch', 9909)
        conn.databaseName = 'dsv05'
        conn.preferredRecordSyntax = 'USMARC'

        query = zoom.Query('PQF', '@attr 1=1032 ' + sys_no)
        res = conn.search(query)
        data = bytes(res[0].data)
    except zoom.ConnectionError:
        print("\n!!! Error: could not connect to aleph !!!\n")
        return

    __write_to_cache(data, sys_no)

    reader = MARCReader(bytes(data), force_utf8=True, to_unicode=True)
    tmp = next(reader)
    #    print("loaded data from aleph.")
    return tmp
Example #3
0
 def build_qobject(self, qstring):
     """ Builds and returns a PyZ3950.zoom.Query instance object.
         Called by search() """
     qobject = zoom.Query('PQF'.encode('utf-8'), qstring.encode('utf-8'))
     log.debug('type(qobject), `%s`' % type(qobject))
     log.debug('pprint.pformat(qobject), `%s`' % pprint.pformat(qobject))
     return qobject
Example #4
0
    def run(self):
        if not parse_only:
            self.conn = zoom.Connection('localhost', 2100)
            self.conn.preferredRecordSyntax = 'SUTRS'
        while 1:
            self.count += 1
            #            if not (self.count % 100):
            #                print("Thread", currentThread (), "Count", self.count)
            query_str = random.choice(self.queries)
            try:
                q = zoom.Query('CCL', query_str)
                if not parse_only:
                    r = self.conn.search(q)
                    for rec in r:
                        self.consume(rec)
            except zoom.Bib1Err as err:
                pass
            except zoom.QuerySyntaxError as e:
                print("e", e, "q", query_str)

            if self.count > 500:
                if not parse_only:
                    self.conn.close()
                # should randomly do clean vs. not clean exit
                self.terminate_queue.put(self, 1)
                break
Example #5
0
def run ():
    conn = zoom.Connection ('amicus.nlc-bnc.ca', 210)
    conn.databaseName = 'NL'
    q = zoom.Query ('CCL', 'ti="1066"')
    ss = conn.scan (q)
    for s in ss[0:10]:
        print s
Example #6
0
def RunQuery():
    "Run a Z39.50 query & save MARC results to files"
    #open connection
    conn = zoom.Connection ('z3950.loc.gov', 7090)
    conn.databaseName = 'VOYAGER'
    conn.preferredRecordSyntax = 'USMARC'
    
    #setup query
    query = zoom.Query('CCL', 'ti="1066 and all that"')
    
    #run query
    res = conn.search(query)
    
    #for each record in the resultset, save as file
    ifilecount = 0
    for r in res:
        sSaveAs = os.path.join(tempfile.gettempdir(),
                               "PyZ3950 search resultset %d.bin" % ifilecount)
        print("Saving as file:", sSaveAs)
        fx = open(sSaveAs, "wb")
        fx.write(r.data)
        fx.close()
        ifilecount += 1
        #parse each record as we save
        ParseRecord(sSaveAs)
    #close connection
    conn.close()
Example #7
0
    def library_search(self, query):
        """
        @param query: The query to be performed
        @type query: molly.apps.library.models.LibrarySearchQuery
        @return: A list of results
        @rtype: [LibrarySearchResult]
        """

        connection = self._make_connection()

        # Convert Query object into a Z39.50 query - we escape for the query by
        # removing quotation marks
        z3950_query = []
        if query.author != None:
            z3950_query.append('(au="%s")' % query.author.replace('"', ''))
        if query.title != None:
            z3950_query.append('(ti="%s")' % query.title.replace('"', ''))
        if query.isbn != None:
            z3950_query.append('(isbn="%s")' % query.isbn.replace('"', ''))
        if query.issn != None:
            z3950_query.append('((1,8)="%s")' % query.issn.replace('"', ''))

        z3950_query = zoom.Query('CCL', 'and'.join(z3950_query))

        try:
            results = self.Results(connection.search(z3950_query),
                                   self._wrapper)
        except zoom.Bib1Err as e:
            # 31 = Resources exhausted - no results available
            if e.condition in (31, 108):
                return []
            else:
                raise
        else:
            return results
Example #8
0
def z3950_query(target=None,
                keyword=None,
                qualifier='(1,1016)',
                query_type='CCL'):
    if target is not None:
        host = target['host']
        database = target['database']
        port = target['port']
        syntax = target['syntax']
        user = target['user']
        password = target['password']

        try:
            if user is not None \
                    and password is not None:
                conn = zoom.Connection(host,
                                       port,
                                       user=user,
                                       password=password)
            else:
                conn = zoom.Connection(host, port)

            conn.databaseName = database
            conn.preferredRecordSyntax = syntax
            query_str = qualifier + '=' + keyword
            query = zoom.Query(query_type, query_str)
            res = conn.search(query)

            return True, res

        except zoom.ConnectionError:
            raise
    else:
        raise ValueError('Z3950 target not provided.')
Example #9
0
    def gather_stage(self, harvest_job):

        log = logging.getLogger(__name__ + '.WAF.gather')
        log.debug('z3950Harvester gather_stage for job: %r', harvest_job)

        self.harvest_job = harvest_job

        # Get source URL
        source_url = harvest_job.source.url

        self._set_source_config(harvest_job.source.config)

        # get current objects out of db
        query = model.Session.query(HarvestObject.guid, HarvestObject.package_id).filter(HarvestObject.current==True).\
                                    filter(HarvestObject.harvest_source_id==harvest_job.source.id)

        guid_to_package_id = dict((res[0], res[1]) for res in query)
        current_guids = set(guid_to_package_id.keys())
        current_guids_in_harvest = set()

        # Get contents
        try:
            conn = zoom.Connection(source_url,
                                   int(self.source_config.get('port', 210)))
            conn.databaseName = self.source_config.get('database', '')
            conn.preferredRecordSyntax = 'XML'
            conn.elementSetName = 'T'
            query = zoom.Query('CCL', 'metadata')
            res = conn.search(query)
            ids = []
            for num, result in enumerate(res):
                hash = hashlib.md5(result.data).hexdigest()
                if hash in current_guids:
                    current_guids_in_harvest.add(hash)
                else:
                    obj = HarvestObject(
                        job=harvest_job,
                        guid=hash,
                        extras=[
                            HOExtra(key='status', value='new'),
                            HOExtra(key='original_document',
                                    value=result.data.decode('latin-1')),
                            HOExtra(key='original_format', value='fgdc')
                        ])
                    obj.save()
                    ids.append(obj.id)
            for guid in (current_guids - current_guids_in_harvest):
                obj = HarvestObject(
                    job=harvest_job,
                    guid=guid,
                    package_id=guid_to_package_id[guid],
                    extras=[HOExtra(key='status', value='delete')])
                obj.save()
                ids.append(obj.id)
            return ids
        except Exception, e:
            self._save_gather_error('Unable to get content for URL: %s: %r' % \
                                        (source_url, e),harvest_job)
            return None
Example #10
0
 def zoom_record(self, bibid):
     query = zoom.Query('PQF', '@attr 1=12 %s' % bibid.encode('utf-8'))
     try:
         results = self.conn.search(query)
         if len(results) > 0:
             return results[0]
     except:
         raise
Example #11
0
def run ():
    conn = zoom.Connection ('amicus.nlc-bnc.ca', 210)
    conn.databaseName = 'NL'

    q = zoom.Query ('CCL', 'ti=A')
    conn.numberOfEntries = 80
    ss = conn.scan (q)
    for i in range (len (ss)):
        print ss.get_term (i), ss.get_fields (i)
Example #12
0
    def lookup(self, term):
        text = 'attrset(XD1/(1,1)="%s")' % (term, )
        q = zoom.Query('CCL', text)
        res = self.conn.search(q)
        l = []
        parsedict = {'XML': xmlparse, 'GRS-1': grs1parse}
        for r in res:
            parser = parsedict.get(r.syntax, None)
            if parser == None:
                print "Unknown syntax:", r.syntax, "for", r.data
                continue

            l += parser(r.data)
        return l
Example #13
0
def search(host, port, database, query, start=1, limit=10):

    conn = zoom.Connection(host, port)
    conn.databaseName = database
    conn.preferredRecordSyntax = 'XML'
    
    query = zoom.Query ('CCL', str(query))
    res = conn.search (query)
    collector = []
    #if we were dealing with marc8 results, would probably need this
    #m = zmarc.MARC8_to_Unicode ()

    # how many to present? At most 10 for now.
    to_show = min(len(res)-(start - 1), limit)
    if limit:
        to_show = min(to_show, limit)


    #this seems to an efficient way of snagging the records
    #would be good to cache the result set for iterative display
    for r in range(start - 1,(start-1) + to_show):
        #would need to translate marc8 records, evergreen doesn't need this
        #collector.append(m.translate(r.data))
        collector.append(str(res.__getitem__(r)).replace('\n',''))
    conn.close ()


    raw = "" . join(collector)

    raw_records = []
    err = None

    pat = re.compile('<record .*?</record>', re.M)
    raw_records = pat.findall(raw)

    parsed = []
    for rec in raw_records:
        # TODO: fix this ascii/replace, once our z3950/marc encoding
        # issues are sorted out.
        rec = unicode(rec, 'ascii', 'replace')
        # replace multiple 'unknown' characters with a single one.
        rec = re.sub(u'\ufffd+', u'\ufffd', rec)

        assert isinstance(rec, unicode) # this must be true.
        parsed.append(ET.fromstring(rec.encode('utf-8')))
    return parsed, len(res)
Example #14
0
def get_marc(target_name, cclquery, result_offset):
    target = targets[target_name]
    m = re_identifier.match(target['identifier'])
    (host, port, db) = m.groups()
    port = int(port) if port else 210
    conn = zoom.Connection (host, port)
    if db:
        conn.databaseName = db
    conn.preferredRecordSyntax = 'USMARC'
    query = zoom.Query ('PQF', cclquery)
    res = conn.search (query)
    offset = 0
    for r in res:
        return r.data
        offset += 1
        if offset == result_offset:
            return r.data
Example #15
0
 def run(self):
     if not parse_only:
         self.conn = zoom.Connection('localhost', 2100)
         self.conn.preferredRecordSyntax = 'SUTRS'
     while 1:
         self.count += 1
         #            if not (self.count % 100):
         #                print "Thread", currentThread (), "Count", self.count
         query_str = random.choice(self.queries)
         try:
             q = zoom.Query('CCL', query_str)
             if not parse_only:
                 r = self.conn.search(q)
                 for rec in r:
                     self.consume(rec)
         except zoom.Bib1Err, err:
             pass
         except zoom.QuerySyntaxError, e:
             print "e", e, "q", query_str
Example #16
0
    def control_number_search(self, control_number):
        """
        @param control_number: The unique ID of the item to be looked up
        @type control_number: str
        @return: The item with this control ID, or None if none can be found
        @rtype: LibrarySearchResult
        """

        # Escape input
        control_number = control_number.replace('"', '')

        z3950_query = zoom.Query(
            'CCL', '(1,%s)="%s"' % (self._control_number_key, control_number))
        connection = self._make_connection()
        results = self.Results(connection.search(z3950_query), self._wrapper)
        if len(results) > 0:
            return results[0]
        else:
            return None
    def open_stream(self, stream):
        server = stream.replace('z3950', 'https')
        (transport, user, passwd, host, port, dirname, filename, args,
         anchor) = self._parse_url(server)

        conn = zoom.Connection(host, port)
        conn.databaseName = dirname
        q = args['query']
        qo = zoom.Query('CQL', q)

        if 'preferredRecordSyntax' in args:
            conn.preferredRecordSyntax = args['preferredRecordSyntax']
        else:
            conn.preferredRecordSyntax = 'USMARC'
        if 'elementSetName' in args:
            conn.elementSetName = args['elementSetName']
        else:
            conn.elementSetName = 'F'
        rs = conn.search(qo)
        self.total = len(rs)
        return rs
Example #18
0
    def search(self, query, position=0, size=10, syntax='CCL'):
        """Return the results of a database query.

        :param query: The database query.
        :param position: The position of the first record (zero-based index).
        :param size: The maximum number of records to return.
        :param syntax: The syntax of the query, either CCL, S-CCL, CQL, S-CQL,
            PQF, C2, ZSQL or CQL-TREE.

        :returns: A :class:`Dataset` object containing the raw record data.
        """
        conn = self._connect()
        try:
            q = zoom.Query(syntax, query)
        except (zoom.QuerySyntaxError) as e:  # pragma: no cover
            raise zoom.QuerySyntaxError("The query could not be parsed.")

        start = int(position)
        end = start + int(size)
        rs = conn.search(q)

        return Dataset([r.data for r in rs[start:end]], total=len(rs))
Example #19
0
field = sys.argv[2]

if (field == "Author"):
	field = "au"
elif (field == "ISBN"):
	field = "isbn"
elif (field == "LCCN"):
	field = "lccn"
else:
	field = "ti"

conn = zoom.Connection ('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'

query = zoom.Query ('CCL', field + "=" + query)

doc = Document ()
root = doc.createElement ("importedData")
doc.appendChild (root)
collection = doc.createElement ("List")
collection.setAttribute ("name", "Library of Congress Import")
root.appendChild (collection)

res = conn.search (query)

count = 0

for r in res:
	m = MARC (MARC=r.data)
Example #20
0
#!/usr/bin/env python
from PyZ3950 import zoom, z3950

zoom.trace_extract = 1
#z3950.msg_size = 0x600
conn = zoom.Connection('z3950.loc.gov', 7090)

#conn = zoom.Connection ('ipac.lib.uchicago.edu', 210)
conn.databaseName = 'VOYAGER'
#conn.databaseName = 'uofc'
conn.preferredRecordSyntax = 'USMARC'

query = zoom.Query('CCL', 'au=Thucydides')

res = conn.search(query)
res.presentChunk = 1
for a in res:
    print a

conn.close()
Example #21
0
#!/usr/bin/env python

from PyZ3950 import zoom, zmarc

def make_conn ():
    conn = zoom.Connection ('z3950.loc.gov', 7090)
    conn.databaseName = 'VOYAGER'
    conn.preferredRecordSyntax = 'USMARC'
    return conn


def fetch_mods (query):
    res = conn.search (query)
    mods_list = []
    for r in res:
        marc_obj = zmarc.MARC (r.data)
        mods_list.append (marc_obj.toMODS ())
    return mods_list

conn = make_conn ()
mods_list = fetch_mods (zoom.Query ('CCL', 'ti="1066 and all that"'))
print mods_list


    
Example #22
0
        loc = left + right

    queryString = 'lccn=' + loc
else:
    queryString = 'ti="' + title + '"'

    if (authors != ""):
        queryString = queryString + ' and au="' + authors + '"'

conn = zoom.Connection('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'

sys.stderr.write("<!-- " + queryString + "-->\n")

query = zoom.Query('CCL', str(queryString))

doc = Document()
root = doc.createElement("importedData")
doc.appendChild(root)
collection = doc.createElement("List")
collection.setAttribute("name", "Library of Congress Import")
root.appendChild(collection)

res = conn.search(query)

count = 0

for r in res:
    m = MARC(MARC=r.data)
Example #23
0
#!/usr/bin/env python
"""Demonstrates adding qualifiers at runtime by mutating ccl.qual_dict,
and calling ccl.relex ()."""

from PyZ3950 import zoom, ccl

ccl.add_qual('AUPERSONAL', (1, 1))

conn = zoom.Connection('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'

query = zoom.Query('CCL', 'aupersonal=MacLane, Saunders')

res = conn.search(query)
for r in res[:20]:
    print(r)

conn.close()
Example #24
0
#!/usr/bin/env python
from PyZ3950 import zoom

conn = zoom.Connection('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'

query = zoom.Query('CCL', 'isbn=0253333490')

res = conn.search(query)
print res[0]

conn.close()
Example #25
0
 def run_query(self, qstr):
     query = zoom.Query('CCL', qstr)
     res = conn.search(query)
     print qstr
     self.disp_res(res)
     return res
Example #26
0
#!/usr/bin/env python
from PyZ3950 import zoom

conn = zoom.Connection('z3950.loc.gov', 7090)
conn.databaseName = 'VOYAGER'
conn.preferredRecordSyntax = 'USMARC'

query = zoom.Query('CCL', 'ti="1066 and all that"')

res = conn.search(query)
for r in res:
    print(r)
conn.close()
Example #27
0
#!/usr/bin/env python
from __future__ import print_function, absolute_import

from PyZ3950 import zoom
conn = zoom.Connection('ilsz3950.nlm.nih.gov', 7090)
conn.databaseName = 'voyager'

conn.preferredRecordSyntax = 'MARC8'

query1 = zoom.Query('CCL', 'au=Gould, Stephen Jay')
res1 = conn.search(query1)
query2 = zoom.Query('CCL', 'au=Pynchon, Thomas')
res2 = conn.search(query2)
for i in range(0, max(len(res1), len(res2))):
    if i < len(res1):
        print("1:", res1[i])
    if i < len(res2):
        print("2:", res2[i])
conn.close()
Example #28
0
# coding: utf-8
"""
Simple script to search a Z39.50 target using Python
and PyZ3950. 
"""

from PyZ3950 import zoom

ISBNs = ['978-1-905017-60-7', '2-86377-125-6']

conn = zoom.Connection('z3950.bnf.fr', 2211)
conn.databaseName = 'TOUT'
conn.preferredRecordSyntax = 'UNIMARC'

for isbn in ISBNs:
    query = zoom.Query('CQL', 'find @attr 1=7 ' + isbn)
    print(query)
    res = conn.search(query)
    for r in res:
        print(str(r))

conn.close()
Example #29
0
from PyZ3950 import zoom

conn = zoom.Connection('www.lib.csu.ru', 210)
conn.databaseName = 'arefd+knigi+liter+period'
conn.preferredRecordSyntax = 'USMARC'
query = zoom.Query('CCL', 'ti=Journal or au=Turgenev')
res = conn.search(query)
for r in res:
    print "db:", repr(r.databaseName), r
Example #30
0
def _get_offers_z3950(id, library):
    offers = []

    # determine which server to talk to
    if library == 'Georgetown':
        conf = settings.Z3950_SERVERS['GT']
    elif library == 'George Mason':
        id = id.strip('m')
        conf = settings.Z3950_SERVERS['GM']
    else:
        raise Exception("unrecognized library %s" % library)

    # search for the id, and get the first record
    z = zoom.Connection(conf['IP'], conf['PORT'])
    z.databaseName = conf['DB']
    z.preferredRecordSyntax = conf['SYNTAX']
    q = zoom.Query('PQF', '@attr 1=12 %s' % id.encode('utf-8'))
    results = z.search(q)
    if len(results) == 0:
        return []
    rec = results[0]

    # normalize holdings information as schema.org offers

    if hasattr(rec, 'data') and not hasattr(rec.data, 'holdingsData'):
        return []

    for holdings_data in rec.data.holdingsData:
        h = holdings_data[1]
        o = {'@type': 'Offer', 'seller': library}

        if hasattr(h, 'callNumber'):
            o['sku'] = h.callNumber.rstrip('\x00').strip()

        if hasattr(h, 'localLocation'):
            o['availabilityAtOrFrom'] = h.localLocation.rstrip('\x00')

        if hasattr(h, 'publicNote') and library == 'Georgetown':
            note = h.publicNote.rstrip('\x00')
            if note == 'AVAILABLE':
                o['availability'] = 'http://schema.org/InStock'
                o['description'] = 'Available'
            elif note in ('SPC USE ONLY', 'LIB USE ONLY'):
                o['availability'] = 'http://schema.org/InStoreOnly'
                o['description'] = 'Available'
            else:
                # set availabilityStarts from "DUE 09-15-14"
                m = re.match('DUE (\d\d)-(\d\d)-(\d\d)', note)
                if m:
                    m, d, y = [int(i) for i in m.groups()]
                    o['availabilityStarts'] = "20%02i-%02i-%02i" % (y, m, d)

                o['availability'] = 'http://schema.org/OutOfStock'
                o['description'] = 'Checked Out'

        elif hasattr(h, 'circulationData'):
            cd = h.circulationData[0]
            if cd.availableNow is True:
                o['availability'] = 'http://schema.org/InStock'
                o['description'] = 'Available'
            else:
                if hasattr(cd, 'availabilityDate') and cd.availablityDate:
                    m = re.match("^(\d{4}-\d{2}-\d{2}).+", cd.availablityDate)
                    if m:
                        o['availabilityStarts'] = m.group(1)
                o['availability'] = 'http://schema.org/OutOfStock'
                o['description'] = 'Checked Out'

        else:
            logging.warn("unknown availability: bibid=%s library=%s h=%s", id,
                         library, h)

        # some locations have a weird period before the name
        o['availabilityAtOrFrom'] = o.get('availabilityAtOrFrom',
                                          '').lstrip('.')

        offers.append(o)

    return offers