def test_querydocument(db): from dlx.marc import Bib, Auth, QueryDocument, Condition, Or from bson import SON from json import loads import re query = QueryDocument(Condition(tag='245', subfields={'a': 'This'})) assert isinstance(query.compile(), SON) qjson = query.to_json() qdict = loads(qjson) assert qdict['245']['$elemMatch']['subfields']['$elemMatch']['code'] == 'a' assert qdict['245']['$elemMatch']['subfields']['$elemMatch']['value'] == 'This' query = QueryDocument( Condition(tag='245', subfields={'a': re.compile(r'(This|Another)'), 'b': 'is the', 'c': 'title'}), Condition(tag='650', modifier='exists'), Or( Condition(tag='710', modifier='exists'), Condition(tag='520', modifier='not_exists') ) ) assert len(list(Bib.find(query.compile()))) == 2 query = QueryDocument( Condition(tag='110', subfields={'a': 'Another header'}), ) assert len(list(Auth.find(query.compile()))) == 1 assert Auth.find_one(query.compile()).id == 2
def unbis(): ''' outputs UNBIS thesaurus subject heading records in MARCXML format /unbis?skip=n&limit=m skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. it uses DLX bibset.to_xml serialization function to output fields 035 and 150 in MARCXML ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") query = QueryDocument( Condition(tag='035', subfields={'a': re.compile('^T')})) print(query.to_json()) authset = AuthSet.from_query(query, projection={ '035': 1, '150': 1 }, skip=skp, limit=limt) unbis = authset.to_xml() return Response(unbis, mimetype='text/xml')
def xml(date): ''' outputs records in MARCXML format for the date which is provided as a dynamic route in YYYYMMDD or YYYY-MM-DD formats /YYYYMMDD/xml?skip=n&limit=m skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. if the date is in wrong format the function returns today's records it uses DLX bibset.to_xml serialization function to output MARCXML ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") str_date = date.replace('-', '') print(f"the original str_date is {str_date}") if len(str_date) != 8: #date = datetime.datetime.now() str_date = str(date.year) + str(date.month) + str(date.day) print(f"the str_date is {str_date}") query = QueryDocument( Condition(tag='998', subfields={'z': re.compile('^' + str_date)}), Condition(tag='029', subfields={'a': 'JN'})) print(query.to_json()) start_time = datetime.now() bibset = BibSet.from_query(query, projection={ '029': 1, '091': 1, '191': 1, '245': 1, '269': 1, '650': 1, '991': 1 }, skip=skp, limit=limt) print(f"duration for 998z was {datetime.now()-start_time}") start_time_xml = datetime.now() xml = bibset.to_xml() #removing double space from the xml; creates pbs with the job number on ODS export xml = xml.replace(" ", " ") print( f"duration for xml serialization was {datetime.now()-start_time_xml}") return Response(xml, mimetype='text/xml')
def unbis_tcode(tcode): ''' looks up UNBIS thesaurus T codes and returns matching subject heading records skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. it uses DLX bibset.to_xml serialization function to output fields 035 and 150 in MARCXML ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 #print(f"skip is {skp} and limit is {limt}") query = QueryDocument( Condition(tag='035', subfields={'a': re.compile(str(tcode).upper())})) print(query.to_json()) dict1 = {} authset = AuthSet.from_query(query, projection={ '035': 1, '150': 1, '993': 1, '994': 1, '995': 1, '996': 1, '997': 1 }, skip=skp, limit=limt) for auth in authset: val_035a = auth.get_values('035', 'a') #print(f"035 values are: {val_035a}") val_035a = ''.join([str for str in val_035a if str[0] == 'T']) #dict1[auth.get_value('035','a')]=auth.get_value('150','a') dict1[val_035a] = { 'EN': auth.get_value('150', 'a'), 'FR': auth.get_value('993', 'a'), 'ES': auth.get_value('994', 'a'), 'AR': auth.get_value('995', 'a'), 'ZH': auth.get_value('996', 'a'), 'RU': auth.get_value('997', 'a') } #dict1['FR']=auth.get_value('993','a') #unbis=authset.to_xml() #return Response(unbis, mimetype='text/xml') return jsonify(dict1)
def unbis_label(label): ''' looks up UNBIS thesaurus labels and returns matching T codes skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. it uses DLX authset to output fields 035 and 150 ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 print(f"skip is {skp} and limit is {limt}") query = QueryDocument( Condition(tag='150', subfields={'a': re.compile(str(label).upper())})) print(query.to_json()) dict1 = {} authset = AuthSet.from_query(query, projection={ '035': 1, '150': 1 }, skip=skp, limit=limt) ''' for auth in authset: dict1[auth.get_value('150','a')]=auth.get_value('035','a') #unbis=authset.to_xml() #return Response(unbis, mimetype='text/xml') return jsonify(dict1) ''' for auth in authset: val_035a = auth.get_values('035', 'a') #print(f"035 values are: {val_035a}") val_035a = ''.join([str for str in val_035a if str[0] == 'T']) #dict1[auth.get_value('035','a')]=auth.get_value('150','a') dict1[auth.get_value('150', 'a')] = val_035a #dict1['FR']=auth.get_value('993','a') #unbis=authset.to_xml() #return Response(unbis, mimetype='text/xml') return jsonify(dict1)
from bson import Regex from dlx import DB from dlx.marc import BibSet, QueryDocument, Condition from config import Config DB.connect(Config.connect_string) query = QueryDocument(Condition(tag='191', modifier='exists'), Condition(tag='269', subfields={'a': Regex('^1975')})) print(query.to_json()) bibset = BibSet.from_query(query, projection={'191': True}, skip=0, limit=0) print('There are {} results'.format(bibset.count)) bibset.cache() for bib in bibset.records: print('id: {}, symbol: {}'.format(bib.id, bib.get_value('191', 'a'))) print(bibset.to_xml())
def votes(topic): ''' looks up UNBIS thesaurus labels and returns matching T codes .. skip=n URL parameter is used to skip n records. Default is 0. limit=m URL parameter is used to limit number of records returned. Default is 50. it uses DLX authset to output fields 035 and 150 ''' try: skp = int(request.args.get('skip')) except: skp = 0 try: limt = int(request.args.get('limit')) except: limt = 50 try: yr_from = request.args.get('year_from') except: yr_from = "1980" try: yr_to = request.args.get('year_to') except: yr_to = '2020' try: cntry = request.args.get('Country') except: cntry = 'CANADA' try: vt = request.args.get('Vote') except: vt = 'A' print(f"skip is {skp} and limit is {limt}") print(f"year_from is {yr_from} and year_to is {yr_to}") print(f"Country is {cntry}") print(f"Vote is {vt}") query = QueryDocument( Condition(tag='191', subfields={'d': re.compile(str(topic))}), Condition(tag='191', subfields={'a': re.compile('^A')})) print(query.to_json()) dict_auth_ids = {} authset = AuthSet.from_query(query, projection={ '001': 1, '191': 1 }, skip=skp, limit=limt) for auth in authset: dict_auth_ids[auth.get_value('191', 'a')] = auth.get_value('001') #unbis=authset.to_xml() #return Response(unbis, mimetype='text/xml') #return jsonify(dict_auth_ids) dict_bibs = {} str_bibs = '' votecountry = '' for key, value in dict_auth_ids.items(): #sample_id=int(dict_auth_ids['A/74/251']) print(f"the id of {key} is {value}") query_bib = QueryDocument( Condition(tag='991', subfields={'d': int(value)}), Condition(tag='989', subfields={'a': re.compile(str('Voting Data'))})) print(query_bib.to_json()) bibset = BibSet.from_query(query_bib, projection={ '001': 1, '791': 1, '967': 1 }, skip=skp, limit=limt) for bib in bibset: for field in bib.get_fields('967'): votecountry = field.get_value("d") + field.get_value("e") #print(f'Country+Vote: {votecountry}') if str(votecountry) == str(vt) + str( cntry ): # for the entries matching input query parameters using AND logic dict_bibs[bib.get_value('791', 'a')] = bib.get_value('001') str_bibs = str_bibs + ' OR 791:[' + bib.get_value( '791', 'a') + ']' print(str_bibs) return jsonify(dict_bibs)