def test_generate_requests(self): # testing values of one country in cntries = ('guam',) guam_results = self.t.generate_requests( lccn=None, oclc=None, raw_query=raw_query, countries=cntries, totals_only=False )[0][2] expected = ('guam', 'all-yrs', '=') self.assertTupleEqual(guam_results, expected) # 2 countries in, 2 out cntries = ('guam', 'virgin island*') results = self.t.generate_requests( lccn=None, oclc=None, raw_query=raw_query, countries=cntries, totals_only=False ) self.assertEqual(len(results), 2) # test a bad lccn pull results = self.t.generate_requests(lccn='sn-96095007') self.assertEqual(results[0][1], '0') # test a good lccn pull results = self.t.generate_requests(lccn='sn 96095007') self.assertEqual(results[0][1], '1') # testing what is actually returned from good lccn pull r_xml = results[0][0].response r_query = extract_elements(r_xml, element=self.srw + 'query') self.assertEqual(r_query[0].text, 'srw.dn exact "sn 96095007"') r_numofrecs = extract_elements(r_xml, element=self.srw + 'numberOfRecords') self.assertEqual(r_numofrecs[0].text, '1') r_schema = extract_elements(r_xml, element=self.srw + 'recordSchema') self.assertEqual(r_schema[0].text, 'info:srw/schema/1/marcxml') # testing requests split by years results = self.test_case_recs_range self.assertEqual(len(results), 4) # Confirm that everything that was returned is '=' operator = list(set([r[2][2] for r in results])) self.assertEqual('=', operator[0]) # testing oclc pull & the results r_xml = self.test_case_rec[0][0].response r_query = extract_elements(r_xml, element=self.srw + 'query') self.assertEqual(r_query[0].text, 'srw.no exact "18475650"') r_numofrecs = extract_elements(r_xml, element=self.srw + 'numberOfRecords') self.assertEqual(r_numofrecs[0].text, '1') # test something that is not a real country results = self.test_case_recs_fake_cntry self.assertFalse(bool(results))
def next(self): _i = extract_elements(self.response, element="{http://a9.com/-/spec/opensearch/1.1/}startIndex") _p = extract_elements(self.response, element="{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage") _t = extract_elements(self.response, element="{http://a9.com/-/spec/opensearch/1.1/}totalResults") try: if int(_t[0].text) > (int(_i[0].text) + int(_p[0].text)): self.args["start"] = int(_i[0].text) + int(_p[0].text) else: raise StopIteration except ValueError: raise StopIteration
def initial_total_count(self, bib_req): ''' This is used to access the quality of results returned. ''' bib_req.get_response() _total = extract_elements(bib_req.response, element='{http://www.loc.gov/zing/srw/}numberOfRecords') return _total[0].text
def next(self): _i = extract_elements( self.response, element='{http://a9.com/-/spec/opensearch/1.1/}startIndex') _p = extract_elements( self.response, element='{http://a9.com/-/spec/opensearch/1.1/}itemsPerPage') _t = extract_elements( self.response, element='{http://a9.com/-/spec/opensearch/1.1/}totalResults') try: if int(_t[0].text) > (int(_i[0].text) + int(_p[0].text)): self.args['start'] = int(_i[0].text) + int(_p[0].text) else: raise StopIteration except ValueError: raise StopIteration
def initial_total_count(self, bib_req): ''' This is used to access the quality of results returned. ''' bib_req.get_response() _total = extract_elements( bib_req.response, element='{http://www.loc.gov/zing/srw/}numberOfRecords') return _total[0].text
def next(self): _i = extract_elements(self.response, element="{http://www.loc.gov/zing/srw/}nextRecordPosition") if len(_i) != 0: if _i[0].text is not None: self.args["startRecord"] = int(_i[0].text) else: raise StopIteration else: raise StopIteration
def next(self): _i = extract_elements( self.response, element='{http://www.loc.gov/zing/srw/}nextRecordPosition') if len(_i) != 0: if _i[0].text is not None: self.args['startRecord'] = int(_i[0].text) else: raise StopIteration else: raise StopIteration
def initial_total_count(self, bib_req): ''' This function hits request three times and returns list of totals from each hit. This is used to access the quality of results returned. ''' totals = [] for grab in itertools.repeat(None, 3): bib_req.get_response() _total = extract_elements(bib_req.response, element='{http://www.loc.gov/zing/srw/}numberOfRecords') totals.append(_total[0].text) return totals
def initial_total_count(self, bib_req): ''' This function hits request three times and returns list of totals from each hit. This is used to access the quality of results returned. ''' totals = [] for grab in itertools.repeat(None, 3): bib_req.get_response() _total = extract_elements(bib_req.response, element='{http://www.loc.gov/zing/srw/}numberOfRecords') totals.append(_total[0].text) #TODO: Check for request time w/ each request # If the request takes more than 10 seconds, # Kill function & split request. Chk split requests. return totals
def grab_content(self, save_path, bib_requests, search_name='ndnp'): ''' Loops over all requests, executes request & saves response to the designated 'save_path'. ''' # Run each request and save content. if not bib_requests: return files_saved = 0 for bib_rec in bib_requests: grab_records = True counter = 0 bib_request, total, components = bib_rec previous_next = None while grab_records: bib_resp = next_record = next = end = start = None counter += 1 # grab xml bib_resp = bib_request.get_response() # identify the xml field of next record number from the xml next_record = extract_elements(bib_request.response, element='{http://www.loc.gov/zing/srw/}nextRecordPosition') # grab the text from next_record to get the actual value try: next = next_record[0].text except IndexError: # no more recs to grab grab_records = False try: end = int(next) - 1 except TypeError: end = total try: start = int(next) - MAX_RECORDS except TypeError: if counter == 1: start = 1 else: start = previous_next if start is None: grab_records = False name_components = [] for i in components: i = i.replace(' ', '-') if i in OPERATOR_MAP: i = OPERATOR_MAP[i] name_components.append(i) batch_name = '_'.join(name_components) filename = '_'.join((search_name, batch_name, str_value(start), str_value(end))) + '.xml' if counter == 1 and len(bib_requests) > 1: _logger.info('Batch: %s = %s total' % (filename, total)) file_location = save_path + '/' + filename save_file = open(file_location, "w") decoded_data = bib_resp.data.decode("utf-8") save_file.write(decoded_data.encode('ascii', 'xmlcharrefreplace')) save_file.close() files_saved += 1 try: previous_next = next bib_request.next() except StopIteration: # Break loop and continue on to next year combination grab_records = False return files_saved
def grab_content(self, save_path, bib_requests, search_name='ndnp'): ''' Loops over all requests, executes request & saves response to the designated 'save_path'. ''' # Run each request and save content. if not bib_requests: return files_saved = 0 for bib_rec in bib_requests: grab_records = True counter = 0 bib_request, total, components = bib_rec previous_next = None while grab_records: bib_resp = next_record = next = end = start = None counter += 1 # grab xml bib_resp = bib_request.get_response() # identify the xml field of next record number from the xml next_record = extract_elements( bib_request.response, element='{http://www.loc.gov/zing/srw/}nextRecordPosition') # grab the text from next_record to get the actual value try: next = next_record[0].text except IndexError: # no more recs to grab grab_records = False try: end = int(next) - 1 except TypeError: end = total try: start = int(next) - MAX_RECORDS except TypeError: if counter == 1: start = 1 else: start = previous_next if start is None: grab_records = False name_components = [] for i in components: i = i.replace(' ', '-') if i in OPERATOR_MAP: i = OPERATOR_MAP[i] name_components.append(i) batch_name = '_'.join(name_components) filename = '_'.join((search_name, batch_name, str_value(start), str_value(end))) + '.xml' if counter == 1 and len(bib_requests) > 1: _logger.info('Batch: %s = %s total' % (filename, total)) file_location = save_path + '/' + filename save_file = open(file_location, "w") decoded_data = bib_resp.data.decode("utf-8") save_file.write( decoded_data.encode('ascii', 'xmlcharrefreplace')) save_file.close() files_saved += 1 try: previous_next = next bib_request.next() except StopIteration: # Break loop and continue on to next year combination grab_records = False return files_saved