def test_date_comparison(self): target_filepath = get_temporary_file(directory="/tmp") file_last_modified = get_file_modified_date(target_filepath) comparison_date = "2013-07-18T16:31:46-0400" self.assertFalse(compare_datetime_to_iso8601_date(file_last_modified, comparison_date)) comparison_date = "2013-07-18T16:31:46Z" self.assertFalse(compare_datetime_to_iso8601_date(file_last_modified, comparison_date))
def test_date_comparison(self): target_filepath = get_temporary_file(directory="/tmp") file_last_modified = get_file_modified_date(target_filepath) comparison_date = "2013-07-18T16:31:46-0400" self.assertFalse( compare_datetime_to_iso8601_date(file_last_modified, comparison_date)) comparison_date = "2013-07-18T16:31:46Z" self.assertFalse( compare_datetime_to_iso8601_date(file_last_modified, comparison_date))
def perform_fulltext_harvest(self, record_list, parameters): """ For every record in given list APSRecord(record ID, DOI, date last updated), yield a APSRecord with added FFT dictionary containing URL to fulltext/metadata XML downloaded locally. If a download is unsuccessful, an error message is given. @return: tuple of (APSRecord, error_message) """ count = 0 request_end = None request_start = None for record in record_list: task_sleep_now_if_required(can_stop_too=False) # Unless this is the first request, lets sleep a bit if request_end and request_start: request_dt = request_end-request_start write_message("Checking request time (%d)" % (request_dt,), verbose=3) if count and request_dt > 0 and request_dt < CFG_APSHARVEST_REQUEST_TIMEOUT: write_message("Initiating sleep for %.1f seconds" % (request_dt,), verbose=3) time.sleep(request_dt) count += 1 task_update_progress("Harvesting record (%d/%d)" % (count, len(record_list))) if not record.doi: msg = "No DOI found for record %d" % (record.recid or "",) write_message("Error: %s" % (msg,), stream=sys.stderr) yield record, msg continue url = CFG_APSHARVEST_FULLTEXT_URL % {'doi': record.doi} result_file = os.path.join(self.zip_folder, "%s.zip" % (record.doi.replace('/', '_'))) try: request_start = time.time() if os.path.exists(result_file): # File already downloaded recently, lets see if it is the same file_last_modified = get_file_modified_date(result_file) if record.last_modified and not compare_datetime_to_iso8601_date(file_last_modified, record.last_modified): # File is not older than APS version, we should not download. raise APSHarvesterFileExits write_message("Trying to save to %s" % (result_file,), verbose=5) result_file = download_url(url=url, download_to_file=result_file, content_type="zip", accept="application/zip", retry_count=5, timeout=60.0) write_message("Downloaded %s to %s" % (url, result_file), verbose=2) except InvenioFileDownloadError, e: msg = "URL could not be opened: %s" % (url,) write_message("Error: %s" % (msg,), stream=sys.stderr) yield record, msg continue except APSHarvesterFileExits: write_message("File exists at %s" % (result_file,), verbose=2)
def perform_fulltext_harvest(self, record_list, parameters): """ For every record in given list APSRecord(record ID, DOI, date last updated), yield a APSRecord with added FFT dictionary containing URL to fulltext/metadata XML downloaded locally. If a download is unsuccessful, an error message is given. @return: tuple of (APSRecord, error_message) """ count = 0 request_end = None request_start = None for record in record_list: task_sleep_now_if_required(can_stop_too=False) # Unless this is the first request, lets sleep a bit if request_end and request_start: request_dt = request_end-request_start write_message("Checking request time (%d)" % (request_dt,), verbose=3) if count and request_dt > 0 and request_dt < CFG_APSHARVEST_REQUEST_TIMEOUT: write_message("Initiating sleep for %.1f seconds" % (request_dt,), verbose=3) time.sleep(request_dt) count += 1 task_update_progress("Harvesting record (%d/%d)" % (count, len(record_list))) if not record.doi: msg = "No DOI found for record %d" % (record.recid or "",) write_message("Error: %s" % (msg,), stream=sys.stderr) yield record, msg continue url = CFG_APSHARVEST_FULLTEXT_URL % {'doi': record.doi} result_file = os.path.join(self.out_folder, "%s.zip" % (record.doi.replace('/', '_'))) try: request_start = time.time() if os.path.exists(result_file): # File already downloaded recently, lets see if it is the same file_last_modified = get_file_modified_date(result_file) if not compare_datetime_to_iso8601_date(file_last_modified, record.last_modified): # File is not older than APS version, we should not download. raise APSHarvesterFileExits write_message("Trying to save to %s" % (result_file,), verbose=5) result_file = download_url(url=url, download_to_file=result_file, content_type="zip", retry_count=5, timeout=60.0) write_message("Downloaded %s to %s" % (url, result_file), verbose=2) except InvenioFileDownloadError, e: msg = "URL could not be opened: %s" % (url,) write_message("Error: %s" % (msg,), stream=sys.stderr) yield record, msg continue except APSHarvesterFileExits: write_message("File exists at %s" % (result_file,), verbose=2)