Exemple #1
0
 def test_date_comparison(self):
     target_filepath = get_temporary_file(directory="/tmp")
     file_last_modified = get_file_modified_date(target_filepath)
     comparison_date = "2013-07-18T16:31:46-0400"
     self.assertFalse(compare_datetime_to_iso8601_date(file_last_modified, comparison_date))
     comparison_date = "2013-07-18T16:31:46Z"
     self.assertFalse(compare_datetime_to_iso8601_date(file_last_modified, comparison_date))
Exemple #2
0
 def test_date_comparison(self):
     target_filepath = get_temporary_file(directory="/tmp")
     file_last_modified = get_file_modified_date(target_filepath)
     comparison_date = "2013-07-18T16:31:46-0400"
     self.assertFalse(
         compare_datetime_to_iso8601_date(file_last_modified,
                                          comparison_date))
     comparison_date = "2013-07-18T16:31:46Z"
     self.assertFalse(
         compare_datetime_to_iso8601_date(file_last_modified,
                                          comparison_date))
    def perform_fulltext_harvest(self, record_list, parameters):
        """
        For every record in given list APSRecord(record ID, DOI, date last
        updated), yield a APSRecord with added FFT dictionary containing URL to
        fulltext/metadata XML downloaded locally.

        If a download is unsuccessful, an error message is given.

        @return: tuple of (APSRecord, error_message)
        """
        count = 0
        request_end = None
        request_start = None
        for record in record_list:
            task_sleep_now_if_required(can_stop_too=False)
            # Unless this is the first request, lets sleep a bit
            if request_end and request_start:
                request_dt = request_end-request_start
                write_message("Checking request time (%d)"
                              % (request_dt,), verbose=3)
                if count and request_dt > 0 and request_dt < CFG_APSHARVEST_REQUEST_TIMEOUT:
                    write_message("Initiating sleep for %.1f seconds"
                                  % (request_dt,), verbose=3)
                    time.sleep(request_dt)

            count += 1
            task_update_progress("Harvesting record (%d/%d)" % (count,
                                                                len(record_list)))

            if not record.doi:
                msg = "No DOI found for record %d" % (record.recid or "",)
                write_message("Error: %s" % (msg,), stream=sys.stderr)
                yield record, msg
                continue

            url = CFG_APSHARVEST_FULLTEXT_URL % {'doi': record.doi}
            result_file = os.path.join(self.zip_folder,
                                       "%s.zip" % (record.doi.replace('/', '_')))
            try:
                request_start = time.time()
                if os.path.exists(result_file):
                    # File already downloaded recently, lets see if it is the same
                    file_last_modified = get_file_modified_date(result_file)
                    if record.last_modified and not compare_datetime_to_iso8601_date(file_last_modified, record.last_modified):
                        # File is not older than APS version, we should not download.
                        raise APSHarvesterFileExits

                write_message("Trying to save to %s" % (result_file,), verbose=5)

                result_file = download_url(url=url,
                                           download_to_file=result_file,
                                           content_type="zip",
                                           accept="application/zip",
                                           retry_count=5,
                                           timeout=60.0)
                write_message("Downloaded %s to %s" % (url, result_file), verbose=2)
            except InvenioFileDownloadError, e:
                msg = "URL could not be opened: %s" % (url,)
                write_message("Error: %s" % (msg,),
                              stream=sys.stderr)
                yield record, msg
                continue

            except APSHarvesterFileExits:
                write_message("File exists at %s" % (result_file,), verbose=2)
    def perform_fulltext_harvest(self, record_list, parameters):
        """
        For every record in given list APSRecord(record ID, DOI, date last
        updated), yield a APSRecord with added FFT dictionary containing URL to
        fulltext/metadata XML downloaded locally.

        If a download is unsuccessful, an error message is given.

        @return: tuple of (APSRecord, error_message)
        """
        count = 0
        request_end = None
        request_start = None
        for record in record_list:
            task_sleep_now_if_required(can_stop_too=False)
            # Unless this is the first request, lets sleep a bit
            if request_end and request_start:
                request_dt = request_end-request_start
                write_message("Checking request time (%d)"
                              % (request_dt,), verbose=3)
                if count and request_dt > 0 and request_dt < CFG_APSHARVEST_REQUEST_TIMEOUT:
                    write_message("Initiating sleep for %.1f seconds"
                                  % (request_dt,), verbose=3)
                    time.sleep(request_dt)

            count += 1
            task_update_progress("Harvesting record (%d/%d)" % (count,
                                                                len(record_list)))

            if not record.doi:
                msg = "No DOI found for record %d" % (record.recid or "",)
                write_message("Error: %s" % (msg,), stream=sys.stderr)
                yield record, msg
                continue

            url = CFG_APSHARVEST_FULLTEXT_URL % {'doi': record.doi}
            result_file = os.path.join(self.out_folder,
                                       "%s.zip" % (record.doi.replace('/', '_')))
            try:
                request_start = time.time()
                if os.path.exists(result_file):
                    # File already downloaded recently, lets see if it is the same
                    file_last_modified = get_file_modified_date(result_file)
                    if not compare_datetime_to_iso8601_date(file_last_modified, record.last_modified):
                        # File is not older than APS version, we should not download.
                        raise APSHarvesterFileExits

                write_message("Trying to save to %s" % (result_file,), verbose=5)

                result_file = download_url(url=url,
                                           download_to_file=result_file,
                                           content_type="zip",
                                           retry_count=5,
                                           timeout=60.0)
                write_message("Downloaded %s to %s" % (url, result_file), verbose=2)
            except InvenioFileDownloadError, e:
                msg = "URL could not be opened: %s" % (url,)
                write_message("Error: %s" % (msg,),
                              stream=sys.stderr)
                yield record, msg
                continue

            except APSHarvesterFileExits:
                write_message("File exists at %s" % (result_file,), verbose=2)