def test_get_file_metadata(self): gtfs_retrieve_FS.add_file_metadata('foo', 'bar', 36003600, 'foobar') actual = gtfs_retrieve_FS.get_file_metadata('foobar') expected = {'local_filename': 'bar', 'remote_filename': 'foo', 'timestamp_datetime': 36003600} self.assertEqual(expected, actual)
def download_file(dest_dir, remote_file_name, no_timestamp, no_md5): gtfs_retrieve_FS.init(dest_dir) latest_local_timestamp = gtfs_retrieve_FS.get_latest_local_timestamp( remote_file_name) local_filename = datetime.datetime.now().strftime( '%Y-%m-%dT%H-%M-%S_') + remote_file_name file_path = os.path.abspath(os.path.join(dest_dir, local_filename)) if gtfs_retrieve_MOT_FTP.get_uptodateness( latest_local_timestamp, MOT_FTP, remote_file_name) or no_timestamp: logger.debug("New file have been found on '" + MOT_FTP + "' or the 'no_timestamp' flag is on") gtfs_retrieve_MOT_FTP.ftp_get_file(file_path, MOT_FTP, remote_file_name) file_md5 = gtfs_retrieve_FS.md5_for_file(file_path) # check if md5 already exists and add it if not if not (gtfs_retrieve_FS.get_file_metadata(file_md5)) or no_md5: gtfs_retrieve_FS.add_file_metadata(remote_file_name, local_filename, int(time.time()), file_md5) logger.debug( "MD5 is different from previous downloads or the 'no_md5' flag is on" ) else: logger.debug("The downloaded file '" + remote_file_name + "' already exists (according to md5 check), removing") os.remove(file_path) else: logger.debug( "No newer (timestamp comparing) file have been found on FTP server skipping downloading" )
def test_get_latest_local_timestamp_not_exist_file(self): gtfs_retrieve_FS.add_file_metadata('foo1', 'bar1', 36003600, 'foobar') gtfs_retrieve_FS.add_file_metadata('foo1', 'bar2', 99999999, 'foobar') actual = gtfs_retrieve_FS.get_latest_local_timestamp('foo22') expected = gtfs_retrieve_FS.MIN_EPOCH_TIME self.assertEqual(expected, actual)
def test_get_latest_local_timestamp(self): gtfs_retrieve_FS.add_file_metadata('foo1', 'bar1', 36003600, 'foobar') gtfs_retrieve_FS.add_file_metadata('foo1', 'bar2', 99999999, 'foobar') actual = gtfs_retrieve_FS.get_latest_local_timestamp('foo1') expected = 99999999 self.assertEqual(expected, actual)
def test_get_file_metadata(self): gtfs_retrieve_FS.add_file_metadata('foo', 'bar', 36003600, 'foobar') actual = gtfs_retrieve_FS.get_file_metadata('foobar') expected = { 'local_filename': 'bar', 'remote_filename': 'foo', 'timestamp_datetime': 36003600 } self.assertEqual(expected, actual)
def download_file(dest_dir, remote_file_name, no_timestamp, no_md5): gtfs_retrieve_FS.init(dest_dir) latest_local_timestamp = gtfs_retrieve_FS.get_latest_local_timestamp(remote_file_name) local_filename = datetime.datetime.now().strftime('%Y-%m-%dT%H-%M-%S_') + remote_file_name file_path = os.path.abspath(os.path.join(dest_dir, local_filename)) if gtfs_retrieve_MOT_FTP.get_uptodateness(latest_local_timestamp, MOT_FTP, remote_file_name) or no_timestamp: logger.debug("New file have been found on '" + MOT_FTP + "' or the 'no_timestamp' flag is on") gtfs_retrieve_MOT_FTP.ftp_get_file(file_path, MOT_FTP, remote_file_name) file_md5 = gtfs_retrieve_FS.md5_for_file(file_path) # check if md5 already exists and add it if not if not (gtfs_retrieve_FS.get_file_metadata(file_md5)) or no_md5: gtfs_retrieve_FS.add_file_metadata(remote_file_name, local_filename, int(time.time()), file_md5) logger.debug("MD5 is different from previous downloads or the 'no_md5' flag is on") else: logger.debug( "The downloaded file '" + remote_file_name + "' already exists (according to md5 check), removing") os.remove(file_path) else: logger.debug("No newer (timestamp comparing) file have been found on FTP server skipping downloading")