def test_same(self): this_module_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) gtfsA = os.path.join(this_module_dir, "gtfsB.zip") gtfsB = os.path.join(this_module_dir, "gtfsB.zip") d = Diff(gtfsA, gtfsB) self.assertFalse(d.is_different()) pass
def __init__(self, url, file_name, cache_dir=None, cache_expire=31): #import pdb; pdb.set_trace() # step 1: temp dir tmp_dir = self.get_tmp_dir() # step 2: cache dir management self.cache_dir = self.get_cache_dir(cache_dir) self.cache_expire = cache_expire # step 3: file name self.file_name = file_name self.file_path = os.path.join(self.cache_dir, self.file_name) # step 4: download new gtfs file self.url = url tmp_path = os.path.join(tmp_dir, self.file_name) file_utils.wget(self.url, tmp_path) # step 5: check the cache whether we should update or not update = False if self.is_fresh_in_cache(self.file_path): logging.info("diff gtfs file") diff = Diff(self.file_path, tmp_path) if diff.is_different(): update = True else: update = True # step 6: mv old file to backup then mv new file in tmp dir to cache if update: logging.info("move to cache") file_utils.bkup(self.file_path) os.rename(tmp_path, self.file_path)
def check_feed(self, url, file_name, force_update=False): ''' download feed from url, and check it against the cache if newer, then replace cached feed .zip file with new version ''' # step 1: file name file_name = file_name file_path = os.path.join(self.cache_dir, file_name) # step 2: download new gtfs file url = url tmp_path = os.path.join(self.tmp_dir, file_name) # step 2b: don't keep downloading a file ... make sure the tmp file is at least 2 hours if os.path.exists(tmp_path) is False or \ file_utils.file_age_seconds(tmp_path) > 7200: web_utils.wget(url, tmp_path) # step 3: check the cache whether we should update or not update = force_update if not force_update: if self.is_fresh_in_cache(file_path): log.info("diff {} against cached {}".format( tmp_path, file_path)) diff = Diff(file_path, tmp_path) if diff.is_different(): update = True else: update = True # step 4: mv old file to backup then mv new file in tmp dir to cache if update: log.info("cp {} to cache {}".format(tmp_path, file_path)) file_utils.bkup(file_path) file_utils.cp(tmp_path, file_path)
def test_diff_calendar(self): #import pdb; pdb.set_trace() this_module_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) gtfsA = os.path.join(this_module_dir, "gtfsA.zip") gtfsB = os.path.join(this_module_dir, "gtfsB.zip") d = Diff(gtfsA, gtfsB) self.assertTrue(d.is_different()) pass
def main(): ## todo test diff, etc... ## this was from diff.py #import pdb; pdb.set_trace() this_module_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) this_module_dir = os.path.join(this_module_dir, "tests") gtfsA = os.path.join(this_module_dir, "gtfsA.zip") gtfsB = os.path.join(this_module_dir, "gtfsB.zip") diff = Diff(gtfsA, gtfsB) diff.is_different() print(diff.new_info.get_feed_info()) print(diff.new_info.get_feed_version()) print(diff.new_info.get_feed_date_range()) print(diff.new_info.get_days_since_stats()) print(diff.new_info.is_gtfs_out_of_date())
def main(): ## todo test diff, etc... ## this was from diff.py #import pdb; pdb.set_trace() this_module_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) this_module_dir = os.path.join(this_module_dir, "tests") gtfsA = os.path.join(this_module_dir, "gtfsA.zip") gtfsB = os.path.join(this_module_dir, "gtfsB.zip") diff = Diff(gtfsA, gtfsB) diff.is_different() print diff.new_info.get_feed_info() print diff.new_info.get_feed_version() print diff.new_info.get_feed_date_range() print diff.new_info.get_days_since_stats() print diff.new_info.is_gtfs_out_of_date()
def cmp_file_to_cached(cls, gtfs_zip_name, cmp_dir): ''' returns a Diff object with cache/gtfs_zip_name & cmp_dir/gtfs_zip_name ''' cache_path = os.path.join(cls.get_cache_dir(), gtfs_zip_name) other_path = os.path.join(cmp_dir, gtfs_zip_name) diff = Diff(cache_path, other_path) return diff
def check_feed(self, url, file_name, force_update=False): """ download feed from url, and check it against the cache if newer, then replace cached feed .zip file with new version """ # step 1: file name file_name = file_name file_path = os.path.join(self.cache_dir, file_name) # step 2: download new gtfs file url = url tmp_path = os.path.join(self.tmp_dir, file_name) # step 2b: don't keep downloading a file ... make sure the tmp file is at least 2 hours if os.path.exists(tmp_path) is False or file_utils.file_age_seconds(tmp_path) > 7200: web_utils.wget(url, tmp_path) # step 3: check the cache whether we should update or not update = force_update if not force_update: if self.is_fresh_in_cache(file_path): log.info("diff {} against cached {}".format(tmp_path, file_path)) diff = Diff(file_path, tmp_path) if diff.is_different(): update = True else: update = True # step 4: test new .zip for validity and also if update: # step 4a: make sure this new .zip feed has a trips.txt, routes.txt and stops.txt file ... if not no update if GtfsInfo.feed_looks_valid(tmp_path): # step 4b: mv old file to backup then mv new file in tmp dir to cache log.info("cp {} to cache {}".format(tmp_path, file_path)) file_utils.bkup(file_path) file_utils.cp(tmp_path, file_path) else: log.warning("something *WRONG* with file: {}".format(tmp_path)) update = False return update