def __init__(self, osm_infile_path, osm_outfile_path, do_bkup=True): """ this class will work to rename streets in OSM, abbreviating common street prefix and suffixes (e.g., North == N, Southeast == SE, Street == St, Avenue == Ave, etc...) :note this assumes that each OSM <tag /> falls completely upon a single line of the file and the parser / renamer will break if targeted tags are spread across multiple lines of the file @todo look at SAX https://gist.github.com/veryhappythings/98604 :todo ... add unit tests TODO: fix up hacky parts... """ self.osm_input_path = osm_infile_path if osm_outfile_path is None or len(osm_outfile_path) == 0: osm_outfile_path = osm_infile_path is_same_input_output = False if osm_outfile_path == osm_infile_path: self.osm_output_path = osm_outfile_path + "temp" is_same_input_output = True else: self.osm_output_path = osm_outfile_path self.abbr_parser = OsmAbbrParser() self.process_osm_file() if is_same_input_output: if do_bkup: file_utils.bkup(osm_outfile_path) file_utils.mv(self.osm_output_path, osm_outfile_path)
def load_feeds(self): ''' insert feeds into configured db (see config/app.ini) ''' #import pdb; pdb.set_trace() print("load_feeds") ret_val = True err_ext = "-error_loading" # get rid of any previously cached error files file_utils.purge(self.cache_dir, ".*" + err_ext) for f in self.feeds: # get cached feed path and feed name (see 'feeds' in config/app.ini) feed_path = os.path.join(self.cache_dir, f['name']) feed_name = f['name'].rstrip(".zip") # make args for gtfsdb kwargs = {} kwargs['url'] = self.db_url if "sqlite:" not in self.db_url: kwargs['is_geospatial'] = self.is_geospatial kwargs['schema'] = feed_name # load this feed into gtfsdb log.info("loading {} ({}) into gtfsdb {}".format( feed_name, feed_path, self.db_url)) try: database_load(feed_path, **kwargs) except Exception, e: ret_val = False file_utils.mv(feed_path, feed_path + err_ext) log.error("DATABASE ERROR : {}".format(e))
def report(self, dir=None, report_name='otp_report.html'): """ render a test pass/fail report with mako """ ret_val = None try: # step 1: mako render of the report #import pdb; pdb.set_trace() suites = self.test_suites.get_suites() r = self.report_template.render( test_suites=suites, test_errors=self.test_suites.has_errors()) ret_val = r # step 2: stream the report to a file report_path = report_name if dir: report_path = os.path.join(dir, report_name) file_utils.mv(report_path, report_path + "-old") f = open(report_path, 'w') if r: f.write(r) else: f.write("Sorry, the template was null...") f.flush() f.close() except NameError, e: log.warn( "This ERROR probably means your template has a variable not being sent down with render: {}" .format(e))
def mv_failed_graph_to_good(self): """ move the failed graph to prod graph name if prod graph doesn't exist and failed does exist """ exists = os.path.exists(self.graph_path) if not exists: fail_path = os.path.join(self.local_cache_dir, self.graph_failed) exists = os.path.exists(fail_path) if exists: file_utils.mv(fail_path, self.graph_path)
def _process_file(self, file_name, do_optimize): ''' does the meat of the work in posting files to SOLR. the paths to SOLR instances are pulled from config/app.ini this routine will post to either a single SOLR instance, or manage multiple SOLR instances running on different ports. ''' is_success = False # step 1: grab file path solr_xml_file_path = os.path.join(self.cache.cache_dir, file_name) log.debug(solr_xml_file_path) # step 2: grab SOLR properties for url (and optionally the web ports where SOLR instance(es) run url = self.config.get('url') reload_url = self.config.get('reload') ports = None if ":{}" in url or ":{0}" in url: ports = self.config.get_list('ports', def_val='80') # step 3: update SOLR if ports: # step 3a: post the .xml content to the first SOLR instance u = url.format(ports[0]) is_success = self.update_index(u, solr_xml_file_path, do_optimize) # step 3b: now refresh all instances of SOLR for p in ports: u = url.format(p) ru = reload_url.format(p) if reload_url else None # have to call commit a couple of times to make SOLR instances refresh self.commit(u) web_utils.get(ru) self.commit(u) web_utils.get(ru) else: # step 3c: update and refresh the single instance of SOLR is_success = self.update_index(url, solr_xml_file_path, do_optimize) # step 4: either warn us, or mv file to processed folder so it's not processed again... if not is_success: log.warn("something happened loading {} into SOLR".format( solr_xml_file_path)) else: to_path = os.path.join(self.post_process_dir, file_name) file_utils.mv(solr_xml_file_path, to_path) return is_success
def _process_file(self, file_name, do_optimize): """ does the meat of the work in posting files to SOLR. the paths to SOLR instances are pulled from config/app.ini this routine will post to either a single SOLR instance, or manage multiple SOLR instances running on different ports. """ is_success = False # step 1: grab file path solr_xml_file_path = os.path.join(self.cache.cache_dir, file_name) log.debug(solr_xml_file_path) # step 2: grab SOLR properties for url (and optionally the web ports where SOLR instance(es) run url = self.config.get('url') reload_url = self.config.get('reload') ports = None if ":{}" in url or ":{0}" in url: ports = self.config.get_list('ports', def_val='80') # step 3: update SOLR if ports: # step 3a: post the .xml content to the first SOLR instance u = url.format(ports[0]) is_success = self.update_index(u, solr_xml_file_path, do_optimize) # step 3b: now refresh all instances of SOLR for p in ports: u = url.format(p) ru = reload_url.format(p) if reload_url else None # have to call commit a couple of times to make SOLR instances refresh self.commit(u) web_utils.get(ru) self.commit(u) web_utils.get(ru) else: # step 3c: update and refresh the single instance of SOLR is_success = self.update_index(url, solr_xml_file_path, do_optimize) # step 4: either warn us, or mv file to processed folder so it's not processed again... if not is_success: log.warn("something happened loading {} into SOLR".format(solr_xml_file_path)) else: to_path = os.path.join(self.post_process_dir, file_name) file_utils.mv(solr_xml_file_path, to_path) return is_success
def scp(cls, feeds=None, filter=None, rm_after_scp=True): """ loop thru servers in app.ini [deploy], looking to scp the pg_dump file over to production :returns number of feeds that were scp'd """ ret_val = 0 exporter = GtfsdbExporter() user = exporter.config.get('user', section='deploy') for feed in exporter.check_feeds(feeds): dump_path = None num_scp = 0 # scp the feed to the configured servers prod_servers = exporter.config.get_json('servers', section='deploy') for server in prod_servers: if filter is None or filter == 'all' or filter in server: dump_path = exporter._scp_dump_file(feed, server, user) if dump_path: num_scp += 1 # move the dump.tar aside (but don't delete it), so it doesn't repeatedly get scp'd if dump_path and rm_after_scp: file_utils.mv(dump_path, dump_path + "-did_scp") # report on number of times we scp'd to different servers, etc... if num_scp > 0: ret_val += 1 # number of feeds scp'd num_servers = len(prod_servers) feed_name = exporter.get_feed_name(feed) if num_scp != num_servers: log.warn("Tried to scp feed {} to {} production servers, but only able to scp {} times.".format( feed_name, num_servers, num_dumped )) else: log.info("successfully scp'd feed {} to {} production servers".format(feed_name, num_servers)) return ret_val
def mv_new_files_into_place(graph_dir, graph_name=GRAPH_NAME, vlog_name=VLOG_NAME, otp_name=OTP_NAME): """ go thru steps of backing up old graph and moving new graph into place on the server """ ret_val = False new_graph = file_utils.make_new_path(graph_dir, graph_name) new_vlog = file_utils.make_new_path(graph_dir, vlog_name) new_otp = file_utils.make_new_path(graph_dir, otp_name) # step 1: check if new OTP GRAPH and VLOG exist ... if both do, proceed if file_utils.is_min_sized(new_graph, quiet=True) and file_utils.is_min_sized(new_vlog, 20, quiet=True): new_otp_exists = file_utils.is_min_sized(new_otp, quiet=True) # step 2: current paths curr_graph = os.path.join(graph_dir, graph_name) curr_vlog = os.path.join(graph_dir, vlog_name) curr_otp = os.path.join(graph_dir, otp_name) # step 3: create OLD folder and build old paths old_path = file_utils.make_old_dir(graph_dir) old_graph = os.path.join(old_path, graph_name) old_vlog = os.path.join(old_path, vlog_name) old_otp = os.path.join(old_path, otp_name) # step 4: mv current stuff to the OLD directory file_utils.mv(curr_graph, old_graph) if new_otp_exists: file_utils.mv(curr_otp, old_otp) # step 5: make sure we moved old stuff out of the way ... if not, we have to exit if file_utils.is_min_sized(curr_graph, quiet=True) or (new_otp_exists and file_utils.is_min_sized(curr_otp, quiet=True)): # @todo this should be an email in addtion to a log message log.error("in trying to deploy new graph, I wasn't able to mv old {} (or {}) out of the way".format(curr_graph, curr_otp)) else: # step 6: ok, we could move the graph (and maybe otp) to OLD dir ... now let's back those files up (rename with date stamp) file_utils.mv(curr_vlog, old_vlog) file_utils.bkup(old_vlog) file_utils.bkup(old_graph) if new_otp_exists: file_utils.bkup(old_otp) # step 7: move new stuff into the 'current' position file_utils.mv(new_graph, curr_graph) file_utils.mv(new_vlog, curr_vlog) if new_otp_exists: file_utils.mv(new_otp, curr_otp) # step 8: last check to make sure we did move things around properly if file_utils.is_min_sized(curr_graph) and file_utils.is_min_sized(curr_otp): ret_val = True else: # @todo this should be an email in addtion to a log message log.error("ruh roh: after trying to deploy a new graph, I don't see either {} or {}".format(curr_graph, curr_otp)) return ret_val