def load_feeds(self): ''' insert feeds into configured db (see config/app.ini) ''' #import pdb; pdb.set_trace() print("load_feeds") ret_val = True err_ext = "-error_loading" # get rid of any previously cached error files file_utils.purge(self.cache_dir, ".*" + err_ext) for f in self.feeds: # get cached feed path and feed name (see 'feeds' in config/app.ini) feed_path = os.path.join(self.cache_dir, f['name']) feed_name = f['name'].rstrip(".zip") # make args for gtfsdb kwargs = {} kwargs['url'] = self.db_url if "sqlite:" not in self.db_url: kwargs['is_geospatial'] = self.is_geospatial kwargs['schema'] = feed_name # load this feed into gtfsdb log.info("loading {} ({}) into gtfsdb {}".format( feed_name, feed_path, self.db_url)) try: database_load(feed_path, **kwargs) except Exception, e: ret_val = False file_utils.mv(feed_path, feed_path + err_ext) log.error("DATABASE ERROR : {}".format(e))
def load_gtfsdb(session=None): """ loads gtfs feed .zip data note: will also call the method below to calculate stop segments from the new schedule data bin/load_gtfsdb -c -g -s test -d postgres://ott@localhost:5432/ott ott/trafficdb/model/inrix/test/gtfs.zip """ from gtfsdb.api import database_load database_load(args.file, **kwargs)
def load_gtfs_data(gtfs_url, cache=False): if cache and os.path.isfile(GTFS_DB): LOGGER.info('Using cached GTFS data at: {}'.format(GTFS_DB)) return _fetch_gtfs_data(gtfs_url) database_load( filename=GTFS_DOWNLOAD_FILE, batch_size=gtfsdb.config.DEFAULT_BATCH_SIZE, schema=gtfsdb.config.DEFAULT_SCHEMA, is_geospatial=False, tables=None, url='sqlite:///{}'.format(GTFS_DB), )
class BasicModelTests(object): try: path = resource_filename('gtfsdb', 'tests') gtfs_file = 'file:///{0}'.format(os.path.join(path, 'large-sample-feed.zip')) db_file = tempfile.mkstemp()[1] url = 'sqlite:///{0}'.format(db_file) db = database_load(gtfs_file, url=url) log.debug("DATABASE TMP FILE: {0}".format(db_file)) except Exception as e: log.warning(e) log.warning("couldn't make the BasicModelTests object for some reason") def get_first(self): try: self._first except AttributeError: if hasattr(self, 'model'): self._first = self.db.session.query(self.model).first() return self._first def test_entity(self): if hasattr(self, 'model'): for r in self.db.session.query(self.model).limit(5): self.assert_(isinstance(r, self.model))
def parse_gtfs_data(): logger.info('loading gtfs data into db ({})...'.format(GTFS_DB)) database_load( filename=GTFS_DOWNLOAD_FILE, batch_size=gtfsdb.config.DEFAULT_BATCH_SIZE, schema=gtfsdb.config.DEFAULT_SCHEMA, is_geospatial=gtfsdb.config.DEFAULT_IS_GEOSPATIAL, tables=None, url='sqlite:///{}'.format(GTFS_DB), ) with sqlite3.connect(GTFS_DB) as conn: curr = conn.cursor() _save_route_data(curr) _save_shape_data(curr) _save_stop_data(curr)
class BasicModelTests(object): path = resource_filename('gtfsdb', 'tests') gtfs_file = 'file:///{0}'.format(os.path.join(path, 'multi-date-feed.zip')) db_file = tempfile.mkstemp()[1] url = 'sqlite:///{0}'.format(db_file) db = database_load(gtfs_file, url=url) log.debug("DATABASE TMP FILE: {0}".format(db_file))
def load_sqlite(): #import pdb; pdb.set_trace() gtfs_path = os.path.join('data', 'gtfs', 'multi-date-feed.zip') gtfs_file = get_test_file_uri(gtfs_path) url = util.make_temp_sqlite_db_uri() #url = util.make_temp_sqlite_db_uri('curr') # write sqlite to this filename file (rather than a random file name) db = database_load(gtfs_file, url=url, current_tables=True) return db
def tag_meta(source, database): db = Database(url=database) meta = db.session.query(Meta).filter_by(file_name=source).first() if not meta: meta = Meta(file_name=source) db.session.add(meta) db.session.commit() meta.completed = database_load(source, database) meta.upload_date = datetime.datetime.utcnow() db.session.commit()
def get_db(): from gtfsdb import api from ott.utils import file_utils from gtfsdb import util dir = file_utils.get_module_dir(CurrentRoutesListDao) gtfs_file = os.path.join(dir, '..', 'tests', 'multi-date-feed.zip') gtfs_file = gtfs_file.replace('c:\\', '/').replace('\\', '/') gtfs_file = "file://{0}".format(gtfs_file) gtfs_file = gtfs_file.replace('\\', '/') url = util.make_temp_sqlite_db_uri('curr') db = api.database_load(gtfs_file, url=url, current_tables=True) return db
def load_pgsql(url, schema="current_test"): """ To run this test, do the following: x) bin/test gtfsdb.tests.test_current You might also have to do the following: a) emacs setup.py - uncomment install_requires='psycopg2' b) buildout # need psychopg2 in bin/test script c) comment out "#SKIP_TESTS = True" below d) psql -d postgres -c "CREATE DATABASE test WITH OWNER ott;" e) bin/test gtfsdb.tests.test_current """ #import pdb; pdb.set_trace() gtfs_path = os.path.join('data', 'gtfs', 'multi-date-feed.zip') gtfs_file = get_test_file_uri(gtfs_path) db = database_load(gtfs_file, url=url, schema=schema, is_geospatial=True, current_tables=True) return db
class BasicModelTests(object): path = resource_filename('gtfsdb', 'tests') gtfs_file = 'file:///{0}'.format( os.path.join(path, 'large-sample-feed.zip')) url = 'sqlite:///{0}'.format(tempfile.mkstemp()[1]) log.debug(url) db = database_load(gtfs_file, url=url) def get_first(self): try: self._first except AttributeError: if hasattr(self, 'model'): self._first = self.db.session.query(self.model).first() return self._first def test_entity(self): if hasattr(self, 'model'): for r in self.db.session.query(self.model).limit(5): self.assert_(isinstance(r, self.model))
from gtfsdb.api import database_load from pkg_resources import resource_filename import os if __name__ == '__main__': path = resource_filename('gtfsdb', 'zips') gtfs_file = 'file:///{0}'.format(os.path.join(path, 'mvkzrt.zip')) basedir = os.path.abspath(os.path.dirname(__file__)) url = 'sqlite:///' + os.path.join(basedir, 'mvk.db') db = database_load(gtfs_file, url=url)
def test_database_load(self): path = resource_filename('gtfsdb', 'tests') filename = 'file:///{0}'.format(os.path.join(path, 'sample-feed.zip')) database_load(filename)
db = Database(url=db_string, is_geospatial=True) db.create() try: GTFS.bootstrab_db(db) except IntegrityError: pass meta = { "dataexchange_id": "action", "file_url": "/Users/rhunter/Desktop/action_20150129_0101.zip", "file_name": "action_20150129_0101.zip", "file_checksum": "MD5123123123123", "date_added": 1213154234.0, } database_load(meta, db_url=db_string) def process_source(source): try: gtfs = GTFS(filename=source) p_db = Database(url=db_string, is_geospatial=True) gtfs.load(p_db, filename=source) except Exception, e: print e finally: pass # Parallel(n_jobs=36)(delayed(process_source)(source) for source in sources)
def run_import(url, **kwargs): db = Database(url=postgresql.url()) db.create() database_load(os.path.join(root_dir, 'data/performance-dataset.zip'), db_url=url, **kwargs) db.drop_all()
def load_sqlite(db_name=None, gtfs_name='multi-date-feed.zip'): # import pdb; pdb.set_trace() gtfs_uri = get_gtfs_file_uri(gtfs_name) url = util.make_temp_sqlite_db_uri(db_name) db = database_load(gtfs_uri, url=url, current_tables=True) return db
def gtfsdb_load(): args, kwargs = get_args() database_load(args.file, **kwargs)
def test_database_load(self): path = resource_filename('gtfsdb', 'tests') filename = 'file:///{0}'.format(os.path.join(path, 'sample-feed.zip')) database_load(filename, ignore_blocks=True)