コード例 #1
0
def main():
    arguments = docopt(__doc__, version='gtfsdbloader %s' % gtfslib.__version__)
    if arguments['--id'] is None:
        arguments['--id'] = ""

    # TODO Configure logging properly?
    logger = logging.getLogger('libgtfs')
    logger.setLevel(logging.INFO)
    logger.addHandler(StreamHandler(sys.stdout))

    dao = Dao(arguments['<database>'],
              sql_logging=arguments['--logsql'],
              schema=arguments['--schema'])

    if arguments['--list']:
        for feed in dao.feeds():
            print(feed.feed_id if feed.feed_id != "" else "(default)")

    if arguments['--delete'] or arguments['--load']:
        feed_id = arguments['--id']
        existing_feed = dao.feed(feed_id)
        if existing_feed:
            logger.warn("Deleting existing feed ID '%s'" % feed_id)
            dao.delete_feed(feed_id)
            dao.commit()

    if arguments['--load']:
        dao.load_gtfs(arguments['--load'],
                      feed_id=arguments['--id'],
                      lenient=arguments['--lenient'],
                      disable_normalization=arguments['--disablenormalize'])
コード例 #2
0
def main():
    arguments = docopt(__doc__,
                       version='gtfsdbloader %s' % gtfslib.__version__)
    if arguments['--id'] is None:
        arguments['--id'] = ""

    # TODO Configure logging properly?
    logger = logging.getLogger('libgtfs')
    logger.setLevel(logging.INFO)
    logger.addHandler(StreamHandler(sys.stdout))

    dao = Dao(arguments['<database>'],
              sql_logging=arguments['--logsql'],
              schema=arguments['--schema'])

    if arguments['--list']:
        for feed in dao.feeds():
            print(feed.feed_id if feed.feed_id != "" else "(default)")

    if arguments['--delete'] or arguments['--load']:
        feed_id = arguments['--id']
        existing_feed = dao.feed(feed_id)
        if existing_feed:
            logger.warn("Deleting existing feed ID '%s'" % feed_id)
            dao.delete_feed(feed_id)
            dao.commit()

    if arguments['--load']:
        dao.load_gtfs(arguments['--load'],
                      feed_id=arguments['--id'],
                      lenient=arguments['--lenient'],
                      disable_normalization=arguments['--disablenormalize'])
コード例 #3
0
    def test_all_gtfs(self):

        if not ENABLE:
            print("This test is disabled as it is very time-consuming.")
            print("If you want to enable it, please see in the code.")
            return

        # Create temporary directory if not there
        if not os.path.isdir(DIR):
            os.mkdir(DIR)

        # Create a DAO. Re-use any existing present.
        logging.basicConfig(level=logging.INFO)
        dao = Dao("%s/all_gtfs.sqlite" % (DIR))

        deids = IDS_TO_LOAD
        if deids is None:
            print("Downloading meta-info for all agencies...")
            resource_url = "http://www.gtfs-data-exchange.com/api/agencies?format=json"
            response = requests.get(resource_url).json()
            if response.get('status_code') != 200:
                raise IOError()
            deids = []
            for entry in response.get('data'):
                deid = entry.get('dataexchange_id')
                deids.append(deid)
            # Randomize the list, otherwise we will always load ABCBus, then ...
            random.shuffle(deids)

        for deid in deids:
            try:
                local_filename = "%s/%s.gtfs.zip" % (DIR, deid)
                if os.path.exists(local_filename) and SKIP_EXISTING:
                    print("Skipping [%s], GTFS already present." % (deid))
                    continue

                print("Downloading meta-info for ID [%s]" % (deid))
                resource_url = "http://www.gtfs-data-exchange.com/api/agency?agency=%s&format=json" % deid
                response = requests.get(resource_url).json()
                status_code = response.get('status_code')
                if status_code != 200:
                    raise IOError("Error %d (%s)" %
                                  (status_code, response.get('status_txt')))
                data = response.get('data')
                agency_data = data.get('agency')
                agency_name = agency_data.get('name')
                agency_area = agency_data.get('area')
                agency_country = agency_data.get('country')

                print("Processing [%s] %s (%s / %s)" %
                      (deid, agency_name, agency_country, agency_area))
                date_max = 0.0
                file_url = None
                file_size = 0
                file_md5 = None
                for datafile in data.get('datafiles'):
                    date_added = datafile.get('date_added')
                    if date_added > date_max:
                        date_max = date_added
                        file_url = datafile.get('file_url')
                        file_size = datafile.get('size')
                        file_md5 = datafile.get('md5sum')
                if file_url is None:
                    print("No datafile available, skipping.")
                    continue

                if file_size > MAX_GTFS_SIZE:
                    print("GTFS too large (%d bytes > max %d), skipping." %
                          (file_size, MAX_GTFS_SIZE))
                    continue

                # Check if the file is present and do not download it.
                try:
                    existing_md5 = hashlib.md5(
                        open(local_filename, 'rb').read()).hexdigest()
                except:
                    existing_md5 = None
                if existing_md5 == file_md5:
                    print("Using existing file '%s': MD5 checksum matches." %
                          (local_filename))
                else:
                    print("Downloading file '%s' to '%s' (%d bytes)" %
                          (file_url, local_filename, file_size))
                    with open(local_filename, 'wb') as local_file:
                        cnx = requests.get(file_url, stream=True)
                        for block in cnx.iter_content(1024):
                            local_file.write(block)
                    cnx.close()

                feed = dao.feed(deid)
                if feed is not None:
                    print("Removing existing data for feed [%s]" % (deid))
                    dao.delete_feed(deid)
                print("Importing into DAO as ID [%s]" % (deid))
                try:
                    dao.load_gtfs("%s/%s.gtfs.zip" % (DIR, deid), feed_id=deid)
                except:
                    error_filename = "%s/%s.error" % (DIR, deid)
                    print("Import of [%s]: FAILED. Logging error to '%s'" %
                          (deid, error_filename))
                    with open(error_filename, 'wb') as errfile:
                        errfile.write(traceback.format_exc())
                    raise
                print("Import of [%s]: OK." % (deid))

            except Exception as error:
                logging.exception(error)
                continue
コード例 #4
0
    def test_all_gtfs(self):

        if not ENABLE:
            print("This test is disabled as it is very time-consuming.")
            print("If you want to enable it, please see in the code.")
            return

        # Create temporary directory if not there
        if not os.path.isdir(DIR):
            os.mkdir(DIR)

        # Create a DAO. Re-use any existing present.
        logging.basicConfig(level=logging.INFO)
        dao = Dao("%s/all_gtfs.sqlite" % (DIR))

        deids = IDS_TO_LOAD
        if deids is None:
            print("Downloading meta-info for all agencies...")
            resource_url = "http://www.gtfs-data-exchange.com/api/agencies?format=json"
            response = requests.get(resource_url).json()
            if response.get('status_code') != 200:
                raise IOError()
            deids = []
            for entry in response.get('data'):
                deid = entry.get('dataexchange_id')
                deids.append(deid)
            # Randomize the list, otherwise we will always load ABCBus, then ...
            random.shuffle(deids)

        for deid in deids:
            try:
                local_filename = "%s/%s.gtfs.zip" % (DIR, deid)
                if os.path.exists(local_filename) and SKIP_EXISTING:
                    print("Skipping [%s], GTFS already present." % (deid))
                    continue

                print("Downloading meta-info for ID [%s]" % (deid))
                resource_url = "http://www.gtfs-data-exchange.com/api/agency?agency=%s&format=json" % deid
                response = requests.get(resource_url).json()
                status_code = response.get('status_code')
                if status_code != 200:
                    raise IOError("Error %d (%s)" % (status_code, response.get('status_txt')))
                data = response.get('data')
                agency_data = data.get('agency')
                agency_name = agency_data.get('name')
                agency_area = agency_data.get('area')
                agency_country = agency_data.get('country')

                print("Processing [%s] %s (%s / %s)" % (deid, agency_name, agency_country, agency_area))
                date_max = 0.0
                file_url = None
                file_size = 0
                file_md5 = None
                for datafile in data.get('datafiles'):
                    date_added = datafile.get('date_added')
                    if date_added > date_max:
                        date_max = date_added
                        file_url = datafile.get('file_url')
                        file_size = datafile.get('size')
                        file_md5 = datafile.get('md5sum')
                if file_url is None:
                    print("No datafile available, skipping.")
                    continue

                if file_size > MAX_GTFS_SIZE:
                    print("GTFS too large (%d bytes > max %d), skipping." % (file_size, MAX_GTFS_SIZE))
                    continue

                # Check if the file is present and do not download it.
                try:
                    existing_md5 = hashlib.md5(open(local_filename, 'rb').read()).hexdigest()
                except:
                    existing_md5 = None
                if existing_md5 == file_md5:
                    print("Using existing file '%s': MD5 checksum matches." % (local_filename))
                else:
                    print("Downloading file '%s' to '%s' (%d bytes)" % (file_url, local_filename, file_size))
                    with open(local_filename, 'wb') as local_file:
                        cnx = requests.get(file_url, stream=True)
                        for block in cnx.iter_content(1024):
                            local_file.write(block)
                    cnx.close()

                feed = dao.feed(deid)
                if feed is not None:
                    print("Removing existing data for feed [%s]" % (deid))
                    dao.delete_feed(deid)
                print("Importing into DAO as ID [%s]" % (deid))
                try:
                    dao.load_gtfs("%s/%s.gtfs.zip" % (DIR, deid), feed_id=deid)
                except:
                    error_filename = "%s/%s.error" % (DIR, deid)
                    print("Import of [%s]: FAILED. Logging error to '%s'" % (deid, error_filename))
                    with open(error_filename, 'wb') as errfile:
                        errfile.write(traceback.format_exc())
                    raise
                print("Import of [%s]: OK." % (deid))

            except Exception as error:
                logging.exception(error)
                continue