Example #1
0
def set_config(base_dir):
    config.base = base_dir
    config.temp_data = os.path.join(config.base, 'tmp_data')
    
    # gtfs
    config.gtfs = os.path.join(config.temp_data, 'gtfs')
    config.gtfs_raw_data = os.path.join(config.gtfs, 'data')
    config.gtfs_processed_data = os.path.join(config.gtfs, 'processed_data')
    mkdir_p(config.gtfs_processed_data)
    config.gtfs_stop_file = os.path.join(config.gtfs_processed_data, 'stop.data') 
    config.gtfs_shape_file = os.path.join(config.gtfs_processed_data, 'shape.data') 
    
    # reports
    config.output_data = os.path.join(config.temp_data, 'output')  
    mkdir_p(config.output_data)    

    # params
    config.max_accuracy_radius_meters = 300
    config.min_accuracy_radius_meters = 200
    config.route_sampling__min_distance_between_points_meters = 10.0
    config.station_radius_in_meters = 300
    config.early_arrival_max_seconds = 35 * 60 # how early can a train arrive before the actual arrival
    config.late_arrival_max_seconds = 35 * 60 # how late can a train arrive after the actual arrival
    config.early_departure_max_seconds = 15 * 60 # how early can a train depart before the actual departure
    config.late_departure_max_seconds = 35 * 60 # how late can a train depart after the actual departure
    config.shape_probability_threshold= 0.80
    config.stop_discovery_location_timeout_seconds = 60
    config.stop_discovery_probability_thresh = 0.90
    config.stop_discovery_count_thresh = 3
    # if the trip list is longer than the threshold, we do not 
    # have a match to GTFS
    config.trip_list_length_thresh = 3
    config.no_report_timegap = datetime.timedelta(minutes = 60)
    config.no_stop_timegap = datetime.timedelta(minutes = 60)
Example #2
0
def download_gtfs_file():
    """ download gtfs zip file from mot, and put it in DATA_DIR in its own subfolder """
    local_dir = os.path.join(GTFS_DATA_DIR,ot_utils.get_utc_time_underscored())
    ot_utils.mkdir_p(local_dir)
    local_path = os.path.join(local_dir,FILE_NAME)
    ot_utils.ftp_get_file(MOT_FTP,FILE_NAME,local_path)
    ot_utils.unzip_file(local_path,local_dir)
Example #3
0
def set_config(base_dir):
    config.base = base_dir
    config.temp_data = os.path.join(config.base, 'tmp_data')
    
    # gtfs
    config.gtfs = os.path.join(config.temp_data, 'gtfs')
    config.gtfs_raw_data = os.path.join(config.gtfs, 'data')
    config.gtfs_processed_data = os.path.join(config.gtfs, 'processed_data')
    mkdir_p(config.gtfs_processed_data)
    config.gtfs_stop_file = os.path.join(config.gtfs_processed_data, 'stop.data') 
    config.gtfs_shape_file = os.path.join(config.gtfs_processed_data, 'shape.data') 
    
    # reports
    config.output_data = os.path.join(config.temp_data, 'output')  
    mkdir_p(config.output_data)    
    config.output_shelve_file = os.path.join(config.output_data, 'shelve.data')  

    # params
    config.max_accuracy_radius_meters = 300
    config.min_accuracy_radius_meters = 200
    config.route_sampling__min_distance_between_points_meters = 10.0
    config.station_radius_in_meters = 300
    config.early_arrival_max_seconds = 35 * 60 # how early can a train arrive before the actual arrival
    config.late_arrival_max_seconds = 35 * 60 # how late can a train arrive before the actual arrival
    config.early_departure_max_seconds = 15 * 60 # how early can a train depart before the actual departure
    config.late_departure_max_seconds = 35 * 60 # how late can a train depart before the actual departure
    config.shape_probability_threshold= 0.80
    config.stop_discovery_location_timeout_seconds = 60
    config.stop_discovery_probability_thresh = 0.95
    config.stop_discovery_count_thresh = 10
Example #4
0
def download_gtfs_file(force=False,gtfs_url=None):
    """ download gtfs zip file from mot, and put it in DATA_DIR in its own subfolder """
    import shutil
    time_suffix = ot_utils.get_utc_time_underscored()
    if not os.path.exists(GTFS_DATA_DIR):
        ot_utils.mkdir_p(GTFS_DATA_DIR)
    tmp_file = '/tmp/%s_tmp.zip' % (time_suffix)
    print 'downloading GTFS to tmp file'
    if not gtfs_url:     
        ot_utils.ftp_get_file(MOT_FTP,FILE_NAME,tmp_file)
    else:
        ot_utils.download_url(gtfs_url,tmp_file) 
    if not force:
        tmp_md5 = ot_utils.md5_for_file(tmp_file)
        last_dir = ot_utils.find_lastest_in_dir(GTFS_DATA_DIR)
        if last_dir:
            was_success = os.path.exists(os.path.join(last_dir,'success'))
            if not was_success:
                print 'Last time was not success'
            else:
                last_file = os.path.join(last_dir,FILE_NAME)
                try:
                    last_md5 = ot_utils.md5_for_file(last_file)
                except Exception,e:
                    print e
                    last_md5 = 'error_in_md5'
                if last_md5 == tmp_md5:
                    print 'Checksum is identical - removing tmp file'
                    os.remove(tmp_file)
                    return None
Example #5
0
def download_gtfs_file(force=False):
    """ download gtfs zip file from mot, and put it in DATA_DIR in its own subfolder """
    import shutil
    time_suffix = ot_utils.get_utc_time_underscored()
    if not os.path.exists(GTFS_DATA_DIR):
        ot_utils.mkdir_p(GTFS_DATA_DIR)
    tmp_file = '/tmp/{0}_tmp.zip'.format(time_suffix)
    LOGGER.info('downloading GTFS to tmp file')
    ot_utils.ftp_get_file(MOT_FTP, FILE_NAME, tmp_file)
    if not force:
        tmp_md5 = ot_utils.md5_for_file(tmp_file)
        last_dir = ot_utils.find_lastest_in_dir(GTFS_DATA_DIR)
        if last_dir:
            was_success = os.path.exists(os.path.join(last_dir, 'success'))
            if not was_success:
                LOGGER.info('Last time was not success')
            else:
                last_file = os.path.join(last_dir, FILE_NAME)
                try:
                    last_md5 = ot_utils.md5_for_file(last_file)
                except Exception as e:
                    LOGGER.exception('failed in md5 for last file - ignoring')
                    last_md5 = 'error_in_md5'
                if last_md5 == tmp_md5:
                    LOGGER.info('Checksum is identical - removing tmp file')
                    os.remove(tmp_file)
                    return None

    LOGGER.info('Checksum is different or force -- copying')
    local_dir = os.path.join(GTFS_DATA_DIR, time_suffix)
    ot_utils.mkdir_p(local_dir)
    try:
        os.remove(os.path.join(GTFS_DATA_DIR,'latest'))
    except (IOError, OSError):
        pass
    os.symlink(local_dir, os.path.join(GTFS_DATA_DIR,'latest'))
    local_file = os.path.join(local_dir, FILE_NAME)
    shutil.move(tmp_file, local_file)
    ot_utils.unzip_file(local_file, local_dir)
    LOGGER.info('All gtfs files are in %s' % local_dir)
    return local_dir
Example #6
0
        basedir = GTFS_ZIP_DIR

    local_dir = os.path.join(basedir,time_suffix)
    tmp_file = '/tmp/%s_tmp.zip' % (time_suffix)     
    ot_utils.ftp_get_file(MOT_FTP,FILE_NAME,tmp_file)
    tmp_md5 = ot_utils.md5_for_file(tmp_file)
    last_dir = ot_utils.find_lastest_in_dir(basedir)
    last_file = os.path.join(last_dir,FILE_NAME)
    try:
        last_md5 = ot_utils.md5_for_file(last_file)
    except Exception,e:
        print e
        last_md5 = 'error_in_md5'
    if last_md5 != tmp_md5:
        print 'Checksum is different- copying'
        ot_utils.mkdir_p(local_dir)
        local_file = os.path.join(local_dir,FILE_NAME)
        shutil.move(tmp_file,local_file)
    else:
        print 'Checksum is identical - removing tmp file'
        os.remove(tmp_file)
        return
    
    if not download_only:
        ot_utils.unzip_file(local_file,local_dir)   
    
        
def find_gtfs_data_dir():
    """ returns the lastest subfolder in DATA_DIR """
    dirnames = glob.glob("%s/*" % (GTFS_DATA_DIR))
    if not dirnames: