Example #1
0
def match_station_with_google_maps():
    key = getConfig('googleMapsKey')
    gmaps = googlemaps.Client(key=key)
    all_stops = get_stops_without_location()
    for stop in all_stops:
        geocoding = gmaps.geocode(stop.stop_name)
        if geocoding is None or len(geocoding) == 0:
            print(f'couldnt find {stop.stop_name} on google maps')
        else:
            location = geocoding[0]['geometry']['location']
            lat = location['lat']
            lng = location['lng']
            stop.stop_lat = lat
            stop.stop_lon = lng
    commit()
Example #2
0
def crawl():
    global stop_times_to_add, finishUp, update_stops_thread, date_arr
    with open('Data/bus_stops.csv') as csv_file:
        csv_reader = csv.reader(csv_file, delimiter=',')
        row_count = sum(1 for row in csv_reader)
        try:
            begin = int(getConfig('csv.begin')) - 1
        except KeyError:
            begin = 1
        try:
            end = int(getConfig('csv.end')) - 1
        except KeyError:
            end = row_count - 1
        csv_file.seek(0)
        stop_set = set()
        for row in skip_stop(csv_reader, begin, end):
            stop_set.add(row[0])
    try:
        max_stops_to_crawl = getConfig('batchSize')
    except:
        max_stops_to_crawl = 3
    try:
        date_arr = getConfig('dates')
    except:
        date_arr = [date_w]
    stop_list = list(stop_set)
    stop_list_deleted = False
    commit()
    get_std_date()
    load_allg_feiertage()
    update_stops_thread = Thread(target=location_data_thread)
    update_stops_thread.daemon = True
    update_stops_thread.start()
    try:
        if getConfig('resetDBstatus'):
            for stop in get_from_table(Stop):
                stop.crawled = False
    except:
        pass
    commit()
    new_session()
    print("started crawling", flush=True)
    count12 = 0
    while True:
        if stop_list_deleted or len(stop_list) == 0:
            if not stop_list_deleted:
                logging.debug(f'deleting stop_list starting with database crawl')
                del stop_list
                stop_list_deleted = True
            if not continuesCrawling:
                break
            stop_set = set()
            for stop in (uncrawled := load_all_uncrawled_stops(max_stops_to_crawl)):
                stop_set.add(stop.stop_name)
                stop.crawled = True
            commit()
            if uncrawled is None or len(uncrawled) == 0:
                break
            to_crawl = list(stop_set)
        else:
            to_crawl = stop_list[:max_stops_to_crawl]
            stop_list = stop_list[max_stops_to_crawl:]
        routes = load_all_stops_to_crawl(to_crawl)
        stop_times_to_add = []
        t = Thread(target=load_data_async, args=(routes,))
        t.daemon = True
        t.start()
        commit()
        while t.is_alive() or (not t.is_alive() and len(q) > 0):
            if len(q) == 0:
                time.sleep(0.01)
                continue
            page = q.pop()
            try:
                process_page(page.url, page.data)
            except TripAlreadyPresentError as e:
                pass
            except Exception as e:
                logging.error(f'load_route {page.url} {repr(e)}')
        stop_times_executor = ThreadPoolExecutor()
        for tree, page, current_stops_dict, trip in stop_times_to_add:
            stop_times_executor.submit(add_stop_times_from_web_page, tree, page, current_stops_dict,
                                       trip)
        stop_times_executor.shutdown(wait=True)
        commit()
        new_session()
        count12 = count12 + 1
        logging.debug(f'finished batch {count12 * max_stops_to_crawl}')
Example #3
0
def export_all_tables():
    tables = [Agency, Calendar, CalendarDate, Frequency, Route, Trip, StopTime, Shape, Stop, Transfer]
    file_names = []
    os.chdir('./db')
    try:
        os.remove('./Archiv.zip')
    except FileNotFoundError:
        pass
    try:
        excluded_routes = getConfig('exportOptions.excludeRouteTypes')
    except:
        excluded_routes = None
    for i in tables:
        try:
            os.remove(f'./{i.__table__.name}.txt')
        except FileNotFoundError:
            pass
    for i in tables:
        file_names.append(f'./{i.__table__.name}.txt')
        new_session()
        if i is StopTime:
            q = query_element(i)
            with open(f'./{i.__table__.name}.txt', 'a') as outfile:
                outcsv = csv.writer(outfile, delimiter=',')
                outcsv.writerow(i.firstline())
                for dataset in windowed_query(q, StopTime.stop_times_id, 1000):
                    outcsv.writerow(dataset.tocsv())
        else:
            with open(f'./{i.__table__.name}.txt', 'a') as outfile:
                outcsv = csv.writer(outfile, delimiter=',')
                outcsv.writerow(i.firstline())
                records = get_from_table(i)
                for row in records:
                    outcsv.writerow(row.tocsv())
        end_session()
        print(f'finished {i.__table__.name}', flush=True)

    if excluded_routes is not None:
        print("removing routes")
        all_routes = []
        all_trips = []
        all_stop_times = []
        deleted_route_ids = []
        deleted_trip_ids = []
        with open(f'./{Route.__table__.name}.txt', 'r') as routes:
            first_line_routes = routes.readline()
            route_type_index = Route.firstline().index('route_type')
            route_id_index = Route.firstline().index('route_id')
            csv_reader = csv.reader(routes, delimiter=',')
            for route in csv_reader:
                if route[route_type_index] != '' and int(route[route_type_index]) in excluded_routes:
                    deleted_route_ids.append(int(route[route_id_index]))
                    continue
                all_routes.append(route)

        with open(f'./{Trip.__table__.name}.txt', 'r') as trips:
            first_line_trips = trips.readline()
            route_id_of_trip_index = Trip.firstline().index('route_id')
            trip_id_index = Trip.firstline().index('trip_id')
            csv_reader = csv.reader(trips, delimiter=',')
            for trip in csv_reader:
                if int(trip[route_id_of_trip_index]) in deleted_route_ids:
                    deleted_trip_ids.append(int(trip[trip_id_index]))
                    continue
                all_trips.append(trip)

        with open(f'./{StopTime.__table__.name}.txt', 'r') as stop_times:
            first_line_stop_times = stop_times.readline()
            trip_id_of_stop_time_index = StopTime.firstline().index("trip_id")
            csv_reader = csv.reader(stop_times, delimiter=',')
            for stop_time in csv_reader:
                if int(stop_time[trip_id_of_stop_time_index]) in deleted_trip_ids:
                    continue
                all_stop_times.append(stop_time)

        os.remove(f'./{Route.__table__.name}.txt')

        with open(f'./{Route.__table__.name}.txt', 'a') as routes:
            routes.writelines([first_line_routes])
            outcsv = csv.writer(routes, delimiter=',')
            for row in all_routes:
                outcsv.writerow(row)

        os.remove(f'./{Trip.__table__.name}.txt')

        with open(f'./{Trip.__table__.name}.txt', 'a') as trips:
            trips.writelines([first_line_trips])
            outcsv = csv.writer(trips, delimiter=',')
            for row in all_trips:
                outcsv.writerow(row)

        os.remove(f'./{StopTime.__table__.name}.txt')

        with open(f'./{StopTime.__table__.name}.txt', 'a') as stop_times:
            stop_times.writelines([first_line_stop_times])
            outcsv = csv.writer(stop_times, delimiter=',')
            for row in all_stop_times:
                outcsv.writerow(row)
        print(f"done removing routes with type {excluded_routes}")

    with ZipFile('./Archiv.zip', 'w') as zip:
        for file in file_names:
            zip.write(file)
Example #4
0
    for stop in all_stops:
        geocoding = gmaps.geocode(stop.stop_name)
        if geocoding is None or len(geocoding) == 0:
            print(f'couldnt find {stop.stop_name} on google maps')
        else:
            location = geocoding[0]['geometry']['location']
            lat = location['lat']
            lng = location['lng']
            stop.stop_lat = lat
            stop.stop_lon = lng
    commit()


if __name__ == "__main__":
    try:
        continuesCrawling = getConfig('continues')
    except KeyError as e:
        continuesCrawling = False
    try:
        fiona_geometry = False
        crawlStopOptions = 'crawlStopOptions' in getConfig()
        try:
            shapefile = getConfig('crawlStopOptions.shapefile')
            fiona_shape = fiona.open(shapefile)
            fiona_iteration = iter(fiona_shape)
            fiona_geometry = []
            for r in fiona_iteration:
                fiona_geometry.append(shape(r['geometry']))
            del fiona_shape
            del fiona_iteration
        except KeyError:
Example #5
0
File: crud.py Project: LeLunZ/aptc
from Models.stop import Stop
from Models.stop_times import StopTime
from Models.trip import Trip
from Models.calendar import Calendar
from Models.calendar_date import CalendarDate
from Models.transport_type_image import TransportTypeImage
from Models.stop_time_text import StopTimeText
import sqlalchemy
import pyhash

logger = logging.getLogger(__name__)

hasher = pyhash.fnv1a_64()
lock = threading.Lock()
try:
    DATABASE_URI = 'postgres+psycopg2://' + str(getConfig('postgres'))
except KeyError:
    DATABASE_URI = 'postgres+psycopg2://postgres:password@localhost:5432/postgres'

from sqlalchemy import create_engine, and_, or_, func, literal_column, Text
from sqlalchemy.orm import sessionmaker

engine = create_engine(DATABASE_URI, executemany_mode='values')

Session = sessionmaker(bind=engine,
                       autoflush=False,
                       autocommit=False,
                       expire_on_commit=True)

s = Session()