def import_to_datastore(path, batch_size): try: assert batch_size > 0 dir_list = glob.glob("{0}/*".format(path)) for directory in sorted(dir_list, reverse=False): started = datetime.now() delete_traffic_volume_of_directory(directory) traffic_volume_rows = get_traffic_volume_rows(directory) new_items = 0 logging.info("inserting " + str(len(traffic_volume_rows)) + " new traffic data rows") for traffic_volume_chunk in chunks(traffic_volume_rows, batch_size): db.session.bulk_insert_mappings(TrafficVolume, traffic_volume_chunk) db.session.commit() new_items += len(traffic_volume_rows) logging.info("\t{0} items in {1}".format(new_items, time_delta(started))) db.session.commit() return new_items except: error = ("Traffic Volume import succeeded partially with " + str(new_items) + " traffic data rows") raise Exception(error)
def delete_cbs_entries(start_date, batch_size): """ deletes all CBS markers (provider_code=1 or provider_code=3) in the database created in year and with provider code provider_code first deletes from tables Involved and Vehicle, then from table AccidentMarker first deletes from tables InvolvedNoLocation and VehicleNoLocation, then from table AccidentsNoLocation """ marker_ids_to_delete = ( db.session.query(AccidentMarker.id) .filter(AccidentMarker.created >= datetime.strptime(start_date, "%Y-%m-%d")) .filter( or_( (AccidentMarker.provider_code == BE_CONST.CBS_ACCIDENT_TYPE_1_CODE), (AccidentMarker.provider_code == BE_CONST.CBS_ACCIDENT_TYPE_3_CODE), ) ) .all() ) marker_ids_to_delete = [acc_id[0] for acc_id in marker_ids_to_delete] logging.info( "There are " + str(len(marker_ids_to_delete)) + " accident ids to delete starting " + str(start_date) ) for ids_chunk in chunks(marker_ids_to_delete, batch_size): logging.info("Deleting a chunk of " + str(len(ids_chunk))) q = db.session.query(Involved).filter(Involved.accident_id.in_(ids_chunk)) if q.all(): logging.info("deleting entries from Involved") q.delete(synchronize_session=False) db.session.commit() q = db.session.query(Vehicle).filter(Vehicle.accident_id.in_(ids_chunk)) if q.all(): logging.info("deleting entries from Vehicle") q.delete(synchronize_session=False) db.session.commit() q = db.session.query(AccidentMarker).filter(AccidentMarker.id.in_(ids_chunk)) if q.all(): logging.info("deleting entries from AccidentMarker") q.delete(synchronize_session=False) db.session.commit()
def delete_cbs_entries_from_email(provider_code, year, batch_size): """ deletes all CBS markers (provider_code=1 or provider_code=3) in the database created in year and with provider code provider_code first deletes from tables Involved and Vehicle, then from table AccidentMarker first deletes from tables InvolvedNoLocation and VehicleNoLocation, then from table AccidentsNoLocation """ marker_ids_to_delete = ( db.session.query(AccidentMarker.provider_and_id) .filter( and_(AccidentMarker.accident_year == year), AccidentMarker.provider_code == provider_code, ) .all() ) marker_ids_to_delete = [acc_id[0] for acc_id in marker_ids_to_delete] logging.info( "There are " + str(len(marker_ids_to_delete)) + " accident ids to delete for year " + str(year) ) for ids_chunk in chunks(marker_ids_to_delete, batch_size): logging.info("Deleting a chunk of " + str(len(ids_chunk))) q = db.session.query(Involved).filter(Involved.provider_and_id.in_(ids_chunk)) if q.all(): logging.info("deleting entries from Involved") q.delete(synchronize_session=False) db.session.commit() q = db.session.query(Vehicle).filter(Vehicle.provider_and_id.in_(ids_chunk)) if q.all(): logging.info("deleting entries from Vehicle") q.delete(synchronize_session=False) db.session.commit() q = db.session.query(AccidentMarker).filter(AccidentMarker.provider_and_id.in_(ids_chunk)) if q.all(): logging.info("deleting entries from AccidentMarker") q.delete(synchronize_session=False) db.session.commit()
def import_to_datastore(filepath, batch_size): try: assert batch_size > 0 started = datetime.now() schools = get_schools(filepath) new_items = 0 all_existing_schools_ids = set(map(lambda x: x[0], db.session.query(School.id).all())) schools = [school for school in schools if school["id"] not in all_existing_schools_ids] logging.info("inserting " + str(len(schools)) + " new schools") for schools_chunk in chunks(schools, batch_size): db.session.bulk_insert_mappings(School, schools_chunk) db.session.commit() new_items += len(schools) logging.info("\t{0} items in {1}".format(new_items, time_delta(started))) return new_items except: error = "Schools import succeded partially with " + new_items + " schools" raise Exception(error)
def import_to_datastore(schools_description_filepath, schools_coordinates_filepath, batch_size): try: assert batch_size > 0 started = datetime.now() schools = get_schools_with_description(schools_description_filepath, schools_coordinates_filepath) truncate_schools_with_description() new_items = 0 logging.info("inserting " + str(len(schools)) + " new schools") for schools_chunk in chunks(schools, batch_size): db.session.bulk_insert_mappings(SchoolWithDescription, schools_chunk) db.session.commit() new_items += len(schools) logging.info("\t{0} items in {1}".format(new_items, time_delta(started))) return new_items except: error = "Schools import succeded partially with " + new_items + " schools" raise Exception(error)
def delete_invalid_entries(batch_size): """ deletes all markers in the database with null latitude or longitude first deletes from tables Involved and Vehicle, then from table AccidentMarker """ marker_ids_to_delete = (db.session.query(AccidentMarker.id).filter( or_((AccidentMarker.longitude == None), (AccidentMarker.latitude == None))).all()) marker_ids_to_delete = [acc_id[0] for acc_id in marker_ids_to_delete] logging.info("There are " + str(len(marker_ids_to_delete)) + " invalid accident_ids to delete") for ids_chunk in chunks(marker_ids_to_delete, batch_size): logging.info("Deleting a chunk of " + str(len(ids_chunk))) q = db.session.query(Involved).filter( Involved.accident_id.in_(ids_chunk)) if q.all(): logging.info("deleting invalid entries from Involved") q.delete(synchronize_session="fetch") db.session.commit() q = db.session.query(Vehicle).filter( Vehicle.accident_id.in_(ids_chunk)) if q.all(): logging.info("deleting invalid entries from Vehicle") q.delete(synchronize_session="fetch") db.session.commit() q = db.session.query(AccidentMarker).filter( AccidentMarker.id.in_(ids_chunk)) if q.all(): logging.info("deleting invalid entries from AccidentMarker") q.delete(synchronize_session="fetch") db.session.commit()