Example #1
0
def import_to_datastore(path, batch_size):
    try:
        assert batch_size > 0
        dir_list = glob.glob("{0}/*".format(path))
        for directory in sorted(dir_list, reverse=False):
            started = datetime.now()
            delete_traffic_volume_of_directory(directory)
            traffic_volume_rows = get_traffic_volume_rows(directory)
            new_items = 0
            logging.info("inserting " + str(len(traffic_volume_rows)) +
                         " new traffic data rows")
            for traffic_volume_chunk in chunks(traffic_volume_rows,
                                               batch_size):
                db.session.bulk_insert_mappings(TrafficVolume,
                                                traffic_volume_chunk)
                db.session.commit()
            new_items += len(traffic_volume_rows)
            logging.info("\t{0} items in {1}".format(new_items,
                                                     time_delta(started)))
        db.session.commit()
        return new_items
    except:
        error = ("Traffic Volume import succeeded partially with " +
                 str(new_items) + " traffic data rows")
        raise Exception(error)
Example #2
0
def delete_cbs_entries(start_date, batch_size):
    """
    deletes all CBS markers (provider_code=1 or provider_code=3) in the database created in year and with provider code provider_code
    first deletes from tables Involved and Vehicle, then from table AccidentMarker
    first deletes from tables InvolvedNoLocation and VehicleNoLocation, then from table AccidentsNoLocation
    """

    marker_ids_to_delete = (
        db.session.query(AccidentMarker.id)
        .filter(AccidentMarker.created >= datetime.strptime(start_date, "%Y-%m-%d"))
        .filter(
            or_(
                (AccidentMarker.provider_code == BE_CONST.CBS_ACCIDENT_TYPE_1_CODE),
                (AccidentMarker.provider_code == BE_CONST.CBS_ACCIDENT_TYPE_3_CODE),
            )
        )
        .all()
    )

    marker_ids_to_delete = [acc_id[0] for acc_id in marker_ids_to_delete]

    logging.info(
        "There are "
        + str(len(marker_ids_to_delete))
        + " accident ids to delete starting "
        + str(start_date)
    )

    for ids_chunk in chunks(marker_ids_to_delete, batch_size):

        logging.info("Deleting a chunk of " + str(len(ids_chunk)))

        q = db.session.query(Involved).filter(Involved.accident_id.in_(ids_chunk))
        if q.all():
            logging.info("deleting entries from Involved")
            q.delete(synchronize_session=False)
            db.session.commit()

        q = db.session.query(Vehicle).filter(Vehicle.accident_id.in_(ids_chunk))
        if q.all():
            logging.info("deleting entries from Vehicle")
            q.delete(synchronize_session=False)
            db.session.commit()

        q = db.session.query(AccidentMarker).filter(AccidentMarker.id.in_(ids_chunk))
        if q.all():
            logging.info("deleting entries from AccidentMarker")
            q.delete(synchronize_session=False)
            db.session.commit()
Example #3
0
def delete_cbs_entries_from_email(provider_code, year, batch_size):
    """
    deletes all CBS markers (provider_code=1 or provider_code=3) in the database created in year and with provider code provider_code
    first deletes from tables Involved and Vehicle, then from table AccidentMarker
    first deletes from tables InvolvedNoLocation and VehicleNoLocation, then from table AccidentsNoLocation
    """

    marker_ids_to_delete = (
        db.session.query(AccidentMarker.provider_and_id)
        .filter(
            and_(AccidentMarker.accident_year == year),
            AccidentMarker.provider_code == provider_code,
        )
        .all()
    )

    marker_ids_to_delete = [acc_id[0] for acc_id in marker_ids_to_delete]

    logging.info(
        "There are "
        + str(len(marker_ids_to_delete))
        + " accident ids to delete for year "
        + str(year)
    )

    for ids_chunk in chunks(marker_ids_to_delete, batch_size):

        logging.info("Deleting a chunk of " + str(len(ids_chunk)))

        q = db.session.query(Involved).filter(Involved.provider_and_id.in_(ids_chunk))
        if q.all():
            logging.info("deleting entries from Involved")
            q.delete(synchronize_session=False)
            db.session.commit()

        q = db.session.query(Vehicle).filter(Vehicle.provider_and_id.in_(ids_chunk))
        if q.all():
            logging.info("deleting entries from Vehicle")
            q.delete(synchronize_session=False)
            db.session.commit()

        q = db.session.query(AccidentMarker).filter(AccidentMarker.provider_and_id.in_(ids_chunk))
        if q.all():
            logging.info("deleting entries from AccidentMarker")
            q.delete(synchronize_session=False)
            db.session.commit()
Example #4
0
def import_to_datastore(filepath, batch_size):
    try:
        assert batch_size > 0
        started = datetime.now()
        schools = get_schools(filepath)
        new_items = 0
        all_existing_schools_ids = set(map(lambda x: x[0], db.session.query(School.id).all()))
        schools = [school for school in schools if school["id"] not in all_existing_schools_ids]
        logging.info("inserting " + str(len(schools)) + " new schools")
        for schools_chunk in chunks(schools, batch_size):
            db.session.bulk_insert_mappings(School, schools_chunk)
            db.session.commit()
        new_items += len(schools)
        logging.info("\t{0} items in {1}".format(new_items, time_delta(started)))
        return new_items
    except:
        error = "Schools import succeded partially with " + new_items + " schools"
        raise Exception(error)
Example #5
0
def import_to_datastore(schools_description_filepath,
                        schools_coordinates_filepath, batch_size):
    try:
        assert batch_size > 0
        started = datetime.now()
        schools = get_schools_with_description(schools_description_filepath,
                                               schools_coordinates_filepath)
        truncate_schools_with_description()
        new_items = 0
        logging.info("inserting " + str(len(schools)) + " new schools")
        for schools_chunk in chunks(schools, batch_size):
            db.session.bulk_insert_mappings(SchoolWithDescription,
                                            schools_chunk)
            db.session.commit()
        new_items += len(schools)
        logging.info("\t{0} items in {1}".format(new_items,
                                                 time_delta(started)))
        return new_items
    except:
        error = "Schools import succeded partially with " + new_items + " schools"
        raise Exception(error)
Example #6
0
def delete_invalid_entries(batch_size):
    """
    deletes all markers in the database with null latitude or longitude
    first deletes from tables Involved and Vehicle, then from table AccidentMarker
    """

    marker_ids_to_delete = (db.session.query(AccidentMarker.id).filter(
        or_((AccidentMarker.longitude == None),
            (AccidentMarker.latitude == None))).all())

    marker_ids_to_delete = [acc_id[0] for acc_id in marker_ids_to_delete]

    logging.info("There are " + str(len(marker_ids_to_delete)) +
                 " invalid accident_ids to delete")

    for ids_chunk in chunks(marker_ids_to_delete, batch_size):

        logging.info("Deleting a chunk of " + str(len(ids_chunk)))

        q = db.session.query(Involved).filter(
            Involved.accident_id.in_(ids_chunk))
        if q.all():
            logging.info("deleting invalid entries from Involved")
            q.delete(synchronize_session="fetch")
            db.session.commit()

        q = db.session.query(Vehicle).filter(
            Vehicle.accident_id.in_(ids_chunk))
        if q.all():
            logging.info("deleting invalid entries from Vehicle")
            q.delete(synchronize_session="fetch")
            db.session.commit()

        q = db.session.query(AccidentMarker).filter(
            AccidentMarker.id.in_(ids_chunk))
        if q.all():
            logging.info("deleting invalid entries from AccidentMarker")
            q.delete(synchronize_session="fetch")
            db.session.commit()