Esempio n. 1
0
def add_city():
    """Добавить области в БД. Запустить только один раз!"""
    raions = ('Алексинский район', 'Арсеньевский район', 'Белёвский район',
              'Богородицкий район', 'Венёвский район', 'Воловский район',
              'Дубенский район', 'Ефремовский район', 'Заокский район',
              'Каменский район', 'Кимовский район', 'Киреевский район',
              'Куркинский район', 'Ленинский район', 'Новомосковский район',
              'Одоевский район', 'Плавский район', 'Суворовский район',
              'Тёпло-Огарёвский район', 'Узловский район', 'Чернский район',
              'Щёкинский район', 'Ясногорский район')
    region = Region()
    region.name = 'Тульская область'
    region.save()
    for raion in raions:
        city = City()
        city.region = region
        city.name = raion
        city.save()
Esempio n. 2
0
def import_data_from_test_csv(db_session, db_uri):
    import pandas as pd
    from sqlalchemy import create_engine
    from app.models import City, Venue, Team, Official

    # read raw matches csv files
    df = pd.read_csv('test_files/matches.csv', parse_dates=['date'])

    # save cities and venues
    city_venue_df = df[['city', 'venue']].drop_duplicates()
    zip_city_venue = zip(city_venue_df['city'], city_venue_df['venue'])
    for cv in zip_city_venue:
        city = City()
        city.name = cv[0]
        db_session.add(city)
        db_session.commit()

        venue = Venue()
        venue.city_id = city.id
        venue.name = cv[1]
        db_session.add(venue)
        db_session.commit()

    # save teams
    team_set1 = set(df['team1'].drop_duplicates())
    team_set2 = set(df['team2'].drop_duplicates())
    all_teams = team_set1.union(team_set2)
    for team in all_teams:
        team_model = Team()
        team_model.name = team
        db_session.add(team_model)

    # save umpires
    umpire_set1 = set(df['umpire1'].drop_duplicates())
    umpire_set2 = set(df['umpire2'].drop_duplicates())
    all_umpires = umpire_set1.union(umpire_set2)
    for umpire in all_umpires:
        umpire_model = Official()
        umpire_model.name = umpire
        db_session.add(umpire_model)

    db_session.commit()

    # create a dict with key and name mappings for
    # cities, venues, teams and officials
    cities = db_session.query(City).all()
    cities_dict = {city.name: city.id for city in cities}
    venues = db_session.query(Venue).all()
    venues_dict = {venue.name: venue.id for venue in venues}
    teams = db_session.query(Team).all()
    teams_dict = {team.name: team.id for team in teams}
    officials = db_session.query(Official).all()
    officials_dict = {official.name: official.id for official in officials}

    # cities = df_matches['city'].drop_duplicates()
    # cities_dict = {x: y for y,x in enumerate(cities)}
    db_session.close()

    # change city, venue, team1, team2, umpire1, umpire2
    # names to their corresponding 'ids' from database
    df['city'] = df['city'].apply(lambda x: cities_dict[x])
    df['venue'] = df['venue'].apply(lambda x: venues_dict[x])
    df['team1'] = df['team1'].apply(lambda x: teams_dict[x])
    df['team2'] = df['team2'].apply(lambda x: teams_dict[x])
    df['toss_winner'] = df['toss_winner'].apply(lambda x: teams_dict[x])
    df['winner'] = df['winner'].apply(lambda x: teams_dict[x])
    df['umpire1'] = df['umpire1'].apply(lambda x: officials_dict[x])
    df['umpire2'] = df['umpire2'].apply(lambda x: officials_dict[x])

    # create sqlalchemy engine
    engine = create_engine(db_uri)

    # Insert dataframe into 'Matches' table
    df.to_sql('Matches', con=engine, if_exists='replace', index=False)
Esempio n. 3
0
def load_csv_data_into_tables(db_session):
    from lib.src.d02_intermediate.clean_data import clean_null_values_in_csv
    from lib.src.d03_processing.get_cleaned_data import get_dataframe

    # clean the raw 'matches.csv' file
    clean_null_values_in_csv()

    # get pandas dataframe from the cleaned up csv
    df = get_dataframe()

    # save cities and venues
    city_venue_df = df[['city', 'venue']].drop_duplicates()
    zip_city_venue = zip(city_venue_df['city'], city_venue_df['venue'])
    for cv in zip_city_venue:
        city = City()
        city.name = cv[0]
        db_session.add(city)
        db_session.commit()

        venue = Venue()
        venue.city_id = city.id
        venue.name = cv[1]
        db_session.add(venue)
        # db_session.commit()

    # save teams
    teams_df = df['team1'].drop_duplicates()
    teams_list = [team for team in teams_df]
    for team in teams_list:
        team_model = Team()
        team_model.name = team
        db_session.add(team_model)

    # save umpires
    umpire_set1 = set(df['umpire1'].drop_duplicates())
    umpire_set2 = set(df['umpire2'].drop_duplicates())
    all_umpires = umpire_set1.union(umpire_set2)
    for umpire in all_umpires:
        umpire_model = Official()
        umpire_model.name = umpire
        db_session.add(umpire_model)

    db_session.commit()

    # create a dict with key and name mappings for
    # cities, venues, teams and officials
    cities = db_session.query(City).all()
    cities_dict = {city.name: city.id for city in cities}
    venues = db_session.query(Venue).all()
    venues_dict = {venue.name: venue.id for venue in venues}
    teams = db_session.query(Team).all()
    teams_dict = {team.name: team.id for team in teams}
    officials = db_session.query(Official).all()
    officials_dict = {official.name: official.id for official in officials}

    # cities = df_matches['city'].drop_duplicates()
    # cities_dict = {x: y for y,x in enumerate(cities)}
    # db_session.close()

    # change city, venue, team1, team2, umpire1, umpire2
    # names to their corresponding 'ids' from database
    df['city'] = df['city'].apply(lambda x: cities_dict[x])
    df['venue'] = df['venue'].apply(lambda x: venues_dict[x])
    df['team1'] = df['team1'].apply(lambda x: teams_dict[x])
    df['team2'] = df['team2'].apply(lambda x: teams_dict[x])
    df['toss_winner'] = df['toss_winner'].apply(lambda x: teams_dict[x])
    df['winner'] = df['winner'].apply(lambda x: teams_dict[x])
    df['umpire1'] = df['umpire1'].apply(lambda x: officials_dict[x])
    df['umpire2'] = df['umpire2'].apply(lambda x: officials_dict[x])

    # create sqlalchemy engine
    engine = db_session.get_bind()

    # Insert dataframe into 'Matches' table
    df.to_sql('Matches', con=engine, if_exists='replace', index=False)