Beispiel #1
0
    def __init__(self):
        """
        Establish the database connection and if it fails to create the connection then log the error in log file
        """

        conn_string = 'DRIVER={ODBC Driver 17 for SQL Server};SERVER=' + constant.SQL_SERVER + ';DATABASE=' + constant.SQL_DB_NAME + ';UID=' + constant.SQL_USERNAME + ';PWD=' + constant.SQL_PASSWORD
        try:
            self.conn = pyodbc.connect(conn_string)
        except Exception as err:
            logfile.Log().log_error(err)
Beispiel #2
0
 def get_details_mongo(self):
     # self.insert_mongo()
     conn = mongodbConnection.MongoDBConn()
     attraction_collection = conn.get_collection(
         constant.MG_ATTRACTIONS_TABLE)
     try:
         return attraction_collection.find({})
     except Exception as err:
         logfile.Log().log_error(err)
     finally:
         conn.close_conn()
 def __init__(self):
     """
     Establish the mongo db connection and if it fails to create the connection then log the error in log file
     """
     self.url = 'mongodb+srv://' + constant.MONGO_USERNAME + ':' + urllib.parse.quote_plus(
         constant.MONGO_PASSWORD
     ) + '@' + constant.MG_DB_NAME + '.wurzf.mongodb.net/test'
     try:
         self.conn = pymongo.MongoClient(self.url)
         self.conn.server_info()
     except Exception as err:
         logfile.Log().log_error(err)
Beispiel #4
0
 def get_mongo_flight_details(self, collection_name):
     """
     Get details from collection of mongo database
     :param collection_name: Name of collection
     :return:
     """
     mongo_conn = mongodbConnection.MongoDBConn()
     try:
         mongo_collection = mongo_conn.get_collection(collection_name)
         return mongo_collection.find({})
     except Exception as err:
         logfile.Log().log_error(err)
     finally:
         mongo_conn.close_conn()
Beispiel #5
0
    def data_cleaning(self):
        data = self.get_details_mongo()
        df = pd.DataFrame(list(data))
        df.drop(['_id'], axis='columns', inplace=True)
        df.head(520)
        df[df[['Places']].duplicated() == True]

        # obj.close_conn()
        #
        # client = mo
        # db = client['tourism']
        # col = db['attractions']

        sql_conn = sqldb.SqlDBConn().conn

        cursor = sql_conn.cursor()
        try:
            cursor.execute('''DROP TABLE dbo.Attractions''')
            cursor.execute(
                '''CREATE TABLE Attractions ( City nvarchar(200) , Places nvarchar(200) , Address nvarchar(200) , Website nvarchar(200),Phone nvarchar(200) , Rating nvarchar(200) , Review nvarchar(200))'''
            )
            for index, row in df.iterrows():
                cursor.execute(
                    "INSERT INTO Attractions (City,Places,Address,Website,Phone,Rating,Review) values(?,?,?,?,?,?,?)",
                    row.City, row.Places, row.Address, row.Website, row.Phone,
                    row.Rating, row.Review)
            sql_conn.commit()
        except Exception as err:
            logfile.Log().log_error(err)
        finally:
            cursor.close()

        sql_conn = sqldb.SqlDBConn().conn
        output_df = pd.DataFrame(
            pd.read_sql(
                "SELECT City,Avg(Review) as Avg_Review from dbo.attractions GROUP By City",
                sql_conn))

        fig = plt.bar(output_df,
                      y='Avg_Review',
                      x='City',
                      labels={
                          'Avg_Review': 'Avg Review',
                          'City': 'city'
                      })
        fig.show()
Beispiel #6
0
 def common_insight(self):
     sql_conn = sqldb.SqlDBConn().conn
     try:
         result_df = pd.read_sql(
             "select Review,Places,City from attractions where City in (select Top 1 cli.City from flights_detail fd inner join cities_countries cc on fd.arrival_city_id=cc.city_id INNER JOIN city_living_index cli on cc.city=cli.City INNER JOIN attractions aa on aa.City=cc.city where flight_status='scheduled' GROUP by cli.city, flight_status order by AVG([Rent Index]) ASC) and Ratings is not null order by Review Desc",
             con=sql_conn)
         city = result_df['City'].iloc[0]
         fig = px.pie(result_df,
                      values='Review',
                      names='Places',
                      labels={
                          'Places': 'Places',
                          'Review': 'Review'
                      },
                      title='Top 10 Places and reviews in ' + city)
         fig.show()
     except Exception as err:
         logfile.Log().log_error(err)
     return True
Beispiel #7
0
 def schedule_graph(self):
     """
     Create the map graph which shows the count of scheduled flights
     :return: boolean
     """
     sql_conn = sqldb.SqlDBConn().conn
     try:
         flight_status_df = pd.read_sql(
             "SELECT country,count(country) as c_count FROM flights_detail fd INNER JOIN cities_countries cc on fd.departure_city_id=cc.city_id where flight_status='scheduled' GROUP by cc.country ORDER BY count(country) DESC",
             con=sql_conn)
         fig = px.choropleth(
             flight_status_df,
             locations=flight_status_df['country'],
             locationmode='country names',
             color='c_count',
             labels={'c_count': 'Number of Scheduled Flights'})
         fig.show(renderer="browser")
     except Exception as err:
         logfile.Log().log_error(err)
     return True
Beispiel #8
0
 def arrival_delay(self):
     """
     Create the pie chart which shows the average arrival delay of all flights
     :return: boolean
     """
     sql_conn = sqldb.SqlDBConn().conn
     try:
         flight_status_df = pd.read_sql(
             "SELECT AVG(arrival_delay) as mean, arrival_airport from flights_detail fd inner join cities_countries cc on fd.arrival_city_id=cc.city_id where arrival_delay<>0 GROUP by arrival_airport,city",
             con=sql_conn)
         fig = px.pie(flight_status_df,
                      values='mean',
                      names='arrival_airport',
                      labels={
                          'arrival_airport': 'Arrival Airport name',
                          'mean': 'Average delay'
                      })
         fig.show()
     except Exception as err:
         logfile.Log().log_error(err)
     return True
Beispiel #9
0
 def flight_status(self):
     """
     Create the bar graph which shows the count of all flight statuses
     :return: boolean
     """
     sql_conn = sqldb.SqlDBConn().conn
     try:
         flight_status_df = pd.read_sql(
             "SELECT flight_status,count(flight_status) as s_count from flights_detail GROUP by flight_status",
             con=sql_conn)
         fig = px.bar(flight_status_df,
                      y='s_count',
                      x='flight_status',
                      labels={
                          'flight_status': 'Flight Status',
                          's_count': 'Count'
                      })
         fig.show()
     except Exception as err:
         logfile.Log().log_error(err)
     return True
Beispiel #10
0
 def mongo_insert_details(self, file_name, collection_name):
     """
     Read data from JSON file and insert it to mongo db
     :param file_name: Name of File
     :return: details inserted in mongo db and close connection
     """
     mongo_conn = mongodbConnection.MongoDBConn()
     mongo_collection = mongo_conn.get_collection(collection_name)
     try:
         if mongo_collection.count() > 0:
             mongo_collection.drop()
             mongo_collection = mongo_conn.get_collection(collection_name)
         json_file_path = os.path.join(
             os.path.abspath(os.path.dirname(__file__)), '..',
             'jsonFiles/' + file_name + '.json')
         with open(json_file_path) as jsonFile:
             json_obj_list = json.load(jsonFile)
             for json_obj in json_obj_list:
                 mongo_collection.insert_many(json_obj)
     except Exception as err:
         logfile.Log().log_error(err)
     finally:
         mongo_conn.close_conn()
Beispiel #11
0
 def insert_cities_countries(self):
     """
     Insert cities and countries in MSSQL database
     :return:
     """
     df = self.merge_cities_countries()
     df = df.drop('country_iso2', axis=1)
     df = df.drop('iata_code', axis=1)
     cities_list = [tuple(rows) for rows in df.values]
     sql_conn = sqldb.SqlDBConn().conn
     cursor = sql_conn.cursor()
     try:
         drop_city_table = 'drop table if exists ' + constant.CITIES_TABLE
         create_city_table = 'Create table ' + constant.CITIES_TABLE + ' (city_id int NOT NULL IDENTITY(1,1), city varchar(255) NOT NULL, country varchar(255) NOT NULL, PRIMARY KEY(city_id))'
         insert_city_sql = 'insert into ' + constant.CITIES_TABLE + ' (city,country) values (?, ?)'
         cursor.execute(drop_city_table)
         cursor.execute(create_city_table)
         cursor.executemany(insert_city_sql, cities_list)
         sql_conn.commit()
     except Exception as err:
         sql_conn.rollback()
         logfile.Log().log_error(err)
     finally:
         sql_conn.close()
Beispiel #12
0
    def data_cleansing(self, data):
        """
        Clean flights dataset and store it in MSSQL
        :param data: dataframe
        :return:
        """
        pd.set_option('display.max_columns', None)
        new_df = pd.DataFrame()
        primary_df = pd.DataFrame(data)
        # airline
        airline_df = pandas.io.json.json_normalize(primary_df['airline'])
        new_df['airline_name'] = airline_df['name']

        # flight_status
        new_df['flight_status'] = primary_df['flight_status']

        # Flight
        flight_df = pandas.io.json.json_normalize(primary_df['flight'])
        new_df[['flight_number',
                'flight_iata']] = flight_df[['number', 'iata']]

        # Departure
        departure_df = pandas.io.json.json_normalize(primary_df['departure'])
        new_df[[
            'departure_delay', 'departure_airport', 'departure_scheduled',
            'departure_iata'
        ]] = departure_df[['delay', 'airport', 'scheduled', 'iata']]

        # Arrival
        arrival_df = pandas.io.json.json_normalize(primary_df['arrival'])
        new_df[[
            'arrival_delay', 'arrival_airport', 'arrival_scheduled',
            'arrival_iata'
        ]] = arrival_df[['delay', 'airport', 'scheduled', 'iata']]

        cities_df = self.merge_cities_countries()

        new_df = pd.merge(new_df,
                          cities_df,
                          left_on='departure_iata',
                          right_on='iata_code')
        new_df = new_df.rename({'city_name': 'departure_city'}, axis=1)

        new_df = pd.merge(new_df,
                          cities_df,
                          left_on='arrival_iata',
                          right_on='iata_code')
        new_df = new_df.rename({'city_name': 'arrival_city'}, axis=1)

        sql_conn = sqldb.SqlDBConn().conn
        cursor = sql_conn.cursor()

        city_table_df = pd.read_sql("select * from " + constant.CITIES_TABLE,
                                    con=sql_conn)

        new_df = pd.merge(new_df,
                          city_table_df,
                          left_on='departure_city',
                          right_on='city')
        new_df = new_df.rename({'city_id': 'departure_city_id'}, axis=1)

        new_df = pd.merge(new_df,
                          city_table_df,
                          left_on='arrival_city',
                          right_on='city')
        new_df = new_df.rename({'city_id': 'arrival_city_id'}, axis=1)

        new_df = new_df.drop([
            'departure_iata', 'arrival_iata', 'country_iso2_x', 'iata_code_x',
            'country_name_x', 'country_iso2_y', 'iata_code_y',
            'country_name_y', 'departure_city', 'arrival_city', 'city_x',
            'country_x', 'city_y', 'country_y'
        ],
                             axis=1)

        new_df = new_df.replace({np.NaN: None})
        new_df['departure_delay'] = new_df['departure_delay'].replace(
            {None: 0})
        new_df['arrival_delay'] = new_df['arrival_delay'].replace({None: 0})

        drop_flight_table = 'drop table if exists ' + constant.MG_FLIGHT_TABLE
        flight_table_sql = 'create table ' + constant.MG_FLIGHT_TABLE + ' (id int not null IDENTITY(1,1), airline_name varchar(255), ' \
                                                                        'flight_status varchar(255), flight_number varchar(255), flight_iata varchar(255), ' \
                                                                        'departure_delay decimal, departure_airport varchar(255), departure_scheduled varchar(255), ' \
                                                                        'arrival_delay decimal, arrival_airport varchar(255), arrival_scheduled varchar(255),' \
                                                                        ' departure_city_id int, arrival_city_id int, PRIMARY KEY (id), FOREIGN KEY (arrival_city_id) REFERENCES ' + constant.CITIES_TABLE + ' (city_id), FOREIGN KEY (departure_city_id) REFERENCES ' + constant.CITIES_TABLE + ' (city_id))'

        column_name_list = []
        for col in new_df.columns:
            column_name_list.append(col)
        data_list = [tuple(rows) for rows in new_df.values]
        insert_sql = "INSERT INTO " + constant.MG_FLIGHT_TABLE + "(" + ', '.join(
            column_name_list) + ") VALUES (" + "?," * (len(column_name_list) -
                                                       1) + "?)"

        try:
            cursor.execute(drop_flight_table)
            cursor.execute(flight_table_sql)
            cursor.executemany(insert_sql, data_list)
            sql_conn.commit()
        except Exception as err:
            logfile.Log().log_error(err)
            sql_conn.rollback()
        finally:
            sql_conn.close()