예제 #1
0
import readFromDatabase as rfd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

ageData = rfd.readFrom('visitor', 'age')
genderData = rfd.readFrom('visitor', 'gender')
carsData = rfd.readFrom('visitor', 'cars')
childrenData = rfd.readFrom('visitor', 'children')
marriedData = rfd.readFrom('visitor', 'married')
socialData = rfd.readFrom('visitor', 'social')
workingData = rfd.readFrom('visitor', 'working')
total = ageData.groupby('Year').sum().reset_index()['Count']
Data = [
    ageData, genderData, carsData, childrenData, marriedData, socialData,
    workingData
]
names = ['age', 'gender', 'cars', 'children', 'married', 'social', 'working']
for i in range(len(Data)):
    tem = Data[i].groupby(['Year', 'Attribute']).sum().reset_index()
    tem = tem.pivot(index='Year', columns='Attribute',
                    values='Count').reset_index().drop(columns=['Year'])
    tem['total'] = total
    #tem.corr().to_csv('correlation/'+names[i]+'_cor.csv')
    #tem.corr(method='spearman').to_csv('correlation/'+names[i]+'_spearman.csv')
listPearson = np.array([[
    0.383, 0.52, 0.143, 0.815, 0.168, 0.408, 0.911, 0.207, 0.946, -0.083,
    0.914, 0.745, 0.893, 0.201, 0.679, 0.803, 0.424, -0.505, 0.954, -0.236,
    0.497
],
                        [
예제 #2
0
from matplotlib import pyplot as plt
import readFromDatabase as rfd
import os
#Read data from database
visitorData = rfd.readFrom('visitor', 'married')
plt.figure(figsize=(8, 8))
#Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe.
overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index()
married = ['married', 'not married']
#Draw a line chart to show the overall trend and set labels and titles for the chart.
for x in married:
    data = overAll[overAll['Attribute'] == x]
    plt.plot(data['Year'], data['Count'], marker='o', linewidth=3)
plt.legend(['Married', 'Not married'], fontsize=15)
plt.ylabel('Count(Million)', fontsize=15, weight='bold')
plt.xlabel('Years', fontsize=15, weight='bold')
plt.xlim(2011, 2019)
plt.title('Number of Married Visitors for Day Visit in Scotland',
          fontsize=15,
          weight='bold')
plt.xticks(fontsize=12, weight='bold')
plt.yticks(fontsize=12, weight='bold')
#Save the chart as a 'PNG' file.
plt.savefig(os.path.dirname(os.getcwd()) + '/figures/MarriedOverYears.png')
plt.show()
예제 #3
0
from matplotlib import pyplot as plt
import readFromDatabase as rfd
import os
#Read data from database
visitorData = rfd.readFrom('visitor', 'children')
plt.figure(figsize=(8, 8))
#Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe.
overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index()
children = ['yes', 'no']
#Draw a line chart to show the overall trend and set labels and titles for the chart.
for x in children:
    data = overAll[overAll['Attribute'] == x]
    plt.plot(data['Year'], data['Count'], marker='o', linewidth=3)
plt.legend(['Yes', 'No'], fontsize=15)
plt.ylabel('Count(Million)', fontsize=15, weight='bold')
plt.xlabel('Years', fontsize=15, weight='bold')
plt.xlim(2011, 2019)
plt.title('Number of Visitors who Have Children for Day Visit in Scotland',
          fontsize=15,
          weight='bold')
plt.xticks(fontsize=12, weight='bold')
plt.yticks(fontsize=12, weight='bold')
#Save the chart as a 'PNG' file.
plt.savefig(os.path.dirname(os.getcwd()) + '/figures/ChildrenOverYears.png')
plt.show()
예제 #4
0
from matplotlib import pyplot as plt
import readFromDatabase as rfd
import os
#Read data from database
visitorData = rfd.readFrom('visitor', 'age')
#Add a column named 'Total'
plt.figure(figsize=(8, 8))
#Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe.
overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index()
age = ['16-24', '25-34', '35-44', '45-54', '55-64', '65+']
#Draw a line chart to show the overall trend and set labels and titles for the chart.
for x in age:
    data = overAll[overAll['Attribute'] == x]
    plt.plot(data['Year'], data['Count'], marker='o', linewidth=3)
plt.legend(['16-24', '25-34', '35-44', '45-54', '55-64', '65+'], fontsize=12)
plt.ylabel('Count(Million)', fontsize=12, weight='bold')
plt.xlabel('Years', fontsize=12, weight='bold')
plt.xlim(2011, 2019)
plt.xticks(fontsize=12, weight='bold')
plt.yticks(fontsize=12, weight='bold')
plt.title('Number of Visitors from Different Age for Day Visit in Scotland',
          fontsize=15,
          weight='bold')
#Save the chart as a 'PNG' file.
plt.savefig(os.path.dirname(os.getcwd()) + '/figures/AgeOverYears.png')
plt.show()
예제 #5
0
from matplotlib import pyplot as plt
import readFromDatabase as rfd
import os
#Read data from database
visitorData = rfd.readFrom('visitor', 'social')
#Add a column named 'Total'
plt.figure(figsize=(8, 8))
#Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe.
overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index()
social = ['ab', 'c1', 'c2', 'de']
#Draw a line chart to show the overall trend and set labels and titles for the chart.
for x in social:
    data = overAll[overAll['Attribute'] == x]
    plt.plot(data['Year'], data['Count'], marker='o', linewidth=3)
plt.legend(['AB', 'C1', 'C2', 'DE'], fontsize=15)
plt.ylabel('Count(Million)', fontsize=15, weight='bold')
plt.xlabel('Years', fontsize=15, weight='bold')
plt.xlim(2011, 2019)
plt.title('Number of Visitors from Social Grade for Day Visit in Scotland',
          fontsize=15,
          weight='bold')
plt.xticks(fontsize=12, weight='bold')
plt.yticks(fontsize=12, weight='bold')
#Save the chart as a 'PNG' file.
plt.savefig(os.path.dirname(os.getcwd()) + '/figures/SocialOverYears.png')
plt.show()
예제 #6
0
import readFromDatabase as rfd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import statsmodels.api as sm

age=['16-24', '25-34', '35-44', '45-54', '55-64', '65+']
cars=['access to car (1+)', 'no access to car (0)']
children=['yes', 'no']
gender=['male', 'female']
married=['married', 'not married']
social=['ab', 'c1', 'c2', 'de']
working=['employed/self-employed (full or part time)',
         'in full or part time education', 'unemployed/not working']
ageData=rfd.readFrom('visitor', 'age')
total=ageData.groupby('Year').sum().reset_index()['Count']
attributes=[age, gender, cars, children, married, social, working]
names=['age', 'gender', 'cars', 'children', 'married', 'social', 'working']

for i in range(len(names)):
    data = rfd.readFrom('visitor', names[i])
    tem = data.groupby(['Year', 'Attribute']).sum().reset_index()
    plt.figure(figsize=(8, 8))
    for item in attributes[i]:
        x = tem[tem['Attribute'] == item]['Count'].values.reshape(-1, 1)
        y = total.values.reshape(-1, 1)
        reg = LinearRegression()
        reg.fit(x, y)
        print(item)
        print("The linear model is: y = {:.5} + {:.5}x".format(reg.intercept_[0], reg.coef_[0][0]))
        predictions = reg.predict(x)
예제 #7
0
from matplotlib import pyplot as plt
import readFromDatabase as rfd
import os
#Read data from database
visitorData = rfd.readFrom('visitor', 'gender')
plt.figure(figsize=(8, 8))
#Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe.
overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index()
gender = ['male', 'female']
#Draw a line chart to show the overall trend and set labels and titles for the chart.
for x in gender:
    data = overAll[overAll['Attribute'] == x]
    plt.plot(data['Year'], data['Count'], marker='o', linewidth=3)
plt.legend(['Male', 'Female'], fontsize=15)
plt.ylabel('Count(Million)', fontsize=15, weight='bold')
plt.xlabel('Years', fontsize=15, weight='bold')
plt.xlim(2011, 2019)
plt.title(
    'Number of Visitors from Different Genders for Day Visit in Scotland',
    fontsize=15,
    weight='bold')
plt.xticks(fontsize=12, weight='bold')
plt.yticks(fontsize=12, weight='bold')
#Save the chart as a 'PNG' file.
plt.savefig(os.path.dirname(os.getcwd()) + '/figures/GenderOverYears.png')
plt.show()
예제 #8
0
from matplotlib import pyplot as plt
import readFromDatabase as rfd
import os
#Read data from database
visitorData = rfd.readFrom('visitor', 'cars')
cars = ['access to car (1+)', 'no access to car (0)']
plt.figure(figsize=(8, 8))
#Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe.
overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index()
#Draw a line chart to show the overall trend and set labels and titles for the chart.
for x in cars:
    data = overAll[overAll['Attribute'] == x]
    plt.plot(data['Year'], data['Count'], marker='o', linewidth=3)
plt.legend(['Access to car (1+)', 'No access to car (0)'], fontsize=15)
plt.ylabel('Count(Million)', fontsize=15, weight='bold')
plt.xlabel('Years', fontsize=15, weight='bold')
plt.xlim(2011, 2019)
plt.title('Number of Visitors Using Cars in Scotland',
          fontsize=15,
          weight='bold')
plt.xticks(fontsize=12, weight='bold')
plt.yticks(fontsize=12, weight='bold')
#Save the chart as a 'PNG' file.
plt.savefig(os.path.dirname(os.getcwd()) + '/figures/CarOverYears.png')
plt.show()
예제 #9
0
from matplotlib import pyplot as plt
import readFromDatabase as rfd
import os
#Read data from database
visitorData = rfd.readFrom('visitor', 'working')
#Add a column named 'Total'
plt.figure(figsize=(8, 8))
#Group the data based on 'Year', calculate the number of visitors for each year and generate a new dataframe.
overAll = visitorData.groupby(['Year', 'Attribute']).sum().reset_index()
working = [
    'employed/self-employed (full or part time)',
    'in full or part time education', 'unemployed/not working'
]
#Draw a line chart to show the overall trend and set labels and titles for the chart.
for x in working:
    data = overAll[overAll['Attribute'] == x]
    plt.plot(data['Year'], data['Count'], marker='o', linewidth=3)
plt.legend([
    'Employed/self-employed (full or part time)',
    'In full or part time education', 'Unemployed/not working'
],
           fontsize=12)
plt.ylabel('Count(Million)', fontsize=15, weight='bold')
plt.xlabel('Years', fontsize=15, weight='bold')
plt.xlim(2011, 2019)
plt.xticks(fontsize=12, weight='bold')
plt.yticks(fontsize=12, weight='bold')
plt.title('Number of Visitors from Different Working Status',
          fontsize=15,
          weight='bold')
#Save the chart as a 'PNG' file.
    elif input in food:
        return "go for food"
    elif input in entertainment:
        return "go for entertainment"
    elif input in leisure:
        return "go for leisure activities"
    elif input in special_event:
        return "for special event"
    elif input in healthCentre:
        return "go for health centre"
    else:
        return "other activities"

attributes=['visit friends or family', 'go for food', 'go for leisure activities',]
#Read data from database
purposeData = rfd.readFrom('action', 'activity')
#List used to classify visitors and draw the figures.
age=['16-24', '25-34', '35-44', '45-54', '55-64', '65+']
cars=['access to car (1+)', 'no access to car (0)']
children=['yes', 'no']
gender=['male', 'female']
married=['married', 'not married']
social=['ab', 'c1', 'c2', 'de']
working=['employed/self-employed (full or part time)',
         'in full or part time education', 'unemployed/not working']
columns=[age, cars, children, gender, social, working, married]
visitorType=['age', 'cars', 'children', 'gender', 'social', 'working', 'married']
purposeData['Action'] = purposeData['Action'].apply(lambda x:classifyPurpose(x))
#Group the based on action as the detailed 15 actions are classified into 8 actions.
purposeData=purposeData.groupby(['Year', 'Action', 'Visitor', 'Attribute']).sum().reset_index()
예제 #11
0
    plt.xticks(fontsize=12, weight='bold')
    plt.yticks(fontsize=12, weight='bold')
    ax2 = f.add_subplot(2, 1, 2)
    plot_pacf(ts,ax=ax2,lags=lags)
    plt.subplots_adjust(hspace=0.5)
    plt.title('Partial Autocorrelation', fontsize=15, weight='bold')
    plt.xticks(fontsize=12, weight='bold')
    plt.yticks(fontsize=12, weight='bold')
    plt.show()
#Generate the date for index
date=[]
for i in range(2011, 2020):
    for j in range(1, 13):
        date.append(str(i)+'-'+str(j))
#Read the data from database and set the date as index.
timeData=rfd.readFrom('visitor', 'age').groupby(['Year', 'Month']).sum().reset_index()
timeData['date']=date
timeData=timeData.set_index(pd.to_datetime(timeData['date'], format='%Y-%m'))
timeData=timeData.drop(columns=['Year', 'Month', 'date'])
#To reduce the data fluctuation range, make logarithm operation
tsLog=np.log(timeData)
#Make decomposition to original data
decomposition=seasonal_decompose(tsLog)
trend=decomposition.trend
seasonal=decomposition.seasonal
residual=decomposition.resid
#Draw the figures about the trend, periodicity and residual of the data.
plt.figure(figsize=(8, 8))
plt.subplot(3, 1, 1)
trend.plot(color='red', fontsize=15, linewidth=2)
plt.title('The Trend of Data', fontsize=15, weight='bold')