Exemple #1
0
        def proceed_with_method():

            url_wine = Chateau_rating(self.address).get_rating_data_url()
            #use requests to beautiful soup the page and then download the pricing data
            user_agent = get_random_ua()
            #print(user_agent)
            headers = {'user-agent': user_agent}
            r_wine = requests.get(url_wine, headers=headers)
            wine_soup = BeautifulSoup(r_wine.text, 'html.parser')

            item_list = []
            for item in wine_soup.find_all('p'):
                item_raw = str(item).replace('<', '').replace('>', '').replace(
                    'p', '').replace('b', '').replace('/', '')
                try:
                    if float(item_raw) > 0:
                        item_list.append(item_raw)
                except Exception:
                    None

            item_dict_raw = {}
            for i in range(0, len(item_list) - 1):
                if float(item_list[i]) > 101 and float(item_list[i + 1]) < 101:
                    item_dict_raw[item_list[i]] = item_list[i + 1]

            item_dict = {}
            for key, rating in item_dict_raw.items():
                y = int(key)
                m = 12
                item_date = eomonth(y, m)
                item_dict[str(item_date).replace(' 00:00:00', '')] = rating

            return item_dict
Exemple #2
0
        def proceed_with_method():

            weather_dict_p_raw = Chateau(self.address).weather_dict('p')
            weather_dict_v_raw = Chateau(self.address).weather_dict('v')

            weather_dict_p = dict_unpacker(weather_dict_p_raw)
            weather_dict_v = dict_unpacker(weather_dict_v_raw)

            rating_dict_raw = Chateau_rating(self.address).get_rating_data()
            rating_dict = dict_unpacker(rating_dict_raw)

            seasonal_weather_dict_p = seasonal_weather_dict(weather_dict_p)
            seasonal_weather_dict_v = seasonal_weather_dict(weather_dict_v)

            av_seasonal_weather_dict_p = average_seasonal_weather_dict(
                weather_dict_p)
            av_seasonal_weather_dict_v = average_seasonal_weather_dict(
                weather_dict_v)

            x_values_train, y_values_train, n_values_train = [], [], []
            x_values_test, y_values_test, n_values_test = [], [], []

            s_values_train, r_values_train, d_values_train = [], [], []
            s_values_test, r_values_test, d_values_test = [], [], []

            def func_p(x):
                func_list = []
                for i in range(0, 10):

                    if i in [12]:  #[2, 7, 9]
                        if i == 2:
                            f = 0.02 * x * x + -0.47 * x + 99.08
                        if i == 7:
                            f = -1.17 * x * x + 2.69 * x + 96.88
                        if i == 9:
                            f = -0.28 * x * x + 0.46 * x + 98.08

                    else:
                        f = 0

                    func_list.append(f)

                return func_list

            def func_v(x):
                func_list = []
                for i in range(0, 10):

                    if i in [4, 5]:  #[3,4,5,6,8]

                        if i == 3:
                            f = -1.17 * x * x + 27.42 * x + -38.69
                        if i == 4:
                            f = -0.29 * x * x + 8.03 * x + 42.72
                        if i == 5:
                            f = -0.24 * x * x + 8.05 * x + 31.77
                        if i == 6:
                            f = -0.21 * x * x + 8.90 * x + 3.81
                        if i == 8:
                            f = -0.22 * x * x + 9.64 * x - 7.21
                    else:
                        f = 0

                    func_list.append(f)

                return func_list

            for key, rating in rating_dict.items():
                if key > datetime(1970, 12, 31) and key < datetime(
                        2000, 12, 31) and int(key.year) > 1970 and rating > 96:
                    x_list = []
                    for i in range(2, 10):
                        try:
                            av_v = seasonal_weather_dict_v[eomonth(
                                key.year, i)]
                            av_p = seasonal_weather_dict_p[eomonth(
                                key.year, i)]

                            v_adj = func_v(av_v)
                            p_adj = func_p(av_p)

                            v_used = v_adj[i]
                            p_used = p_adj[i]

                            if v_used != 0:
                                x_list.append(v_used)
                            if p_used != 0:
                                x_list.append(p_used)

                        except Exception:
                            None

                    if x_list != []:
                        x_values_train.append(x_list)
                        y_values_train.append(rating)
                        n_values_train.append(key.year)

                if key >= datetime(2000, 12, 31) and int(
                        key.year) > 1970 and rating > 96:
                    x_list = []
                    for i in range(2, 10):
                        try:
                            av_v = seasonal_weather_dict_v[eomonth(
                                key.year, i)]
                            av_p = seasonal_weather_dict_p[eomonth(
                                key.year, i)]

                            v_adj = func_v(av_v)
                            p_adj = func_p(av_p)

                            v_used = v_adj[i]
                            p_used = p_adj[i]

                            if v_used != 0:
                                x_list.append(v_used)
                            if p_used != 0:
                                x_list.append(p_used)

                        except Exception:
                            None

                    if x_list != []:
                        x_values_test.append(x_list)
                        y_values_test.append(rating)
                        n_values_test.append(key.year)

            X_values_train = np.array(x_values_train)
            X_values_test = np.array(x_values_test)
            X_values_all = np.array(x_values_train + x_values_test)
            y_values_all = y_values_train + y_values_test
            n_values_all = n_values_train + n_values_test

            #Create linear regression object
            regr = linear_model.LinearRegression()

            #Train the model using the training sets
            regr.fit(X_values_train, y_values_train)

            #Make predictions using the testing set
            y_values_pred = regr.predict(X_values_test)
            y_values_pred_all = regr.predict(X_values_all)

            #The coefficients
            print('Coefficients: \n', regr.coef_)
            #The mean squared error
            print("Mean squared error: %.2f" %
                  mean_squared_error(y_values_test, y_values_pred))
            #Explained variance score: 1 is perfect prediction
            print('R2 score: %.2f' % r2_score(y_values_test, y_values_pred))

            x = y_values_pred_all
            y = y_values_all
            z = np.polyfit(x, y, 1)
            z_formatted = np.ndarray.tolist(z)
            p = np.poly1d(z)
            xp = np.linspace(min(y_values_pred_all), max(y_values_pred_all),
                             100)

            #calculate correlation coefficient
            correl_y = p(x)
            R = np.corrcoef(y_values_all, y_values_pred_all)
            cor = R.item(1)  #R is a 2x2 matrix so take the correct entry
            print("\nCorrelation coefficient: " + str('%0.2f' % cor))

            print("\nSuggested polynomial a*x + b has [a, b]: " +
                  str('%0.2f' % z_formatted[0]) + ", " +
                  str('%0.2f' %
                      z_formatted[1]))  #+ str('%0.2f' % z_formatted[3]))

            #Size the output
            fig = plt.figure(dpi=128, figsize=(10, 6))

            #Chart gridlines
            plt.grid(None, 'major', 'both')

            #Axis tick formats
            for tick in plt.gca().get_xticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)
                tick.set_rotation('vertical')
            for tick in plt.gca().get_yticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)

            #Axis labels and formats

            # axis 1
            color = 'tab:blue'
            plt.xlabel("Rating Estimate (weather fundamentals)", fontsize=12)
            #plt.xticks(np.arange(x_values[11], x_values[0], 2))
            plt.ylabel("Rating", color='black', fontsize=12)
            plt.scatter(y_values_pred_all, y_values_all, color=color)
            plt.plot(xp, p(xp), color='red')
            plt.tick_params(axis='y', labelcolor=color)

            for i, txt in enumerate(n_values_all):
                plt.annotate(txt, (y_values_pred_all[i], y_values_all[i]))

            #remove borders
            plt.gca().spines['top'].set_visible(False)

            #Chart title
            plt.title(str(self.address) + " Rating vs Estimate", fontsize=14)

            #Show chart
            plt.show()
Exemple #3
0
        def proceed_with_method():

            weather_dict_p_raw = Chateau(self.address).weather_dict('p')
            weather_dict_v_raw = Chateau(self.address).weather_dict('v')

            weather_dict_p = dict_unpacker(weather_dict_p_raw)
            weather_dict_v = dict_unpacker(weather_dict_v_raw)

            rating_dict_raw = Chateau_rating(self.address).get_rating_data()
            rating_dict = dict_unpacker(rating_dict_raw)

            seasonal_weather_dict_p = seasonal_weather_dict(weather_dict_p)
            seasonal_weather_dict_v = seasonal_weather_dict(weather_dict_v)

            av_seasonal_weather_dict_p = average_seasonal_weather_dict(
                weather_dict_p)
            av_seasonal_weather_dict_v = average_seasonal_weather_dict(
                weather_dict_v)

            x_values_train, y_values_train, n_values_train = [], [], []
            x_values_test, y_values_test, n_values_test = [], [], []

            s_values_train, r_values_train, d_values_train = [], [], []
            s_values_test, r_values_test, d_values_test = [], [], []

            def func_p(x):
                f = -0.57 * x * x + 2.23 * x + 92.78
                return f

            def func_v(x):
                f = -0.29 * x * x + 12.85 * x - 43.96
                return f

            for key, rating in rating_dict.items():
                if key > datetime(1970, 12, 31) and key < datetime(
                        2000, 12, 31) and int(key.year) > 1970:
                    for i in range(6, 7):
                        try:
                            av_v = seasonal_weather_dict_v[eomonth(
                                key.year, i)]
                            av_p = seasonal_weather_dict_p[eomonth(
                                key.year, i)]

                            x_values_train.append([func_v(av_v), func_p(av_p)])
                            y_values_train.append(rating)
                            n_values_train.append(key.year)

                        except Exception:
                            None

                if key >= datetime(2000, 12, 31) and int(key.year) > 1970:
                    for i in range(6, 7):
                        try:
                            av_v = seasonal_weather_dict_v[eomonth(
                                key.year, i)]
                            av_p = seasonal_weather_dict_p[eomonth(
                                key.year, i)]

                            x_values_test.append([func_v(av_v), func_p(av_p)])
                            y_values_test.append(rating)
                            n_values_test.append(key.year)

                        except Exception:
                            None

                if key > datetime(1970, 12, 31) and key < datetime(
                        2000, 12, 31) and int(key.year) > 1970:

                    strike_v = 0
                    strike_p = 0

                    for i in range(4, 10):
                        try:
                            if seasonal_weather_dict_v[eomonth(
                                    key.year,
                                    i)] < av_seasonal_weather_dict_v[i]:

                                if i in range(7, 10):
                                    a = 0.5
                                else:
                                    a = 1

                                strike_v = strike_v + 1

                        except Exception:
                            None

                    for i in range(5, 10):
                        try:
                            if seasonal_weather_dict_p[eomonth(
                                    key.year,
                                    i)] > 1.5 * av_seasonal_weather_dict_p[i]:
                                strike_p = strike_p + 1
                        except Exception:
                            None

                    s_values_train.append(strike_v + strike_p)
                    r_values_train.append(rating)
                    d_values_train.append(key.year)

                if key >= datetime(2000, 12, 31) and int(key.year) > 1970:

                    strike_v = 0
                    strike_p = 0

                    for i in range(4, 10):
                        try:
                            if seasonal_weather_dict_v[eomonth(
                                    key.year,
                                    i)] < av_seasonal_weather_dict_v[i]:

                                if i in range(7, 10):
                                    a = 0.5
                                else:
                                    a = 1

                                strike_v = strike_v + 1

                        except Exception:
                            None

                    for i in range(5, 10):
                        try:
                            if seasonal_weather_dict_p[eomonth(
                                    key.year,
                                    i)] > 1.5 * av_seasonal_weather_dict_p[i]:
                                strike_p = strike_p + 1
                        except Exception:
                            None

                    s_values_test.append(strike_v + strike_p)
                    r_values_test.append(rating)
                    d_values_test.append(key.year)

            j_dict_train = {}
            for i in range(0, len(n_values_train) - 1):
                j_dict_train[n_values_train[i]] = [
                    x_values_train[i], y_values_train[i]
                ]

            j_dict_test = {}
            for i in range(0, len(n_values_test) - 1):
                j_dict_test[n_values_test[i]] = [
                    x_values_test[i], y_values_test[i]
                ]

            s_dict_train = {}
            for i in range(0, len(d_values_train) - 1):
                s_dict_train[d_values_train[i]] = [
                    s_values_train[i], r_values_train[i]
                ]

            s_dict_test = {}
            for i in range(0, len(d_values_test) - 1):
                s_dict_test[d_values_test[i]] = [
                    s_values_test[i], r_values_test[i]
                ]

            train_dict = {}
            for key in j_dict_train.keys():
                if key in s_dict_train.keys():
                    new_list = j_dict_train[key][0]
                    strike = s_dict_train[key][0]
                    new_list.append(int(strike))
                    rating = j_dict_train[key][1]
                    train_dict[key] = [new_list, rating]

            test_dict = {}
            for key in j_dict_test.keys():
                if key in s_dict_test.keys():
                    new_list = j_dict_test[key][0]
                    strike = s_dict_test[key][0]
                    new_list.append(int(strike))
                    rating = j_dict_test[key][1]
                    test_dict[key] = [new_list, rating]

            x_values_train, y_values_train, n_values_train = [], [], []
            x_values_test, y_values_test, n_values_test = [], [], []

            for key in train_dict.keys():
                x_values_train.append(train_dict[key][0])
                y_values_train.append(train_dict[key][1])
                n_values_train.append(key)

            for key in test_dict.keys():
                x_values_test.append(test_dict[key][0])
                y_values_test.append(test_dict[key][1])
                n_values_test.append(key)

            X_values_train = np.array(x_values_train)
            X_values_test = np.array(x_values_test)
            X_values_all = np.array(x_values_train + x_values_test)
            y_values_all = y_values_train + y_values_test
            n_values_all = n_values_train + n_values_test

            #Create linear regression object
            regr = linear_model.LinearRegression()

            #Train the model using the training sets
            regr.fit(X_values_train, y_values_train)

            #Make predictions using the testing set
            y_values_pred = regr.predict(X_values_test)
            y_values_pred_all = regr.predict(X_values_all)

            #The coefficients
            print('Coefficients: \n', regr.coef_)
            #The mean squared error
            print("Mean squared error: %.2f" %
                  mean_squared_error(y_values_test, y_values_pred))
            #Explained variance score: 1 is perfect prediction
            print('R2 score: %.2f' % r2_score(y_values_test, y_values_pred))

            x = y_values_pred_all
            y = y_values_all
            z = np.polyfit(x, y, 1)
            z_formatted = np.ndarray.tolist(z)
            p = np.poly1d(z)
            xp = np.linspace(min(y_values_pred_all), max(y_values_pred_all),
                             100)

            #calculate correlation coefficient
            correl_y = p(x)
            R = np.corrcoef(y_values_all, y_values_pred_all)
            cor = R.item(1)  #R is a 2x2 matrix so take the correct entry
            print("\nCorrelation coefficient: " + str('%0.2f' % cor))

            print("\nSuggested polynomial a*x + b has [a, b]: " +
                  str('%0.2f' % z_formatted[0]) + ", " +
                  str('%0.2f' %
                      z_formatted[1]))  #+ str('%0.2f' % z_formatted[3]))

            #Size the output
            fig = plt.figure(dpi=128, figsize=(10, 6))

            #Chart gridlines
            plt.grid(None, 'major', 'both')

            #Axis tick formats
            for tick in plt.gca().get_xticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)
                tick.set_rotation('vertical')
            for tick in plt.gca().get_yticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)

            #Axis labels and formats

            # axis 1
            color = 'tab:blue'
            plt.xlabel("Rating Estimate (weather fundamentals)", fontsize=12)
            #plt.xticks(np.arange(x_values[11], x_values[0], 2))
            plt.ylabel("Rating", color='black', fontsize=12)
            plt.scatter(y_values_pred_all, y_values_all, color=color)
            plt.plot(xp, p(xp), color='red')
            plt.tick_params(axis='y', labelcolor=color)

            for i, txt in enumerate(n_values_all):
                plt.annotate(txt, (y_values_pred_all[i], y_values_all[i]))

            #remove borders
            plt.gca().spines['top'].set_visible(False)

            #Chart title
            plt.title(str(self.address) + " Rating vs Estimate", fontsize=14)

            #Show chart
            plt.show()
Exemple #4
0
        def proceed_with_method():

            weather_dict_p_raw = Chateau(self.address).weather_dict('p')
            weather_dict_v_raw = Chateau(self.address).weather_dict('v')

            weather_dict_p = dict_unpacker(weather_dict_p_raw)
            weather_dict_v = dict_unpacker(weather_dict_v_raw)

            rating_dict_raw = Chateau_rating(self.address).get_rating_data()
            rating_dict = dict_unpacker(rating_dict_raw)

            seasonal_weather_dict_p = seasonal_weather_dict(weather_dict_p)
            seasonal_weather_dict_v = seasonal_weather_dict(weather_dict_v)

            av_seasonal_weather_dict_p = average_seasonal_weather_dict(
                weather_dict_p)
            av_seasonal_weather_dict_v = average_seasonal_weather_dict(
                weather_dict_v)

            x_values, y_values, n_values = [], [], []

            for key, rating in rating_dict.items():
                if key > datetime(1970, 12, 31) and int(key.year) > 1970:

                    strike_v = 0
                    strike_p = 0

                    for i in range(4, 10):
                        try:
                            if seasonal_weather_dict_v[eomonth(
                                    key.year,
                                    i)] < av_seasonal_weather_dict_v[i]:

                                if i in range(7, 10):
                                    a = 0.5
                                else:
                                    a = 1

                                strike_v = strike_v + (
                                    av_seasonal_weather_dict_v[i] -
                                    seasonal_weather_dict_v[eomonth(
                                        key.year, i)])

                        except Exception:
                            None

                    for i in range(5, 10):
                        try:
                            if seasonal_weather_dict_p[eomonth(
                                    key.year,
                                    i)] > 1.5 * av_seasonal_weather_dict_p[i]:
                                strike_p = strike_p + (
                                    seasonal_weather_dict_p[eomonth(
                                        key.year, i)] -
                                    av_seasonal_weather_dict_p[i])
                        except Exception:
                            None

                x_values.append(strike_v + strike_p)
                y_values.append(rating)
                n_values.append(key.year)

            #calculate best fit line
            x = x_values
            y = y_values
            z = np.polyfit(x, y, 1)
            z_formatted = np.ndarray.tolist(z)
            p = np.poly1d(z)
            xp = np.linspace(min(x_values), max(x_values), 100)

            #calculate correlation coefficient
            correl_y = p(x)
            #A = np.vstack([x, np.ones(len(x))]).T
            #m, c = np.linalg.lstsq(A, correl_y, rcond=None)[0]
            #print(m, c)
            R = np.corrcoef(y, correl_y)
            cor = R.item(1)  #R is a 2x2 matrix so take the correct entry
            print("\nCorrelation coefficient: " + str('%0.2f' % cor))

            print("\nSuggested polynomial a*x + b has [a, b]: " +
                  str('%0.2f' % z_formatted[0]) + ", " +
                  str('%0.2f' %
                      z_formatted[1]))  #+ str('%0.2f' % z_formatted[3]))

            #Size the output
            fig = plt.figure(dpi=128, figsize=(10, 6))

            #Chart gridlines
            plt.grid(None, 'major', 'both')

            #Axis tick formats
            for tick in plt.gca().get_xticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)
                tick.set_rotation('vertical')
            for tick in plt.gca().get_yticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)

            #Axis labels and formats

            # axis 1
            color = 'tab:blue'
            plt.xlabel("Temp", fontsize=12)
            #plt.xticks(np.arange(x_values[11], x_values[0], 2))
            plt.ylabel("Rating", color='black', fontsize=12)
            plt.scatter(x_values, y_values, color=color)
            plt.plot(xp, p(xp), color='red')
            plt.tick_params(axis='y', labelcolor=color)

            for i, txt in enumerate(n_values):
                plt.annotate(txt, (x[i], y[i]))

            #remove borders
            plt.gca().spines['top'].set_visible(False)

            #Chart title
            plt.title(str(self.address) + " Rating vs Price", fontsize=14)

            #Show chart
            plt.show()
Exemple #5
0
        def proceed_with_method():

            weather_dict_p_raw = Chateau(self.address).weather_dict('p')
            weather_dict_v_raw = Chateau(self.address).weather_dict('v')

            weather_dict_p = dict_unpacker(weather_dict_p_raw)
            weather_dict_v = dict_unpacker(weather_dict_v_raw)

            rating_dict_raw = Chateau_rating(self.address).get_rating_data()
            rating_dict = dict_unpacker(rating_dict_raw)

            seasonal_weather_dict_p = average_seasonal_weather_dict(
                weather_dict_p)
            seasonal_weather_dict_v = average_seasonal_weather_dict(
                weather_dict_v)

            price_dict_raw = Chateau_data(self.address).get_price_data()

            price_dict = dict_unpacker(price_dict_raw)

            x_values, y_values, n_values = [], [], []

            for key, rating in rating_dict.items():
                if key in rating_dict.keys() and key > datetime(
                        1970, 12, 31) and rating > 96:

                    p_values, v_values = [], []

                    for w_date, data in weather_dict_v.items():
                        if w_date < eomonth(
                                key.year, end_month - 1) and w_date > eomonth(
                                    key.year, start_month - 1):
                            v_values.append(float(data))

                    if v_values == []:
                        None
                    else:
                        av = statistics.mean(v_values)
                        x_values.append(av)
                        y_values.append(rating)
                        n_values.append(key.year)

            #calculate best fit line
            x = x_values
            y = y_values
            z = np.polyfit(x, y, 2)
            z_formatted = np.ndarray.tolist(z)
            p = np.poly1d(z)
            xp = np.linspace(min(x_values), max(x_values), 100)

            #calculate correlation coefficient
            correl_y = p(x)
            #A = np.vstack([x, np.ones(len(x))]).T
            #m, c = np.linalg.lstsq(A, correl_y, rcond=None)[0]
            #print(m, c)
            R = np.corrcoef(y, correl_y)
            cor = R.item(1)  #R is a 2x2 matrix so take the correct entry
            print("\n For month:" + str(start_month))
            print("\nCorrelation coefficient: " + str('%0.2f' % cor))

            print("\nSuggested polynomial a*x^2 + bx + c has [a, b, c]: " +
                  str('%0.2f' % z_formatted[0]) + ", " +
                  str('%0.2f' % z_formatted[1]) + ", " +
                  str('%0.2f' %
                      z_formatted[2]))  #+ str('%0.2f' % z_formatted[3]))

            #Size the output
            fig = plt.figure(dpi=128, figsize=(10, 6))

            #Chart gridlines
            plt.grid(None, 'major', 'both')

            #Axis tick formats
            for tick in plt.gca().get_xticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)
                tick.set_rotation('vertical')
            for tick in plt.gca().get_yticklabels():
                tick.set_fontname("Calibri")
                tick.set_fontsize(12)

            #Axis labels and formats

            # axis 1
            color = 'tab:blue'
            plt.xlabel("Temp", fontsize=12)
            #plt.xticks(np.arange(x_values[11], x_values[0], 2))
            plt.ylabel("Rating", color='black', fontsize=12)
            plt.scatter(x_values, y_values, color=color)
            plt.plot(xp, p(xp), color='red')
            plt.tick_params(axis='y', labelcolor=color)

            for i, txt in enumerate(n_values):
                plt.annotate(txt, (x[i], y[i]))

            #remove borders
            plt.gca().spines['top'].set_visible(False)

            #Chart title
            plt.title(str(self.address) + " Rating vs Price", fontsize=14)

            #Show chart
            plt.show()