def login_and_getdata(): print('获取cookis...') try: s.cookies = get_cookie_from_file() except: print("从文件获取cookies失败...\n正在尝试提交表单登录以获取...") s.cookies = get_cookie_from_net() # 开始爬取数据 get_all_data(s, headers)
def forecast_regression(dt, car_park): """Regression-based forecast of time series.""" # get training data (for 'car_park') df = get_data.get_all_data() # build X and y for model fitting # TODO # train random forest model rf = RandomForestRegressor()
def forecast_linear(dt, car_park): """Simple linear prediction.""" # get latest two observations, before the given point in time df = get_data.get_all_data() df2 = df[(df.datetime < str(dt)) & (df.name == car_park)] df3 = df2.sort_values(by='datetime') df4 = df3.tail(n=2) second_latest = df4.head(n=1) latest = df4.tail(n=1) # calculate delta for capacity (per second) diff_time = (latest.datetime.iloc[0] - second_latest.datetime.iloc[0]) diff_sec = diff_time.seconds diff_cap = (latest.cap.iloc[0] - second_latest.cap.iloc[0]) d_cap_per_sec = diff_cap / diff_sec # linearly extrapolate to the requested point in time fcst_sec = (dt - latest.datetime.iloc[0]).seconds fcst_cap = latest.cap.iloc[0] + d_cap_per_sec * fcst_sec # limit prediction to min-max range of all observations for this car-park fcst_cap = max(fcst_cap, 0) return (fcst_cap)
import get_data as gd '''取json檔所有資料 get_all_data("面積.json") 取面積資料 get_all_data("人口結構.json") 取人口結構資料 get_all_data("所得.json") 取所得資料 get_all_data("性別.json") 取性別資料 get_all_data("社會增加.json") 取社會增加資料 get_all_data("教育程度.json") 取教育程度資料 ''' d = gd.get_all_data("面積.json") #取單一里資料 d_a = gd.get_data_by_neighborhood("性別.json", "頂寮里") print(d) print(d_a)
def load_ema_data(self, path): self.emas = get_all_data(path, 'ema')
def load_lsf_data(self, path): self.lsfs = get_all_data(path, 'lsf')
os.makedirs(OUT_PARENT_DIR) outf_path = '%s/%s' % (OUT_PARENT_DIR, 'fr%s.csv' % curr_gw) with open(outf_path, 'w', encoding='utf8') as outf: wrtr = csv.DictWriter(outf, hdrs) wrtr.writeheader() if curr_gw: # natural_number -> 0-index curr_gw -= 1 rfs = gen_ranked_fixtures(tfd, curr_gw, n_avgs, look_ahead) rfd = [dict(zip(hdrs, rf)) for rf in rfs] # presorted_rfs = sorted(rfd, key=lambda rf: float(rf[T0AVG])) # sort by diff presorted_rfs = sorted(rfd, key=lambda rf: rf['team']) wrtr.writerows(presorted_rfs) # truncate final new-line to facilitate auto-csv-rendering in Github outf.truncate(outf.tell() - len(os.linesep) * 2) if __name__ == '__main__': get_data.get_all_data() N_AVGS = 3 LOOK_AHEAD = 4 td = gen_teams() tfd = gen_team_fixture(td) # manual update eg for double-gameweeks if update_team2fixts: tfd = update_fixture(tfd, gameweek) write_fixture_ranks(tfd, gameweek, N_AVGS, LOOK_AHEAD)
import get_data as gd result = input() result = open(result, "w", encoding="utf8") d_soc_inc = gd.get_all_data("社會增加.json") list_dist = ["下寮里", "大村里", "大庄里", "中正里", "中和里", "文化里", "永安里", "永寧里", "安仁里", "南簡里", "草湳里", "頂寮里", "福德里", "興農里"] list_rate = [] for dist in list_dist: rate = d_soc_inc[dist][3] list_rate.append((dist, rate)) list_rate = sorted(list_rate, key=lambda x: (float(x[1]), x[0]), reverse=True) rresult = [] rate_low = float(list_rate[13][1]) rate_adj = 10/(float(list_rate[0][1])-rate_low) for i in range(14): dist = list_rate[i][0] rate = float(list_rate[i][1]) if i == 0: com = [dist, 10] elif i == 13: com = [dist, 0] elif rate < 0: com = [dist, 0] else: score = (float(rate)-rate_low)*rate_adj com = [dist, score] rresult.append(com) result.writelines("梧棲區社會增加率分數\n") result.writelines("\n") for com in rresult:
"""Plot target variable as time series.""" import get_data from ggplot import aes, geom_line, facet_wrap, ggplot if __name__ == "__main__": df = get_data.get_all_data() p = ggplot(df, aes('datetime', 'cap', group='date')) + \ geom_line(alpha=0.2) + \ facet_wrap('name') p.save('../output/time_series.pdf')
import get_data as gd d_humcom = gd.get_all_data("人口結構.json") print(d_humcom)
from sklearn.compose import make_column_transformer from sklearn.impute import SimpleImputer from sklearn.preprocessing import OneHotEncoder from get_data import get_all_data # scikitlearn tip #1 df = get_all_data() df.head() ohe = OneHotEncoder() imp = SimpleImputer() ct = make_column_transformer((ohe, ['imS']), (imp, ['imL']), remainder='passthrough') new_matrix = ct.fit_transform(df) # scikitlearn tip #2 ct2 = make_column_transformer((ohe, ['imS'])) new_matyrix_2 = ct2.fit_transform(df) new_matyrix_2.shape
import get_data as gd d_square = gd.get_all_data("面積.json") list_dist = ["下寮里", "大村里", "大庄里", "中正里", "中和里", "文化里", "永安里", "永寧里", "安仁里", "南簡里", "草湳里", "頂寮里", "福德里", "興農里"] list_square = [] for dist in list_dist: list_square.append(dist, d_square[dist][1]) people_com = input() result = input() people_com = open(people_com, "r", encoding="utf8") result = open(result, "a", encoding="utf8") Funder18 = dict() F19to35 = dict() F36to45 = dict() Fotherage = dict() Munder18 = dict() M19to35 = dict() M36to45 = dict() Motherage = dict() list_1part = [] list_allpart = [] for line in people_com: line = line.strip(" ") line = line.replace('"', "") line = line.strip("\n") line = line.strip(",") if line != "{" and line != "[" and line != "]" and line != "}": if "總計" not in line: cut = line.find(":") line = line[cut+2:len(line)]
import get_data as gd result = input() result = open(result, "w", encoding="utf8") d_gender = gd.get_all_data("性別.json") d_square = gd.get_all_data("面積.json") list_dist = [ "下寮里", "大村里", "大庄里", "中正里", "中和里", "文化里", "永安里", "永寧里", "安仁里", "南簡里", "草湳里", "頂寮里", "福德里", "興農里" ] list_male = [] list_female = [] print(d_square) for dist in list_dist: male = d_gender[dist][1] female = d_gender[dist][2] sqaure = float(d_square[dist]) mcom = (dist, male / sqaure) fcom = (dist, female / sqaure) list_male.append(mcom) list_female.append(fcom) list_male = sorted(list_male, key=lambda x: (x[1], x[0]), reverse=True) list_female = sorted(list_female, key=lambda x: (x[1], x[0]), reverse=True) print(list_male) male_low = float(list_male[13][1]) female_low = float(list_female[13][1]) male_adj = 10 / (float(list_male[0][1]) - male_low) female_adj = 10 / (float(list_female[0][1]) - female_low) print(male_adj, "male_adj") mresult = [] fresult = []
import get_data as gd result = input() result = open(result, "w", encoding="utf8") d_educ = gd.get_all_data("教育程度.json") d_square = { "下寮里": 0.3275, "大村里": 1.3775, "大庄里": 1.4125, "中正里": 0.2100, "中和里": 0.0954, "文化里": 0.1925, "永安里": 1.6575, "永寧里": 1.6275, "安仁里": 0.1325, "南簡里": 1.8750, "草湳里": 3.3175, "頂寮里": 0.8675, "福德里": 1.4995, "興農里": 2.0125 } list_dist = [ "下寮里", "大村里", "大庄里", "中正里", "中和里", "文化里", "永安里", "永寧里", "安仁里", "南簡里", "草湳里", "頂寮里", "福德里", "興農里" ] d_a = gd.get_data_by_neighborhood("面積.json", "面積") list_gra = [] list_college = [] list_high = []