def remove_flag_if_not_updated(repo_username, repo_id, issue, issue_comments=None, needs_resubmit_content_id=cvar['NEEDS_RESUBMIT_CONTENT_ID'], remove_form_resubmit_comment_after=cvar['REMOVE_FORM_RESUBMIT_COMMENT_AFTER'], now=datetime.now(), is_debug=cvar['DEBUG']): if not issue: return False number = issue.get('number') if not number: return False if has_content_from_custom_submit_form(issue): return False comment = get_needs_resubmit_comment(repo_username, repo_id, issue, issue_comments=issue_comments, needs_resubmit_content_id=needs_resubmit_content_id) if comment is None: return False created_at = util.get_date(comment.get('created_at')) if created_at is None: return False remove_date = created_at + timedelta(days=remove_form_resubmit_comment_after) if remove_date > now: return False if not is_debug: github_api.delete_automated_issue_comments(repo_username, repo_id, number) return True
def __init__(self, data): self.name = util.get_string(data, "name") self.pid = util.get_string(data, "pid") self.start_time = util.get_date(data, "start_time") self.time_created = util.get_date(data, "time_created") self.time_started = util.get_date(data, "time_started") self.time_completed = util.get_date(data, "time_completed") self.url = util.get_string(data, "url") self.url_status = util.get_string(data, "url_status") self.url_headers = util.get_string(data, "url_headers") self.url_content = util.get_string(data, "url_content") self.status = util.get_string(data, "status") util.ensure(self.name, "name") util.ensure(self.start_time, "start_time") util.ensure(self.url, "url") util.ensure(self.status, "status")
def fit_predict_score(*, tr: pd.DataFrame, ts: pd.DataFrame, sub: pd.DataFrame, city: str, windows: list, start_date: pd.Timestamp, end_date: pd.Timestamp, impute_methods: list, n_thread: int, method: str='median', vprint=print) -> pd.DataFrame: current_date = start_date scores_df = [] while current_date < end_date: vprint(1, '# --- fitting predicting evaluating on {}'.format(get_date(current_date))) predictions = rolling_summary(sub=sub, data=tr, predict_start=current_date, windows=windows, n_thread=n_thread, method=method, impute_methods=impute_methods, vprint=vprint) truth = get_truth(city=city, data=ts, start_date=current_date + pd.Timedelta(1, unit='D')) scores = evaluate(city=city, truth=truth, predictions=predictions) scores['smape'] = pd.Series(scores).mean() scores['date'] = get_date(current_date) vprint(1, scores['smape']) current_date += pd.Timedelta(value=1, unit='D') scores_df.append(scores) scores_df = pd.DataFrame(scores_df) scores_df = scores_df[['date', 'smape'] + [col for col in scores_df.columns if col not in ['date', 'smape']]] return scores_df
def get_most_recent_datetime_creator_response(issue, comments): try: if not issue: return creator = issue.get('user') if not creator: return creator_login = creator.get('login') if not creator_login: return created_str = issue.get('created_at') if not created_str: return most_recent = util.get_date(created_str) if not comments or not len(comments): return most_recent for comment in comments: comment_user = comment.get('user') if not comment_user: continue comment_login = comment_user.get('login') if comment_login != creator_login: continue created_str = comment.get('created_at') if not created_str: continue created_at = util.get_date(created_str) if created_at > most_recent: most_recent = created_at return most_recent except Exception as ex: print 'get_most_recent_datetime_creator_comment error: %s' % ex
def fit_predict_score(sub, tr, ts, start_date, end_date, city, history_length, changepoint_scale, num_changepoints, n_thread, vprint=print) -> pd.DataFrame: current_date = start_date scores_df = [] while current_date < end_date: vprint(1, '# --- fitting predicting evaluating on {}'.format(get_date(current_date))) predictions = fbprophet(sub=sub, data=tr, current_date=current_date, history_length=history_length, changepoint_scale=changepoint_scale, num_changepoints=num_changepoints, n_thread=n_thread, vprint=vprint) truth = get_truth(city=city, data=ts, start_date=current_date + pd.Timedelta(1, unit='D')) scores = evaluate(city=city, truth=truth, predictions=predictions) scores['smape'] = pd.Series(scores).mean() scores['date'] = get_date(current_date) vprint(1, scores['smape']) current_date += pd.Timedelta(value=1, unit='D') scores_df.append(scores) scores_df = pd.DataFrame(scores_df) scores_df = scores_df[['date', 'smape'] + [col for col in scores_df.columns if col not in ['date', 'smape']]] return scores_df
def create_targets(container_configs_list, bazel_version): """Creates the new docker_toolchain_autoconfig target if not exists. An example target located in configs/ubuntu16_04_clang/BUILD is: //configs/ubuntu16_04_clang:msan-ubuntu16_04-clang-1.0-bazel_0.15.0-autoconfig There is one target per container per Bazel version per config type. The script only creates new targets in the BUILD file if they do not exist, i.e. if a target for the given version of Bazel, type and config version already exists, then the script does not re-create it. Args: container_configs_list: list of ContainerConfigs, the list of ContainerConfigs to generate configs for. bazel_version: string, the version of Bazel used to generate the configs. """ container_sha_map = imp.load_source("toolchain_containers", SHA_MAP_FILE) clang_revision_map = imp.load_source("clang_revision", CLANG_REVISION_FILE) clang_revision = clang_revision_map.CLANG_REVISION for container_configs in container_configs_list: # Get the sha256 value of the container used to generate the configs. sha = container_sha_map.toolchain_container_sha256s()[ "%s_clang" % container_configs.distro] for config in container_configs.configs: # Get target basename from config definitions. target = get_autoconfig_target_name( config_type=config.config_type, distro=container_configs.distro, config_version=container_configs.version, bazel_version=bazel_version) with open(container_configs.get_target_build_path(), "a+") as build_file: if target not in build_file.read(): tpl_file_path = os.path.join(GIT_ROOT, "release", "cc", "%s.tpl" % config.config_type) with open(tpl_file_path, "r") as tpl_file: tpl = Template(tpl_file.read()).substitute( DATE=get_date(), DISTRO=container_configs.distro, CONFIG_VERSION=container_configs.version, BAZEL_VERSION=bazel_version, NAME=container_configs.image, SHA=sha, CLANG_REVISION=clang_revision) build_file.write(tpl)
def __init__(self, number=None, data={}): self.number = number self.data = data self.score = 0 self.number_of_comments = 0 self.score_data = {} self.issue = self.data.get('issue', {}) self.title = self.issue.get('title') if not self.title: self.title = '' self.body = self.issue.get('body') if not self.body: self.body = '' self.user = self.issue.get('user', {}) self.login = self.user.get('login', '') or '' self.avatar = self.user.get('avatar_url', '') or '' self.assignee = '' assignee = self.issue.get('assignee') if assignee: self.assignee = assignee.get('login', '') or '' self.milestone = '' milestone = self.issue.get('milestone') if milestone: self.milestone = milestone.get('title', '') or '' self.references = 0 self.created_at = util.get_date(self.issue.get('created_at')) self.updated_at = util.get_date(self.issue.get('updated_at')) self.comments = self.data.get('issue_comments') if not self.comments or not isinstance(self.comments, list): self.comments = [] self.number_of_comments = len(self.comments) self.org_members = self.data.get('org_members', [])
def __init__(self, camera, brick, name): date = util.get_date() if not os.path.exists(name): os.mkdir(name) self.dir_name = os.path.join(name, date) self.pickle_name = os.path.join(name, date + "_pickle") if not os.path.isdir(self.dir_name): os.mkdir(self.dir_name) self.data_dict = {} self.count = 0 self.brick = brick self.camera = camera
def __init__(self, **kwargs): self.__dict__.update(kwargs) self.last_conquest = datetime(year=1444, month=11, day=11) try: for k, v in yield_info( ((k, v) for k, v in self.history.items() if k[0].isnumeric())): inner, tag = v.popitem() # fixme this probably breaks with tag-switching countries, see occupations as well if inner == 'owner': self.last_conquest = get_date(k) except AttributeError: # no history -> uncolonized? pass
def test_get_next_date_monthly_next_month_first(self): schedule_lines = [ '2013/06/28 lightning energy', ' ;; schedule ; monthly ; 1st', ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/07/01') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
def test_get_next_date_monthly_next_month_eom30(self): schedule_lines = [ '2013/07/30 lightning energy', ' ;; schedule ; monthly ; eom30 ; ; auto', ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/08/30') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
def test_get_next_date_monthly_multiple_days_this_month_again(self): schedule_lines = [ '2013/06/08 lightning energy', ' ;; schedule ; monthly ; 7th, 12th' ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/06/12') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
def test_get_next_date_monthly_next_month_again(self): schedule_lines = [ '2013/06/12 lightning energy', ' ;; schedule ; monthly ; 12th ; ; auto', ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/07/12') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
def test_get_next_date_monthly_multiple_days_next_month(self): schedule_lines = [ '2013/06/27 lightning energy', ' ;; schedule ; monthly ; 7th, 27th ; ; auto', ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/07/07') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
def test_get_next_date_monthly_interval3(self): schedule_lines = [ '2013/06/15 lightning energy', ' ;; schedule ; monthly ; 15th ; 3 ; auto', ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/09/15') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
def test_get_next_date_monthly_too_many70(self): schedule_lines = [ '2013/07/15 lightning energy', ' ;; schedule ; monthly ; 70th ; 1 ; auto', ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/07/31') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
def test_get_next_date_monthly_interval12eom_leap_two(self): schedule_lines = [ '2012/02/29 lightning energy', ' ;; schedule ; monthly ; eom ; 12 ; auto', ] schedule_thing = ScheduleThing(schedule_lines) expected_next_date = util.get_date('2013/02/28') self.assertEqual( expected_next_date, schedule_thing._get_next_date(schedule_thing.thing_date) )
async def calc_moy(self, ctx: Context): print(f"\n{ctx.author} (moy) - Calculating average") if ctx.author.dm_channel is None: await ctx.author.create_dm() can_update = True marks = {} if self.db.users.count_documents({'id': ctx.author.id}) == 0: # Get data from user username = await util.get_dm_var(self.bot, ctx, "Nom d'utilisateur ?") password = await util.get_dm_var(self.bot, ctx, "Mot de passe ?") save = await util.get_dm_var(self.bot, ctx, "Voulez-vous sauvegarder vos données dans la base de données ? O/N") if save.lower() == "o": # Create user entry self.db.users.insert_one( {'id': ctx.author.id, 'username': username, 'password': password, 'last_updated': "", 'data': ""}) else: user = self.db.users.find_one({'id': ctx.author.id}) username = user.get('username') password = user.get('password') # update only happen daily can_update = user.get('last_updated') != util.get_date() if not can_update: marks = user.get('data') if can_update: message = await ctx.author.dm_channel.send("Attente de disponibilité du service ...") self.mc.wait_until_available() await message.edit(content="Calcul en cours ...") try: marks = self.mc.get_marks(username, password) self.db.users.update_one({'id': ctx.author.id}, {'$set': {'last_updated': util.get_date(), 'data': marks}}) except Exception as ex: print(ex) self.mc.stop() if marks != {}: embed = discord.Embed(title="Moyenne", description=f'```{self.mc.sort_data(marks)}```', colour=0xff0000) await ctx.author.dm_channel.send(embed=embed) else: await ctx.author.dm_channel.send("Une erreur s'est produite ! Réessayez plus tard.") print(f"{ctx.author} (moy) - Done")
def get_artists(location_id, location, start, end): ''' get events given a location ID ''' artists = set() latest_date = start end_date = end page_number = 1 while latest_date < end_date: response = get_response(location_id, page_number) for event in response['resultsPage']['results']['event']: if location.lower() in str(event['location']['city']).lower(): if event['start']['datetime']: date = get_date(event['start']['datetime'][:10]) if latest_date < date: latest_date = get_date(event['start']['datetime'][:10]) if event['performance']: artists.add( event['performance'][0]['artist']['displayName']) page_number += 1 return list(artists)
def __init__(self, robot, cam, name=None): date = get_date() if name is not None: if not os.path.exists(name): os.mkdir(name) self.dir_name = os.path.join(name, date) self.pickle_name = os.path.join(name, date + '_pickle') else: self.dir_name = date self.pickle_name = os.path.join(date + '_pickle') os.mkdir(self.dir_name) self.data_dict = {} self.count = 0 self.robot = robot self.camera = cam
def _parse_top_line(self, line): m = re.match(LedgerThing.TOP_LINE_REGEX, line) the_date, code, payee = m.groups() # date can be modified self.thing_date = util.get_date(the_date) # payee and transaction code are read-only if code is not None: self.transaction_code = str(code) if payee is None or payee.strip() == '': self.payee = UNSPECIFIED_PAYEE else: self.payee = payee.strip()
def crawl(): # make web request soup = http_get(BASE_URL + MENU_URL) # locate html data html = soup.body.contents[-2].table.tbody.contents[3].td.table.contents # stores food that has already been added to the table food_cache = {} # extract data for MEAL in MEALS: meal_index = MEALS[MEAL] meal_data = html[meal_index] for DINING_COMMON in DINING_COMMONS: dc_index = DINING_COMMONS[DINING_COMMON] if len(meal_data.contents) <= dc_index: break meal_dc_data = meal_data.contents[dc_index] for entry in meal_dc_data.find_all('a'): meal_name = entry.contents[0].string meal_name, gluten_free = truncate_meal_name(meal_name) # skip the "Nutritive Analysis" link if 'nutritive analysis' in meal_name.lower(): continue # create database models object if meal_name in food_cache: food_obj = food_cache[meal_name] else: # food is not located in local cache # check if food is in database food_obj = Food.query.filter_by(name=meal_name).first() # not found in database, crawl page if food_obj is None: food_obj = extract_food_info(entry) db.session.add(food_obj) # add food to the cache food_cache[meal_name] = food_obj menu_obj = Menu(date = get_date(), location = LOCATION_TO_ENUM[DINING_COMMON], \ meal = MEAL_TO_ENUM[MEAL], food = food_obj) db.session.add(menu_obj) db.session.commit()
def rolling_summary(*, sub: pd.DataFrame, data: pd.DataFrame, predict_start: pd.Timestamp, windows: list, method='median', impute_methods: list, n_thread: int=1, vprint=print): vprint(2, 'rolling median prediction starting {}'.format(get_date(predict_start))) vprint(2, data.utc_time.min()) vprint(2, data.utc_time.max()) verbose_level = getattr(vprint, 'verbose_level', 500) get_medians_thread = partial(get_summary, data=data, predict_start=predict_start, method=method, impute_methods=impute_methods, verbose_level=verbose_level) pool = Pool(n_thread) medians = pool.map(get_medians_thread, windows) pool.terminate() pool.close() predictions = medians[0].copy() for col in ['PM2.5', 'PM10', 'O3']: predictions[col] = pd.concat([median[col] for median in medians], axis=1).median(axis=1) submissions = pd.merge(left=sub[['test_id']], right=predictions[SUB_COLS], how='left') return submissions
def get_high_high_county(seven_day, seven_day_adjusted, stable_unadjusted, stable_adjusted): date_list = util.get_date(ndays=7) seven_day = util.get_high_high_county(seven_day, date_list) seven_day_adjusted = util.get_high_high_county(seven_day_adjusted, date_list) stable_unadjusted = util.get_high_high_county(stable_unadjusted, date_list) stable_adjusted = util.get_high_high_county(stable_adjusted, date_list) output = { "seven_day_average": seven_day, "seven_day_average_adjusted": seven_day_adjusted, "stable_unadjusted": stable_unadjusted, "stable_adjusted": stable_adjusted } return output
def manage_old_issue(repo_username, repo_id, issue): # disable auto closing issues return if not issue: return number = issue.get('number') if not number: return if is_closed(issue): return if is_pull_request(issue): return updated_str = issue.get('updated_at') if not updated_str: return updated_at = util.get_date(updated_str) if not is_old_issue(updated_at): return if has_labels_preventing_close(issue): return if has_comments_preventing_close(issue): return if has_assignee_preventing_close(issue): return if has_milestone_preventing_close(issue): return if github_api.is_org_member(repo_username, issue['user']['login']): return if cvar['DO_NOT_CLOSE_WHEN_REFERENCED'] is False: issue_events = github_api.fetch_issue_events(repo_username, repo_id, number) if has_events_preventing_close(issue_events): return return close_old_issue(repo_username, repo_id, number, issue)
def cdf_hitMissThroughput(): data = np.loadtxt("../data/throughputRelation/data/2/train/sample.csv", skiprows=1) # size hThroughput mThroughput hTime mTime rtt maxMT reqCount totalMT avgMT data_hThroughput = data[:, 1].flatten() data_mThroughput = data[:, 2].flatten() print(np.average(data_hThroughput)) print(np.average(data_mThroughput)) # fig = plt.figure(figsize=(4, 3)) util.drawCDF(data=data_hThroughput/1024/1024, label="hit throughput", marker=util.markerL[0], color=util.colorL[0]) util.drawCDF(data=data_mThroughput/1024/1024, label="miss throughput", marker=util.markerL[1], color=util.colorL[1]) plt.legend(loc='best', fontsize=20) plt.tick_params(labelsize=23) plt.xlabel("Throughput(Mbps)", fontsize=25) plt.ylabel("CDF", fontsize=25) dt = util.get_date(timeFlag=False) plt.grid(ls='--') plt.savefig("./plot/cdf_hitMissThroughput_"+dt+".pdf", bbox_inches = 'tight') plt.show()
def predict(*, pred_date: str, bj_windows: str='golden_8', ld_windows: str='golden_8', bj_method: str='median', ld_method: str='median', bj_lgbm: bool=True, ld_lgbm: bool=True, bj_fwbw: bool=True, ld_fwbw: bool=True, n_thread: int=8, save: bool=True, dosubmit: bool=False, suffix: str='dummy', verbose: int=2): vprint = get_verbose_print(verbose_level=verbose) pred_date = pd.to_datetime(pred_date) get_new_data = pred_date > pd.to_datetime('2018-03-28') sub = pd.read_csv("../input/sample_submission.csv") OUTDIR = '../submission/sub_{}-{}-{}'.format(pred_date.year, pred_date.month, pred_date.day) os.system('mkdir -p {}'.format(OUTDIR)) predict_start_day = pred_date + pd.Timedelta(1, unit='D') predict_start = pd.to_datetime(get_date(predict_start_day)) bj_data = get_city_data(city='bj', vprint=vprint, impute_with_lgbm=bj_lgbm, get_new_data=get_new_data) ld_data = get_city_data(city='ld', vprint=vprint, impute_with_lgbm=ld_lgbm, get_new_data=get_new_data) vprint(2, bj_data.head()) vprint(2, bj_data.loc[bj_data['stationId']!= 'zhiwuyuan_aq'].tail()) vprint(2, ld_data.head()) vprint(2, ld_data.tail()) bj_fwbw_impute_methods = ['day', 'mean'] if bj_fwbw else [] ld_fwbw_impute_methods = ['day', 'mean'] if ld_fwbw else [] bj_pred = rolling_summary(sub=sub, data=bj_data, predict_start=predict_start, windows=MEDIAN_WINDOWS[bj_windows], n_thread=n_thread, method=bj_method, impute_methods=bj_fwbw_impute_methods, vprint=vprint) ld_pred = rolling_summary(sub=sub, data=ld_data, predict_start=predict_start, windows=MEDIAN_WINDOWS[ld_windows], n_thread=n_thread, method=ld_method, impute_methods=ld_fwbw_impute_methods, vprint=vprint) submissions = sub.copy() bj_cond = submissions['test_id'].map(lambda x: x.split('#')[0] in BEIJING_STATIONS) ld_cond = submissions['test_id'].map(lambda x: x.split('#')[0] in LONDON_STATIONS) submissions.loc[bj_cond] = bj_pred.loc[bj_cond].values submissions.loc[ld_cond] = ld_pred.loc[ld_cond].values submissions['PM2.5'] = submissions['PM2.5'].map(lambda x: max(0, x)) submissions['PM10'] = submissions['PM10'].map(lambda x: max(0, x)) submissions['O3'] = submissions['O3'].map(lambda x: max(0, x)) if save: if not suffix: filepath = '{}/model_{}_sub.csv'.format(OUTDIR, 3) else: filepath = '{}/model_{}_sub_{}.csv'.format(OUTDIR, 3, suffix) submissions.to_csv(filepath, index=False) if dosubmit: submit(subfile=filepath, description='model_{}_{}'.format(3, str(predict_start).split()[0]), filename='model_{}_sub_{}.csv'.format(3, str(predict_start).split()[0]) )
def __init__(self, gameinfo, player_only=False): self.gameinfo = gameinfo self.player = gameinfo["meta"]["player"] self.current_date = get_date(gameinfo["meta"]["date"]) self.countries = {} for tag, c in gameinfo["gamestate"]["countries"].items(): try: self.countries[tag] = Country(tag=tag, **c) except DummyCountryException: # fixme some real countries don't have revolutionary colors, find another key pass self.provinces = [ Province(id=int(i[1:]), **p) for i, p in gameinfo["gamestate"]["provinces"].items() ] if player_only: player_country = self.countries[self.player] player_country.analyze(self) else: for country in self.countries.values(): country.analyze(self)
def predict(*, pred_date: str, bj_his_length: int, ld_his_length: int, bj_npoints: int, ld_npoints: int, bj_scale:float, ld_scale: float, n_thread: int=8, save: bool=True, dosubmit: bool=False, suffix: str='dummy', verbose: int=2): vprint = get_verbose_print(verbose_level=verbose) pred_date = pd.to_datetime(pred_date) get_new_data = pred_date > pd.to_datetime('2018-03-28') sub = pd.read_csv("../input/sample_submission.csv") OUTDIR = '../submission/sub_{}-{}-{}'.format(pred_date.year, pred_date.month, pred_date.day) os.system('mkdir -p {}'.format(OUTDIR)) predict_start_day = pred_date + pd.Timedelta(1, unit='D') predict_start = pd.to_datetime(get_date(predict_start_day)) bj_data = get_city_data(city='bj', vprint=vprint, impute_with_lgbm=True, partial_data=True, get_new_data=get_new_data) ld_data = get_city_data(city='ld', vprint=vprint, impute_with_lgbm=True, partial_data=True, get_new_data=get_new_data) vprint(2, bj_data.head()) vprint(2, bj_data.loc[bj_data['stationId']!= 'zhiwuyuan_aq'].tail()) vprint(2, ld_data.head()) vprint(2, ld_data.tail()) bj_pred = fbprophet(sub=sub, data=bj_data, current_date=predict_start, history_length=bj_his_length, changepoint_scale=bj_scale, num_changepoints=bj_npoints, n_thread=n_thread, vprint=vprint) ld_pred = fbprophet(sub=sub, data=ld_data, current_date=predict_start, history_length=ld_his_length, changepoint_scale=ld_scale, num_changepoints=ld_npoints, n_thread=n_thread, vprint=vprint) submissions = sub.copy() bj_cond = submissions['test_id'].map(lambda x: x.split('#')[0] in BEIJING_STATIONS) ld_cond = submissions['test_id'].map(lambda x: x.split('#')[0] in LONDON_STATIONS) submissions.loc[bj_cond, ['PM2.5', 'PM10', 'O3']] = bj_pred.loc[bj_cond, ['PM2.5', 'PM10', 'O3']].values submissions.loc[ld_cond, ['PM2.5', 'PM10']] = ld_pred.loc[ld_cond, ['PM2.5', 'PM10']].values submissions['PM2.5'] = submissions['PM2.5'].map(lambda x: max(0, x)) submissions['PM10'] = submissions['PM10'].map(lambda x: max(0, x)) submissions['O3'] = submissions['O3'].map(lambda x: max(0, x)) submissions = submissions[['test_id', 'PM2.5', 'PM10', 'O3']] if save: if not suffix: filepath = '{}/model_{}_sub.csv'.format(OUTDIR, 4) else: filepath = '{}/model_{}_sub_{}.csv'.format(OUTDIR, 4, suffix) submissions.to_csv(filepath, index=False) if dosubmit: submit(subfile=filepath, description='model_{}_{}'.format(4, str(predict_start).split()[0]), filename='model_{}_sub_{}.csv'.format(4, str(predict_start).split()[0]))
def get_ruler_history(self, current_date): total_months = calculate_months_diff(current_date, START_DATE) last_crowning = last_ruler = None for date, history in yield_info( ((k, v) for k, v in self.history.items() if k[0].isnumeric())): if any(x in history for x in ('monarch', 'monarch_heir')): new_crowing = get_date(date) try: last_ruler = Ruler(**history['monarch']) except KeyError: last_ruler = Ruler(**history['monarch_heir']) if new_crowing > START_DATE: self.add_ruler(last_ruler, new_crowing, last_crowning) last_crowning = new_crowing else: last_crowning = START_DATE self.add_ruler(last_ruler, current_date, last_crowning) # rulers stats self.avg_ruler_life = np.average( [r.months for r in self.rulers if not r.is_regency_council]) self.avg_ruler_stats = sum([r.mana_generated for r in self.rulers]) / total_months
def draw_heatmap(HIT_FLAG=True): df = pd.read_csv('../../data/throughputRelation/data/3/sample.csv', delimiter=' ') # df.drop(['maxMT', 'avgMT', 'totalMT'], axis=1, inplace=True) # saleprice correlation matrix k = 7 # number of variables for heatmap corrmat = df.corr() if HIT_FLAG: cols = corrmat.nlargest(k, 'hThroughput')['hThroughput'].index else: cols = corrmat.nlargest(k, 'mThroughput')['mThroughput'].index cm = np.corrcoef(df[cols].values.T) fifthColor = '#7f0523' forthColor = '#df8064' thirdColor = '#ffffff' secondColor = '#70b0d1' firstColor = '#073a70' cmap = col.LinearSegmentedColormap.from_list( 'own2', [firstColor, secondColor, thirdColor, forthColor, fifthColor]) sns.set(font_scale=1.25) hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values, cmap=cmap, vmin=-1, vmax=1) dt = util.get_date(timeFlag=False) if HIT_FLAG: plt.savefig("../plot/heatmap_h2m_" + dt + ".pdf", bbox_inches='tight') else: plt.savefig("../plot/heatmap_m2h_" + dt + ".pdf", bbox_inches='tight') plt.show()
def get_dates(file_types, people, data_dir, param_dir, override_min_date=-180, calc_min_date=False): max_date = 0 min_date = 0 for file_type in file_types: ##Get the data matrix and the field_names data, fields = util.read_clinical_data( data_dir + file_type.upper() + ".csv", param_dir) for i in range(len(fields)): fields[i] = fields[i].lower() date_fields = [] with open(param_dir + file_type + "_fields.csv") as fin: for line in fin: line = line.strip().split(",") date_fields.append(line[2]) date_fields = list(set(date_fields)) for person in people: if person in data: for visit in data[person]: for field in date_fields: date = util.get_date(visit, fields, field, file_type) if date != "no date": min_date = min(date, min_date) max_date = max(date, max_date) ##You could change this to only record data after the start of the study or a date before the start of the study if not calc_min_date: min_date = override_min_date return min_date, max_date
def get_adj_matrix(start, number_of_papers, end_date): edge_list = None for i in range(start, number_of_papers): mydict = util.get_idx_info(INFO,i) edge_list = util.to_edgelist_co_area_ins(edge_list, mydict, end_date) return edge_list numbers= [40, 77,97,226,177] end_date = "2016-05-27" end_date = util.get_date(end_date) ins_list = get_instituitions(441,617) area_list= get_areas(441,617) area_list= area_list[1:] edge_list = get_adj_matrix(441,617, end_date) try: journal_volume="PRX_5" os.mkdir("C:/Users/hexie/Documents/APS_result/"+str(journal_volume)) except: journal_volume="PRX_5" print("already here") for area in area_list:
def test_dates(self): self.assertEqual("1999/12/03", util.get_date_string(date(1999, 12, 3))) self.assertEqual(date(1999, 12, 3), util.get_date("1999/12/03")) self.assertTrue(util.is_valid_date("2016/10/26")) self.assertTrue(util.is_valid_date("2016/1/5")) self.assertFalse(util.is_valid_date("2016/5/5 10:23"))
def calculate_seven_day_lisa(): month_day = util.get_month_day() gdf = geopandas.read_file( "../download/usafacts_confirmed_{}.geojson".format(month_day)) gdf.columns = util.rename_column_usafacts(gdf.columns.tolist()) # Select informational columns and calculate 7-day average for last 7 days seven_day = gdf.iloc[:, 1:13] seven_day_adjusted = gdf.iloc[:, 1:13] stable_adjusted = gdf.iloc[:, 1:13] stable_unadjusted = gdf.iloc[:, 1:13] for i in range(-8, -1): seven_day[gdf.columns[i]] = (gdf.iloc[:, i] - gdf.iloc[:, i - 7]) / 7 seven_day_adjusted[gdf.columns[i]] = ( (gdf.iloc[:, i] - gdf.iloc[:, i - 7]) / 7) * 100000 / gdf['population'] stable_unadjusted[gdf.columns[i]] = gdf.iloc[:, i] stable_adjusted[ gdf.columns[i]] = gdf.iloc[:, i] * 100000 / gdf['population'] seven_day["average"] = seven_day.iloc[:, -7:].mean(axis=1) seven_day_adjusted["average"] = seven_day_adjusted.iloc[:, -7:].mean(axis=1) stable_unadjusted["average"] = stable_unadjusted.iloc[:, -7:].mean(axis=1) stable_adjusted["average"] = stable_adjusted.iloc[:, -7:].mean(axis=1) seven_day["geometry"] = gdf["geometry"] seven_day_adjusted["geometry"] = gdf["geometry"] stable_unadjusted["geometry"] = gdf["geometry"] stable_adjusted["geometry"] = gdf["geometry"] # Weight parameters for LISA counties = pygeoda.geopandas_to_geoda(seven_day) w = pygeoda.weights.queen(counties) # Unadjusted Seven Day select_col = util.get_date(ndays=7) int_data = [seven_day[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): seven_day[col] = lisa.GetClusterIndicators(i) seven_day = seven_day.to_dict(orient="records") seven_day = { "type": "7 day average unadjusted", "source": "USAFacts", "features": seven_day } print("7 day average unadjusted") # Adjusted Seven Day int_data = [seven_day_adjusted[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): seven_day_adjusted[col] = lisa.GetClusterIndicators(i) seven_day_adjusted = seven_day_adjusted.to_dict(orient="records") seven_day_adjusted = { "type": "7 day average adjusted", "source": "USAFacts", "features": seven_day_adjusted } print("7 day average adjusted") # Unadjusted Stable int_data = [stable_unadjusted[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): stable_unadjusted[col] = lisa.GetClusterIndicators(i) stable_unadjusted = stable_unadjusted.to_dict(orient="records") stable_unadjusted = { "type": "stable unadjusted", "source": "USAFacts", "features": stable_unadjusted } print("stable unadjusted") # Adjusted Stable int_data = [stable_adjusted[c].tolist() for c in select_col] lisa = pygeoda.batch_local_moran(w, int_data, nCPUs=1, perm=999) for i, col in enumerate(select_col): stable_adjusted[col] = lisa.GetClusterIndicators(i) stable_adjusted = stable_adjusted.to_dict(orient="records") stable_adjusted = { "type": "stable adjusted", "source": "USAFacts", "features": stable_adjusted } print("stable adjusted") return seven_day, seven_day_adjusted, stable_unadjusted, stable_adjusted
parser.add_argument("env_name", type=str, help='available env_name:') parser.add_argument("random_seed", type=int) parser.add_argument("num_of_agents", type=int) parser.add_argument("temperature", type=float) parser.add_argument("batch_size", type=int, default=5000) args = parser.parse_args() env_name = env_map[args.env_name] prefix = prefix_map[args.env_name] n_epochs = n_epochs_map[args.env_name] random_seed = int(args.random_seed) run_function = function_map[args.algo] n_itr = n_epochs_map[args.env_name] num_of_agents = int(args.num_of_agents) temperature = float(args.temperature) learning_rate = learning_rate_map[args.env_name] batch_size = int(args.batch_size) if args.algo == "multi_REINFORCE_stein" or args.algo == "multi_REINFORCE_stein_anneal" or args.algo == 'multi_REINOFRCE_stein_reg' or args.algo == "multi_REINFORCE_stein_no_critic" or args.algo == 'multi_REINFORCE_baseline_no_critic' or args.algo == 'multi_REINFORCE_stein_evolution': args.algo = "{:}#{:}_temp={:}".format(args.algo, num_of_agents, args.temperature) run_experiment_lite( run_function, n_parallel=4, snapshot_mode="last", seed=random_seed, log_dir="./../exp_log/{:}_seed={:}_iter=500_env={:}_{:}".format( args.algo, random_seed, prefix, get_date()), )
def predict(*, pred_date: str, bj_his_length=360, ld_his_length=420, bj_windows='golden_8', ld_windows='fib_8', bj_dropout=0.6, ld_dropout=0.2, bj_units=(48, 48, 48, 48), ld_units=(24, 24, 24, 24), bj_batchsize=84, ld_batchsize=22, verbose: int=2, save=True, dosubmit=False, suffix='alt_lgb_split'): vprint = get_verbose_print(verbose_level=verbose) pred_date = pd.to_datetime(pred_date) get_new_data = pred_date > pd.to_datetime('2018-03-28') sub = pd.read_csv("../input/sample_submission.csv") OUTDIR = '../submission/sub_{}-{}-{}'.format(pred_date.year, pred_date.month, pred_date.day) os.system('mkdir -p {}'.format(OUTDIR)) predict_start_day = pred_date + pd.Timedelta(1, unit='D') predict_start = pd.to_datetime(get_date(predict_start_day)) bj_data = get_city_data(city='bj', vprint=vprint, impute_with_lgbm=False, partial_data=False, get_new_data=get_new_data) ld_data = get_city_data(city='ld', vprint=vprint, impute_with_lgbm=False, partial_data=False, get_new_data=get_new_data) vprint(2, bj_data.head()) vprint(2, bj_data.loc[bj_data['stationId']!= 'zhiwuyuan_aq'].tail()) vprint(2, ld_data.head()) vprint(2, ld_data.tail()) bj_data = impute(bj_data, lgbm=True, hour=True, mean=True) ld_data = impute(ld_data, lgbm=True, hour=True, mean=True) vprint(2, bj_data.head()) vprint(2, bj_data.loc[bj_data['stationId']!= 'zhiwuyuan_aq'].tail()) vprint(2, ld_data.head()) vprint(2, ld_data.tail()) bj_w_train_data = long_to_wide(bj_data) ld_w_train_data = long_to_wide(ld_data) train_split_date = pred_date - pd.Timedelta(3, unit='D') bj_pred = fit_predict(city='bj', sub=sub, w_train_data=bj_w_train_data, train_data=bj_data, train_split_date=train_split_date, history_length=bj_his_length, pred_date=pred_date, windows=MEDIAN_WINDOWS[bj_windows], dropout_rate=bj_dropout, units=bj_units, batch_size=bj_batchsize, l2_strength=0.0001, n_folds=5, vprint=vprint ) ld_pred = fit_predict(city='ld', sub=sub, w_train_data=ld_w_train_data, train_data=ld_data, train_split_date=train_split_date, history_length=ld_his_length, pred_date=pred_date, windows=MEDIAN_WINDOWS[ld_windows], dropout_rate=ld_dropout, units=ld_units, batch_size=ld_batchsize, l2_strength=0.0001, n_folds=5, vprint=vprint ) submissions = sub.copy() bj_cond = submissions['test_id'].map(lambda x: x.split('#')[0] in BEIJING_STATIONS) ld_cond = submissions['test_id'].map(lambda x: x.split('#')[0] in LONDON_STATIONS) submissions.loc[bj_cond, ['PM2.5', 'PM10', 'O3']] = bj_pred.loc[bj_cond, ['PM2.5', 'PM10', 'O3']].values submissions.loc[ld_cond, ['PM2.5', 'PM10']] = ld_pred.loc[ld_cond, ['PM2.5', 'PM10']].values submissions['PM2.5'] = submissions['PM2.5'].map(lambda x: max(0, x)) submissions['PM10'] = submissions['PM10'].map(lambda x: max(0, x)) submissions['O3'] = submissions['O3'].map(lambda x: max(0, x)) submissions = submissions[['test_id', 'PM2.5', 'PM10', 'O3']] if save: if not suffix: filepath = '{}/model_{}_sub.csv'.format(OUTDIR, 6) else: filepath = '{}/model_{}_sub_{}.csv'.format(OUTDIR, 6, suffix) submissions.to_csv(filepath, index=False) if dosubmit: submit(subfile=filepath, description='model_{}_{}'.format(6, str(predict_start).split()[0]), filename='model_{}_sub_{}.csv'.format(6, str(predict_start).split()[0]))
def get_statement_info_from_cache(self): key, cache = self.get_key_and_cache() if key in cache: self.ending_date = util.get_date(cache[key]['ending_date']) self.ending_balance = cache[key]['ending_balance']
def run_setting(setting, verbose_level=2, start_date, end_date, n_folds=5, skip=1): num, city, history_length, num_shifts, median_shifts, median_windows, dropout_rate, l2_strength, units, batch_size = setting median_windows = MEDIAN_WINDOWS[median_windows] vprint = get_verbose_print(verbose_level) sub = pd.read_csv('../input/sample_submission.csv') get_new_data = end_date > pd.to_datetime('2018-03-28') test_data = get_city_data(city=city, vprint=vprint, impute_with_lgbm=False, get_new_data=get_new_data) train_data = test_data.copy() # type: pd.DataFrame vprint(2, train_data.head()) vprint(2, train_data.loc[train_data['stationId'] != 'zhiwuyuan_aq'].tail()) train_data = impute(train_data, lgbm=True, hour=True, mean=True) vprint(2, train_data.head()) vprint(2, train_data.loc[train_data['stationId'] != 'zhiwuyuan_aq'].tail()) w_train_data = long_to_wide(train_data) current_date = start_date scores_df = [] STATIONS = BEIJING_STATIONS if city == 'bj' else LONDON_STATIONS while current_date < end_date: vprint(1, "running experiment for {} at {}".format(current_date, datetime.now())) train_split_date = current_date - pd.Timedelta(3, unit='D') x_train, y_train = wide_make_fw_x_y( wdata=w_train_data, ldata=train_data, split_date=train_split_date, history_length=history_length, num_shifts=num_shifts, use_medians=True, median_shifts=median_shifts, window_sizes=median_windows, for_prediction=False, n_thread=8, vprint=vprint, save_feature=True, use_cache=True, window_name=setting.median_windows) x_test, _ = wide_make_fw_x_y( wdata=w_train_data, ldata=train_data, split_date=current_date, history_length=history_length, num_shifts=1, use_medians=True, median_shifts=median_shifts, window_sizes=median_windows, for_prediction=False, n_thread=8, vprint=vprint, save_feature=True, use_cache=True, window_name=setting.median_windows) x_train = x_train.loc[x_train['stationId'].map(lambda x: x.split('#')[0] in [s for s in STATIONS if s != 'zhiwuyuan_aq'])] y_train = y_train.loc[y_train['stationId'].map(lambda x: x.split('#')[0] in [s for s in STATIONS if s != 'zhiwuyuan_aq'])] x_test = x_test.loc[x_test['stationId'].map(lambda x: x.split('#')[0] in STATIONS)] subs = [] min_valid_smape = [] groups = x_train['stationId'].map(lambda x: x.split('#')[0]) group_kfold = GroupKFold(n_splits=n_folds) splits = list(group_kfold.split(X=x_train, groups=groups)) for it, (train_idx, val_idx) in enumerate(splits): vprint(2, '# ---- fold {} ----'.format(it + 1)) model = get_keras_model(input_dim=x_train.shape[1] - 1, dropout_rate=dropout_rate, l2_strength=l2_strength, units=units) if it == 0: vprint(1, model.summary()) history = model.fit( x=x_train.iloc[train_idx, 1:].values, y=y_train.iloc[train_idx, 1:].values, validation_data=(x_train.iloc[val_idx, 1:].values, y_train.iloc[val_idx, 1:].values), batch_size=batch_size, epochs=65535, verbose=0, callbacks=[ ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=15, verbose=verbose_level), EarlyStopping(monitor='val_loss', patience=30, verbose=verbose_level), ModelCheckpoint(filepath='./model_checkpoint_{}.hdf5'.format(num), monitor='val_loss', save_best_only=True, save_weights_only=True, mode='min') ]) min_valid_smape.append(np.min(history.history['val_loss'])) predictions = model.predict(x_test.iloc[:, 1:], verbose=verbose_level) predictions = pd.DataFrame(predictions) predictions['stationId'] = x_test['stationId'].map(lambda x: x.split('#')[0]).tolist() predictions['measure'] = x_test['stationId'].map(lambda x: x.split('#')[1]).tolist() vprint(2, '# ---- formatting submission df ----') for idx, row in tqdm(predictions.iterrows()): values = row[:48].values sub.loc[sub.test_id.isin([row['stationId'] + '#' + str(i) for i in range(48)]), row['measure']] = values subs.append(sub[SUB_COLS]) vprint(2, 'mean {}, std {}'.format(np.mean(min_valid_smape), np.std(min_valid_smape))) submissions = subs[0] for sub in subs[1:]: submissions[['PM2.5', 'PM10', 'O3']] += sub[['PM2.5', 'PM10', 'O3']] submissions[['PM2.5', 'PM10', 'O3']] /= n_folds truth = get_truth(city=city, data=test_data, start_date=current_date + pd.Timedelta(1, unit='D')) scores = evaluate(city=city, truth=truth, predictions=submissions) if 'zhiwuyuan_aq-O3' in scores: scores['zhiwuyuan_aq-O3'] = np.nan if 'zhiwuyuan_aq-PM2.5' in scores: scores['zhiwuyuan_aq-PM2.5'] = np.nan if 'zhiwuyuan_aq-PM10' in scores: scores['zhiwuyuan_aq-PM10'] = np.nan scores['smape'] = pd.Series(scores).mean() scores['date'] = get_date(current_date) vprint(1, scores['smape']) current_date += pd.Timedelta(value=skip, unit='D') scores_df.append(scores) scores_df = pd.DataFrame(scores_df) scores_df = scores_df[['date', 'smape'] + [col for col in scores_df.columns if col not in ['date', 'smape']]] outfile_name = 'shortcut_mlp_experiment_{}_{}.csv'.format(num, city) outfile_path = '../summaries/{}'.format(outfile_name) if os.path.exists(outfile_path): df = pd.read_csv(outfile_path) scores_df = pd.concat([scores_df, df], axis=0) scores_df.drop_duplicates(inplace=True) scores_df.sort_values(by='date', inplace=True) scores_df.to_csv('../summaries/{}'.format(outfile_name), index=False) with open('../summaries/shortcut_mlp_settings.txt', 'a') as f: f.write(str(setting) + ' Summary File: ' + outfile_name) vprint(1, '# ---- mean {} '.format(scores_df['smape'].mean()))
if __name__ == '__main__': parser = argparse.ArgumentParser(description='Experiment or predicting with rolling summaries model') parser.add_argument('--mode', type=str, default='exp', help='whether to do experiment or make prediction') parser.add_argument('--pred_date', type=str, default=None, help='to generate prediction for this date') parser.add_argument('--pred_bj_windows', type=str, default='golden_8', help='what windows to use for bj') parser.add_argument('--pred_ld_windows', type=str, default='golden_8', help='what windows to use for ld') parser.add_argument('--pred_bj_method', type=str, default='median', help='what method to use for bj') parser.add_argument('--pred_ld_method', type=str, default='median', help='what method to use for ld') parser.add_argument('--pred_bj_lgbm', type=str2bool, default='True', help='whether to impute with lgbm for bj') parser.add_argument('--pred_ld_lgbm', type=str2bool, default='True', help='whether to impute with lgbm for ld') parser.add_argument('--pred_bj_fwbw', type=str2bool, default='True', help='whether to impute with fwbw for bj') parser.add_argument('--pred_ld_fwbw', type=str2bool, default='True', help='whether to impute with fwbw for ld') parser.add_argument('--save', type=str2bool, default='True', help='whether to save submission file') parser.add_argument('--save_suffix', type=str, default='alt_lgb_roll', help='suffix append to submission filename') parser.add_argument('--submit', type=str2bool, default='False', help='whether to submit submission file') parser.add_argument('--exp_start_date', type=str, default=None, help='date to start experiment, inclusive') parser.add_argument('--exp_end_date', type=str, default=None, help='date to end experiment, exclusive') parser.add_argument('--n_thread', type=int, default=8, help='number of threads to run experiment') parser.add_argument('--verbose', type=int, default=2, help='verbose level') args = parser.parse_args() if args.mode == 'exp': experiment(list(range(1, 81)), args.exp_start_date, args.exp_end_date, args.n_thread, args.verbose) else: if not args.pred_date args.pred_date = get_date(pd.to_datetime(datetime.now())) predict(pred_date=args.pred_date, bj_windows=args.pred_bj_windows, ld_windows=args.pred_ld_windows, bj_method=args.pred_bj_method, ld_method=args.pred_ld_method, bj_lgbm=args.pred_bj_lgbm, ld_lgbm=args.pred_ld_lgbm, bj_fwbw=args.pred_bj_fwbw, ld_fwbw=args.pred_ld_fwbw, n_thread=args.n_thread, save=args.save, dosubmit=args.submit, suffix=args.save_suffix, verbose=int(args.verbose))
help='available algorithms') parser.add_argument("env_name", type=str, help='available env_name:') parser.add_argument("random_seed", type=int) parser.add_argument("num_of_agents", type=int) parser.add_argument("temperature", type=float) parser.add_argument("batch_size", type=int, default = 5000) args = parser.parse_args() env_name = env_map[args.env_name] prefix = prefix_map[args.env_name] n_epochs = n_epochs_map[args.env_name] random_seed = int(args.random_seed) run_function = function_map[args.algo] n_itr = n_epochs_map[args.env_name] num_of_agents = int(args.num_of_agents) temperature = float(args.temperature) learning_rate = learning_rate_map[args.env_name] batch_size = int(args.batch_size) if args.algo == "multi_REINFORCE_stein" or args.algo == "multi_REINFORCE_stein_anneal" or args.algo == 'multi_REINOFRCE_stein_reg' or args.algo == "multi_REINFORCE_stein_no_critic" or args.algo == 'multi_REINFORCE_baseline_no_critic' or args.algo == 'multi_REINFORCE_stein_evolution': args.algo = "{:}#{:}_temp={:}".format(args.algo, num_of_agents, args.temperature) run_experiment_lite( run_function, n_parallel=8, snapshot_mode="last", seed=random_seed, log_dir="./../exp_log/25-16/{:}_seed={:}_iter=500_env={:}_{:}".format(args.algo, random_seed, prefix, get_date()), )
def set_statement_info(self): if self.ending_balance is not None: print( "(Enter 'cancel' to remove ending balance and set " "ending date to today.)" ) old_ending_date = self.ending_date while True: date_str = util.get_date_string(self.ending_date) new_date = self.get_response( prompt='Ending Date (YYYY/MM/DD)', old_value=date_str ) if self.cancel_statement(new_date): return try: self.ending_date = util.get_date(new_date) break except ValueError: print('*** Invalid date') new_ending_balance = None if self.ending_balance is None: old_ending_balance = None else: old_ending_balance = util.get_amount_str( self.ending_balance ) while True: new_ending_balance = self.get_response( prompt='Ending Balance', old_value=old_ending_balance ) if new_ending_balance is None: break if self.cancel_statement(new_ending_balance): return try: self.ending_balance = float( new_ending_balance.replace('$', '') ) break except ValueError: print('*** Invalid number') if new_ending_balance is not None: new_ending_balance = util.get_amount_str( self.ending_balance ) # only list and save to cache if values have changed... if old_ending_date != self.ending_date \ or old_ending_balance != new_ending_balance: self.save_statement_info_to_cache() self.list_transactions()
def main(mode: str = 'train'): if mode == 'train': os.system('mkdir -p ../models') assert os.path.exists('../input/bj_api_his.csv' ), 'run download_data.sh to get data first' assert os.path.exists('../input/ld_api_his.csv' ), 'run download_data.sh to get data first' bj_his = pd.read_csv(filepath_or_buffer='../input/bj_api_his.csv', parse_dates=['utc_time']) ld_his = pd.read_csv(filepath_or_buffer='../input/ld_api_his.csv', parse_dates=['utc_time']) bj_lgb_models = { '{}-{}'.format(stationId, measure): lgb_cv(bj_his, stationId, measure) for stationId in BEIJING_STATIONS for measure in ['PM2.5', 'PM10', 'O3'] } with open("../models/bj_lgbm.pkl", 'wb') as f: pickle.dump(bj_lgb_models, f) ld_lgb_models = { '{}-{}'.format(stationId, measure): lgb_cv(ld_his, stationId, measure) for stationId in LONDON_STATIONS for measure in ['PM2.5', 'PM10'] } with open("../models/ld_lgbm.pkl", 'wb') as f: pickle.dump(ld_lgb_models, f) print('# ---- DONE ---- #') if mode == 'impute': assert os.path.exists('../models/bj_lgbm.pkl'), 'model not trained yet' assert os.path.exists('../models/ld_lgbm.pkl'), 'model not trained yet' bj_his = pd.read_csv(filepath_or_buffer='../input/bj_api_his.csv', parse_dates=['utc_time']) ld_his = pd.read_csv(filepath_or_buffer='../input/ld_api_his.csv', parse_dates=['utc_time']) end_date = get_date( pd.to_datetime(datetime.now()) + pd.Timedelta(1, 'D')) bj_new = download_aq_data(city='bj', start_date='2018-04-01', start_hour='00', end_date=end_date, end_hour='23', save=False, partial_data=False, data_source='alternative') ld_new = download_aq_data(city='ld', start_date='2018-04-01', start_hour='00', end_date=end_date, end_hour='23', save=False, partial_data=False, data_source='alternative') bj_new = bj_new.loc[bj_new.utc_time < pd.to_datetime(today) - pd.Timedelta(1, 'D')] ld_new = ld_new.loc[ld_new.utc_time < pd.to_datetime(today) - pd.Timedelta(1, 'D')] bj_data = pd.concat([bj_his, bj_new], axis=0) ld_data = pd.concat([ld_his, ld_new], axis=0) ld_data = ld_data.loc[ld_data.stationId.isin(LONDON_STATIONS)] bj_data = bj_data.loc[bj_data.stationId.isin(BEIJING_STATIONS)] bj_data = bj_data[AQ_COL_NAMES] ld_data = ld_data[AQ_COL_NAMES] bj_data = fix_nat(bj_data) ld_data = fix_nat(ld_data) bj_data = lgbm_impute(data=bj_data, city='bj') ld_data = lgbm_impute(data=ld_data, city='ld') data = pd.concat([bj_data, ld_data], axis=0) data = fix_nat(data) data.to_csv( '../input/lgb_imputed_new_source_2014-03-31-_{}.csv'.format(today), index=False)
def draw_conquest_heat_map(self, country=None, crop_margin=50, resize_ratio=1.0, start_date=None, end_date=None): # todo add support for multiplayer by drawing multiple countries in a single map # fixme some provinces in the bharat file don't work properly, find why country = self.campaign.get_country(country) start_date = get_date(start_date) if start_date else START_DATE end_date = get_date( end_date ) if end_date else self.campaign.current_date # todo raise exception wrong date provinces = [ p for p in country.owned_provinces if p.last_conquest <= end_date ] spectrum = country.calculate_color_spectrum(n=end_date.year - start_date.year + 1) id_to_color = { str(p.id): spectrum[max(p.last_conquest.year - start_date.year, 0)] for p in provinces } width, height = self.map_img.size pixels = self.map_img_pixels.copy() e, n, w, s = 0, height, width, 0 for province_id, color in id_to_color.items(): for x, bands in self.province_coordinates[province_id].items(): x = int(x) for y1, y2 in bands: pixels[x, y1:y2] = color e = max(e, min(y2 + crop_margin, width)) w = min(w, max(y1 - crop_margin, 0)) n = min(n, max(x - crop_margin, 0)) s = max(s, min(x + crop_margin, height)) out = Image.fromarray(pixels) if out.size[0] > 50 and crop_margin >= 0: out = out.crop((w, n, e, s)) w, h = out.size # draw legend draw = ImageDraw.Draw(out) draw.fontmode = '1' margin, height = 10, 30 box_width = (w - 2 * margin) # fixme test functioning for conquest after a single year on 3-color countries if start_date.year != end_date.year: spectrum = country.calculate_color_spectrum(n=box_width) for i, c in enumerate(spectrum): draw.rectangle([(margin + i, h - margin - height), (margin + i + 1, h - margin)], fill=c) draw.rectangle([(margin, h - margin - height), (w - margin, h - margin)], outline="black", width=1) else: draw.rectangle([(margin, h - margin - height), (w - margin, h - margin)], outline="black", width=1, fill=country.colors[0]) font_margin = 8 draw.text( (margin + font_margin, h - margin - height + font_margin), str(start_date.year), font=self.font, fill='white') draw.text((w - margin - 42, h - margin - height + font_margin), str(end_date.year), font=self.font, fill='white') out = out.resize(np.array(out.size) * resize_ratio, Image.BILINEAR) out.save(f"{ASSETS_DIR}/heatmap.png")
def commit_ctrl_insert(conn,ctrl): basesql = "insert into data_ctrl_mst(maker_id,data_type,data_date) values(%s,%s,%s);" with conn: with conn.cursor() as cur: cur.execute(basesql,(ctrl.maker_id,ctrl.data_type,util.get_date(ctrl.data_date))) return conn
def testGetDate(self): etree = fromstring(ENTRY_DATE) self.assertEqual((2006, 8, 4, 20, 52, 0), time.gmtime(util.get_date(etree, 'updated'))[0:6]) self.assertEqual((2006, 8, 4, 15, 52, 1), time.gmtime(util.get_date(etree, 'published'))[0:6]) self.assertEqual((2006, 8, 5, 15, 55, 11), time.gmtime(util.get_date(etree, APP('edited')))[0:6])
def get_trajectory(item): try: try: user_id = get_uid(item) date = get_date(item) ts_0_0_0 = time.mktime(time.strptime(date, DATEFORMAT)) lines_list_0 = [] lines_list_1 = [] lines_list_2 = [] lines_list_3 = [] ts_traj_map = {} sorted_map = {} traj_list = [] inside_tup = ('1') AP_list = [ '0C8268F90E64', '0C8268C7D504', '14E6E4E1C510', '0C8268C7DD6C' ] time_index_nums = int(24 * 60 / 5) come_cnt_list = [None] * time_index_nums go_cnt_list = [None] * time_index_nums stay_cnt_list = [None] * time_index_nums except Exception as e: raise e try: df = pd.read_csv(item, sep='|', names=['user_id', 'ts', 'rssi', 'AP']) with open(item, 'r') as fr: for line in fr: line_list = line.split("|") AP = line_list[-1].strip() # print("line_list: %s, filename: %s" % (line_list, item)) line_list = [line_list[0], line_list[1], line_list[2], AP] if AP == AP_list[0]: lines_list_0.append(line_list) elif AP == AP_list[1]: lines_list_1.append(line_list) elif AP == AP_list[2]: lines_list_2.append(line_list) elif AP == AP_list[3]: lines_list_3.append(line_list) except Exception as e: raise e try: i = 1 while i < (len(lines_list_0) - 1): rssi = int(lines_list_0[i][2]) if rssi > int(lines_list_0[i - 1][2]) and rssi > int( lines_list_0[i + 1][2]): ts = time.strftime(DATETIMEFORMAT, time.localtime(int(lines_list_0[i][1]))) if ts in ts_traj_map: traj = ts_traj_map[ts] ts_traj_map[ts] = [traj, '0'] # ts_traj_map[ts] = [traj, ['0', rssi]] # print("outside AP: 0. dup ts. ts_traj_map[ts]: %s, ts: %s, user_id: %s" % (ts_traj_map[ts], ts, user_id)) # ts_traj_map[ts] = '1' else: ts_traj_map[ts] = '0' # ts_traj_map[ts] = ['0', rssi] i += 1 i = 1 while i < (len(lines_list_1) - 1): rssi = int(lines_list_1[i][2]) if rssi > int(lines_list_1[i - 1][2]) and rssi > int( lines_list_1[i + 1][2]): ts = time.strftime(DATETIMEFORMAT, time.localtime(int(lines_list_1[i][1]))) if ts in ts_traj_map: traj = ts_traj_map[ts] ts_traj_map[ts] = [traj, '1'] # ts_traj_map[ts] = [traj, ['1', rssi]] # print("inside AP: 1. dup ts. ts_traj_map[ts]: %s, ts: %s, user_id: %s" % (ts_traj_map[ts], ts, user_id)) # continue # ts_traj_map[ts] = '1' else: ts_traj_map[ts] = '1' # ts_traj_map[ts] = ['1', rssi] i += 1 i = 1 while i < (len(lines_list_2) - 1): rssi = int(lines_list_2[i][2]) if rssi > int(lines_list_2[i - 1][2]) and rssi > int( lines_list_2[i + 1][2]): ts = time.strftime(DATETIMEFORMAT, time.localtime(int(lines_list_2[i][1]))) if ts in ts_traj_map: traj = ts_traj_map[ts] ts_traj_map[ts] = [traj, '2'] # ts_traj_map[ts] = [traj, ['2', rssi]] # print("outside AP: 2. dup ts. ts_traj_map[ts]: %s, ts: %s, user_id: %s" % (ts_traj_map[ts], ts, user_id)) # ts_traj_map[ts] = '1' else: ts_traj_map[ts] = '2' # ts_traj_map[ts] = ['2', rssi] i += 1 i = 1 while i < (len(lines_list_3) - 1): rssi = int(lines_list_3[i][2]) if rssi > int(lines_list_3[i - 1][2]) and rssi > int( lines_list_3[i + 1][2]): ts = time.strftime(DATETIMEFORMAT, time.localtime(int(lines_list_3[i][1]))) if ts in ts_traj_map: traj = ts_traj_map[ts] ts_traj_map[ts] = [traj, '3'] # ts_traj_map[ts] = [traj, ['3', rssi]] # print("inside AP: 3. dup ts. ts_traj_map[ts]: %s, ts: %s, user_id: %s" % (ts_traj_map[ts], ts, user_id)) # ts_traj_map[ts] = '1' else: ts_traj_map[ts] = '3' # ts_traj_map[ts] = ['3', rssi] i += 1 length = len(ts_traj_map) if length > 1: sorted_map = sorted(ts_traj_map.items()) for tup in sorted_map: if isinstance(tup[1], list): print("sorted_map: %s, user_id: %s" % (sorted_map, user_id)) continue # print("sorted_map: %s, user_id: %s" % (sorted_map, user_id)) except Exception as e: raise e except Exception as e: raise e
def get_trajectory(item): try: try: user_id = get_uid(item) date = get_date(item) ts_0_0_0 = time.mktime(time.strptime(date, DATEFORMAT)) lines_list_0 = [] lines_list_1 = [] ts_traj_map = {} sorted_map = {} traj_list = [] inside_tup = ('1') come_cnt_list = [None] * TIME_INDEX_NUMS go_cnt_list = [None] * TIME_INDEX_NUMS stay_cnt_list = [None] * TIME_INDEX_NUMS except Exception as e: raise e try: with open(item, 'r') as fr: for line in fr: line_list = line.split("|") AP = line_list[-1].strip() line_list = [line_list[0], line_list[1], line_list[2], AP] if AP == '14E4E6E186A4': lines_list_0.append(line_list) elif AP == 'EC172FE3B340': lines_list_1.append(line_list) except Exception as e: raise e try: i = 1 while i < (len(lines_list_0) - 1): rssi = int(lines_list_0[i][2]) if rssi > int(lines_list_0[i-1][2]) and rssi > int(lines_list_0[i+1][2]): ts = time.strftime(DATETIMEFORMAT, time.localtime(int(lines_list_0[i][1]))) if ts in ts_traj_map: # traj = ts_traj_map[ts] # ts_traj_map[ts] = [traj, '0'] # ts_traj_map[ts] = [traj, ['0', rssi]] print("outside AP. dup ts. ts_traj_map: %s, ts: %s, user_id: %s" % (ts_traj_map, ts, user_id)) ts_traj_map[ts] = '1' else: ts_traj_map[ts] = '0' # ts_traj_map[ts] = ['0', rssi] i += 1 i = 1 while i < (len(lines_list_1) - 1): rssi = int(lines_list_1[i][2]) if rssi > int(lines_list_1[i-1][2]) and rssi > int(lines_list_1[i+1][2]): ts = time.strftime(DATETIMEFORMAT, time.localtime(int(lines_list_1[i][1]))) if ts in ts_traj_map: # traj = ts_traj_map[ts] # ts_traj_map[ts] = [traj, '1'] # ts_traj_map[ts] = [traj, ['1', rssi]] print("inside AP. dup ts. ts_traj_map: %s, ts: %s, user_id: %s" % (ts_traj_map, ts, user_id)) # continue ts_traj_map[ts] = '1' else: ts_traj_map[ts] = '1' # ts_traj_map[ts] = ['1', rssi] i += 1 length = len(ts_traj_map) if length > 1: sorted_map = sorted(ts_traj_map.items()) # print("sorted_map: %s, user_id: %s" % (sorted_map, user_id)) prev_ts = convert_datetime_to_timestamp(sorted_map[0][0], DATETIMEFORMAT) traj_list.append(sorted_map[0]) i = 1 while i < length: cur_ts = convert_datetime_to_timestamp(sorted_map[i][0], DATETIMEFORMAT) if cur_ts - prev_ts <= THRESHOLD: traj_list.append(sorted_map[i]) else: come_cnt_list, go_cnt_list, stay_cnt_list = handle(traj_list, inside_tup, user_id, ts_0_0_0, come_cnt_list, go_cnt_list, stay_cnt_list) # print("traj_list: %s, user_id: %s" % (traj_list, user_id)) traj_list[:] = [] traj_list.append(sorted_map[i]) i += 1 prev_ts = cur_ts come_cnt_list, go_cnt_list, stay_cnt_list = handle(traj_list, inside_tup, user_id, ts_0_0_0, come_cnt_list, go_cnt_list, stay_cnt_list) # print("traj_list: %s, user_id: %s" % (traj_list, user_id)) with lock: global RES_LIST i = 0 while i < TIME_INDEX_NUMS: come_cnt = come_cnt_list[i] if come_cnt_list[i] else 0 go_cnt = go_cnt_list[i] if go_cnt_list[i] else 0 stay_cnt = stay_cnt_list[i] if stay_cnt_list[i] else 0 RES_LIST[i] = RES_LIST[i] + come_cnt - go_cnt + stay_cnt i += 1 # for cnt in come_cnt_list: # if cnt: # print("come_cnt_list: %s, filename: %s" % (come_cnt_list, item)) # break # print("come_cnt_list: %s, filename: %s" % (come_cnt_list, item)) # print("go_cnt_list: %s, filename: %s" % (go_cnt_list, item)) # print("stay_cnt_list: %s, filename: %s" % (stay_cnt_list, item)) except Exception as e: raise e except Exception as e: raise e
def commit_ctrl_update_with_msg(conn,ctrl): basesql = "update data_ctrl_mst set update_stamp = current_timestamp , status = %s, msg = %s where maker_id = %s and data_type = %s and data_date = %s;" with conn: with conn.cursor() as cur: cur.execute(basesql,(ctrl.status,ctrl.msg, ctrl.maker_id,ctrl.data_type,util.get_date(ctrl.data_date))) return conn