def get_future(self, args): print("Collecting future forecasting") start_point = utils.get_datetime_now() start_point = start_point - timedelta(days=1) # interval = args.interval * 86400 cities = [] if "," in args.city: cities = args.city.split(",") else: cities.append(args.city) while True: now = utils.get_datetime_now() if (now - start_point).total_seconds() >= 0: try: # crawl 4 days forward for each city for i in range(4): start_point = start_point + timedelta(days=1) date = "%s-%s-%s" % (start_point.year, self.format10(start_point.month), self.format10(start_point.day)) for c in cities: html = self.craw_future(c, i) data = self.mine_data(date, html, c) if data: output = "\n".join(data) + "\n" self.write_log(output) except Exception as e: print(e)
def execute(self, args): print("start crawling aqicn") save_interval = args.save_interval start = utils.get_datetime_now() start = start - timedelta(hours=1) output = "" crawler_range = 3600 * args.interval while True: now = utils.get_datetime_now() if (now - start).total_seconds() > crawler_range: output = self.craw_data_controller(now) # move pointer for timestep self.write_log(output) start = start + timedelta(hours=1) output = ""
def get_prediction_real_time(sparkEngine, model=None, url_weight="", dim=15, prediction_weight="", encoder_length=24, decoder_length=24, attention_length=24, is_close_cuda=True): # continuously crawl aws and aqi & weather end = utils.get_datetime_now() end = end - timedelta(hours=1) # end = datetime.strptime("2018-06-19 11:01:00", p.fm) # e_ = end.strftime(p.fm) start = end - timedelta(hours=23) start = start.replace(minute=0, second=0, microsecond=0) # s_ = start.strftime(p.fm) # 2. process normalize data vectors, w_pred, china_vectors, timestamp = sparkEngine.process_vectors(start, end, dim) v_l = len(vectors) if v_l: sp_vectors = psv.convert_data_to_grid_exe(vectors) if v_l < encoder_length: sp_vectors = np.pad(sp_vectors, ((encoder_length - v_l,0), (0,0), (0,0), (0, 0)), 'constant', constant_values=0) # repeat for 25 districts if w_pred: w_pred = np.repeat(np.expand_dims(w_pred, 1), p.grid_size, 1) de_vectors = psv.convert_data_to_grid_exe(w_pred) # pad to fill top elements of decoder vectors de_vectors = np.pad(de_vectors, ((0, 0), (0, 0), (0, 0), (6, 0)), 'constant', constant_values=0) else: # know nothing about future weather forecast de_vectors = np.zeros((decoder_length, p.grid_size, p.grid_size, dim)) sp_vectors = np.concatenate((sp_vectors, de_vectors), axis=0) c_l = len(china_vectors) if c_l < attention_length: # print(attention_length - c_l) china_vectors = np.pad(china_vectors, ((attention_length - c_l, 0), (0, 0)), 'constant', constant_values=0) # 4. Feed to model if model is None: # model = BaselineModel(encoder_length=encoder_length, encode_vector_size=12, batch_size=1, decoder_length=decoder_length, rnn_layers=1, # dtype='grid', grid_size=25, use_cnn=True) # model.set_data(sp_vectors, [0], None) # model = MaskGan(encoder_length=encoder_length, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, use_cnn=True) model = APGan(encoder_length=24, decoder_length=24, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, forecast_factor=0) # model = APNet(encoder_length=24, decoder_length=24, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, forecast_factor=0) model.set_data(sp_vectors, [0], None, china_vectors) with tf.device('/%s' % p.device): model.init_ops(is_train=False) saver = tf.train.Saver() tconfig = get_gpu_options(False) with tf.Session(config=tconfig) as session: model.assign_datasets(session) preds_pm25 = realtime_execute(model, session, saver, decoder_length, p.prediction_weight_pm25) model.forecast_factor = 1 preds_pm10 = realtime_execute(model, session, saver, decoder_length, p.prediction_weight_pm10) china_vectors = np.array(china_vectors) # print("china", china_vectors.shape) # tf.reset_default_graph() # session.close() if is_close_cuda: cuda.select_device(0) cuda.close() return (preds_pm25, preds_pm10), timestamp, np.transpose(china_vectors[:,:2] * 500) else: return ([],[]), [], []
def execute(self, args): print("start crawling aws") save_interval = args.save_interval start = datetime.strptime(args.start, pr.fm) start_point = utils.get_datetime_now() # output = "timestamp,PM10_VAL,PM2.5_VAL,O3(ppm),NO2(ppm),CO(ppm),SO2(ppm),PM10_AQI,PM2.5_AQI\n" output = "" counter = 0 last_save = 0 crawler_range = 86400 if not args.forward: if args.end: end = datetime.strptime(args.end, pr.fm) else: end = utils.get_datetime_now() length = (end - start).total_seconds() / crawler_range else: end = datetime.strptime("2050-12-31 00:00:00", pr.fm) while start <= end: now = utils.get_datetime_now() # at first, crawling by daily # if up to the moment, crawling by hourly # how long from last crawled date to now? if (now - start).total_seconds() > crawler_range: tmp = start st = "00" ed = "24" if crawler_range != 86400: st = self.format10(tmp.hour) ed = self.format10(tmp.hour + 1) output, counter, last_save = self.craw_data_controller( output, counter, last_save, save_interval, tmp, st, ed) # move pointer for timestep if not args.forward: utils.update_progress(counter * 1.0 / length) else: self.write_log(output) output = "" if crawler_range == 86400: start = start + timedelta(days=1) else: start = start + timedelta(hours=1) print("AWS done") else: # Approach boundary (reach end) then reduce range to hourly crawling crawler_range = 3600 self.write_log(output)
def main(self, args): #filename = "craw_weather_%s_%s_%s.txt" % (args.city, utils.clear_datetime(args.start), utils.clear_datetime(args.end)) start = datetime.strptime(args.start, pr.fm) if args.end: end = datetime.strptime(args.end, pr.fm) else: end = utils.get_datetime_now() start_point = utils.get_datetime_now() # output = "timestamp,PM10_VAL,PM2.5_VAL,O3(ppm),NO2(ppm),CO(ppm),SO2(ppm),PM10_AQI,PM2.5_AQI\n" output = "" length = (end - start).total_seconds() / 86400.0 save_interval = args.save_interval counter = 0 last_save = 0 if "," in args.city: cities = args.city.split(",") else: cities = [args.city] while start <= end: now = utils.get_datetime_now() if (now - start_point).total_seconds() >= args.interval: try: counter += 1 date = "%s-%s-%s" % (start.year, self.format10( start.month), self.format10(start.day)) for c in cities: html = self.craw_data(c, date) data = self.mine_data(date, html, c) if data: output += "\n".join(data) + "\n" if (counter - last_save) == save_interval: last_save = counter self.write_log(output) output = "" except Exception as e: print(start.strftime(pr.fm), e) start = start + timedelta(days=1) start_point = now utils.update_progress(counter * 1.0 / length) self.write_log(output)
def execute(self, args): print("start crawling aqi seoul") save_interval = args.save_interval start = datetime.strptime(args.start, pr.fm) # start_point = utils.get_datetime_now() output = "" counter = 0 last_save = 0 # crawler_range = 3600 if not args.forward: if args.end: end = datetime.strptime(args.end, pr.fm) else: end = utils.get_datetime_now() length = (end - start).total_seconds() / 86400 else: end = datetime.strptime("2050-12-31 00:00:00", pr.fm) while start <= end: now = utils.get_datetime_now() # if (now - start_point).total_seconds() >= args.interval: # start_point = now if (now - start).total_seconds() > 3600: hour = start.hour tmp = start if tmp.hour == 0: tmp = tmp - timedelta(hours=1) hour = "24" else: hour = self.format10(tmp.hour) st_ = start.strftime(pr.fm) output, counter, last_save = self.craw_data_controller(output, counter, last_save, save_interval, tmp, hour, st_) # move pointer for timestep start = start + timedelta(hours=1) if not args.forward: utils.update_progress(counter * 1.0 / length) else: self.write_log(output) output = "" self.write_log(output)
def upload_to_mugshot(instance, filename): """ Uploads a mugshot for a user to the ``USERENA_MUGSHOT_PATH`` and saving it under unique hash for the image. This is for privacy reasons so others can't just browse through the mugshot directory. """ extension = filename.split('.')[-1].lower() salt, hash = generate_sha1(instance.id) path = accounts_settings.MUGSHOT_PATH % {'username': instance.user.username, 'id': instance.user.id, 'date': instance.user.date_joined, 'date_now': get_datetime_now().date()} return '%(path)s%(hash)s.%(extension)s' % {'path': path, 'hash': hash[:10], 'extension': extension}
def predict(self): now = utils.get_datetime_now() if (not self.prediction0) or not self.last_time or ( now - self.last_time).total_seconds() >= 1800: self.last_time = now preds, timestamp, china = get_prediction_real_time(sparkEngine) self.beijing = china[0, :].flatten().tolist() self.shenyang = china[1, :].flatten().tolist() # self.prediction0 = (np.array(preds[0]) + 15).tolist() # self.prediction1 = (np.array(preds[1]) + 15).tolist() self.prediction0 = preds[0] self.prediction1 = preds[1] self.avg0 = np.mean(self.prediction0, axis=1).tolist() self.avg1 = np.mean(self.prediction1, axis=1).tolist() self.timestamp = timestamp
def GET(self): date = utils.get_datetime_now() return json.dumps({"datetime": date.strftime(pr.fm)})
parser.add_argument("-i", "--interval", default=1, type=int) parser.add_argument("-s", "--start", default=2018, type=int) parser.add_argument("-e", "--end", default=2018, type=int) parser.add_argument("-c", "--country", default="south-korea") args = parser.parse_args() filename = "holiday_%s_%s_%s.txt" % (args.country, args.start, args.end) end = args.end start = args.start # output = "timestamp,PM10_VAL,PM2.5_VAL,O3(ppm),NO2(ppm),CO(ppm),SO2(ppm),PM10_AQI,PM2.5_AQI\n" output = "" length = end - start + 1 counter = 0 last_save = 0 start_point = utils.get_datetime_now() while start <= end: now = utils.get_datetime_now() if (now - start_point).total_seconds() >= args.interval: counter += 1 # try: year = start html = craw_data(year, args.country) data = mine_data(year, html) if data: output += ",".join(data) + "\n" # except Exception as e: # print(e) start += 1 start_point = now utils.update_progress(counter * 1.0 / length)