def get_future(self, args):
     print("Collecting future forecasting")
     start_point = utils.get_datetime_now()
     start_point = start_point - timedelta(days=1)
     # interval = args.interval * 86400
     cities = []
     if "," in args.city:
         cities = args.city.split(",")
     else:
         cities.append(args.city)
     while True:
         now = utils.get_datetime_now()
         if (now - start_point).total_seconds() >= 0:
             try:
                 # crawl 4 days forward for each city
                 for i in range(4):
                     start_point = start_point + timedelta(days=1)
                     date = "%s-%s-%s" % (start_point.year,
                                          self.format10(start_point.month),
                                          self.format10(start_point.day))
                     for c in cities:
                         html = self.craw_future(c, i)
                         data = self.mine_data(date, html, c)
                         if data:
                             output = "\n".join(data) + "\n"
                             self.write_log(output)
             except Exception as e:
                 print(e)
 def execute(self, args):
     print("start crawling aqicn")
     save_interval = args.save_interval
     start = utils.get_datetime_now()
     start = start - timedelta(hours=1)
     output = ""
     crawler_range = 3600 * args.interval
     while True:
         now = utils.get_datetime_now()
         if (now - start).total_seconds() > crawler_range:
             output = self.craw_data_controller(now)
             # move pointer for timestep
             self.write_log(output)
             start = start + timedelta(hours=1)
             output = ""
Example #3
0
def get_prediction_real_time(sparkEngine, model=None, url_weight="", dim=15, prediction_weight="", encoder_length=24, decoder_length=24, attention_length=24, is_close_cuda=True):
    # continuously crawl aws and aqi & weather
    end = utils.get_datetime_now()
    end = end - timedelta(hours=1)
    # end = datetime.strptime("2018-06-19 11:01:00", p.fm)
    # e_ = end.strftime(p.fm)
    start = end - timedelta(hours=23)
    start = start.replace(minute=0, second=0, microsecond=0)
    # s_ = start.strftime(p.fm)
    # 2. process normalize data
    vectors, w_pred, china_vectors, timestamp = sparkEngine.process_vectors(start, end, dim)
    v_l = len(vectors)
    if v_l:
        sp_vectors = psv.convert_data_to_grid_exe(vectors)
        if v_l < encoder_length:
            sp_vectors = np.pad(sp_vectors, ((encoder_length - v_l,0), (0,0), (0,0), (0, 0)), 'constant', constant_values=0)
        # repeat for 25 districts
        if w_pred:
            w_pred = np.repeat(np.expand_dims(w_pred, 1), p.grid_size, 1)
            de_vectors = psv.convert_data_to_grid_exe(w_pred)
            # pad to fill top elements of decoder vectors
            de_vectors = np.pad(de_vectors, ((0, 0), (0, 0), (0, 0), (6, 0)), 'constant', constant_values=0)
        else:
            # know nothing about future weather forecast
            de_vectors = np.zeros((decoder_length, p.grid_size, p.grid_size, dim))
        sp_vectors = np.concatenate((sp_vectors, de_vectors), axis=0)

        c_l = len(china_vectors)
        if c_l < attention_length:
            # print(attention_length - c_l)
            china_vectors = np.pad(china_vectors, ((attention_length - c_l, 0), (0, 0)), 'constant', constant_values=0)

        # 4. Feed to model
        if model is None:
            # model = BaselineModel(encoder_length=encoder_length, encode_vector_size=12, batch_size=1, decoder_length=decoder_length, rnn_layers=1,
            #                 dtype='grid', grid_size=25, use_cnn=True)
            # model.set_data(sp_vectors, [0], None)
            # model = MaskGan(encoder_length=encoder_length, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, use_cnn=True)
            model = APGan(encoder_length=24, decoder_length=24, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, forecast_factor=0)
            # model = APNet(encoder_length=24, decoder_length=24, encode_vector_size=15, batch_size=1, decode_vector_size=9, grid_size=25, forecast_factor=0)
        model.set_data(sp_vectors, [0], None, china_vectors)
        with tf.device('/%s' % p.device):
            model.init_ops(is_train=False)
            saver = tf.train.Saver()
        tconfig = get_gpu_options(False)        
        with tf.Session(config=tconfig) as session:
            model.assign_datasets(session)    
            preds_pm25 = realtime_execute(model, session, saver, decoder_length, p.prediction_weight_pm25)
            model.forecast_factor = 1
            preds_pm10 = realtime_execute(model, session, saver, decoder_length, p.prediction_weight_pm10)
            china_vectors = np.array(china_vectors)
            # print("china", china_vectors.shape)
            # tf.reset_default_graph()
            # session.close()
            if is_close_cuda:
                cuda.select_device(0)
                cuda.close()
        return (preds_pm25, preds_pm10), timestamp, np.transpose(china_vectors[:,:2] * 500)
    else:
        return ([],[]), [], []
Example #4
0
 def execute(self, args):
     print("start crawling aws")
     save_interval = args.save_interval
     start = datetime.strptime(args.start, pr.fm)
     start_point = utils.get_datetime_now()
     # output = "timestamp,PM10_VAL,PM2.5_VAL,O3(ppm),NO2(ppm),CO(ppm),SO2(ppm),PM10_AQI,PM2.5_AQI\n"
     output = ""
     counter = 0
     last_save = 0
     crawler_range = 86400
     if not args.forward:
         if args.end:
             end = datetime.strptime(args.end, pr.fm)
         else:
             end = utils.get_datetime_now()
         length = (end - start).total_seconds() / crawler_range
     else:
         end = datetime.strptime("2050-12-31 00:00:00", pr.fm)
     while start <= end:
         now = utils.get_datetime_now()
         # at first, crawling by daily
         # if up to the moment, crawling by hourly
         # how long from last crawled date to now?
         if (now - start).total_seconds() > crawler_range:
             tmp = start
             st = "00"
             ed = "24"
             if crawler_range != 86400:
                 st = self.format10(tmp.hour)
                 ed = self.format10(tmp.hour + 1)
             output, counter, last_save = self.craw_data_controller(
                 output, counter, last_save, save_interval, tmp, st, ed)
             # move pointer for timestep
             if not args.forward:
                 utils.update_progress(counter * 1.0 / length)
             else:
                 self.write_log(output)
                 output = ""
             if crawler_range == 86400:
                 start = start + timedelta(days=1)
             else:
                 start = start + timedelta(hours=1)
             print("AWS done")
         else:
             # Approach boundary (reach end) then reduce range to hourly crawling
             crawler_range = 3600
     self.write_log(output)
Example #5
0
 def main(self, args):
     #filename = "craw_weather_%s_%s_%s.txt" % (args.city, utils.clear_datetime(args.start), utils.clear_datetime(args.end))
     start = datetime.strptime(args.start, pr.fm)
     if args.end:
         end = datetime.strptime(args.end, pr.fm)
     else:
         end = utils.get_datetime_now()
     start_point = utils.get_datetime_now()
     # output = "timestamp,PM10_VAL,PM2.5_VAL,O3(ppm),NO2(ppm),CO(ppm),SO2(ppm),PM10_AQI,PM2.5_AQI\n"
     output = ""
     length = (end - start).total_seconds() / 86400.0
     save_interval = args.save_interval
     counter = 0
     last_save = 0
     if "," in args.city:
         cities = args.city.split(",")
     else:
         cities = [args.city]
     while start <= end:
         now = utils.get_datetime_now()
         if (now - start_point).total_seconds() >= args.interval:
             try:
                 counter += 1
                 date = "%s-%s-%s" % (start.year, self.format10(
                     start.month), self.format10(start.day))
                 for c in cities:
                     html = self.craw_data(c, date)
                     data = self.mine_data(date, html, c)
                     if data:
                         output += "\n".join(data) + "\n"
                     if (counter - last_save) == save_interval:
                         last_save = counter
                         self.write_log(output)
                         output = ""
             except Exception as e:
                 print(start.strftime(pr.fm), e)
             start = start + timedelta(days=1)
             start_point = now
             utils.update_progress(counter * 1.0 / length)
     self.write_log(output)
 def execute(self, args):
     print("start crawling aqi seoul")
     save_interval = args.save_interval
     start = datetime.strptime(args.start, pr.fm)
     # start_point = utils.get_datetime_now()
     output = ""
     counter = 0
     last_save = 0
     # crawler_range = 3600
     if not args.forward:
         if args.end:
             end = datetime.strptime(args.end, pr.fm)
         else:
             end = utils.get_datetime_now()
         length = (end - start).total_seconds() / 86400
     else:
         end = datetime.strptime("2050-12-31 00:00:00", pr.fm)
     while start <= end:
         now = utils.get_datetime_now()
         # if (now - start_point).total_seconds() >= args.interval:
         #     start_point = now
         if (now - start).total_seconds() > 3600:
             hour = start.hour
             tmp = start
             if tmp.hour == 0:
                 tmp = tmp - timedelta(hours=1)
                 hour = "24"
             else:
                 hour = self.format10(tmp.hour)
             st_ = start.strftime(pr.fm)
             output, counter, last_save = self.craw_data_controller(output, counter, last_save, save_interval, tmp, hour, st_)
             # move pointer for timestep
             start = start + timedelta(hours=1)
             if not args.forward:
                 utils.update_progress(counter * 1.0 / length)
             else:
                 self.write_log(output)
                 output = ""
     self.write_log(output)      
Example #7
0
def upload_to_mugshot(instance, filename):
    """
    Uploads a mugshot for a user to the ``USERENA_MUGSHOT_PATH`` and saving it
    under unique hash for the image. This is for privacy reasons so others
    can't just browse through the mugshot directory.
    """
    extension = filename.split('.')[-1].lower()
    salt, hash = generate_sha1(instance.id)
    path = accounts_settings.MUGSHOT_PATH % {'username': instance.user.username,
                                                    'id': instance.user.id,
                                                    'date': instance.user.date_joined,
                                                    'date_now': get_datetime_now().date()}
    return '%(path)s%(hash)s.%(extension)s' % {'path': path,
                                               'hash': hash[:10],
                                               'extension': extension}
Example #8
0
 def predict(self):
     now = utils.get_datetime_now()
     if (not self.prediction0) or not self.last_time or (
             now - self.last_time).total_seconds() >= 1800:
         self.last_time = now
         preds, timestamp, china = get_prediction_real_time(sparkEngine)
         self.beijing = china[0, :].flatten().tolist()
         self.shenyang = china[1, :].flatten().tolist()
         # self.prediction0 = (np.array(preds[0]) + 15).tolist()
         # self.prediction1 = (np.array(preds[1]) + 15).tolist()
         self.prediction0 = preds[0]
         self.prediction1 = preds[1]
         self.avg0 = np.mean(self.prediction0, axis=1).tolist()
         self.avg1 = np.mean(self.prediction1, axis=1).tolist()
         self.timestamp = timestamp
Example #9
0
 def GET(self):
     date = utils.get_datetime_now()
     return json.dumps({"datetime": date.strftime(pr.fm)})
    parser.add_argument("-i", "--interval", default=1, type=int)
    parser.add_argument("-s", "--start", default=2018, type=int)
    parser.add_argument("-e", "--end", default=2018, type=int)
    parser.add_argument("-c", "--country", default="south-korea")
    
    args = parser.parse_args()

    filename = "holiday_%s_%s_%s.txt" % (args.country, args.start, args.end)
    end = args.end
    start = args.start
    # output = "timestamp,PM10_VAL,PM2.5_VAL,O3(ppm),NO2(ppm),CO(ppm),SO2(ppm),PM10_AQI,PM2.5_AQI\n"
    output = ""
    length = end - start + 1
    counter = 0
    last_save = 0
    start_point = utils.get_datetime_now()
    while start <= end:
        now = utils.get_datetime_now()
        if (now - start_point).total_seconds() >= args.interval:
            counter += 1
            # try:
            year = start
            html = craw_data(year, args.country)
            data = mine_data(year, html)
            if data:
                output += ",".join(data) + "\n"
            # except Exception as e:
            #     print(e)
            start += 1
            start_point = now   
            utils.update_progress(counter * 1.0 / length)