def exec_get_raster(self, ftp_path, convert_to_gtiff, product, layer_name, save_s3=False, remove_local_files=False): path_local = self.download_from_ftp(ftp_path) if path_local is None: list_dict_meta_info_converted = {} return list_dict_meta_info_converted dict_meta_info_raw = self.get_dict_meta_info_from_filename( path_local, product) # convert to gtiff if convert_to_gtiff: list_dict_meta_info_converted = get_single_layer_gtiff( path_local, layer_name=layer_name, dir_save=self.dir_parent_converted_local, dict_meta_info_raw=dict_meta_info_raw) else: list_dict_meta_info_converted = {} # save_s3 if save_s3: url_s3 = transfer_to_s3(path_local, dir_local_parent=self.dir_parent_raw_local, dir_s3_parent=self.dir_parent_raw_s3, remove_local_file=remove_local_files, multiprocessing=self.processes > 1, s3_bucket_name=self.s3_bucket_name) dict_meta_info_raw['url_s3'] = url_s3 if remove_local_files: dict_meta_info_raw['path_local'] = None list_dict_meta_info_converted_out = [] if len(list_dict_meta_info_converted) > 0: for dict_meta_info_converted in list_dict_meta_info_converted: path_converted = dict_meta_info_converted['path_local'] url_s3 = transfer_to_s3( path_converted, dir_local_parent=self.dir_parent_converted_local, dir_s3_parent=self.dir_parent_converted_s3, remove_local_file=remove_local_files, multiprocessing=self.processes > 1, s3_bucket_name=self.s3_bucket_name) dict_meta_info_converted['url_s3'] = url_s3 if remove_local_files: dict_meta_info_converted['path_local'] = None list_dict_meta_info_converted_out.append( dict_meta_info_converted) list_dict_meta_info_converted = deepcopy( list_dict_meta_info_converted_out) return list_dict_meta_info_converted
def save_list(self, filename_list=FILENAME_LIST): df_meta_info = pd.DataFrame(self.list_dict_meta_info_converted) url_s3_head = get_s3_url_head(self.s3_bucket_name) url_s3 = os.path.join(url_s3_head, filename_list) df_record_s3 = update_data_list_df(path_org=url_s3, df_new=df_meta_info) # save local path_record_local = os.path.join(self.dir_parent_converted_local, filename_list) df_record_s3.to_csv(path_record_local, index=False) transfer_to_s3(path_local=path_record_local, dir_local_parent=self.dir_parent_converted_local, dir_s3_parent=self.dir_parent_converted_s3, remove_local_file=False, multiprocessing=False, s3_bucket_name=self.s3_bucket_name) return df_record_s3
def main(): s3_dir_parent = 'data/JAXA_HIMAWARI/gtiff' if not os.path.exists(s3_dir_parent): os.makedirs(s3_dir_parent) df = get_meta_list_himawari(S3_BUCKET_NAME, s3_dir_parent) path_local_csv = os.path.join(s3_dir_parent, FILE_NAME_HEAD + '.csv') df.to_csv(path_local_csv, index=False) print(path_local_csv) url_s3 = transfer_to_s3(path_local=path_local_csv, dir_local_parent=s3_dir_parent, dir_s3_parent=s3_dir_parent, remove_local_file=False, multiprocessing=False, s3_bucket_name=S3_BUCKET_NAME) print(url_s3)
def main(): s3_dir_parent = 'data/GSMaP/picked/standard/hourly' if not os.path.exists(s3_dir_parent): os.makedirs(s3_dir_parent) df = get_meta_list_GSMaP(S3_BUCKET_NAME, s3_dir_parent) path_local_csv = os.path.join(s3_dir_parent, FILE_NAME_HEAD + '.csv') df.to_csv(path_local_csv, index=False) print(path_local_csv) url_s3 = transfer_to_s3(path_local=path_local_csv, dir_local_parent=s3_dir_parent, dir_s3_parent=s3_dir_parent, remove_local_file=False, multiprocessing=False, s3_bucket_name=S3_BUCKET_NAME) print(url_s3)
def _exec_get_raster(self, ftp_path, convert_to_gtiff=True, list_band_filter=['//Grid/monthlyPrecipRate'], pick_part=True, x_min=120.61, y_min=22.29, x_max=151.35, y_max=46.8, epsg_code=4326, area_name='japan', dir_parent_picked_local=DIR_PARENT_PICKED, save_s3=False, dir_s3_parent=None, remove_local_files=False, s3_bucket_name=None): list_out_path = [] path_local = self.download_from_ftp(ftp_path) if not os.path.exists(dir_parent_picked_local): os.makedirs(dir_parent_picked_local) if convert_to_gtiff: list_path_converted = self.convert_from_hdf_to_gtiff( path_local, list_band_filter=list_band_filter) if pick_part: shapes = shapes_from_bbox(x_min=x_min, y_min=y_min, x_max=x_max, y_max=y_max, epsg_code=epsg_code) for path_converted in list_path_converted: filename = os.path.basename(path_converted) filename = os.path.splitext(filename)[ 0] + '_' + area_name + os.path.splitext(filename)[1] path_converted_out = os.path.join(dir_parent_picked_local, filename) pick_part_raster(path_converted, path_converted_out, shapes) list_out_path.append(path_converted_out) else: list_path_converted = [] if save_s3: if dir_s3_parent is None: dir_parent_raw_s3 = str(self.dir_parent_raw_local) dir_parent_converted_s3 = str(self.dir_parent_converted_local) dir_parent_picked_s3 = str(dir_parent_picked_local) else: dir_parent_raw_s3 = os.path.join(dir_s3_parent, self.dir_parent_raw_local) dir_parent_converted_s3 = os.path.join( dir_s3_parent, self.dir_parent_converted_local) dir_parent_picked_s3 = os.path.join(dir_s3_parent, dir_parent_picked_local) transfer_to_s3(path_local, dir_local_parent=self.dir_parent_raw_local, dir_s3_parent=dir_parent_raw_s3, remove_local_file=remove_local_files, multiprocessing=self.processes > 1, s3_bucket_name=s3_bucket_name) if len(list_path_converted) > 0: for path_converted in list_path_converted: transfer_to_s3( path_converted, dir_local_parent=self.dir_parent_converted_local, dir_s3_parent=dir_parent_converted_s3, remove_local_file=remove_local_files, multiprocessing=self.processes > 1, s3_bucket_name=s3_bucket_name) if len(list_out_path) > 0: for path in list_out_path: transfer_to_s3(path, dir_local_parent=dir_parent_picked_local, dir_s3_parent=dir_parent_picked_s3, remove_local_file=remove_local_files, multiprocessing=self.processes > 1, s3_bucket_name=s3_bucket_name) return []
def get_data(job_id, api_token, url_historical=URL_HISTORICAL, max_wait_time=60, random_wait=True, dir_save=DIR_SAVE, filename='sample', out_format='CSV', save_s3=False, dir_s3_parent=DIR_S3_PARENT, remove_local_file=False, processes=1, s3_bucket_name=S3_BUCKET_NAME): """ get data from spire Args: job_id (str): job id api_token (str): spire api token url_historical (str): URL of historical API max_wait_time (int): maximum waiting interval (sec) dir_save (str): dir path for saving data filename (str): filename without ext out_format (str): Specifies the format of the downloadable files. Must be one of these options: “CSV” (encoded as UTF-8, and separated by a comma) “JSON” (encoded as UTF-8 and new line delimited) Returns: path to the download data """ # data = check_status(job_id, api_token, url_historical=url_historical) wait_time = 0 data = {'job_state': 'RUNNING'} while data['job_state'] != 'DONE': if (max_wait_time is not None) and (max_wait_time <= wait_time): wait_time = int(max_wait_time) else: wait_time += 15 if random_wait: time.sleep(wait_time + random.randrange(-5, 5)) else: time.sleep(wait_time) data = check_status(job_id, api_token, url_historical=url_historical) print('Job ID: ', job_id, ' Job State: ', data['job_state']) if data['job_state'] == 'DONE': dataurl = data['download_urls'] dl_url = dataurl[0] # Get request to download data from URL and output it to a CSV file in current working directory. r = requests.get(dl_url, allow_redirects=True) path_temp = os.path.join(dir_save, filename) if out_format == 'CSV': path = path_temp + '.csv' elif out_format == 'JSON': path = path_temp + '.json' else: print('out_format should be CSV or JSON') return if not os.path.exists(dir_save): os.makedirs(dir_save) file = open(path, 'wb').write(r.content) #todo: もしデータを間引くならここ。(csvを読み込み、1秒ごととする) if save_s3: path = transfer_to_s3(path, dir_local_parent=dir_save, dir_s3_parent=dir_s3_parent, remove_local_file=remove_local_file, multiprocessing=processes > 1, s3_bucket_name=s3_bucket_name) return path