예제 #1
0
    def exec_get_raster(self,
                        ftp_path,
                        convert_to_gtiff,
                        product,
                        layer_name,
                        save_s3=False,
                        remove_local_files=False):
        path_local = self.download_from_ftp(ftp_path)
        if path_local is None:
            list_dict_meta_info_converted = {}
            return list_dict_meta_info_converted

        dict_meta_info_raw = self.get_dict_meta_info_from_filename(
            path_local, product)

        # convert to gtiff
        if convert_to_gtiff:
            list_dict_meta_info_converted = get_single_layer_gtiff(
                path_local,
                layer_name=layer_name,
                dir_save=self.dir_parent_converted_local,
                dict_meta_info_raw=dict_meta_info_raw)
        else:
            list_dict_meta_info_converted = {}

        # save_s3
        if save_s3:
            url_s3 = transfer_to_s3(path_local,
                                    dir_local_parent=self.dir_parent_raw_local,
                                    dir_s3_parent=self.dir_parent_raw_s3,
                                    remove_local_file=remove_local_files,
                                    multiprocessing=self.processes > 1,
                                    s3_bucket_name=self.s3_bucket_name)
            dict_meta_info_raw['url_s3'] = url_s3
            if remove_local_files:
                dict_meta_info_raw['path_local'] = None

            list_dict_meta_info_converted_out = []
            if len(list_dict_meta_info_converted) > 0:
                for dict_meta_info_converted in list_dict_meta_info_converted:
                    path_converted = dict_meta_info_converted['path_local']
                    url_s3 = transfer_to_s3(
                        path_converted,
                        dir_local_parent=self.dir_parent_converted_local,
                        dir_s3_parent=self.dir_parent_converted_s3,
                        remove_local_file=remove_local_files,
                        multiprocessing=self.processes > 1,
                        s3_bucket_name=self.s3_bucket_name)
                    dict_meta_info_converted['url_s3'] = url_s3
                    if remove_local_files:
                        dict_meta_info_converted['path_local'] = None
                    list_dict_meta_info_converted_out.append(
                        dict_meta_info_converted)
                list_dict_meta_info_converted = deepcopy(
                    list_dict_meta_info_converted_out)
        return list_dict_meta_info_converted
예제 #2
0
    def save_list(self, filename_list=FILENAME_LIST):
        df_meta_info = pd.DataFrame(self.list_dict_meta_info_converted)

        url_s3_head = get_s3_url_head(self.s3_bucket_name)
        url_s3 = os.path.join(url_s3_head, filename_list)
        df_record_s3 = update_data_list_df(path_org=url_s3,
                                           df_new=df_meta_info)

        # save local
        path_record_local = os.path.join(self.dir_parent_converted_local,
                                         filename_list)

        df_record_s3.to_csv(path_record_local, index=False)
        transfer_to_s3(path_local=path_record_local,
                       dir_local_parent=self.dir_parent_converted_local,
                       dir_s3_parent=self.dir_parent_converted_s3,
                       remove_local_file=False,
                       multiprocessing=False,
                       s3_bucket_name=self.s3_bucket_name)

        return df_record_s3
def main():
    s3_dir_parent = 'data/JAXA_HIMAWARI/gtiff'

    if not os.path.exists(s3_dir_parent):
        os.makedirs(s3_dir_parent)

    df = get_meta_list_himawari(S3_BUCKET_NAME, s3_dir_parent)
    path_local_csv = os.path.join(s3_dir_parent, FILE_NAME_HEAD + '.csv')
    df.to_csv(path_local_csv, index=False)
    print(path_local_csv)
    url_s3 = transfer_to_s3(path_local=path_local_csv,
                            dir_local_parent=s3_dir_parent,
                            dir_s3_parent=s3_dir_parent,
                            remove_local_file=False,
                            multiprocessing=False,
                            s3_bucket_name=S3_BUCKET_NAME)
    print(url_s3)
예제 #4
0
def main():
    s3_dir_parent = 'data/GSMaP/picked/standard/hourly'

    if not os.path.exists(s3_dir_parent):
        os.makedirs(s3_dir_parent)

    df = get_meta_list_GSMaP(S3_BUCKET_NAME, s3_dir_parent)
    path_local_csv = os.path.join(s3_dir_parent, FILE_NAME_HEAD + '.csv')
    df.to_csv(path_local_csv, index=False)
    print(path_local_csv)
    url_s3 = transfer_to_s3(path_local=path_local_csv,
                            dir_local_parent=s3_dir_parent,
                            dir_s3_parent=s3_dir_parent,
                            remove_local_file=False,
                            multiprocessing=False,
                            s3_bucket_name=S3_BUCKET_NAME)
    print(url_s3)
예제 #5
0
    def _exec_get_raster(self,
                         ftp_path,
                         convert_to_gtiff=True,
                         list_band_filter=['//Grid/monthlyPrecipRate'],
                         pick_part=True,
                         x_min=120.61,
                         y_min=22.29,
                         x_max=151.35,
                         y_max=46.8,
                         epsg_code=4326,
                         area_name='japan',
                         dir_parent_picked_local=DIR_PARENT_PICKED,
                         save_s3=False,
                         dir_s3_parent=None,
                         remove_local_files=False,
                         s3_bucket_name=None):

        list_out_path = []
        path_local = self.download_from_ftp(ftp_path)

        if not os.path.exists(dir_parent_picked_local):
            os.makedirs(dir_parent_picked_local)

        if convert_to_gtiff:
            list_path_converted = self.convert_from_hdf_to_gtiff(
                path_local, list_band_filter=list_band_filter)
            if pick_part:
                shapes = shapes_from_bbox(x_min=x_min,
                                          y_min=y_min,
                                          x_max=x_max,
                                          y_max=y_max,
                                          epsg_code=epsg_code)
                for path_converted in list_path_converted:
                    filename = os.path.basename(path_converted)
                    filename = os.path.splitext(filename)[
                        0] + '_' + area_name + os.path.splitext(filename)[1]
                    path_converted_out = os.path.join(dir_parent_picked_local,
                                                      filename)
                    pick_part_raster(path_converted, path_converted_out,
                                     shapes)
                    list_out_path.append(path_converted_out)
        else:
            list_path_converted = []

        if save_s3:
            if dir_s3_parent is None:
                dir_parent_raw_s3 = str(self.dir_parent_raw_local)
                dir_parent_converted_s3 = str(self.dir_parent_converted_local)
                dir_parent_picked_s3 = str(dir_parent_picked_local)
            else:
                dir_parent_raw_s3 = os.path.join(dir_s3_parent,
                                                 self.dir_parent_raw_local)
                dir_parent_converted_s3 = os.path.join(
                    dir_s3_parent, self.dir_parent_converted_local)
                dir_parent_picked_s3 = os.path.join(dir_s3_parent,
                                                    dir_parent_picked_local)

            transfer_to_s3(path_local,
                           dir_local_parent=self.dir_parent_raw_local,
                           dir_s3_parent=dir_parent_raw_s3,
                           remove_local_file=remove_local_files,
                           multiprocessing=self.processes > 1,
                           s3_bucket_name=s3_bucket_name)
            if len(list_path_converted) > 0:
                for path_converted in list_path_converted:
                    transfer_to_s3(
                        path_converted,
                        dir_local_parent=self.dir_parent_converted_local,
                        dir_s3_parent=dir_parent_converted_s3,
                        remove_local_file=remove_local_files,
                        multiprocessing=self.processes > 1,
                        s3_bucket_name=s3_bucket_name)
            if len(list_out_path) > 0:
                for path in list_out_path:
                    transfer_to_s3(path,
                                   dir_local_parent=dir_parent_picked_local,
                                   dir_s3_parent=dir_parent_picked_s3,
                                   remove_local_file=remove_local_files,
                                   multiprocessing=self.processes > 1,
                                   s3_bucket_name=s3_bucket_name)

        return []
예제 #6
0
def get_data(job_id,
             api_token,
             url_historical=URL_HISTORICAL,
             max_wait_time=60,
             random_wait=True,
             dir_save=DIR_SAVE,
             filename='sample',
             out_format='CSV',
             save_s3=False,
             dir_s3_parent=DIR_S3_PARENT,
             remove_local_file=False,
             processes=1,
             s3_bucket_name=S3_BUCKET_NAME):
    """ get data from spire

    Args:
        job_id (str): job id
        api_token (str): spire api token
        url_historical (str): URL of historical API
        max_wait_time (int): maximum waiting interval (sec)
        dir_save (str): dir path for saving data
        filename (str): filename without ext
        out_format (str): Specifies the format of the downloadable files. Must be one of these options:
            “CSV” (encoded as UTF-8, and separated by a comma)
            “JSON” (encoded as UTF-8 and new line delimited)

    Returns: path to the download data

    """
    # data = check_status(job_id, api_token, url_historical=url_historical)
    wait_time = 0
    data = {'job_state': 'RUNNING'}

    while data['job_state'] != 'DONE':
        if (max_wait_time is not None) and (max_wait_time <= wait_time):
            wait_time = int(max_wait_time)
        else:
            wait_time += 15
        if random_wait:
            time.sleep(wait_time + random.randrange(-5, 5))
        else:
            time.sleep(wait_time)
        data = check_status(job_id, api_token, url_historical=url_historical)
        print('Job ID: ', job_id, '  Job State: ', data['job_state'])
        if data['job_state'] == 'DONE':
            dataurl = data['download_urls']
            dl_url = dataurl[0]
            # Get request to download data from URL and output it to a CSV file in current working directory.
            r = requests.get(dl_url, allow_redirects=True)
            path_temp = os.path.join(dir_save, filename)
            if out_format == 'CSV':
                path = path_temp + '.csv'
            elif out_format == 'JSON':
                path = path_temp + '.json'
            else:
                print('out_format should be CSV or JSON')
                return

            if not os.path.exists(dir_save):
                os.makedirs(dir_save)
            file = open(path, 'wb').write(r.content)

            #todo: もしデータを間引くならここ。(csvを読み込み、1秒ごととする)

            if save_s3:
                path = transfer_to_s3(path,
                                      dir_local_parent=dir_save,
                                      dir_s3_parent=dir_s3_parent,
                                      remove_local_file=remove_local_file,
                                      multiprocessing=processes > 1,
                                      s3_bucket_name=s3_bucket_name)

    return path