def _load_ctdpy_files(self): files = generate_filepaths( self.temp_dir, endswith='.txt', # Presumably CTD-standard format only_from_dir=False) self.ctdpy_session = Session(filepaths=files, reader='ctd_stdfmt') self.ctdpy_datasets = self.ctdpy_session.read()
def load_data(self, return_session=False, filters=None, file_name_elements=None): """Load CTD-standard-formnat data. Using ctdpy. Args: return_session (bool): True/False filters (dict | None): Filter out files to load based on month, serno or ship. """ files = generate_filepaths(self.data_directory, not_pattern_list=[ 'delivery_note', 'information', 'metadata', 'sensorinfo' ], endswith='.txt', only_from_dir=True) files = list(files) if filters: filter_obj = Filter([os.path.basename(f) for f in files], file_name_elements) filter_obj.add_filter(**filters) files = [ f for f in files if os.path.basename(f) in filter_obj.valid_file_names ] ctd_session = Session(filepaths=files, reader='ctd_stdfmt') datasets = ctd_session.read() for key, item in datasets[0].items(): self.append_item(key, item) if return_session: return ctd_session
@author: a002028 """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths import time from pprint import pprint # GET FILES # base_dir = '...\\Svea_v16 april\\CTD\\data' # tar längre tid att läsa ifrån filtjänst base_dir = r'C:\Utveckling\ctdpy\ctdpy\tests\test_data\exprapp_april_2020' files = generate_filepaths( base_dir, # pattern_list=['.cnv', '.xlsx'], # Both cnv- and metadata-files endswith='.cnv', # Only cnv-files # endswith='.txt', # Presumably CTD-standard format only_from_dir= True, # we exclude search of files from folders under "base_dir" ) # Create SESSION object s = Session( filepaths=files, reader='smhi', ) # READ DELIVERY DATA, CNV, XLSX start_time = time.time() datasets = s.read() print("Datasets loaded--%.3f sec" % (time.time() - start_time)) print('Files loaded:')
Created on 2020-07-10 14:15 @author: a002028 """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths, get_reversed_dictionary from profileqc.qc import SessionQC import time # GET FILES base_dir = r'C:\Arbetsmapp\datasets\Profile\2020\SHARK_Profile_2020_COD_SMHI\processed_data' files = generate_filepaths( base_dir, endswith='.txt', # Presumably CTD-standard format only_from_dir=False, ) # Create SESSION object s = Session( filepaths=files, reader='ctd_stdfmt', ) # READ DELIVERY DATA, CNV, XLSX start_time = time.time() datasets = s.read() print("Datasets loaded--%.3f sec" % (time.time() - start_time)) # print('Files loaded:') # pprint(list(datasets[0]))
@author: johannes """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths import time from pprint import pprint # GET FILES base_dir = r'C:\Temp\ctdpy_temp\mw_testdata' # Note the time difference (~ x10) when loading data from fileserver (EXPRAPP) compared to reading from local disc.. files = generate_filepaths( base_dir, endswith='.cnv', # Only cnv-files only_from_dir= True, # we exclude search of files from folders under "base_dir" ) # Create SESSION object s = Session( filepaths=files, reader='smhi', ) # READ DELIVERY DATA, CNV, XLSX start_time = time.time() datasets = s.read() print("Datasets loaded--%.3f sec" % (time.time() - start_time)) print('Files loaded:') pprint(list(datasets[0].keys()))
#!/usr/bin/env python3 """ Created on 2021-11-26 13:51 @author: johannes """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths # GET FILES base_dir = r'C:\Temp\CTD_DV\test_txt_meta_fmt' files = generate_filepaths(base_dir, pattern_list=['.cnv', '.txt']) # Create SESSION object s = Session( filepaths=files, reader='smhi', ) datasets = s.read() print(list(datasets[0])) print(list(datasets[1])) # SAVE DATA ACCORDING TO CTD STANDARD FORMAT (TXT), but "keep_original_file_names" data_path = s.save_data( datasets, writer='ctd_standard_template', keep_original_file_names=True, return_data_path=True,
# Copyright (c) 2020 SMHI, Swedish Meteorological and Hydrological Institute # License: MIT License (see LICENSE.txt or http://opensource.org/licenses/mit). """ Created on 2021-04-19 10:22 @author: johannes """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths base_dir = r'C:\Temp\CTD_DV\SGU\SGU_upp20_profile_rapportering_20210401' files = generate_filepaths( base_dir, pattern_list=['.vp2', '.xlsx'], # Both cnv- and metadata-files ) s = Session(filepaths=files, reader='sgus') datasets = s.read() data_path = s.save_data( datasets, writer='ctd_standard_template', return_data_path=True, ) s.create_archive(data_path=data_path)
# Copyright (c) 2020 SMHI, Swedish Meteorological and Hydrological Institute. # License: MIT License (see LICENSE.txt or http://opensource.org/licenses/mit). """ Created on 2020-07-10 13:59 @author: a002028 """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths base_dir = r'C:\Temp\CTD_DV\qc_NOS_2015\data' files = generate_filepaths( base_dir, pattern_list=['.cnv', '.xlsx'], ) s = Session( filepaths=files, reader='nos', ) datasets = s.read() # s.save_data( # datasets[0], # writer='metadata_template', # ) # data_path = s.save_data( # datasets, # writer='ctd_standard_template', # return_data_path=True,
# -*- coding: utf-8 -*- """ Created on 2020-02-18 13:42 @author: a002028 """ import time from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths base_dir = r'C:\Temp\CTD_DV\mvp_data' files = generate_filepaths( base_dir, endswith='.cnv' ) start_time = time.time() s = Session( filepaths=files, reader='smhi', ) # datasets = s.read() print("Session--%.3f sec" % (time.time() - start_time)) # TEST PRINTS # print('SHIPmapping test', s.settings.smap.map_cntry_and_shipc(cntry='34', shipc='AR')) # print('SHIPmapping test', s.settings.smap.map_shipc('3401')) # print('SHIPmapping test', s.settings.smap.map_shipc('Aranda')) # print('SHIPmapping test', s.settings.smap.map_shipc('ARANDA'))
@author: a002028 """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths import time from pprint import pprint base_dir = r'C:\Utveckling\ctdpy\ctdpy\tests\test_data\exprapp_april_2020' # only_from_dir=False: We accept filesearch from folders within base_dir, # only_from_dir=True: We only accept filesearch base_dir, not from folders within base_dir files = generate_filepaths( base_dir, pattern_list=['.cnv', '.xlsx'], # Both cnv- and metadata-files only_from_dir=False, ) # Create SESSION object start_time = time.time() s = Session( filepaths=files, reader='smhi', ) print("Session--%.3f sec" % (time.time() - start_time)) # READ DELIVERY DATA, CNV, XLSX start_time = time.time() datasets = s.read() print("Datasets loaded--%.3f sec" % (time.time() - start_time))
# -*- coding: utf-8 -*- """ Created on Thu Jul 05 08:22:21 2018 @author: a002028 """ from ctdpy.core.session import Session from ctdpy.core.utils import generate_filepaths base_dir = r'C:\Utveckling\TESTING\data2sqlite\test_data' files = generate_filepaths( base_dir, pattern_list=['.cnv', '.xlsx'], only_from_dir=False, ) s = Session( filepaths=files, reader='smhi', ) datasets = s.read()
def _create_standard_format_files(self): try: working_directory = self._get_working_directory() if not self._is_validate_working_directory(working_directory): messagebox.showinfo('Create standard files', 'Not a valid working directory') self.logger.info('Not a valid working directory') return if not self._working_directory_has_files_for_creating_standard_fromat( ): messagebox.showinfo( 'Create standard files', 'No files to process in working directory') self.logger.info( f'No files in working directory: {working_directory}') return save_directory = Path(working_directory, 'standard_format_files') if save_directory.exists(): if os.listdir(save_directory): if messagebox.askyesno( 'Create standard files', 'Output files already exist. Do you want to delete the old ones?' ): self._delete_files_in_directory(save_directory) else: messagebox.showinfo('Create standard files', 'Aborted by user') else: os.makedirs(save_directory) files = generate_filepaths(working_directory, pattern_list=['.cnv', '.xlsx'], only_from_dir=True) session = ctdpy_session.Session(filepaths=files, reader='smhi') start_time = time.time() datasets = session.read() self.logger.debug("Datasets loaded--%.3f sec" % (time.time() - start_time)) start_time = time.time() data_path = session.save_data( datasets, writer='ctd_standard_template', return_data_path=True, # save_path=save_directory, ) for file_name in os.listdir(data_path): source_path = Path(data_path, file_name) target_path = Path(save_directory, file_name) shutil.copy2(source_path, target_path) self.logger.debug( f"Datasets saved in {time.time() - start_time} sec at location: {data_path}. Files copied to: {save_directory}" ) messagebox.showinfo( 'Create standard files', f'Standard format files created in directory: {save_directory}' ) except Exception as e: self.logger.error(e) messagebox.showerror('Internal error', e)