def concat(self, output_txt=False, out_file='', file_filter='', filter2='', progress_bar=True): """combine exported rwd files (in txt format) parameters ---------- output_txt : bool set to True to save a concatenated text file out_file : str filepath, absolute or relative file_filter : str filter2 : str progress_bar : bool """ self.file_filter = file_filter if self.filter2 == '': self.filter2 = filter2 if check_platform() == 'win32': self.txt_dir = windows_folder_path(self.txt_dir) else: self.txt_dir = linux_folder_path(self.txt_dir) first_file = True files = sorted(glob(self.txt_dir + '*.txt')) self.file_count = len(files) self.pad = len(str(self.file_count)) + 1 self.counter = 1 self.start_time = datetime.now() for f in files: if self.file_filter in f and self.filter2 in f: if progress_bar: draw_progress_bar(self.counter, self.file_count, self.start_time) else: print("Adding {0}/{1} {2} ... ".format(str(self.counter).rjust(self.pad),str(self.file_count).ljust(self.pad),f), end="", flush=True) if first_file == True: first_file = False try: base = read_text_data(filename=f,data_type=self.data_type, file_filter=self.file_filter, file_ext=self.file_ext, sep=self.sep) if progress_bar != True: print("[OK]") pass except IndexError: print('Only standard headertypes accepted') break else: file_path = f try: s = read_text_data(filename=f,data_type=self.data_type, file_filter=self.file_filter, file_ext=self.file_ext, sep=self.sep) base.data = base.data.append(s.data, sort=False) if progress_bar != True: print("[OK]") except: if progress_bar != True: print("[FAILED]") print("could not concat {0}".format(file_path)) pass else: pass self.counter += 1 if output_txt == True: if out_file == "": out_file = f"{self.data_type}_" + datetime.today().strftime("%Y-%m-%d") + ".txt" base.data.to_csv(out_file, sep=',', index=False) self.out_file = out_file try: self.ch_info = s.ch_info self.ch_list = s.ch_list self.data = base.data.drop_duplicates(subset=[self.header_sections['data_header']], keep='first') self.head = s.head self.site_info = s.site_info self.filename = s.filename self.site_number = self.filename.split("\\")[-1][:4] self.format_rwd_site_data() except UnboundLocalError: print("No files match to contatenate.") return None
def single_file(self, rld): try: if self.progress_bar: draw_progress_bar(self.counter, self.raw_count, self.start_time) else: print("Processing {0}/{1} ... {2} ... ".format( str(self.counter).rjust(self.pad), str(self.raw_count).ljust(self.pad), os.path.basename(rld)), end="", flush=True) self.encoded_rld_bytes = self.prepare_file_bytes(rld) if self.nec_file: self.encoded_nec_bytes = self.prepare_file_bytes(self.nec_file) else: self.encoded_nec_bytes = '' if not self.token_valid(): self.session_token, self.session_start_time = self.request_session_token( ) headers = {"Authorization": "Bearer {}".format(self.session_token)} self.data = { 'filebytes': self.encoded_rld_bytes, 'necfilebytes': self.encoded_nec_bytes, 'headertype': self.header_type, # standard | columnonly | none 'exporttype': self.export_type, # measurements (default) | samples 'exportformat': self.export_format, # csv_zipped (default) | parquet 'encryptionkey': self.encryption_pass, 'columnheaderformat': '', # not implemented yet } self.resp = requests.post(data=self.data, url=convert_url, headers=headers) zipped_data_file = zipfile.ZipFile(io.BytesIO(self.resp.content)) reg_data_file = self.resp.content name = zipped_data_file.infolist().pop() out_filename = os.path.basename(rld).split('.rld')[0] + '.txt' with open(os.path.join(self.out_dir, out_filename), 'wb') as outputfile: outputfile.write(zipped_data_file.read(name)) try: filename = os.path.join(self.out_dir, out_filename) file_contents = open(filename, "r").read() f = open(filename, "w", newline="\r\n") f.write(file_contents) f.close() except: print( "Could not convert Windows newline characters properly; file may be unstable" ) if self.progress_bar is False: print("[DONE]") except Exception as e: if self.progress_bar is False: print("[FAILED]") print('unable to process file: {0}'.format(rld)) print(e) print(str(self.resp.status_code) + " " + self.resp.reason + "\n") pass
def concat_txt(self, txt_dir='', file_type='meas', file_filter='', filter2='', start_date='1970-01-01', end_date='2150-12-31', ch_details=False, output_txt=False, out_file='', progress_bar=True, **kwargs): """Will concatenate all text files in the txt_dir files must match the site_filter argument. Note these are both blank by default. Parameters ---------- txt_dir : str directory holding txt files file_type : str type of export (meas, event, comm, sample, etc...) file_filter : str text filter for txt files, like site number, etc. filter2 : str secondary text filter start_date : str for filtering files to concat based on date "YYYY-mm-dd" end_date : str for filtering files to concat based on date "YYYY-mm-dd" ch_details : bool show additional info in ch_info dataframe output_txt : bool create a txt output of data df out_file : str filename to write data dataframe too if output_txt = True progress_bar : bool show bar on concat [True] or list of files [False] Returns --------- ch_info : obj pandas dataframe of ch_list (below) pulled out of file with sympro_txt_read.arrange_ch_info() ch_list : list list of channel info; can be converted to json w/ import json ... json.dumps(fut.ch_info) data : obj pandas dataframe of all data head : obj lines at the top of the txt file..., used when rebuilding timeshifted files site_info : obj pandas dataframe of site information logger_sn : str ipack_sn : str logger_type : str ipack_type : str latitude : float longitude : float elevation : int site_number : str site_description : str start_date : str txt_file_names : list list of files included in concatenation Examples -------- Read files into nrgpy reader object >>> import nrgpy >>> reader = nrgpy.sympro_txt_read() >>> reader.concat_txt( txt_dir='/path/to/txt/files/', file_filter='123456', # site 123456 start_date='2020-01-01', end_date='2020-01-31', ) Time elapsed: 2 s | 33 / 33 [=============================================] 100% Queue processed >>> reader.logger_sn '820600019' >>> reader.ch_info Bearing: Channel: Description: Effective Date: Height: Offset: Scale Factor: Serial Number: Type: Units: 0 50.00 1 NRG S1 2020-01-31 00:00:00 33.00 0.13900 0.09350 94120000059 Anemometer m/s 1 230.00 2 NRG S1 2020-01-31 00:00:00 0.00 0.13900 0.09350 94120000058 Anemometer m/s 2 50.00 3 NRG S1 2020-01-31 00:00:00 22.00 0.13900 0.09350 94120000057 Anemometer m/s 3 230.00 4 NRG 40C Anem 2020-01-31 00:00:00 22.00 0.35000 0.76500 179500324860 Anemometer m/s 4 50.00 5 NRG 40C Anem 2020-01-31 00:00:00 12.00 0.35000 0.76500 179500324859 Anemometer m/s 5 230.00 6 NRG S1 2020-01-31 00:00:00 12.00 0.13900 0.09350 94120000056 Anemometer m/s 6 320.00 13 NRG 200M Vane 2020-01-31 00:00:00 32.00 -1.46020 147.91100 10700000125 Vane Deg 7 320.00 14 NRG 200M Vane 2020-01-31 00:00:00 21.00 -1.46020 147.91100 10700000124 Vane Deg 8 0.00 15 NRG T60 Temp 2020-01-31 00:00:00 34.00 -40.85550 44.74360 9400000705 Analog C 9 0.00 16 NRG T60 Temp 2020-01-31 00:00:00 2.00 -40.85550 44.74360 9400000xxx Analog C 10 0.00 17 NRG RH5X Humi 2020-01-31 00:00:00 0.00 0.00000 20.00000 NaN Analog %RH 11 0.00 20 NRG BP60 Baro 2020-01-31 00:00:00 0.00 495.27700 243.91400 NaN Analog hPa 12 0.00 21 NRG BP60 Baro 2020-01-31 00:00:00 2.00 495.04400 244.23900 9396FT1937 Analog hPa """ if 'site_filter' in kwargs and file_filter == '': self.file_filter = kwargs.get('site_filter') else: self.file_filter = file_filter self.ch_details = ch_details self.start_date = start_date self.end_date = end_date self.filter2 = filter2 self.file_type = file_type self.txt_file_names = [] if check_platform() == 'win32': self.txt_dir = windows_folder_path(txt_dir) else: self.txt_dir = linux_folder_path(txt_dir) first_file = True files = [ f for f in sorted(glob(self.txt_dir + '*.txt'))\ if self.file_filter in f and self.filter2 in f\ and date_check(self.start_date, self.end_date, f) ] self.file_count = len(files) self.pad = len(str(self.file_count)) self.counter = 1 self.start_time = datetime.now() for f in files: if self.file_filter in f and self.file_type in f and self.filter2 in f: if progress_bar: draw_progress_bar(self.counter, self.file_count, self.start_time) else: print("Adding {0}/{1} ... {2} ... ".format( str(self.counter).rjust(self.pad), str(self.file_count).ljust(self.pad), os.path.basename(f)), end="", flush=True) if first_file == True: first_file = False try: base = sympro_txt_read(f) if progress_bar != True: print("[OK]") self.txt_file_names.append(os.path.basename(f)) except IndexError: print('Only standard SymPRO headertypes accepted') break else: file_path = f try: s = sympro_txt_read(file_path, ch_details=self.ch_details) base.data = base.data.append(s.data, sort=False) if progress_bar != True: print("[OK]") self.txt_file_names.append(os.path.basename(f)) except: if progress_bar != True: print("[FAILED]") print("could not concat {0}".format( os.path.basename(file_path))) pass else: pass self.counter += 1 if out_file != "": self.out_file = out_file if output_txt == True: base.data.to_csv(os.path.join(txt_dir, out_file), sep=',', index=False) try: self.ch_info = s.ch_info self.ch_list = s.ch_list self.array = s.array self.data = base.data.drop_duplicates(subset=['Timestamp'], keep='first') self.data.reset_index(drop=True, inplace=True) self.head = s.head self.site_info = s.site_info self.format_site_data() print("\n") except UnboundLocalError: print("No files match to contatenate.") return None
def shift_timestamps(txt_folder="", out_folder="", file_filter="", start_date="1970-01-01", end_date="2150-12-31", seconds=3600): """Takes as input a folder of exported standard text files and time to shift in seconds. Parameters ---------- txt_folder : str path to folder with txt files to shift out_folder : str where to put the shifted files (in subfolder by default) file_filter : str filter for restricting file set start_date : str date filter "YYYY-mm-dd" end_date : str date filter "YYYY-mm-dd" seconds : int time in seconds to shift timestamps (default 3600) Returns ------- obj text files with shifted timestamps; new file names include shifted timestamp. """ if out_folder: out_dir = out_folder else: out_dir = os.path.join(txt_folder, "shifted_timestamps") os.makedirs(out_dir, exist_ok=True) files = [ f for f in sorted(glob(txt_folder + "/" + '*.txt'))\ if file_filter in f and \ date_check(start_date, end_date, f) ] file_count = len(files) counter = 1 start_time = datetime.now() for f in files: try: draw_progress_bar(counter, file_count, start_time) f = os.path.join(txt_folder, f) fut = sympro_txt_read(filename=f) fut.format_site_data() fut.data['Timestamp'] = pd.to_datetime( fut.data['Timestamp']) + timedelta(seconds=seconds) fut.output_txt_file(shift_timestamps=True, standard=False, out_dir=out_dir, out_file=f) except pd.errors.EmptyDataError: pass except Exception as e: print(traceback.format_exc()) pass counter += 1
def single_file(self, rld): try: if self.progress_bar: draw_progress_bar(self.counter, self.raw_count, self.start_time) else: print("Processing {0}/{1} ... {2} ... ".format(str(self.counter).rjust(self.pad),str(self.raw_count).ljust(self.pad),os.path.basename(rld)), end="", flush=True) RldFileBytes = open(rld,'rb').read() EncodedFileBytes = base64.encodebytes(RldFileBytes) if self.convert_url == ConvertServiceUrl: # CONVERT 2.0 if self.nec_file: NecFile = open(self.nec_file,'rb').read() NECFileBytes = base64.encodebytes(NecFile) else: NECFileBytes = '' if not token_valid(self.session_start_time): self.session_token, self.session_start_time = request_session_token(self.client_id, self.client_secret) headers = {"Authorization": "Bearer {}".format(self.session_token)} self.Data = { 'filebytes': EncodedFileBytes, 'necfilebytes': NECFileBytes, 'headertype': self.header_type, # standard | columnonly | none 'exporttype': self.export_type, # measurements (default) | samples 'exportformat': self.export_format, # csv_zipped (default) | parquet 'encryptionkey': self.encryption_pass, 'columnheaderformat': '', # not implemented yet } self.resp=requests.post(data=self.Data, url=self.convert_url, headers=headers) else: # BETA CONVERT Data = {'apitoken': self.token, 'encryptionpassword': self.encryption_pass, 'headertype': self.header_type, #standard | columnonly | none 'exportformat': self.export_format, # csv_zipped (default) | parquet 'filebytearray': EncodedFileBytes} self.resp=requests.post(data=Data, url=self.convert_url) zippedDataFile = zipfile.ZipFile(io.BytesIO(self.resp.content)) regDataFile = self.resp.content name = zippedDataFile.infolist().pop() outFileName = "".join(rld.split(self.folder_split)[-1:])[:-4] + '_' + self.export_type + '.txt' with open(os.path.join(self.out_dir, outFileName),'wb') as outputfile: outputfile.write(zippedDataFile.read(name)) try: filename = os.path.join(self.out_dir, outFileName) fileContents = open(filename,"r").read() f = open(filename,"w", newline="\r\n") f.write(fileContents) f.close() except: print("Could not convert Windows newline characters properly; file may be unstable") if self.progress_bar == False: print("[DONE]") except Exception as e: if self.progress_bar == False: print("[FAILED]") print('unable to process file: {0}'.format(rld)) print(e) print(str(self.resp.status_code) + " " + self.resp.reason + "\n") pass