def loader(self, file_name, *args, **kwargs): """Loads data into a DataSet object and returns it""" new_tests = [] test_no = 1 channel_index = 1 channel_number = 1 creator = "no name" item_ID = 1 schedule_file_name = "no name" start_datetime = "2020.02.24 14:58:00" test_ID = 1 test_name = "no name" if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None self.logger.debug("in loader") self.logger.debug("filename: %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) data = Cell() data.cell_no = test_no data.loaded_from = file_name fid = FileID(file_name) data.channel_index = channel_index data.channel_number = channel_number data.creator = creator data.item_ID = item_ID data.schedule_file_name = schedule_file_name data.start_datetime = start_datetime data.test_ID = test_ID data.test_name = test_name data.raw_data_files.append(fid) length_of_test, normal_df = load_nda() data.summary = empty_df data.raw = normal_df data.raw_data_files_length.append(length_of_test) data = self._post_process(data) data = self.identify_last_data_point(data) new_tests.append(data) return new_tests
def _init_data(self, file_name, global_data_df, test_no): data = Cell() data.cell_no = test_no data.loaded_from = file_name fid = FileID(file_name) # name of the .res file it is loaded from: # data.parent_filename = os.path.basename(file_name) data.channel_index = int( global_data_df[self.headers_global["channel_index_txt"]][test_no]) data.channel_number = int( global_data_df[self.headers_global["channel_number_txt"]][test_no]) data.creator = global_data_df[ self.headers_global["creator_txt"]][test_no] data.item_ID = global_data_df[ self.headers_global["item_id_txt"]][test_no] data.schedule_file_name = global_data_df[ self.headers_global["schedule_file_name_txt"]][test_no] data.start_datetime = global_data_df[ self.headers_global["start_datetime_txt"]][test_no] data.test_ID = int( global_data_df[self.headers_normal.test_id_txt][test_no]) data.test_name = global_data_df[ self.headers_global["test_name_txt"]][test_no] data.raw_data_files.append(fid) return data
def loader(self, file_name, bad_steps=None, **kwargs): """Loads data from biologics .mpr files. Args: file_name (str): path to .res file. bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c) to skip loading. Returns: new_tests (list of data objects) """ new_tests = [] if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) # creating temporary file and connection temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) self.logger.debug("tmp file: %s" % temp_filename) self.logger.debug("HERE WE LOAD THE DATA") data = Cell() fid = FileID(file_name) # div parameters and information (probably load this last) test_no = 1 data.cell_no = test_no data.loaded_from = file_name # some overall prms data.channel_index = None data.channel_number = None data.creator = None data.item_ID = None data.schedule_file_name = None data.start_datetime = None data.test_ID = None data.test_name = None data.raw_data_files.append(fid) # --------- read raw-data (normal-data) ------------------------- self.logger.debug("reading raw-data") self.mpr_data = None self.mpr_log = None self.mpr_settings = None self._load_mpr_data(temp_filename, bad_steps) length_of_test = self.mpr_data.shape[0] self.logger.debug(f"length of test: {length_of_test}") self.logger.debug("renaming columns") self._rename_headers() # --------- stats-data (summary-data) ------------------------- summary_df = self._create_summary_data() if summary_df.empty: txt = "\nCould not find any summary (stats-file)!" txt += " (summary_df.empty = True)" txt += "\n -> issue make_summary(use_cellpy_stat_file=False)" warnings.warn(txt) data.summary = summary_df data.raw = self.mpr_data data.raw_data_files_length.append(length_of_test) new_tests.append(data) self._clean_up(temp_filename) return new_tests
def loader(self, file_name, bad_steps=None, **kwargs): """Loads data from arbin .res files. Args: file_name (str): path to .res file. bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c) to skip loading. Returns: new_tests (list of data objects) """ # TODO: @jepe - insert kwargs - current chunk, only normal data, etc if DEBUG_MODE: time_0 = time.time() new_tests = [] if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None self.logger.debug("in loader") self.logger.debug("filename: %s" % file_name) filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) self.logger.debug(txt) if (filesize > prms.Instruments.Arbin.max_res_filesize and not prms.Reader.load_only_summary): error_message = "\nERROR (loader):\n" error_message += "%s > %s - File is too big!\n" % ( hfilesize, humanize_bytes(prms.Instruments.Arbin.max_res_filesize), ) error_message += "(edit prms.Instruments.Arbin" "['max_res_filesize'])\n" print(error_message) return None table_name_global = TABLE_NAMES["global"] table_name_stats = TABLE_NAMES["statistic"] table_name_normal = TABLE_NAMES["normal"] # creating temporary file and connection temp_dir = tempfile.gettempdir() temp_filename = os.path.join(temp_dir, os.path.basename(file_name)) shutil.copy2(file_name, temp_dir) self.logger.debug("tmp file: %s" % temp_filename) use_mdbtools = False if use_subprocess: use_mdbtools = True if is_posix: use_mdbtools = True # windows with same python bit as windows bit (the ideal case) if not use_mdbtools: constr = self.__get_res_connector(temp_filename) if use_ado: conn = dbloader.connect(constr) else: conn = dbloader.connect(constr, autocommit=True) self.logger.debug("constr str: %s" % constr) self.logger.debug("reading global data table") sql = "select * from %s" % table_name_global self.logger.debug("sql statement: %s" % sql) global_data_df = pd.read_sql_query(sql, conn) # col_names = list(global_data_df.columns.values) else: import subprocess if is_posix: if is_macos: self.logger.debug("\nMAC OSX USING MDBTOOLS") else: self.logger.debug("\nPOSIX USING MDBTOOLS") else: self.logger.debug("\nWINDOWS USING MDBTOOLS-WIN") # creating tmp-filenames temp_csv_filename_global = os.path.join(temp_dir, "global_tmp.csv") temp_csv_filename_normal = os.path.join(temp_dir, "normal_tmp.csv") temp_csv_filename_stats = os.path.join(temp_dir, "stats_tmp.csv") # making the cmds mdb_prms = [ (table_name_global, temp_csv_filename_global), (table_name_normal, temp_csv_filename_normal), (table_name_stats, temp_csv_filename_stats), ] # executing cmds for table_name, tmp_file in mdb_prms: with open(tmp_file, "w") as f: subprocess.call( [sub_process_path, temp_filename, table_name], stdout=f) self.logger.debug(f"ran mdb-export {str(f)} {table_name}") # use pandas to load in the data global_data_df = pd.read_csv(temp_csv_filename_global) tests = global_data_df[self.headers_normal.test_id_txt] number_of_sets = len(tests) self.logger.debug("number of datasets: %i" % number_of_sets) for counter, test_no in enumerate(range(number_of_sets)): if counter > 0: self.logger.warning("***MULTITEST-FILE (not recommended)") if not ALLOW_MULTI_TEST_FILE: break data = Cell() data.cell_no = test_no data.loaded_from = file_name fid = FileID(file_name) # name of the .res file it is loaded from: # data.parent_filename = os.path.basename(file_name) data.channel_index = int(global_data_df[ self.headers_global["channel_index_txt"]][test_no]) data.channel_number = int(global_data_df[ self.headers_global["channel_number_txt"]][test_no]) data.creator = global_data_df[ self.headers_global["creator_txt"]][test_no] data.item_ID = global_data_df[ self.headers_global["item_id_txt"]][test_no] data.schedule_file_name = global_data_df[ self.headers_global["schedule_file_name_txt"]][test_no] data.start_datetime = global_data_df[ self.headers_global["start_datetime_txt"]][test_no] data.test_ID = int( global_data_df[self.headers_normal.test_id_txt][test_no]) data.test_name = global_data_df[ self.headers_global["test_name_txt"]][test_no] data.raw_data_files.append(fid) self.logger.debug("reading raw-data") if not use_mdbtools: # --------- read raw-data (normal-data) ------------------------ length_of_test, normal_df = self._load_res_normal_table( conn, data.test_ID, bad_steps) # --------- read stats-data (summary-data) --------------------- sql = "select * from %s where %s=%s order by %s" % ( table_name_stats, self.headers_normal.test_id_txt, data.test_ID, self.headers_normal.data_point_txt, ) summary_df = pd.read_sql_query(sql, conn) if counter > number_of_sets: self._clean_up_loadres(None, conn, temp_filename) else: normal_df = pd.read_csv(temp_csv_filename_normal) # filter on test ID normal_df = normal_df[normal_df[ self.headers_normal.test_id_txt] == data.test_ID] # sort on data point if prms._sort_if_subprocess: normal_df = normal_df.sort_values( self.headers_normal.data_point_txt) length_of_test = normal_df.shape[0] summary_df = pd.read_csv(temp_csv_filename_stats) # clean up for f in [ temp_filename, temp_csv_filename_stats, temp_csv_filename_normal, temp_csv_filename_global, ]: if os.path.isfile(f): try: os.remove(f) except WindowsError as e: self.logger.warning( f"could not remove tmp-file\n{f} {e}") if summary_df.empty and prms.Reader.use_cellpy_stat_file: txt = "\nCould not find any summary (stats-file)!" txt += "\n -> issue make_summary(use_cellpy_stat_file=False)" logging.debug(txt) # normal_df = normal_df.set_index("Data_Point") data.summary = summary_df if DEBUG_MODE: mem_usage = normal_df.memory_usage() logging.debug(f"memory usage for " f"loaded data: \n{mem_usage}" f"\ntotal: {humanize_bytes(mem_usage.sum())}") logging.debug(f"time used: {(time.time() - time_0):2.4f} s") data.raw = normal_df data.raw_data_files_length.append(length_of_test) data = self._post_process(data) new_tests.append(data) new_tests = self._inspect(new_tests) return new_tests
def loader(self, file_name, bad_steps=None, **kwargs): new_tests = [] if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return None filesize = os.path.getsize(file_name) hfilesize = humanize_bytes(filesize) txt = "Filesize: %i (%s)" % (filesize, hfilesize) logging.debug(txt) data = Cell() fid = FileID(file_name) # div parameters and information (probably load this last) test_no = 1 data.cell_no = test_no data.loaded_from = file_name # some overall prms data.channel_index = None data.channel_number = None data.creator = None data.item_ID = None data.schedule_file_name = None data.test_ID = None data.test_name = None data.raw_data_files.append(fid) # --------- read raw-data (normal-data) ------------------------- self._load_pec_data(file_name, bad_steps) data.start_datetime = self.pec_settings["start_time"] length_of_test = self.pec_data.shape[0] logging.debug(f"length of test: {length_of_test}") logging.debug("renaming columns") self._rename_headers() self._convert_units() data.raw = self.pec_data data.raw_data_files_length.append(length_of_test) new_tests.append(data) return new_tests
def loader(self, file_name, **kwargs): new_tests = [] if not os.path.isfile(file_name): self.logger.info("Missing file_\n %s" % file_name) return # find out strategy (based on structure) if self.structure["format"] != "csv": raise NotImplementedError sep = self.structure.get("sep", prms.Reader.sep) if sep is None: sep = prms.Reader.sep locate_vars_by = self.structure.get("locate_vars_by", "key_value_pairs") comment_chars = self.structure.get("comment_chars", ["#", "!"]) header_row = self.structure.get("start_data", None) if header_row is None: header_row = self._find_data_start(file_name, sep) # parse variables var_lines = [] with open(file_name, "rb") as fp: for i, line in enumerate(fp): if i < header_row: line = line.strip() try: line = line.decode() except UnicodeDecodeError: logging.debug( "UnicodeDecodeError: " "skipping this line: " f"{line}" ) else: if line.startswith(comment_chars): logging.debug(f"Comment: {line}") else: var_lines.append(line) else: break var_dict = dict() if locate_vars_by == "key_value_pairs": for line in var_lines: parts = line.split(sep) try: var_dict[parts[0]] = parts[1] except IndexError as e: logging.debug(f"{e}\ncould not split var-value\n{line}") else: raise NotImplementedError data = Cell() data.loaded_from = file_name fid = self._generate_fid(file_name, var_dict) # parsing cellpydata attributes for attribute in ATTRS_CELLPYFILE: key = self.variables.get(attribute, None) # print(f"{attribute} -> {key}") if key: val = var_dict.pop(key, None) if key in ["mass"]: val = float(val) # print(f"{attribute}: {val}") setattr(data, attribute, val) data.raw_data_files.append(fid) # setting optional attributes (will be implemented later I hope) key = self.variables.get("total_mass", None) if key: total_mass = var_dict.pop(key, None) logging.debug("total_mass is given, but not propagated") logging.debug(f"unused vars: {var_dict}") raw = self._parse_csv_data(file_name, sep, header_row) raw = self._rename_cols(raw) raw = self._check_cycleno_stepno(raw) data.raw_data_files_length.append(raw.shape[0]) data.summary = None data.raw = raw new_tests.append(data) return new_tests