Beispiel #1
0
def test_only_fid():
    from cellpy.readers.core import FileID
    my_fid_one = FileID()
    my_file = fdv.cellpy_file_path
    my_fid_one.populate(my_file)
    my_fid_two = FileID(my_file)
    assert my_fid_one.get_raw()[0] == my_fid_two.get_raw()[0]
    assert my_fid_one.get_size() == my_fid_two.get_size()
Beispiel #2
0
 def _init_data(self, file_name, global_data_df, test_no):
     data = Cell()
     data.cell_no = test_no
     data.loaded_from = file_name
     fid = FileID(file_name)
     # name of the .res file it is loaded from:
     # data.parent_filename = os.path.basename(file_name)
     data.channel_index = int(
         global_data_df[self.headers_global["channel_index_txt"]][test_no])
     data.channel_number = int(
         global_data_df[self.headers_global["channel_number_txt"]][test_no])
     data.creator = global_data_df[
         self.headers_global["creator_txt"]][test_no]
     data.item_ID = global_data_df[
         self.headers_global["item_id_txt"]][test_no]
     data.schedule_file_name = global_data_df[
         self.headers_global["schedule_file_name_txt"]][test_no]
     data.start_datetime = global_data_df[
         self.headers_global["start_datetime_txt"]][test_no]
     data.test_ID = int(
         global_data_df[self.headers_normal.test_id_txt][test_no])
     data.test_name = global_data_df[
         self.headers_global["test_name_txt"]][test_no]
     data.raw_data_files.append(fid)
     return data
Beispiel #3
0
    def loader(self, file_name, *args, **kwargs):
        """Loads data into a DataSet object and returns it"""

        new_tests = []

        test_no = 1
        channel_index = 1
        channel_number = 1
        creator = "no name"
        item_ID = 1
        schedule_file_name = "no name"
        start_datetime = "2020.02.24 14:58:00"
        test_ID = 1
        test_name = "no name"

        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        self.logger.debug("in loader")
        self.logger.debug("filename: %s" % file_name)

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        self.logger.debug(txt)

        data = Cell()
        data.cell_no = test_no
        data.loaded_from = file_name
        fid = FileID(file_name)
        data.channel_index = channel_index
        data.channel_number = channel_number
        data.creator = creator
        data.item_ID = item_ID
        data.schedule_file_name = schedule_file_name
        data.start_datetime = start_datetime
        data.test_ID = test_ID
        data.test_name = test_name
        data.raw_data_files.append(fid)

        length_of_test, normal_df = load_nda()

        data.summary = empty_df

        data.raw = normal_df
        data.raw_data_files_length.append(length_of_test)

        data = self._post_process(data)
        data = self.identify_last_data_point(data)

        new_tests.append(data)

        return new_tests
Beispiel #4
0
    def loader(self, file_name, bad_steps=None, **kwargs):
        new_tests = []
        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        logging.debug(txt)

        data = DataSet()
        fid = FileID(file_name)

        # div parameters and information (probably load this last)
        test_no = 1
        data.test_no = test_no
        data.loaded_from = file_name

        # some overall prms
        data.channel_index = None
        data.channel_number = None
        data.creator = None
        data.item_ID = None
        data.schedule_file_name = None
        data.test_ID = None
        data.test_name = None
        data.raw_data_files.append(fid)

        # --------- read raw-data (normal-data) -------------------------

        self._load_pec_data(file_name, bad_steps)
        data.start_datetime = self.pec_settings["start_time"]
        length_of_test = self.pec_data.shape[0]
        logging.debug(f"length of test: {length_of_test}")

        logging.debug("renaming columns")
        self._rename_headers()
        self._convert_units()

        data.dfdata = self.pec_data

        data.raw_data_files_length.append(length_of_test)
        new_tests.append(data)

        return new_tests
Beispiel #5
0
    def loader(self, file_name, bad_steps=None, **kwargs):
        """Loads data from biologics .mpr files.

        Args:
            file_name (str): path to .res file.
            bad_steps (list of tuples): (c, s) tuples of steps s
             (in cycle c) to skip loading.

        Returns:
            new_tests (list of data objects)
        """
        new_tests = []
        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        self.logger.debug(txt)

        # creating temporary file and connection
        temp_dir = tempfile.gettempdir()
        temp_filename = os.path.join(temp_dir, os.path.basename(file_name))
        shutil.copy2(file_name, temp_dir)

        self.logger.debug("tmp file: %s" % temp_filename)
        self.logger.debug("HERE WE LOAD THE DATA")

        data = DataSet()
        fid = FileID(file_name)

        # div parameters and information (probably load this last)
        test_no = 1
        data.test_no = test_no
        data.loaded_from = file_name

        # some overall prms
        data.channel_index = None
        data.channel_number = None
        data.creator = None
        data.item_ID = None
        data.schedule_file_name = None
        data.start_datetime = None
        data.test_ID = None
        data.test_name = None
        data.raw_data_files.append(fid)

        # --------- read raw-data (normal-data) -------------------------
        self.logger.debug("reading raw-data")
        self.mpr_data = None
        self.mpr_log = None
        self.mpr_settings = None

        self._load_mpr_data(temp_filename, bad_steps)
        length_of_test = self.mpr_data.shape[0]
        self.logger.debug(f"length of test: {length_of_test}")

        self.logger.debug("renaming columns")
        self._rename_headers()
        # ---------  stats-data (summary-data) -------------------------
        summary_df = self._create_summary_data()

        if summary_df.empty:
            txt = "\nCould not find any summary (stats-file)!"
            txt += " (summary_df.empty = True)"
            txt += "\n -> issue make_summary(use_cellpy_stat_file=False)"
            warnings.warn(txt)

        data.dfsummary = summary_df
        data.dfdata = self.mpr_data

        data.raw_data_files_length.append(length_of_test)
        new_tests.append(data)

        self._clean_up(temp_filename)
        return new_tests
Beispiel #6
0
    def _generate_fid(self, file_name, var_dict):
        fid = FileID()
        last_modified = var_dict.get(
            self.variables["fid_last_modification_time"],
            None,
        )
        size = var_dict.get(
            self.variables["fid_size"],
            None,
        )
        last_accessed = var_dict.get(
            self.variables["fid_last_accessed"],
            None,
        )

        if any([last_modified, size, last_accessed]):
            fid.name = os.path.abspath(file_name)
            fid.full_name = file_name
            fid.location = os.path.dirname(file_name)

            fid.size = size
            fid.last_modified = last_modified
            fid.last_accessed = last_accessed
            fid.last_info_changed = last_accessed
        else:
            fid.populate(file_name)

        return fid
Beispiel #7
0
    def loader(self, file_name, bad_steps=None, **kwargs):
        """Loads data from arbin .res files.

        Args:
            file_name (str): path to .res file.
            bad_steps (list of tuples): (c, s) tuples of steps s (in cycle c)
            to skip loading.

        Returns:
            new_tests (list of data objects)
        """
        # TODO: @jepe - insert kwargs - current chunk, only normal data, etc

        if DEBUG_MODE:
            time_0 = time.time()

        new_tests = []
        if not os.path.isfile(file_name):
            self.logger.info("Missing file_\n   %s" % file_name)
            return None

        self.logger.debug("in loader")
        self.logger.debug("filename: %s" % file_name)

        filesize = os.path.getsize(file_name)
        hfilesize = humanize_bytes(filesize)
        txt = "Filesize: %i (%s)" % (filesize, hfilesize)
        self.logger.debug(txt)
        if (filesize > prms.Instruments.Arbin.max_res_filesize
                and not prms.Reader.load_only_summary):
            error_message = "\nERROR (loader):\n"
            error_message += "%s > %s - File is too big!\n" % (
                hfilesize,
                humanize_bytes(prms.Instruments.Arbin.max_res_filesize),
            )
            error_message += "(edit prms.Instruments.Arbin" "['max_res_filesize'])\n"
            print(error_message)
            return None

        table_name_global = TABLE_NAMES["global"]
        table_name_stats = TABLE_NAMES["statistic"]
        table_name_normal = TABLE_NAMES["normal"]

        # creating temporary file and connection

        temp_dir = tempfile.gettempdir()
        temp_filename = os.path.join(temp_dir, os.path.basename(file_name))
        shutil.copy2(file_name, temp_dir)
        self.logger.debug("tmp file: %s" % temp_filename)

        use_mdbtools = False
        if use_subprocess:
            use_mdbtools = True
        if is_posix:
            use_mdbtools = True

        # windows with same python bit as windows bit (the ideal case)
        if not use_mdbtools:
            constr = self.__get_res_connector(temp_filename)

            if use_ado:
                conn = dbloader.connect(constr)
            else:
                conn = dbloader.connect(constr, autocommit=True)
            self.logger.debug("constr str: %s" % constr)

            self.logger.debug("reading global data table")
            sql = "select * from %s" % table_name_global
            self.logger.debug("sql statement: %s" % sql)
            global_data_df = pd.read_sql_query(sql, conn)
            # col_names = list(global_data_df.columns.values)

        else:
            import subprocess

            if is_posix:
                if is_macos:
                    self.logger.debug("\nMAC OSX USING MDBTOOLS")
                else:
                    self.logger.debug("\nPOSIX USING MDBTOOLS")
            else:
                self.logger.debug("\nWINDOWS USING MDBTOOLS-WIN")

            # creating tmp-filenames
            temp_csv_filename_global = os.path.join(temp_dir, "global_tmp.csv")
            temp_csv_filename_normal = os.path.join(temp_dir, "normal_tmp.csv")
            temp_csv_filename_stats = os.path.join(temp_dir, "stats_tmp.csv")

            # making the cmds
            mdb_prms = [
                (table_name_global, temp_csv_filename_global),
                (table_name_normal, temp_csv_filename_normal),
                (table_name_stats, temp_csv_filename_stats),
            ]

            # executing cmds
            for table_name, tmp_file in mdb_prms:
                with open(tmp_file, "w") as f:
                    subprocess.call(
                        [sub_process_path, temp_filename, table_name],
                        stdout=f)
                    self.logger.debug(f"ran mdb-export {str(f)} {table_name}")

            # use pandas to load in the data
            global_data_df = pd.read_csv(temp_csv_filename_global)

        tests = global_data_df[self.headers_normal.test_id_txt]
        number_of_sets = len(tests)
        self.logger.debug("number of datasets: %i" % number_of_sets)

        for counter, test_no in enumerate(range(number_of_sets)):
            if counter > 0:
                self.logger.warning("***MULTITEST-FILE (not recommended)")
                if not ALLOW_MULTI_TEST_FILE:
                    break
            data = Cell()
            data.cell_no = test_no
            data.loaded_from = file_name
            fid = FileID(file_name)
            # name of the .res file it is loaded from:
            # data.parent_filename = os.path.basename(file_name)
            data.channel_index = int(global_data_df[
                self.headers_global["channel_index_txt"]][test_no])
            data.channel_number = int(global_data_df[
                self.headers_global["channel_number_txt"]][test_no])
            data.creator = global_data_df[
                self.headers_global["creator_txt"]][test_no]
            data.item_ID = global_data_df[
                self.headers_global["item_id_txt"]][test_no]
            data.schedule_file_name = global_data_df[
                self.headers_global["schedule_file_name_txt"]][test_no]
            data.start_datetime = global_data_df[
                self.headers_global["start_datetime_txt"]][test_no]
            data.test_ID = int(
                global_data_df[self.headers_normal.test_id_txt][test_no])
            data.test_name = global_data_df[
                self.headers_global["test_name_txt"]][test_no]
            data.raw_data_files.append(fid)

            self.logger.debug("reading raw-data")
            if not use_mdbtools:
                # --------- read raw-data (normal-data) ------------------------
                length_of_test, normal_df = self._load_res_normal_table(
                    conn, data.test_ID, bad_steps)
                # --------- read stats-data (summary-data) ---------------------
                sql = "select * from %s where %s=%s order by %s" % (
                    table_name_stats,
                    self.headers_normal.test_id_txt,
                    data.test_ID,
                    self.headers_normal.data_point_txt,
                )
                summary_df = pd.read_sql_query(sql, conn)
                if counter > number_of_sets:
                    self._clean_up_loadres(None, conn, temp_filename)
            else:
                normal_df = pd.read_csv(temp_csv_filename_normal)
                # filter on test ID
                normal_df = normal_df[normal_df[
                    self.headers_normal.test_id_txt] == data.test_ID]
                # sort on data point
                if prms._sort_if_subprocess:
                    normal_df = normal_df.sort_values(
                        self.headers_normal.data_point_txt)
                length_of_test = normal_df.shape[0]
                summary_df = pd.read_csv(temp_csv_filename_stats)
                # clean up
                for f in [
                        temp_filename,
                        temp_csv_filename_stats,
                        temp_csv_filename_normal,
                        temp_csv_filename_global,
                ]:
                    if os.path.isfile(f):
                        try:
                            os.remove(f)
                        except WindowsError as e:
                            self.logger.warning(
                                f"could not remove tmp-file\n{f} {e}")

            if summary_df.empty and prms.Reader.use_cellpy_stat_file:
                txt = "\nCould not find any summary (stats-file)!"
                txt += "\n -> issue make_summary(use_cellpy_stat_file=False)"
                logging.debug(txt)
            # normal_df = normal_df.set_index("Data_Point")

            data.summary = summary_df
            if DEBUG_MODE:
                mem_usage = normal_df.memory_usage()
                logging.debug(f"memory usage for "
                              f"loaded data: \n{mem_usage}"
                              f"\ntotal: {humanize_bytes(mem_usage.sum())}")
                logging.debug(f"time used: {(time.time() - time_0):2.4f} s")

            data.raw = normal_df
            data.raw_data_files_length.append(length_of_test)

            data = self._post_process(data)

            new_tests.append(data)

        new_tests = self._inspect(new_tests)

        return new_tests