Exemple #1
0
    def get_data_fromfile(
        self, data_type, filename
    ):  #, dates=None): #it should be added a list "dates" for the cases where there are several measurement sets (e.g. GC)

        if data_type == "SCADA":
            ExcelTable = pd.read_excel(filename, sheet_name="_DATA")
            name_timecolumn = Experiment.name_timecolumn[data_type]
            ExcelTable[name_timecolumn] = pd.to_datetime(
                ExcelTable[name_timecolumn])
            d_t0 = ExcelTable[
                name_timecolumn] >= self.date_ini  ## d_t0=ExcelTable["Acquisition Date & Time"].dt.strftime("%H:%M")>t0 (when t0 is in HH:MM format)
            d_t1 = ExcelTable[name_timecolumn] <= self.date_end
            Table_timeinterval = ExcelTable[d_t0 & d_t1]
            return Table_timeinterval

        elif data_type == "GC1" or data_type == "INFERNO":  #this must to be checkd because it could exist several measurements sets within the experiment time
            ExcelTable = pd.read_excel(filename)
            name_timecolumn = Experiment.name_timecolumn[data_type]
            ExcelTable[name_timecolumn] = pd.to_datetime(
                ExcelTable[name_timecolumn])
            d_t0 = ExcelTable[
                name_timecolumn] >= self.date_ini  #dates[0]  #It must to be read the whole file (this is why it is important tha the experiment dates be wider enough)
            d_t1 = ExcelTable[name_timecolumn] <= self.date_end  #dates[1]
            Table_timeinterval = ExcelTable[d_t0 & d_t1]
            return Table_timeinterval

        elif data_type == "SPA":  #this must to be checked because it could exist several measurements sets within the experiment time
            #app=QtWidgets.QApplication(sys.argv)
            spa_win = guiSPA.Ui_MainWindow()
            spa_win.setupUi()
            spa_win.read_file(filename)
            spa_win.MainWindow.show(
            )  #displays a window where the different sheets are assigned to the different times when the sample was collected
            while spa_win.finish_window == False:
                QtCore.QCoreApplication.processEvents()
                time.sleep(0.01)
            try:
                sheets_dates = spa_win.sh_dates
            except:
                guiSPA.Message_popup("Error", "Error reading SPA table",
                                     "The sheets were not read from SPA file")
                #sheets_dates is a dictionary where the key is the time => dict["YYY-MM-DD HH:MM:SS"]. the keys must be sorted by the time
                #the value is a list. list[0]=GPX, list[1]=FR/CR, list[2]=sheet_name of R. (R is the repetition of the GC, X is the spa sample number, P is the point)
            else:  #read the excel tables from the respective files
                print("done with the SPA file!")
                Table_timeinterval = {}
                for tm, v_list in sheets_dates.items():
                    #print("V[2]={}".format(v1[2]))
                    Table_timeinterval[tm] = [[
                        v[0], v[1],
                        pd.read_excel(filename, sheet_name=v[2])
                    ] for v in v_list]
                print("SPA directory created")
                #     #sys.exit(app.exec_())
                return Table_timeinterval
    def set_point_data(self, collect_data, time_type, date_ini, date_end,
                       delay,
                       db_experiment):  #,name_timecolumn): #delete data_type
        #collect_data (str) = type of data to be introduced SCADA,GC1,Inferno,SPA
        #time_type (str)= SCADA or GC
        #date_ini (str) = initial date of the point
        #date_end (str) = end date of the point
        #delay (str) = delay in minutes respect the SCADA time (SCADA -> delay=0 minutes)
        #db_experiment = dictionary with all the data added to the experiment given by the attribute data_experiment of the Experiment class

        #this method defines the attributes of the point created and add the data to the time_db

        date_ini = datetime.strptime(date_ini, "%Y-%m-%d %H:%M:%S")
        date_end = datetime.strptime(date_end, "%Y-%m-%d %H:%M:%S")
        delay = float(delay)

        self.delay = delay
        self.date_ini = date_ini
        self.date_end = date_end
        #self.point_route=point_route#it is a string with the indexes of Project/Season/experiment/point

        delay_db = {k: 0 for k in ["SCADA", "GC1", "INFERNO", "SPA"]}
        if time_type == "GC":  #if the time is the time of the GC file then the SCADA must to be read at time-delay from scada file. (however the point date will be always the scada time)
            delay_db[
                "SCADA"] = -delay  #it is negative because the SCADA is the real time
            date_i = self.date_ini - delay  #the point date will be always the scada time
            date_e = self.date_end - delay  #the point date will be always the scada time
        elif time_type == "SCADA":  #if the time is the one of the SCADA file then the GC and inferno must to be read at time+delay
            delay_db["GC1"] = delay_db["INFERNO"] = delay_db["SPA"] = delay
            date_i = self.date_ini
            date_e = self.date_end
        #minutes0=date_ini.minute
        self.time_db_pnt = {
            fd: []
            for fd in Point.time_db_fields
        }  #initializing the directory time_db for the evaluated point (this later will be added to the global time_db directory)
        #time_dp_pnt will be transformed into a pandas dataframe

        #key="DATE" => creates a list of rounded time (floor minute) from date_ini to date_end
        #date_i=self.date_ini
        while date_i <= date_e:
            t_rounded = datetime(
                date_i.year, date_i.month, date_i.day, date_i.hour,
                date_i.minute, 00
            )  #time floor-rounded to the minute (this is in order to create the point time slot list)
            self.time_db_pnt["DATE"].append(t_rounded)
            self.time_db_pnt["POINT_ROUTE"].append(self.point_route)
            date_i += timedelta(minutes=1)

        if collect_data == "AUTOMATIC":  #the time taken by the point is the SCADA time
            #collect data from all databases
            Nentries = {k: 0 for k in Experiment.db_names}
            for k in db_experiment.keys():
                #if k in self.data_point.keys(): #the data from the database k has already added to the point
                #    tk.messagebox.showwarning("Database already added", f"the database {k} has been already added to this point")
                #    continue
                if len(db_experiment[k]) == 0:
                    #pop up a message saying that the database k is missing
                    guiSPA.Message_popup(
                        "Error", "Missed Database",
                        f"the database {k} is missing, please add it to the experiment"
                    )
                    continue
                if k in [
                        "SCADA", "GC1", "INFERNO"
                ]:  #this must to be taken from a function (to generalize for the case when data_type!=automatic)
                    d_t0, d_t1 = None, None
                    Nentries[k] = 0
                    #search which of the list elements added to the db_experiment[k] has the time interval it is being looking for
                    for db in db_experiment[
                            k]:  #db_experiment[k] is a dictionary with a list per each key (when more that one SCADA or GC has been added to the experiment)
                        d_t0 = db[Experiment.name_timecolumn[
                            k]] >= self.date_ini + timedelta(
                                minutes=delay_db[k])
                        d_t1 = db[Experiment.name_timecolumn[
                            k]] <= self.date_end + timedelta(
                                minutes=delay_db[k])
                        if all(d_t0 == False) and all(
                                d_t1 == False
                        ):  #means that the timeslot defined is not on the evaluated data entry of the experiment (an experiment can have several GC´s or SCADA´s)
                            guiSPA.Message_popup(
                                "Warning", "time error",
                                f"the times defined are not within the database {k}, please add the data within the timeframe or check the time intervals defined"
                            )
                            continue

                        #extracts the row dataframes from the different databases to create self.time_db_pnt[database]
                        for t_i in self.time_db_pnt["DATE"]:
                            t0 = db[Experiment.name_timecolumn[
                                k]] >= t_i + timedelta(minutes=delay_db[k])
                            t1 = db[Experiment.
                                    name_timecolumn[k]] <= t_i + timedelta(
                                        minutes=delay_db[k]) + timedelta(
                                            seconds=59.999)
                            self.time_db_pnt[k].append(
                                db[t0 & t1]
                            )  #in this way each entry in the list will correspond with a time in the row of the pnadas dataframe
                            if len(self.time_db_pnt[k][-1]) > 0:
                                Nentries[k] += 1

                elif k == "SPA":
                    Nentries[k] = 0  #number of entries of the database k
                    for t_i in self.time_db_pnt[
                            "DATE"]:  #goes for all the dates that are stored in time_db_pnt (each date will be at each row of the pandas dataframe)
                        t0 = t_i + timedelta(minutes=delay_db[k])
                        t1 = t_i + timedelta(minutes=delay_db[k]) + timedelta(
                            seconds=59.999)
                        #self.data_point[k]={}
                        fv = 0  #number of entries found
                        G_PX0 = "G_None"
                        #temp=time_db_pnt[k]
                        SPA_samples = []
                        for db in db_experiment[
                                k]:  #considering that several SPA files were added in that experiment

                            #When two SPA's are in series both share the same time, therefore two dataframes must to be in the db_experiment["SPA"] dictionary for a specific key (since the key is the time)
                            for t_spa, v_list in db.items():
                                time_SPA = datetime.strptime(
                                    t_spa, "%Y-%m-%d %H:%M:%S"
                                )  #gets the time from the keys of the SPA data (check method get_data_fromfile)
                                if t0 <= time_SPA <= t1:
                                    SPA_samples.append(
                                        [[t_spa, v[0], v[1], v[2]]
                                         for v in v_list]
                                    )  #collect the different F/C matrices for the evaluated time
                                    #each SPA file has its data grouped into certain times, then once the time is found it must to jump to the other SPA file added

                                    break  # maybe not <- the loop must continue because it must allow the case when 2 SPA syringes are used (both are marked at the same hour)

                        #at the end, all the list fields should be joined together into just one list (not a list of lists as it is right know (one list for each SPA file))
                        #guiSPA.Message_popup("Info","SPA Added",f"It has been added {len(SPA_samples)} samples at time t_i={t_i} of the SPA taken at t_spa={t_spa}")
                        self.time_db_pnt[k].append(SPA_samples)

                        if len(SPA_samples) > 0:
                            Nentries[k] += 1
                        #else:
                        #as the times t_spa are sorted within the db_experiment["SPA"] it must to be found the time t_spa within the interval [t0,t1]
                        #print(f"there is no data within the interval {str(t0)} and {str(t1)}\n in any of the {len(db_experiment[k])} databases added in the experiment.\n Please check the SPA dates")
                        # guiSPA.Message_popup("Error","time error",
                        #                      f"there is no data within the interval {str(t0)} and {str(t1)}\n in any of the {len(db_experiment[k])} databases added in the experiment.\n Please check the SPA dates")

                        #continue
                        #else:
                        #    tk.message.showinfo("No SPA Added","No data of SPA was added at time t_i=. Please check the SPA dates")

                #Add the log of the database added to show it in the table of the databases added in the Add_point window
                #data_added[k]=[database date_ini,database date_end,delay,Nentries]

                self.data_added[k] = [
                    self.date_ini + timedelta(minutes=delay_db[k]),
                    self.date_end + timedelta(minutes=delay_db[k]),
                    delay_db[k] + delay * (delay_db["SCADA"] != 0), Nentries[k]
                ]

        for k, v in self.time_db_pnt.items():
            if len(v) == 0:
                self.time_db_pnt[k] = [
                    "No Data" for i in self.time_db_pnt["DATE"]
                ]

        self.update_db_global(self)
    def add_data(self, data_type, filename, delay, comments=""):
        #data_type (str) = type of data to be introduced SCADA,GC1,Inferno,SPA
        #filename (str)= this is the route of the file where the information will be extracted out (user defined through a explorer window)
        #delay(str) = delay time respect of the SCADA time (format: HH:MM:SS)

        #The data can be read from a predefined excel sheet format
        #the different data should be stored in a dictionary (with the data_type string as the key)
        #each experiment can have several time intervals (to check)
        data_addedtolist = False
        if data_type == "SCADA":
            self.data_experiment[data_type].append(
                self.get_data_fromfile(data_type, filename)
            )  #it must to search all the files which are within the dates given (maybe not)
        elif data_type == "GC1" or data_type == "INFERNO":  #The time interval of the GC corresponds with the whole time registered in the file
            self.data_experiment[data_type].append(
                self.get_data_fromfile(data_type, filename))  #maybe this one
        elif data_type == "SPA":
            self.data_experiment[data_type].append(
                self.get_data_fromfile(data_type, filename))

        if data_type in ["SCADA", "GC1", "INFERNO"]:
            if len(self.data_experiment[data_type][-1].index
                   ) > 0:  #check if the last dataframe added has data on it
                data_addedtolist = True
        elif data_type in ["SPA"]:
            if len(self.data_experiment[data_type][-1].items()
                   ) > 0:  #check if the last dataframe added has data on it
                data_addedtolist = True

        if data_addedtolist:  #check if the last dataframe added has data on it
            d_min, d_max = self.get_dates_db(
                data_type, -1
            )  #-1 because always must to take the last db loaded for a given data_type

            if len(self.data_experiment_info[data_type]) == 0:
                self.data_experiment_info[data_type].append(
                    (data_type + "_" +
                     str(len(self.data_experiment[data_type]) - 1), d_min,
                     d_max, delay, comments))
            else:
                d_min_list = [
                    datetime.strptime(d_info[1], "%Y-%m-%d %H:%M:%S")
                    for d_info in self.data_experiment_info[data_type]
                ]
                d_max_list = [
                    datetime.strptime(d_info[2], "%Y-%m-%d %H:%M:%S")
                    for d_info in self.data_experiment_info[data_type]
                ]
                if datetime.strptime(d_min, "%Y-%m-%d %H:%M:%S") >= min(
                        d_min_list) or datetime.strptime(
                            d_max, "%Y-%m-%d %H:%M:%S") <= max(d_max_list):
                    yesorno = guiSPA.Message_popup(
                        "YesorNo", "Time intervals overlapped",
                        "The time interval of the new data is overlapping with one of the databases already uploaded. keep it anyway?"
                    )
                    if yesorno.ret == "Yes":
                        self.data_experiment_info[data_type].append(
                            (data_type + "_" +
                             str(len(self.data_experiment[data_type]) - 1),
                             d_min, d_max, delay, comments))
                    else:
                        del self.data_experiment[data_type][-1]
                        guiSPA.Message_popup(
                            "Warning", "Data overlapped",
                            "Data times overlapped. Check the file and upload it again"
                        )

        else:
            del self.data_experiment[data_type][
                -1]  #deletes the last element because the times dont interesect
            guiSPA.Message_popup(
                "Error", "Time interval error",
                "The time interval of the experiment does not intersect with the time interval of the data in the file selected. Please check the times"
            )