def get_data_fromfile( self, data_type, filename ): #, dates=None): #it should be added a list "dates" for the cases where there are several measurement sets (e.g. GC) if data_type == "SCADA": ExcelTable = pd.read_excel(filename, sheet_name="_DATA") name_timecolumn = Experiment.name_timecolumn[data_type] ExcelTable[name_timecolumn] = pd.to_datetime( ExcelTable[name_timecolumn]) d_t0 = ExcelTable[ name_timecolumn] >= self.date_ini ## d_t0=ExcelTable["Acquisition Date & Time"].dt.strftime("%H:%M")>t0 (when t0 is in HH:MM format) d_t1 = ExcelTable[name_timecolumn] <= self.date_end Table_timeinterval = ExcelTable[d_t0 & d_t1] return Table_timeinterval elif data_type == "GC1" or data_type == "INFERNO": #this must to be checkd because it could exist several measurements sets within the experiment time ExcelTable = pd.read_excel(filename) name_timecolumn = Experiment.name_timecolumn[data_type] ExcelTable[name_timecolumn] = pd.to_datetime( ExcelTable[name_timecolumn]) d_t0 = ExcelTable[ name_timecolumn] >= self.date_ini #dates[0] #It must to be read the whole file (this is why it is important tha the experiment dates be wider enough) d_t1 = ExcelTable[name_timecolumn] <= self.date_end #dates[1] Table_timeinterval = ExcelTable[d_t0 & d_t1] return Table_timeinterval elif data_type == "SPA": #this must to be checked because it could exist several measurements sets within the experiment time #app=QtWidgets.QApplication(sys.argv) spa_win = guiSPA.Ui_MainWindow() spa_win.setupUi() spa_win.read_file(filename) spa_win.MainWindow.show( ) #displays a window where the different sheets are assigned to the different times when the sample was collected while spa_win.finish_window == False: QtCore.QCoreApplication.processEvents() time.sleep(0.01) try: sheets_dates = spa_win.sh_dates except: guiSPA.Message_popup("Error", "Error reading SPA table", "The sheets were not read from SPA file") #sheets_dates is a dictionary where the key is the time => dict["YYY-MM-DD HH:MM:SS"]. the keys must be sorted by the time #the value is a list. list[0]=GPX, list[1]=FR/CR, list[2]=sheet_name of R. (R is the repetition of the GC, X is the spa sample number, P is the point) else: #read the excel tables from the respective files print("done with the SPA file!") Table_timeinterval = {} for tm, v_list in sheets_dates.items(): #print("V[2]={}".format(v1[2])) Table_timeinterval[tm] = [[ v[0], v[1], pd.read_excel(filename, sheet_name=v[2]) ] for v in v_list] print("SPA directory created") # #sys.exit(app.exec_()) return Table_timeinterval
def set_point_data(self, collect_data, time_type, date_ini, date_end, delay, db_experiment): #,name_timecolumn): #delete data_type #collect_data (str) = type of data to be introduced SCADA,GC1,Inferno,SPA #time_type (str)= SCADA or GC #date_ini (str) = initial date of the point #date_end (str) = end date of the point #delay (str) = delay in minutes respect the SCADA time (SCADA -> delay=0 minutes) #db_experiment = dictionary with all the data added to the experiment given by the attribute data_experiment of the Experiment class #this method defines the attributes of the point created and add the data to the time_db date_ini = datetime.strptime(date_ini, "%Y-%m-%d %H:%M:%S") date_end = datetime.strptime(date_end, "%Y-%m-%d %H:%M:%S") delay = float(delay) self.delay = delay self.date_ini = date_ini self.date_end = date_end #self.point_route=point_route#it is a string with the indexes of Project/Season/experiment/point delay_db = {k: 0 for k in ["SCADA", "GC1", "INFERNO", "SPA"]} if time_type == "GC": #if the time is the time of the GC file then the SCADA must to be read at time-delay from scada file. (however the point date will be always the scada time) delay_db[ "SCADA"] = -delay #it is negative because the SCADA is the real time date_i = self.date_ini - delay #the point date will be always the scada time date_e = self.date_end - delay #the point date will be always the scada time elif time_type == "SCADA": #if the time is the one of the SCADA file then the GC and inferno must to be read at time+delay delay_db["GC1"] = delay_db["INFERNO"] = delay_db["SPA"] = delay date_i = self.date_ini date_e = self.date_end #minutes0=date_ini.minute self.time_db_pnt = { fd: [] for fd in Point.time_db_fields } #initializing the directory time_db for the evaluated point (this later will be added to the global time_db directory) #time_dp_pnt will be transformed into a pandas dataframe #key="DATE" => creates a list of rounded time (floor minute) from date_ini to date_end #date_i=self.date_ini while date_i <= date_e: t_rounded = datetime( date_i.year, date_i.month, date_i.day, date_i.hour, date_i.minute, 00 ) #time floor-rounded to the minute (this is in order to create the point time slot list) self.time_db_pnt["DATE"].append(t_rounded) self.time_db_pnt["POINT_ROUTE"].append(self.point_route) date_i += timedelta(minutes=1) if collect_data == "AUTOMATIC": #the time taken by the point is the SCADA time #collect data from all databases Nentries = {k: 0 for k in Experiment.db_names} for k in db_experiment.keys(): #if k in self.data_point.keys(): #the data from the database k has already added to the point # tk.messagebox.showwarning("Database already added", f"the database {k} has been already added to this point") # continue if len(db_experiment[k]) == 0: #pop up a message saying that the database k is missing guiSPA.Message_popup( "Error", "Missed Database", f"the database {k} is missing, please add it to the experiment" ) continue if k in [ "SCADA", "GC1", "INFERNO" ]: #this must to be taken from a function (to generalize for the case when data_type!=automatic) d_t0, d_t1 = None, None Nentries[k] = 0 #search which of the list elements added to the db_experiment[k] has the time interval it is being looking for for db in db_experiment[ k]: #db_experiment[k] is a dictionary with a list per each key (when more that one SCADA or GC has been added to the experiment) d_t0 = db[Experiment.name_timecolumn[ k]] >= self.date_ini + timedelta( minutes=delay_db[k]) d_t1 = db[Experiment.name_timecolumn[ k]] <= self.date_end + timedelta( minutes=delay_db[k]) if all(d_t0 == False) and all( d_t1 == False ): #means that the timeslot defined is not on the evaluated data entry of the experiment (an experiment can have several GC´s or SCADA´s) guiSPA.Message_popup( "Warning", "time error", f"the times defined are not within the database {k}, please add the data within the timeframe or check the time intervals defined" ) continue #extracts the row dataframes from the different databases to create self.time_db_pnt[database] for t_i in self.time_db_pnt["DATE"]: t0 = db[Experiment.name_timecolumn[ k]] >= t_i + timedelta(minutes=delay_db[k]) t1 = db[Experiment. name_timecolumn[k]] <= t_i + timedelta( minutes=delay_db[k]) + timedelta( seconds=59.999) self.time_db_pnt[k].append( db[t0 & t1] ) #in this way each entry in the list will correspond with a time in the row of the pnadas dataframe if len(self.time_db_pnt[k][-1]) > 0: Nentries[k] += 1 elif k == "SPA": Nentries[k] = 0 #number of entries of the database k for t_i in self.time_db_pnt[ "DATE"]: #goes for all the dates that are stored in time_db_pnt (each date will be at each row of the pandas dataframe) t0 = t_i + timedelta(minutes=delay_db[k]) t1 = t_i + timedelta(minutes=delay_db[k]) + timedelta( seconds=59.999) #self.data_point[k]={} fv = 0 #number of entries found G_PX0 = "G_None" #temp=time_db_pnt[k] SPA_samples = [] for db in db_experiment[ k]: #considering that several SPA files were added in that experiment #When two SPA's are in series both share the same time, therefore two dataframes must to be in the db_experiment["SPA"] dictionary for a specific key (since the key is the time) for t_spa, v_list in db.items(): time_SPA = datetime.strptime( t_spa, "%Y-%m-%d %H:%M:%S" ) #gets the time from the keys of the SPA data (check method get_data_fromfile) if t0 <= time_SPA <= t1: SPA_samples.append( [[t_spa, v[0], v[1], v[2]] for v in v_list] ) #collect the different F/C matrices for the evaluated time #each SPA file has its data grouped into certain times, then once the time is found it must to jump to the other SPA file added break # maybe not <- the loop must continue because it must allow the case when 2 SPA syringes are used (both are marked at the same hour) #at the end, all the list fields should be joined together into just one list (not a list of lists as it is right know (one list for each SPA file)) #guiSPA.Message_popup("Info","SPA Added",f"It has been added {len(SPA_samples)} samples at time t_i={t_i} of the SPA taken at t_spa={t_spa}") self.time_db_pnt[k].append(SPA_samples) if len(SPA_samples) > 0: Nentries[k] += 1 #else: #as the times t_spa are sorted within the db_experiment["SPA"] it must to be found the time t_spa within the interval [t0,t1] #print(f"there is no data within the interval {str(t0)} and {str(t1)}\n in any of the {len(db_experiment[k])} databases added in the experiment.\n Please check the SPA dates") # guiSPA.Message_popup("Error","time error", # f"there is no data within the interval {str(t0)} and {str(t1)}\n in any of the {len(db_experiment[k])} databases added in the experiment.\n Please check the SPA dates") #continue #else: # tk.message.showinfo("No SPA Added","No data of SPA was added at time t_i=. Please check the SPA dates") #Add the log of the database added to show it in the table of the databases added in the Add_point window #data_added[k]=[database date_ini,database date_end,delay,Nentries] self.data_added[k] = [ self.date_ini + timedelta(minutes=delay_db[k]), self.date_end + timedelta(minutes=delay_db[k]), delay_db[k] + delay * (delay_db["SCADA"] != 0), Nentries[k] ] for k, v in self.time_db_pnt.items(): if len(v) == 0: self.time_db_pnt[k] = [ "No Data" for i in self.time_db_pnt["DATE"] ] self.update_db_global(self)
def add_data(self, data_type, filename, delay, comments=""): #data_type (str) = type of data to be introduced SCADA,GC1,Inferno,SPA #filename (str)= this is the route of the file where the information will be extracted out (user defined through a explorer window) #delay(str) = delay time respect of the SCADA time (format: HH:MM:SS) #The data can be read from a predefined excel sheet format #the different data should be stored in a dictionary (with the data_type string as the key) #each experiment can have several time intervals (to check) data_addedtolist = False if data_type == "SCADA": self.data_experiment[data_type].append( self.get_data_fromfile(data_type, filename) ) #it must to search all the files which are within the dates given (maybe not) elif data_type == "GC1" or data_type == "INFERNO": #The time interval of the GC corresponds with the whole time registered in the file self.data_experiment[data_type].append( self.get_data_fromfile(data_type, filename)) #maybe this one elif data_type == "SPA": self.data_experiment[data_type].append( self.get_data_fromfile(data_type, filename)) if data_type in ["SCADA", "GC1", "INFERNO"]: if len(self.data_experiment[data_type][-1].index ) > 0: #check if the last dataframe added has data on it data_addedtolist = True elif data_type in ["SPA"]: if len(self.data_experiment[data_type][-1].items() ) > 0: #check if the last dataframe added has data on it data_addedtolist = True if data_addedtolist: #check if the last dataframe added has data on it d_min, d_max = self.get_dates_db( data_type, -1 ) #-1 because always must to take the last db loaded for a given data_type if len(self.data_experiment_info[data_type]) == 0: self.data_experiment_info[data_type].append( (data_type + "_" + str(len(self.data_experiment[data_type]) - 1), d_min, d_max, delay, comments)) else: d_min_list = [ datetime.strptime(d_info[1], "%Y-%m-%d %H:%M:%S") for d_info in self.data_experiment_info[data_type] ] d_max_list = [ datetime.strptime(d_info[2], "%Y-%m-%d %H:%M:%S") for d_info in self.data_experiment_info[data_type] ] if datetime.strptime(d_min, "%Y-%m-%d %H:%M:%S") >= min( d_min_list) or datetime.strptime( d_max, "%Y-%m-%d %H:%M:%S") <= max(d_max_list): yesorno = guiSPA.Message_popup( "YesorNo", "Time intervals overlapped", "The time interval of the new data is overlapping with one of the databases already uploaded. keep it anyway?" ) if yesorno.ret == "Yes": self.data_experiment_info[data_type].append( (data_type + "_" + str(len(self.data_experiment[data_type]) - 1), d_min, d_max, delay, comments)) else: del self.data_experiment[data_type][-1] guiSPA.Message_popup( "Warning", "Data overlapped", "Data times overlapped. Check the file and upload it again" ) else: del self.data_experiment[data_type][ -1] #deletes the last element because the times dont interesect guiSPA.Message_popup( "Error", "Time interval error", "The time interval of the experiment does not intersect with the time interval of the data in the file selected. Please check the times" )