def load_all_presence_caches( self, detection_confidence_requirement=0.666 ): #TODO: add cams param with default = 0,1,2,3 experiment_start_date = datetime.date(2016, 7, 20) experiment_end_date = datetime.date(2016, 9, 19) experiment_length = (experiment_end_date - experiment_start_date).days presences = [] for i in tqdm(range(experiment_length + 1)): date = experiment_start_date + datetime.timedelta(days=i) # Go through all days, note down which are missing, report that. Combine the rest into a list of presences. (file_name, file_path) = create_presence_cache_filename( date, method='counts', detection_confidence_requirement= detection_confidence_requirement, cams=[0, 1, 2, 3]) file = Path(file_path) if file.exists(): # print("Appending: " + str(file_path)) presences.append((date, self.load_presence_for_date(date))) # else: # print("Doesn't exist: " + str(file_path)) print("Collected " + str(len(presences)) + "/" + str(experiment_length + 1) + " presence caches (all that are currently downloaded).") return presences
def load_presence_for_date(self, date, detection_confidence_requirement=0.666): (file_name, file_path) = create_presence_cache_filename( date, method='counts', detection_confidence_requirement=detection_confidence_requirement, cams=[0, 1, 2, 3]) print('loading ' + file_name) presence_df = self.load(file_name, type=CacheType.presence, format=CacheFormat.csv) return presence_df
def load_multiple_day_caches( self, amount=62, type=CacheType.presence, detection_confidence_requirement=0.666, days_delta=0): #TODO: add cams param with default = 0,1,2,3 experiment_start_date = datetime.date(2016, 7, 20) experiment_end_date = datetime.date(2016, 9, 19) experiment_length = (experiment_end_date - experiment_start_date).days start_date = experiment_start_date + datetime.timedelta( days=days_delta) caches = [] for i in tqdm(range(amount)): date = start_date + datetime.timedelta(days=i) # Go through all days, note down which are missing, report that. Combine the rest into a list of presences. if type == CacheType.presence: (file_name, file_path) = create_presence_cache_filename( date, method='counts', detection_confidence_requirement= detection_confidence_requirement, cams=[0, 1, 2, 3]) elif type == CacheType.gaps: (file_name, file_path) = create_gaps_cache_filename( date, detection_confidence_requirement= detection_confidence_requirement) file = Path(file_path) if file.exists(): if type == CacheType.presence: caches.append((date, self.load_presence_for_date( date, detection_confidence_requirement= detection_confidence_requirement))) elif type == CacheType.gaps: caches.append(self.load(file_name, type=type)) print("Collected " + str(len(caches)) + "/" + str(experiment_length + 1) + " per-day caches.") return caches
from tqdm import tqdm #% datetime_start = datetime(2016, 7, 20) #TODO: set beginning date as default param in file helpers num_days_to_process = 60 num_intervals_per_hour = 60 #TODO: if considered num_hours = 24 #TODO: set as default param in file helpers presence_dfs = [] for i in tqdm(range(0, num_days_to_process)): start_day = datetime_start+timedelta(days=i) (csv_name, csv_path) = create_presence_cache_filename(num_hours, start_day, num_intervals_per_hour) file = Path(csv_path) if file.exists() == False: print(csv_path+ " doesn't exist") else: new_presence_df = pd.read_csv(csv_path).iloc[:,1:] new_presence_df = new_presence_df.drop(columns='id') presence_dfs.append(new_presence_df) # print("Adding df #"+str(i)+", "+csv_name) # Saving and loading cache (should not normally be needed, as it seems to work faster to use the code above) # presence_df.to_csv('../../caches/Presence/COMBINED_PRESENCE_59d_24h_from_07-19.csv') # presence_df = pd.read_csv('../../caches/Presence/COMBINED_PRESENCE_59d_24h_from_07-19.csv', index_col='bee_id')
for i, axi in enumerate(ax): axi.set_ylim([0,125]) axi.set_xlim([-5,200]) axi.set_ylabel('Amount of intervals with given presence detected') axi.set_xlabel('Presence (max should be 90) [bee #'+str(rands[i])+']') pres.iloc[rands[i]].hist(bins=100, figsize=(18,14), ax=ax[i]) # In[ ]: # archive: getting other variants of data# In[3]: observ_period = timedelta(hours=1) experiment_start_day = datetime(2016, 7, 20) # TODO: those are dates of first and last detections experiment_end_day = datetime(2016, 9, 19) # consider making the period smaller datetime_start = datetime(2016, 7, 25) num_observ_periods = 1 # hours num_intervals_per_hour = 120 (csv_name, csv_path) = create_presence_cache_filename(num_observ_periods, datetime(2016, 7, 20), num_intervals_per_hour, locations=True, cam_orientation='back') (bee_ids_as_ferwar_format, bee_ids_as_beesbookid_format) = get_all_bee_ids() d = detections_to_presence(num_observ_periods, datetime_start, num_intervals_per_hour, bee_ids_as_ferwar_format, method='counts', detection_confidence_requirement=0.99)
total_num_intervals = (num_intervals_per_hour * num_hours) print("Starting from", datetime_start, "with number of hours:", num_hours) print("Bin size for the trip lengths plot:", bin_size_in_hours) print("Number of intervals per hour:", num_intervals_per_hour) print("Rolling win size:", rolling_window_size) #(NOTE: First detections are on 20.07.2016, last are 19.09.2016 (3 months duration)) # In[9]: #Loading the csv of intermediate result (saved from prevoius cell) #example value: "/mnt/storage/janek/caches/Presence/PRESENCE-2016-08-23_00_num_hours_24_int_size_120.csv" #NOTE: the presence cache does not yet know what bees it contains! (csv_name, csv_path) = create_presence_cache_filename(num_hours, datetime_start, num_intervals_per_hour) print('Loading ' + csv_path) presence_df = pd.read_csv(csv_path).iloc[:, 1:] #NOTE: save and read csv adds a duplicate index column, which has to be removed with iloc #TODO: find a cleaner way to to solve that presence_df.shape # In[10]: #TODO: Constants cannot be defined twice! (here and DB_TO_DETECTIONS) #potential solution: google jupyter magic/jupyter constant definition #Parameters for loading data, currently using known date of 23th, august 2016)