def test_test(model, test_data, test_examples=5): # Meta-test-test # given a meta-test-trained model, evaluate accuracy on the held out set # of classes used x, y = test_data with torch.no_grad(): logits = model(x) # report performance per class ys = list(divide_chunks(y, test_examples)) tasks = list(divide_chunks(logits, test_examples)) t_accs = [ torch.eq(task.argmax(dim=1), ys).sum().item() / test_examples for task, ys in zip(tasks, ys) ] return t_accs
def retrieve_from_nwis(site_codes, start_date, end_date, n_per_chunk=1): chunked_list = divide_chunks(site_codes, n_per_chunk) df_list = [] for site_code_chunk in chunked_list: d = st.get_streamflow_data(site_code_chunk, start_date, end_date, 'iv', '15T') df_list.append(d) df_comb = pd.concat(df_list, 1) return df_comb
def make_blank_weight_grid(catchment_ids, grid_ids, out_zarr): catchment_chunk_size = 10000 chunked_catchments = divide_chunks(catchment_ids, catchment_chunk_size) i = 0 for indices in chunked_catchments: print(f'doing chunk {i}', flush=True) blank = pd.DataFrame(0, index=indices, columns=grid_ids, dtype='float32') col_name = 'nldas_grid_no' idx_name = 'nhd_comid' chunks = {col_name: 10000, idx_name: 30000} ds = convert_df_to_dataset(blank, col_name, idx_name, 'weight', chunks) ds.to_zarr(out_zarr, mode='a', append_dim=idx_name)
def read_signals(self, max_size): data_array = [] while len(data_array) < max_size: # 14 x 640 = 8960 second_array = [] while len(second_array) < 1792: data = self.socket.recv( self.buffer_size).decode('utf-8').split("\r\n") for data_chunk in data: converted_data = [] for elem in data_chunk.split(","): try: converted_data.append(int(elem)) except: pass second_array.extend(converted_data) time.sleep(0.5) data_array.extend(second_array) return list(divide_chunks(data_array[:max_size], 14))
def sample_test(self, num_tasks, train_examples=15, device="cuda"): assert num_tasks < len( self.tasks_test ), f"Number of tasks requested is too large: {num_tasks} > {len(self.tasks_test)}" assert ( train_examples <= 20 ), f"Number of examples requested is too large: {train_examples} > 20" # chose the n tasks to use tasks = choice(self.tasks_test, size=num_tasks, replace=False) # get the 20 indexes of each task (they are sequential) task_ids = [range(task * 20, (task + 1) * 20) for task in tasks] # split each group of 20 ids into (usually) 15 train and 5 test, unzip to separate train and test sequences train_tasks, test_tasks = unzip( train_test_split(ids, train_size=train_examples, shuffle=True) for ids in task_ids) # assemble the train/test trajectories train_traj = [ self.cifar_test[i] for train_task in train_tasks for i in train_task ] test_traj = [ self.cifar_test[i] for test_task in test_tasks for i in test_task ] # test-train examples are divided by task and sent to device (cpu/cuda) chunk2device = lambda chunk: [(im.to(device), label.to(device)) for im, label in chunk] train_tasks = [ chunk2device(chunk) for chunk in divide_chunks(train_traj, n=train_examples) ] # test-test tasks are collected into a massive tensor for one-pass evaluation ims, labels = list(zip(*test_traj)) test_data = (torch.cat(ims).to(device), torch.cat(labels).to(device)) return train_tasks, test_data, tasks
def record_data(self, task, preprocess=True): samples_to_collect = task.get_run_time() * self.sample_rate channels = 14 samples_per_chunk = 80 chunks = int(samples_to_collect / samples_per_chunk) data_array = np.zeros((channels, chunks, samples_per_chunk)) data = self.signal_reader.read_signals(8960) # print(len(data)) # (640, 14) => (14, 640) data = np.array(data).swapaxes(0, 1) if preprocess: for i, channel_data in enumerate(data): processed_data = preprocess_data(channel_data, sample_rate=128, notch=True, bp_filter=True, artifact_removal=True) data_array[i] = list(divide_chunks(processed_data, 80)) else: data_array = data # (14, 8, 80) => (14, 80, 8) => (8, 80, 14) samples = data_array.swapaxes(1, 2).swapaxes(0, 2) labels = [task.get_task_type()] * 8 # all 8 labels have same target # save all data for transfer learning if self.transfer_learning: self.recorded_data['samples'].append(samples) self.recorded_data['labels'].extend(labels) task_data = {"samples": samples, "labels": labels} return task_data
def get_all_streamflow_data(output_file, sites_file, huc2=None, num_sites_per_chunk=5, start_date="1970-01-01", end_date='2019-01-01', time_scale='H', output_format='zarr', num_site_chunks_write=6, s3=False): """ gets all streamflow data for a date range for a given huc2. Calls are chunked by station :param output_file: [str] path to the csv file or zarr store where the data will be stored :param sites_file: [str] path to file that contains the nwis site information :param huc2: [str] zero-padded huc 2 (e.g., "02") :param num_sites_per_chunk: [int] the number of sites that will be pulled at in each web service call :param start_date: [str] the start date of when you want the data for (e.g., "1980-01-01") :param end_date: [str] the end date of when you want the data for (e.g., "1990-01-01") :param time_scale: [str] Pandas like time string for the time scale at which the data will be aggregated (e.g., 'H' for hour or 'D' for daily) :param output_format: [str] the format of the output file. 'csv' or 'zarr' :param num_site_chunks_write: :param S3: :return: None """ product = get_product_from_time_scale(time_scale) site_codes = get_site_codes(sites_file, huc2) not_done_sites = get_indices_not_done(output_file, site_codes, 'site_code', output_format, is_column=False, s3=s3) site_codes_chunked = divide_chunks(not_done_sites, num_sites_per_chunk) # loop through site_code_chunks chunk_dfs = [] i = 0 for site_chunk in site_codes_chunked: last_chunk = False if site_chunk[-1] == not_done_sites[-1]: last_chunk = True streamflow_df_sites = None # catch if there is a problem on the server retrieving the data try: streamflow_df_sites = get_streamflow_data(site_chunk, start_date, end_date, product, time_scale) except json.decoder.JSONDecodeError: continue if streamflow_df_sites is not None: chunk_dfs.append(streamflow_df_sites) # add the number of stations for which we got data i += streamflow_df_sites.shape[1] if not i % (num_site_chunks_write * num_sites_per_chunk) or \ last_chunk: print('writing out', flush=True) write_out_chunks(chunk_dfs, output_file, output_format) chunk_dfs = []
def get_context_data(self, *args, **kwargs): context = super().get_context_data(*args, **kwargs) context['marketing_list_chunks'] = divide_chunks( self.get_queryset().filter(marketing_subscription=True), 90) return context
def echo(update, context): if update.message.text == '📆 Отримати миттєві новини за добу': digest = get_immediately_digest() for item in digest: context.bot.send_message(chat_id=update.message.chat_id, text="{}\n{}".format( item['title'], item['link'])) if update.message.text == '◀ Назад': custom_keyboard = divide_chunks(main_menu, 2) reply_markup = telegram.ReplyKeyboardMarkup(main_menu) context.bot.send_message(chat_id=update.message.chat_id, text="Головне меню", reply_markup=reply_markup) if update.message.text == '📌 Налаштувати категорії': custom_keyboard = divide_chunks(tags, 2) reply_markup = telegram.ReplyKeyboardMarkup(custom_keyboard) context.bot.send_message(chat_id=update.message.chat_id, text="Обирайте категорії новин", reply_markup=reply_markup) if update.message.text == '🕓 Налаштувати час': if len(context.user_data['categories']) > 0: custom_keyboard = divide_chunks(times, 2) reply_markup = telegram.ReplyKeyboardMarkup(custom_keyboard) context.bot.send_message(chat_id=update.message.chat_id, text="Оберіть час отримання дайджесту", reply_markup=reply_markup) else: context.bot.send_message(chat_id=update.message.chat_id, text="⚠\nСпершу оберіть категорії") if update.message.text == '🔧 Мої налаштування': if context.user_data and len( context.user_data['categories']) > 0 and context.user_data.get( 'time') is not None: context.bot.send_message( chat_id=update.message.chat_id, text= "<b>Ви підписані на категорії:</b>\n✅ {}. \n⌚ Час отримання дайджесту: {}. \n• Змінити налаштування /reset\n" .format(',\n✅ '.join(context.user_data['categories']), context.user_data['time']), parse_mode=telegram.ParseMode.HTML) elif load_from_db_by_chat_id(chat_id=update.message.chat_id): user_data = {} user_data['categories'] = load_from_db_by_chat_id( chat_id=update.message.chat_id)[0][2].strip('{ }').replace( '"', '').replace(' ', '').split(',') user_data['time'] = load_from_db_by_chat_id( chat_id=update.message.chat_id)[0][3] context.bot.send_message( chat_id=update.message.chat_id, text= "<b>Ви підписані на категорії:</b>\n✅ {}. \n⌚ Час отримання дайджесту: {}. \n• Змінити налаштування /reset\n" .format(',\n✅ '.join(user_data['categories']), user_data['time']), parse_mode=telegram.ParseMode.HTML) else: context.bot.send_message(chat_id=update.message.chat_id, text="⚠\nВи ще не налаштували дайджест") if update.message.text in tags and update.message.text != '◀ Назад': categories_handler(update, context) if update.message.text in times and update.message.text != '◀ Назад': time_handler(update, context)