Ejemplo n.º 1
0
def test_test(model, test_data, test_examples=5):
    # Meta-test-test
    # given a meta-test-trained model, evaluate accuracy on the held out set
    # of classes used
    x, y = test_data
    with torch.no_grad():
        logits = model(x)
        # report performance per class
        ys = list(divide_chunks(y, test_examples))
        tasks = list(divide_chunks(logits, test_examples))
        t_accs = [
            torch.eq(task.argmax(dim=1), ys).sum().item() / test_examples
            for task, ys in zip(tasks, ys)
        ]
    return t_accs
Ejemplo n.º 2
0
def retrieve_from_nwis(site_codes, start_date, end_date, n_per_chunk=1):
    chunked_list = divide_chunks(site_codes, n_per_chunk)
    df_list = []
    for site_code_chunk in chunked_list:
        d = st.get_streamflow_data(site_code_chunk, start_date, end_date, 'iv',
                                   '15T')
        df_list.append(d)
    df_comb = pd.concat(df_list, 1)
    return df_comb
Ejemplo n.º 3
0
def make_blank_weight_grid(catchment_ids, grid_ids, out_zarr):
    catchment_chunk_size = 10000
    chunked_catchments = divide_chunks(catchment_ids, catchment_chunk_size)
    i = 0
    for indices in chunked_catchments:
        print(f'doing chunk {i}', flush=True)
        blank = pd.DataFrame(0, index=indices, columns=grid_ids, dtype='float32')
        col_name = 'nldas_grid_no'
        idx_name = 'nhd_comid'
        chunks = {col_name: 10000, idx_name: 30000}
        ds = convert_df_to_dataset(blank, col_name, idx_name, 'weight',
                                   chunks)
        ds.to_zarr(out_zarr, mode='a', append_dim=idx_name)
Ejemplo n.º 4
0
    def read_signals(self, max_size):
        data_array = []
        while len(data_array) < max_size:  # 14 x 640 = 8960
            second_array = []
            while len(second_array) < 1792:
                data = self.socket.recv(
                    self.buffer_size).decode('utf-8').split("\r\n")
                for data_chunk in data:
                    converted_data = []
                    for elem in data_chunk.split(","):
                        try:
                            converted_data.append(int(elem))
                        except:
                            pass
                    second_array.extend(converted_data)
                time.sleep(0.5)
                data_array.extend(second_array)

        return list(divide_chunks(data_array[:max_size], 14))
Ejemplo n.º 5
0
    def sample_test(self, num_tasks, train_examples=15, device="cuda"):
        assert num_tasks < len(
            self.tasks_test
        ), f"Number of tasks requested is too large: {num_tasks} > {len(self.tasks_test)}"
        assert (
            train_examples <= 20
        ), f"Number of examples requested is too large: {train_examples} > 20"

        # chose the n tasks to use
        tasks = choice(self.tasks_test, size=num_tasks, replace=False)

        # get the 20 indexes of each task (they are sequential)
        task_ids = [range(task * 20, (task + 1) * 20) for task in tasks]

        # split each group of 20 ids into (usually) 15 train and 5 test, unzip to separate train and test sequences
        train_tasks, test_tasks = unzip(
            train_test_split(ids, train_size=train_examples, shuffle=True)
            for ids in task_ids)

        # assemble the train/test trajectories
        train_traj = [
            self.cifar_test[i] for train_task in train_tasks
            for i in train_task
        ]

        test_traj = [
            self.cifar_test[i] for test_task in test_tasks for i in test_task
        ]

        # test-train examples are divided by task and sent to device (cpu/cuda)
        chunk2device = lambda chunk: [(im.to(device), label.to(device))
                                      for im, label in chunk]
        train_tasks = [
            chunk2device(chunk)
            for chunk in divide_chunks(train_traj, n=train_examples)
        ]

        # test-test tasks are collected into a massive tensor for one-pass evaluation
        ims, labels = list(zip(*test_traj))
        test_data = (torch.cat(ims).to(device), torch.cat(labels).to(device))

        return train_tasks, test_data, tasks
Ejemplo n.º 6
0
    def record_data(self, task, preprocess=True):
        samples_to_collect = task.get_run_time() * self.sample_rate
        channels = 14
        samples_per_chunk = 80
        chunks = int(samples_to_collect / samples_per_chunk)
        data_array = np.zeros((channels, chunks, samples_per_chunk))

        data = self.signal_reader.read_signals(8960)
        # print(len(data))

        # (640, 14) => (14, 640)
        data = np.array(data).swapaxes(0, 1)

        if preprocess:
            for i, channel_data in enumerate(data):
                processed_data = preprocess_data(channel_data,
                                                 sample_rate=128,
                                                 notch=True,
                                                 bp_filter=True,
                                                 artifact_removal=True)
                data_array[i] = list(divide_chunks(processed_data, 80))
        else:
            data_array = data

        # (14, 8, 80) => (14, 80, 8) => (8, 80, 14)
        samples = data_array.swapaxes(1, 2).swapaxes(0, 2)
        labels = [task.get_task_type()] * 8  # all 8 labels have same target

        # save all data for transfer learning
        if self.transfer_learning:
            self.recorded_data['samples'].append(samples)
            self.recorded_data['labels'].extend(labels)

        task_data = {"samples": samples, "labels": labels}

        return task_data
def get_all_streamflow_data(output_file,
                            sites_file,
                            huc2=None,
                            num_sites_per_chunk=5,
                            start_date="1970-01-01",
                            end_date='2019-01-01',
                            time_scale='H',
                            output_format='zarr',
                            num_site_chunks_write=6,
                            s3=False):
    """
    gets all streamflow data for a date range for a given huc2. Calls are
    chunked by station

    :param output_file: [str] path to the csv file or zarr store where the data
    will be stored
    :param sites_file: [str] path to file that contains the nwis site
    information
    :param huc2: [str] zero-padded huc 2 (e.g., "02")
    :param num_sites_per_chunk: [int] the number of sites that will be pulled
    at in each web service call
    :param start_date: [str] the start date of when you want the data for
    (e.g., "1980-01-01")
    :param end_date: [str] the end date of when you want the data for
    (e.g., "1990-01-01")
    :param time_scale: [str] Pandas like time string for the time scale at which
    the data will be aggregated (e.g., 'H' for hour or 'D' for daily)
    :param output_format: [str] the format of the output file. 'csv' or 'zarr'
    :param num_site_chunks_write:
    :param S3:
    :return: None
    """
    product = get_product_from_time_scale(time_scale)
    site_codes = get_site_codes(sites_file, huc2)

    not_done_sites = get_indices_not_done(output_file,
                                          site_codes,
                                          'site_code',
                                          output_format,
                                          is_column=False,
                                          s3=s3)
    site_codes_chunked = divide_chunks(not_done_sites, num_sites_per_chunk)

    # loop through site_code_chunks
    chunk_dfs = []
    i = 0
    for site_chunk in site_codes_chunked:
        last_chunk = False
        if site_chunk[-1] == not_done_sites[-1]:
            last_chunk = True
        streamflow_df_sites = None
        # catch if there is a problem on the server retrieving the data
        try:
            streamflow_df_sites = get_streamflow_data(site_chunk, start_date,
                                                      end_date, product,
                                                      time_scale)
        except json.decoder.JSONDecodeError:
            continue
        if streamflow_df_sites is not None:
            chunk_dfs.append(streamflow_df_sites)
            # add the number of stations for which we got data
            i += streamflow_df_sites.shape[1]

            if not i % (num_site_chunks_write * num_sites_per_chunk) or \
                    last_chunk:
                print('writing out', flush=True)
                write_out_chunks(chunk_dfs, output_file, output_format)
                chunk_dfs = []
Ejemplo n.º 8
0
 def get_context_data(self, *args, **kwargs):
     context = super().get_context_data(*args, **kwargs)
     context['marketing_list_chunks'] = divide_chunks(
         self.get_queryset().filter(marketing_subscription=True), 90)
     return context
Ejemplo n.º 9
0
def echo(update, context):
    if update.message.text == '📆 Отримати миттєві новини за добу':
        digest = get_immediately_digest()
        for item in digest:
            context.bot.send_message(chat_id=update.message.chat_id,
                                     text="{}\n{}".format(
                                         item['title'], item['link']))

    if update.message.text == '◀ Назад':
        custom_keyboard = divide_chunks(main_menu, 2)
        reply_markup = telegram.ReplyKeyboardMarkup(main_menu)
        context.bot.send_message(chat_id=update.message.chat_id,
                                 text="Головне меню",
                                 reply_markup=reply_markup)

    if update.message.text == '📌 Налаштувати категорії':
        custom_keyboard = divide_chunks(tags, 2)
        reply_markup = telegram.ReplyKeyboardMarkup(custom_keyboard)
        context.bot.send_message(chat_id=update.message.chat_id,
                                 text="Обирайте категорії новин",
                                 reply_markup=reply_markup)

    if update.message.text == '🕓 Налаштувати час':
        if len(context.user_data['categories']) > 0:
            custom_keyboard = divide_chunks(times, 2)
            reply_markup = telegram.ReplyKeyboardMarkup(custom_keyboard)
            context.bot.send_message(chat_id=update.message.chat_id,
                                     text="Оберіть час отримання дайджесту",
                                     reply_markup=reply_markup)
        else:
            context.bot.send_message(chat_id=update.message.chat_id,
                                     text="⚠\nСпершу оберіть категорії")

    if update.message.text == '🔧 Мої налаштування':

        if context.user_data and len(
                context.user_data['categories']) > 0 and context.user_data.get(
                    'time') is not None:
            context.bot.send_message(
                chat_id=update.message.chat_id,
                text=
                "<b>Ви підписані на категорії:</b>\n✅ {}. \n⌚ Час отримання дайджесту: {}. \n• Змінити налаштування /reset\n"
                .format(',\n✅ '.join(context.user_data['categories']),
                        context.user_data['time']),
                parse_mode=telegram.ParseMode.HTML)
        elif load_from_db_by_chat_id(chat_id=update.message.chat_id):
            user_data = {}
            user_data['categories'] = load_from_db_by_chat_id(
                chat_id=update.message.chat_id)[0][2].strip('{ }').replace(
                    '"', '').replace(' ', '').split(',')
            user_data['time'] = load_from_db_by_chat_id(
                chat_id=update.message.chat_id)[0][3]
            context.bot.send_message(
                chat_id=update.message.chat_id,
                text=
                "<b>Ви підписані на категорії:</b>\n✅ {}. \n⌚ Час отримання дайджесту: {}. \n• Змінити налаштування /reset\n"
                .format(',\n✅ '.join(user_data['categories']),
                        user_data['time']),
                parse_mode=telegram.ParseMode.HTML)

        else:
            context.bot.send_message(chat_id=update.message.chat_id,
                                     text="⚠\nВи ще не налаштували дайджест")

    if update.message.text in tags and update.message.text != '◀ Назад':
        categories_handler(update, context)

    if update.message.text in times and update.message.text != '◀ Назад':
        time_handler(update, context)