def create_timestamps(self) -> pa.Table: """ :return: converts the audio metadata into a data table """ result_array = [[], [], []] for m in self.metadata: timestamps = calc_evenly_sampled_timestamps( m[0], m[1].num_rows, self.sample_interval_micros) result_array[0].extend(timestamps) result_array[1].extend(timestamps) result_array[2].extend(m[1]["microphone"].to_numpy()) for gs, ge in self.gaps: num_samples = int((ge - gs) / self.sample_interval_micros) - 1 timestamps = calc_evenly_sampled_timestamps( gs + self.sample_interval_micros, num_samples, self.sample_interval_micros) gap_array = [timestamps, np.full(len(timestamps), np.nan)] result_array[0].extend(gap_array[0]) result_array[1].extend(gap_array[0]) result_array[2].extend(gap_array[1]) ptable = pa.Table.from_pydict(dict(zip(AUDIO_DF_COLUMNS, result_array))) return pc.take( ptable, pc.sort_indices(ptable, sort_keys=[("timestamps", "ascending")]))
def test_call_function_with_memory_pool(): arr = pa.array(["foo", "bar", "baz"]) indices = np.array([2, 2, 1]) result1 = arr.take(indices) result2 = pc.call_function('take', [arr, indices], memory_pool=pa.default_memory_pool()) expected = pa.array(["baz", "baz", "bar"]) assert result1.equals(expected) assert result2.equals(expected) result3 = pc.take(arr, indices, memory_pool=pa.default_memory_pool()) assert result3.equals(expected)
table = table.select(["ix", "x", "y", "title", "first_author_name", "date", "language"]) # truncate the title after 101 characters (matching display logic) truncated_title = pc.utf8_replace_slice(table.column("title"), start=101, stop=1000, replacement="") table = table.set_column(table.schema.get_field_index("title"), "title", truncated_title) # ensure all dictionaries in the file use the same key/value mappings table = table.unify_dictionaries() # filter out non-numeric dates (e.g. null, "1850-1853") # matches the hack in index.js:37 mask = pc.invert(pc.is_null(table.column("date"))) table = table.filter(mask) # sorting by the date improves the loading aesthetics # comment this out to exactly match the original appearance indices = pc.sort_indices(table, sort_keys=[("date", "ascending")]) table = pc.take(table, indices) # after sorting replace ix with an accurate row index indices = pc.sort_indices(table, sort_keys=[("date", "ascending")]) table = table.set_column(table.schema.get_field_index("ix"), "ix", pc.cast(indices, pa.uint32())) temp_path.unlink() local = fs.LocalFileSystem() with local.open_output_stream(str(target_path)) as file: with pa.RecordBatchStreamWriter(file, table.schema) as writer: writer.write_table(table, 10000)