def main(): with open('./results/history.pickle', 'rb') as f: history = pickle.load(f) print(last(history['acc'])) print(last(history['val_acc'])) print(last(history['loss'])) print(last(history['val_loss'])) plot_history(history)
def test_custom_steps(tmp_dir, mocker): dvclive = Live("logs") steps = [0, 62, 1000] metrics = [0.9, 0.8, 0.7] for step, metric in zip(steps, metrics): dvclive.set_step(step) dvclive.log("m", metric) assert read_history("logs", "m") == (steps, metrics) assert read_latest("logs", "m") == (last(steps), last(metrics))
def test_custom_steps(tmp_dir): dvclive = Live("logs") out = tmp_dir / dvclive.dir / Scalar.subfolder steps = [0, 62, 1000] metrics = [0.9, 0.8, 0.7] for step, metric in zip(steps, metrics): dvclive.set_step(step) dvclive.log("m", metric) assert read_history(out, "m") == (steps, metrics) assert read_latest(out, "m") == (last(steps), last(metrics))
def create_document_indices_from_sentence_indices(span_lists, token_lists, document): """Convert sentence spans (each sentence starting at 0) to document spans. Args: span_lists: list of lists of tuples of int (start, end) representing token locs token_lists: list of list of str. sentences and tokens in document. document: str. raw text of predicted document Returns: document_span_lists: A list of lists of tuples of int (start, end) """ sentence_lengths = [last(span_list)[-1] for span_list in span_lists] sentence_starts = [] offset = 0 # We have to base our location off of the original document to deal with weird sentences # For example: "Yuliya loves cats. Ray loves dogs." or the case where as sentence is split # Mid-word due to exceeding the max sentence list # We select the first length, and the second sentence and so on to get the offsets for length, token_list in zip(sentence_lengths, token_lists): next_start = document[offset:].find(first(token_list)) offset = offset + next_start sentence_starts.append(offset) offset = offset + length # Modify our sentence indices so that the sentences line up with the original text document_span_lists = [] for start, span_list in zip(sentence_starts, span_lists): document_span_lists.append([[span_start + start, span_end + start] for (span_start, span_end) in span_list]) return document_span_lists
def generate_random_images( tmp_directory, video_file, size=None, aspect_ratio=(16, 9), **kwargs): """ Generate uniformly distributed random snapshots from a video.""" from moviepy import editor clip = editor.VideoFileClip(video_file) if int(clip.duration) < 1: return 0 # hard-coded steps of snapshot smoothness if clip.duration < 30: num_of_chunks = 3 elif clip.duration < 60: num_of_chunks = 5 elif clip.duration < 60 * 5: num_of_chunks = 20 elif clip.duration < 60 * 10: num_of_chunks = 30 else: num_of_chunks = 50 chunk_size = int(clip.duration // num_of_chunks) for i, video_chunk in enumerate(chunks(chunk_size, range(0, int(clip.duration)))): random_frame_time = random.uniform(first(video_chunk), last(video_chunk)) img = Image.fromarray(clip.get_frame(random_frame_time)) if size: img = img_resize(img, size=size, aspect_ratio=aspect_ratio) file_name = f'{i}.{kwargs.get("output_ext", "png")}' img_save(img, str(tmp_directory / 'random' / file_name)) return num_of_chunks
def _infer_y_field(rev_data_points, x_field): all_fields = list(first(rev_data_points).keys()) all_fields.remove(PlotData.REVISION_FIELD) if x_field and x_field in all_fields: all_fields.remove(x_field) y_field = last(all_fields) return y_field
def _reverse_inputs_and_indices(encoded_sentence_forward, output_index_list_forward): """Reverse sequence of character codes and list of output indices.""" if len(encoded_sentence_forward ) >= 2: # sentence should at least have start, end characters start_sentence_value = first(encoded_sentence_forward) end_sentence_value = last(encoded_sentence_forward) encoded_sentence_length = len(encoded_sentence_forward) # Reverse all character codes in the sentence without affecting the first and last elements # (those are special start_sentence_value and end_sentence_value) encoded_sentence_back = [start_sentence_value] encoded_sentence_back.extend( encoded_sentence_forward[-2:0:-1]) # skip start and end encoded_sentence_back.append(end_sentence_value) else: encoded_sentence_back = [] # compute backward output indices if len(output_index_list_forward) == 0: locations_before_tokens = [] else: locations_before_tokens = [0] + output_index_list_forward[:-1] output_indices_back = [ encoded_sentence_length - x - 1 for x in locations_before_tokens ] return encoded_sentence_back, output_indices_back
def _prepare_item_image(self, image): extensions = last(image.name.split('.')).lower() image_url = urlquote(image.url).replace('%3A', ':') return { 'image': image, 'url': self.get_with_domain(image_url), 'type': 'image/{}'.format(extensions.replace('jpg', 'jpeg')), }
def test_continue(tmp_dir, resume, steps, metrics): dvclive.init("logs") for metric in [0.9, 0.8]: dvclive.log("metric", metric) dvclive.next_step() assert read_history("logs", "metric") == ([0, 1], [0.9, 0.8]) assert read_latest("logs", "metric") == (1, 0.8) dvclive.init("logs", resume=resume) for new_metric in [0.7, 0.6]: dvclive.log("metric", new_metric) dvclive.next_step() assert read_history("logs", "metric") == (steps, metrics) assert read_latest("logs", "metric") == (last(steps), last(metrics))
def make(path="logs"): datapoints = [{"metric": 0.0, "step": 0}, {"metric": 0.5, "step": 1}] tmp_dir.gen( { (tmp_dir / path).with_suffix(".json"): json.dumps( last(datapoints) ), (tmp_dir / path / "metric.tsv"): _dumps_tsv(datapoints), } )
def test_continue(tmp_dir, resume, steps, metrics): dvclive = Live("logs") out = tmp_dir / dvclive.dir / Scalar.subfolder for metric in [0.9, 0.8]: dvclive.log("metric", metric) dvclive.next_step() assert read_history(out, "metric") == ([0, 1], [0.9, 0.8]) assert read_latest(out, "metric") == (1, 0.8) dvclive = Live("logs", resume=resume) for new_metric in [0.7, 0.6]: dvclive.log("metric", new_metric) dvclive.next_step() assert read_history(out, "metric") == (steps, metrics) assert read_latest(out, "metric") == (last(steps), last(metrics))
def get_bins(race_data): """ Group races and create bins (time ranges) of BIN_SIZE. For each bin find out pct of racers in that bin and avg time of that bin. Also assign bin number to identify racers and their bin they fall into later on. """ bin_data = [] race_groups = race_data.groupby('race_id') for race_id, race_group in race_groups: top_75_percentile = race_group[ race_group.final_time < race_group.final_time.quantile(.75)] # Skip races with missing data. if len(top_75_percentile) == 0: continue bins = pd.cut(top_75_percentile.final_time, BIN_SIZE, right=False) # fastest = time.strftime( # '%H:%M:%S', time.gmtime(min(top_75_percentile.final_time))) # slowest = time.strftime( # '%H:%M:%S', time.gmtime(max(top_75_percentile.final_time))) # print "fastest =>", fastest # print "slowest =>", slowest bin_number = 0 for bin_key, bin_group in top_75_percentile.groupby(bins): bin_number += 1 population_pct = len(bin_group) / float(len(top_75_percentile)) bin_avg_time = bin_group.final_time.mean() if math.isnan(bin_avg_time): # Yes Ugly. Pandas bin key is a string. # This split gives us bin's lower/upper range time. lower_range = float(first(bin_key.split(',')).strip('[')) upper_range = float(last(bin_key.split(',')).strip(')')) bin_avg_time = np.mean([lower_range, upper_range]) bin_data.append({'race_id': int(race_id), 'bin_number': bin_number, 'population_pct': population_pct, 'bin_avg_time': bin_avg_time }) return bin_data
def plot_history(): def plot_values_collection(title, values_collection): plot.clf() plot.title(title) for values in values_collection: plot.plot(values) plot.show() with open('./results/history.pickle', 'rb') as f: history = pickle.load(f) print(last(history['val_acc'])) plot_values_collection('loss', map(partial(getitem, history), ('loss', 'val_loss'))) plot_values_collection('accuracy', map(partial(getitem, history), ('acc', 'val_acc')))
def block_quote(self, text: str): if text.endswith("\n"): text = text[:-1] text = re.sub("</?p>", "", text) quote = text.rsplit("\n", maxsplit=2) if len(quote) == 1: template = '<blockquote class="blockquote"><p>{author}</p></blockquote>' else: template = """ <figure> <blockquote class="blockquote"><p>{text}</p></blockquote> <figcaption class="blockquote-footer">{author}</figcaption> </figure> """ return template.format(text=" ".join(quote[:-1]), author=last(quote)).strip()
def _split_long_sentences(token_lists, span_lists, max_tokens, max_chars): """Split tokenized sentences to enforce max_tokens and max_chars. Adjust character spans. This function does not enforce correct token length: it assumes that any individual token is shorter than max_chars characters. Args: token_lists: List of lists of str (one str per token). Each token shorter than max_chars span_lists: List of lists of tuples of (start, end) token locations in sentence max_tokens: maximum number of tokens per sentence max_chars: maximum number of characters per sentence Returns: new_tokens: new list of lists of str, where each list conforms to max_chars and max_tokens new_spans: new list of lists of (start, end) tuples. Spans start at 0 for each sentence """ if max_tokens < 1 or max_chars < 1: raise ValueError("Expected max_tokens and max_chars to be at least 1\n" "Found max_tokens = {}, max_chars = {}".format( max_tokens, max_chars)) new_tokens = [] new_spans = [] for token_list, span_list in zip(token_lists, span_lists): # check for bad input if len(token_list) != len(span_list): raise ValueError("Bad tokenized sentence: number of " "tokens does not equal number of spans\n" "tokens: {}\ntoken spans: {}".format( token_list, span_list)) # check if empty if len(token_list) == 0: continue # check if we already fit into the limits to avoid unnecessary loops if len(token_list) <= max_tokens and last(span_list)[1] <= max_chars: new_tokens.append(token_list) new_spans.append(span_list) continue # loop over tokens until fill up a sentence chunk token_index = 0 token_shift = 0 span_shift = 0 sentence_token_chunk = [] sentence_span_chunk = [] while token_index < len(token_list): token = token_list[token_index] span = span_list[token_index] # if token fits in if span[1] - span_shift <= max_chars and token_index - token_shift < max_tokens: sentence_token_chunk.append(token) adjusted_span = (span[0] - span_shift, span[1] - span_shift) sentence_span_chunk.append(adjusted_span) # if token doesn't fit, need to start a new sentence chunk else: if len(sentence_token_chunk) > 0: new_tokens.append(sentence_token_chunk) new_spans.append(sentence_span_chunk) span_shift = span[0] token_shift = token_index sentence_token_chunk = [token] sentence_span_chunk = [(0, span[1] - span[0])] token_index += 1 new_tokens.append(sentence_token_chunk) new_spans.append(sentence_span_chunk) if len(new_tokens) == 0: # preserve the dimensionality of the output new_tokens = [[]] new_spans = [[]] return new_tokens, new_spans return new_tokens, new_spans
import os from funcy import last from glob import glob from self_driving import SelfDriving from stable_baselines3 import SAC from stable_baselines3.common.evaluation import evaluate_policy env = SelfDriving() model_path = last(sorted(glob('log/*.zip'), key=lambda f: os.stat(f).st_mtime)) model = SAC.load(model_path, env) print(model_path) reward_mean, _ = evaluate_policy(model, env, n_eval_episodes=1, render=True, warn=False) print(f'reward: {reward_mean:.02f}') for _ in range(10): env.seed(None) # 乱数シードをNone(現在時刻を使う)に設定します。 observation = env.reset() done = False while not done: action, _ = model.predict(observation, deterministic=True)
def list_javascripts(): return [last(x.split('src/static/')) for x in glob.glob(f'src/static/dist/*.js')]
def _merge_values(values: List[Optional[TimeSeries]]): return funcy.last(i for i in values if i is not None)
def img_save(img, file_path, codec_settings=codec_defaults): """ Safely store Image objects to files. """ if last(file_path.split('.')) in ['jpg', 'jpeg'] and img.mode in ('RGBA', 'LA', 'P'): img = img.convert('RGB') # jpeg does not support alpha channels img.save(file_path, **codec_settings)
def create_initial_story(sender, instance, **kwargs): if not instance.id: instance.created = datetime.datetime.today() instance.extension = last(instance.__unicode__().split('.')).upper()
def lca(self, word1, word2) -> Word: """Least Common Ancestor.""" trace = zip(self._sift(word1), self._sift(word2)) common_ancestors = (n1 for (n1, n2) in trace if n1 == n2) return fn.last(common_ancestors).data
def sift(self, word) -> Node: return fn.last(self._sift(word))
def create_initial_story(sender, instance, **kwargs): if not instance.id: instance.created = datetime.datetime.today() instance.extension = last(instance.__unicode__().split(".")).upper()
def learn_dfa(inputs, label, find_counter_example, outputs=None) -> DFA: return fn.last(_learn_dfa(inputs, label, find_counter_example, outputs))
def prepare_inputs_from_pretokenized(self, tokenized_sentences): """Construct inputs to ContextualizedEmbedding from tokenized sentences and optional labels. Character-level padding length is determined by the longest sentence in the batch. Use at your own risk. This assumes the sentences already fit into character and token limits. Args: tokenized_sentences: List of lists of str, one str for each token Returns: a dict of inputs to the ContextualizedEmbedding layer 'forward_input': padded array of character codes corresponding to each sentence 'backward_input': padded array of character codes in reverse order 'forward_index_input': padded array of locations of token outputs in forward_input 'backward_index_input': padded array of locations of token outputs in backward_input 'forward_mask_input': mask of same shape as forward_index_input, with 0's where padded and 1's where real tokens 'backward_mask_input':mask of same shape as back_index_input, with 0's where padded and 1's where real tokens """ token_spans = get_space_joined_indices_from_token_lists( tokenized_sentences) # Pad everything to longest sentence length sentence_lengths = [ last(spans)[1] for spans in token_spans if len(spans) > 0 ] longest_sentence_len = max(sentence_lengths + [0]) pad_len = longest_sentence_len + 2 # 2 for extra start character and end character # encode sentences and get output indices (located at token edges) ( forward_inputs, backward_inputs, output_index_list_forward, output_index_list_backward, ) = self._encode_and_index(tokenized_sentences, token_spans) # pad sentences forward_inputs = _pad_sentences(forward_inputs, pad_len, self.char_pad_value) backward_inputs = _pad_sentences(backward_inputs, pad_len, self.char_pad_value) # Make inputs used in indexing and masking forward_index_array = self._prepare_index_array( output_index_list_forward) backward_index_array = self._prepare_index_array( output_index_list_backward) forward_mask = self._prepare_mask_array(output_index_list_forward) backward_mask = self._prepare_mask_array(output_index_list_backward) model_inputs = { "forward_input": forward_inputs, "backward_input": backward_inputs, "forward_index_input": forward_index_array, "backward_index_input": backward_index_array, "forward_mask_input": forward_mask, "backward_mask_input": backward_mask, } return model_inputs