def processCharactor(char, table: deque, memory: np.ndarray, charset): """ Called by train function in every iteration to update "table" (numpy array) and "memory" (deque) with char. Increment value in specific index by 1, the index is determine from "memory", each combination corespondent to unique index. This have effect of counting a number of times :param char: :param table: :param memory: :param charset: :return: """ charnum = charset[char] try: table[tuple(memory)][charnum] += 1 except IndexError as error: print(error) print(memory[0]) exit() memory.append(charnum)
def determine_right(side: np.ndarray, head: np.ndarray, tail: np.ndarray, skeleton: np.ndarray): """Determine if the given side is the right side. :param side: One of the sides :param head: Head coordinates :param tail: Tail coordinates :param skeleton: Skeleton Coordinates :return: If the side is the right side of not """ # Create a clone to rotate test_curve = np.copy(side) # Make curve tail at (0,0) for normalization test_curve = test_curve - test_curve[-1] # de-Rotate the skeleton and the curve rotation_matrix = fish_rotation_matrix(head, tail) rotated_skeleton = np.dot(skeleton, rotation_matrix) rotated_curve = np.dot(test_curve, rotation_matrix) # If most x-points are more positive than that of the skeleton it's the right # This works since the fish is de-rotated to be "parallel-ish" to the y-axis shapely_curve = shp.LineString(zip(*rotated_curve.T)) shapely_midline = shp.LineString(zip(*rotated_skeleton.T)) side = [] for p in zip(*shapely_curve.xy): closest_point = shapely_midline.interpolate( shapely_midline.project(shp.Point(p))) side.append(1 if p[0] > closest_point.xy[0][0] else -1) return np.mean(side) > 0
def create_validation_dataset( test: np.ndarray, val_size: float, random_state: int) -> Tuple[np.ndarray, np.ndarray]: users = test[:, 0] items = test[:, 1] ratings = test[:, 2] val = [] test = [] for user in set(users): indices = users == user pos_items = items[indices] val_items = np.random.RandomState(random_state).choice( pos_items, int(val_size * len(pos_items)), replace=False) test_items = np.setdiff1d(pos_items, val_items) for val_item in val_items: item_indices = (items == val_item) & (users == user) val_rating = int(ratings[item_indices]) val.append([user, val_item, val_rating]) for test_item in test_items: item_indices = (items == test_item) & (users == user) test_rating = int(ratings[item_indices]) test.append([user, test_item, test_rating]) val = np.array(val) test = np.array(test) return test, val
def augment_embeddings( self, word2idx: Dict[str, int], idx2word: Dict[int, str], embeddings: np.ndarray, token: str, emb: Optional[np.ndarray] = None) -> types.Embeddings: word2idx[token] = len(embeddings) idx2word[len(embeddings)] = token if emb is None: emb = np.random.uniform(low=-0.05, high=0.05, size=self.dim_) embeddings.append(emb) return word2idx, idx2word, embeddings
def _make_ctxs(self, seq: np.ndarray): # left_ctx + word + right_ctx if self.add_eos or self.add_bos: seq = list(seq) # ndarray to list or a copy of list if self.add_bos and seq[0] != self.field.bos_idx: seq.insert(0, self.field.bos_idx) if self.add_eos and seq[-1] != self.field.eos_idx: seq.append(self.field.eos_idx) full_window = self.ctx_size + self.ctx_size for i in range(len(seq) - full_window): word = seq[i + self.ctx_size] ctx = seq[i:i + self.ctx_size] + seq[i + self.ctx_size + 1:i + 2 * self.ctx_size + 1] yield (ctx, word)
def _from_hp_space(self, hp: Dict[str, Any], _hp_repr: np.ndarray = None): # TODO: parse hp space and append as much as possible (so that whole repr fits in 'embedding_size') binary representation of each relative positions in hp_space ranges/choices # TODO: refactor this code (use NNI's hp_space generated YAML file instead of hyperopt) # for node in hyperopt.vectorize.uniq(hyperopt.vectorize.toposort(self._hp_space)): # if isinstance(node, hyperopt.tpe.pyll.rec_eval): # pass if not _hp_repr: # First call of recursion over hp_space dict _hp_repr = np.array([]) for n, v in hp.items(): if isinstance(v, Dict): _hp_repr.append(self._from_hp_space(v, _hp_repr)) elif True or isinstance(v, ...): raise NotImplementedError self._hp_space[n] _hp_repr.append(...) return torch.from_numpy(_hp_repr)
def draw_points(img: np.ndarray, points: np.ndarray, cors: np.ndarray = None, r: int = 5) -> None: """ :param img: Input image BGR :param points: [[x1,y1], ...[xn,yn]] :param cors: [(b,g,r), ...(b,g,r)] :param r: circle-radio :return: """ if cors is None: cors = [] for i in range(0, len(points)): c = tuple(np.random.choice(range(256), size=3)) cors.append((int(c[0]), int(c[1]), int(c[2]))) k = 0 for p in points: cv2.circle(img, (p[0], p[1]), r, cors[k], -1) k = k + 1
def _check_branching( X: np.ndarray, Xsamples: np.ndarray, restart: int, threshold: float = 0.25 ) -> Tuple[bool, List[np.ndarray]]: """\ Check whether time series branches. Parameters ---------- X current time series data. Xsamples list of previous branching samples. restart counts number of restart trials. threshold sets threshold for attractor identification. Returns ------- check true if branching realization Xsamples updated list """ check = True Xsamples = list(Xsamples) if restart == 0: Xsamples.append(X) else: for Xcompare in Xsamples: Xtmax_diff = np.absolute(X[-1, :] - Xcompare[-1, :]) # If the second largest element is smaller than threshold # set check to False, i.e. at least two elements # need to change in order to have a branching. # If we observe all parameters of the system, # a new attractor state must involve changes in two # variables. if np.partition(Xtmax_diff, -2)[-2] < threshold: check = False if check: Xsamples.append(X) logg.debug(f'realization {restart}: {"" if check else "no"} new branch') return check, Xsamples
def __call__(self, image: np.ndarray, landmarks: np.ndarray, rgb: bool = True, two_steps: bool = False) -> np.ndarray: if landmarks.size > 0: # Preparation if rgb: image = image[..., ::-1] if landmarks.ndim == 2: landmarks = landmarks[np.newaxis, ...] # Crop the face patches roi_boxes = [] face_patches = [] for lms in landmarks: roi_boxes.append(parse_roi_box_from_landmark(lms[:68].T)) face_patches.append( cv2.resize( crop_img(image, roi_boxes[-1]), (self.config.input_size, self.config.input_size))) face_patches = (torch.from_numpy( np.array(face_patches).transpose( (0, 3, 1, 2)).astype(np.float32)).to(self.device) - 127.5) / 128.0 # Get 3DMM parameters params = self.net(face_patches).cpu().numpy() if two_steps: landmarks = [] for param, roi_box in zip(params, roi_boxes): landmarks.append(predict_68pts(param, roi_box).T) return self.__call__(image, np.array(landmarks), rgb=False, two_steps=False) else: return np.hstack((np.array(roi_boxes, dtype=np.float32), params)) else: return np.empty(shape=(0, 66), dtype=np.float32)
def encode(self, data: np.ndarray) -> np.ndarray: return data.append( self.manager.modem._rng.randint(2, self.__check_block_size))
def generate_quantiles_histogram(quantiles: np.ndarray, min_val: float, max_val: float, total_count: float, num_buckets: int) -> statistics_pb2.Histogram: """Generate quantiles histrogram from the quantile boundaries. Args: quantiles: A numpy array containing the quantile boundaries. min_val: The minimum value among all values over which the quantiles are computed. max_val: The maximum value among all values over which the quantiles are computed. total_count: The total number of values over which the quantiles are computed. num_buckets: The required number of buckets in the quantiles histogram. Returns: A statistics_pb2.Histogram proto. """ result = statistics_pb2.Histogram() result.type = statistics_pb2.Histogram.QUANTILES quantiles = list(quantiles) # We explicitly add the min and max to the quantiles list as the # quantiles combiner returns only the internal boundaries. quantiles.insert(0, min_val) # Insert min_val in the beginning. quantiles.append(max_val) # Append max_val to the end. # We assume that the number of quantiles is at least the required number of # buckets in the quantiles histogram. assert len(quantiles) - 1 >= num_buckets # Sample count per bucket based on the computed quantiles. current_sample_count = float(total_count / (len(quantiles) - 1)) # Sample count per bucket required for the quantiles histogram. required_sample_count = float(total_count / num_buckets) # Start of the current bucket. bucket_start = min_val # Sample count of the current bucket. running_sample_count = 0 # Iterate to create the first num_buckets - 1 buckets. for i in six.moves.range(len(quantiles) - 1): if running_sample_count + current_sample_count >= required_sample_count: # Sample count needed for the current bucket. needed_sample_count = required_sample_count - running_sample_count # Compute width of the current bucket based on the needed sample count. # We assume the samples are uniformly distributed in an interval. width = ((quantiles[i + 1] - quantiles[i]) * needed_sample_count / current_sample_count) result.buckets.add(low_value=bucket_start, high_value=quantiles[i] + width, sample_count=required_sample_count) # Add any carried over sample count for the next bucket. running_sample_count = current_sample_count - needed_sample_count # Fix the start of the next bucket. bucket_start = quantiles[i] + width if len(result.buckets) == num_buckets - 1: break else: running_sample_count += current_sample_count # Add the last bucket. result.buckets.add(low_value=bucket_start, high_value=max_val, sample_count=required_sample_count) return result
def beam_search_list_label(prior_score: np.ndarray, prior_label_list: dict, now_score: np.ndarray, top_k: int = 3, unique: bool = False, mask_label: int = None): """ beam search 搜索 执行效率不高 排序阶段会取出所有的数据 进行排序 :param prior_score 前一过程对应分值 shape (b, ) :param prior_label_list [[0, 1, 2]] :param now_score 当前的分值 shape (b, target_size) :param top_k 取的top_k数据 :param unique 单条路径中 是否允许重复 :param debug :param mask_label mask的label数据 :return: """ if prior_score is None: # 首次执行beam_search multi_score = now_score top_k_indexs = np.argsort( a=-multi_score, axis=-1)[:, :top_k] # (b, target_size) -> (b, top_k) prior_label_list = [] prior_score = [] for b in range(now_score.shape[0]): label_lists = [] for k in range(top_k): # top_k label = top_k_indexs[b][k] label_lists.append([label]) score = multi_score[b][label] prior_score.append(score) prior_label_list.append(label_lists) return prior_label_list, prior_score else: # 非首次执行beam_search assert len(prior_score) == len(now_score) multi_score = [] for i in range(len(prior_score)): multi_score.append(prior_score[i] * now_score[i]) multi_score = np.array(multi_score) new_prior_label_list = [] new_prior_score = [] point = 0 # 指针位置 for label_lists in prior_label_list: # 每一个id数据 都要选择 top_k数据 id_label_list_score = [] # 每一个id样本的 label与score for label_list in label_lists: id_brach_multi_score = multi_score[point] id_brach_top_index = np.argsort( a=-id_brach_multi_score)[:top_k * 3] # 多获取一些样本 后续需要去重 可能去掉 for t in id_brach_top_index: id_brach_multi_score_t = id_brach_multi_score[t] id_label_list_score.append( [label_list + [t], id_brach_multi_score_t]) point += 1 label_list_score = list( sorted(id_label_list_score, key=lambda x: x[1], reverse=True)) # 排序 if unique: # 保证样本无重复 if mask_label is None: # mask_label 不允许重复 label_list_score = list( filter(lambda x: len(set(x[0])) == len(x[0]), label_list_score)) else: # mask_label 允许重复 label_list_score_ = [] for label_list, score in id_label_list_score: label_list_ = list( filter(lambda x: x != mask_label, label_list)) if len(set(label_list_)) == len(label_list_): label_list_score_.append([label_list, score]) label_list_score = label_list_score_ top_k_label_list_score = label_list_score[:top_k] label_lists = [] for label_list, score in top_k_label_list_score: label_lists.append(label_list) new_prior_score.append(score) new_prior_label_list.append(label_lists) return new_prior_label_list, new_prior_score