예제 #1
0
    def build(self):
        layer_sizes = list(sliding_window(2, self.dimensions))
        if self.single_module == -1 or self.single_module == 0:
            layers = []

            for i, size in enumerate(layer_sizes):
                layers.append(("fc" + str(i), nn.Linear(size[0], size[1])))
                if i < len(self.dimensions) - 2:
                    layers.append(("act" + str(i), nn.ELU()))
                    layers.append(
                        ("drop" + str(i + 1), nn.Dropout(self.keep_prob)))
            self.encoder = nn.Sequential(OrderedDict(layers))
        else:
            self.encoder = nn.Sequential()

        if self.single_module == 0 or self.single_module == 1:
            layers = []
            for i, size in enumerate(layer_sizes[-1::-1]):
                layers.append(("fc" + str(i), nn.Linear(size[1], size[0])))
                if i < len(self.dimensions) - 2:
                    layers.append(("act" + str(i), nn.ELU()))
                    layers.append(
                        ("drop" + str(i + 1), nn.Dropout(self.keep_prob)))
            self.decoder = nn.Sequential(OrderedDict(layers))
        else:
            self.decoder = nn.Sequential()
예제 #2
0
    def _construct_relationship(self, path, updated_factors):
        start_node = path[0]
        end_node = path[-1]

        computed_matrix = (self.fuser.factor(start_node)
                           if not start_node.name in updated_factors else
                           updated_factors[start_node.name])
        print(
            type(start_node),
            start_node,
            start_node.name in updated_factors,
            computed_matrix.shape,
        )

        for src, dst in sliding_window(2, path):
            relation = list(self.fuser.fusion_graph.get_relations(src, dst))[0]
            print(relation)
            computed_matrix = np.dot(computed_matrix,
                                     self.fuser.backbone(relation))

        end_factor = (self.fuser.factor(end_node)
                      if not end_node.name in updated_factors else
                      updated_factors[end_node.name])
        computed_matrix = np.dot(computed_matrix, end_factor.T)

        return computed_matrix
    def _persist_header_chain(
        cls, db: BaseDB, headers: Iterable[BlockHeader]
    ) -> Tuple[Tuple[BlockHeader, ...], Tuple[BlockHeader, ...]]:
        try:
            first_header = first(headers)
        except StopIteration:
            return tuple(), tuple()
        else:

            for parent, child in sliding_window(2, headers):
                if parent.hash != child.parent_hash:
                    raise ValidationError(
                        "Non-contiguous chain. Expected {} to have {} as parent but was {}"
                        .format(
                            encode_hex(child.hash),
                            encode_hex(parent.hash),
                            encode_hex(child.parent_hash),
                        ))

            is_genesis = first_header.parent_hash == GENESIS_PARENT_HASH
            if not is_genesis and not cls._header_exists(
                    db, first_header.parent_hash):
                raise ParentNotFound(
                    "Cannot persist block header ({}) with unknown parent ({})"
                    .format(encode_hex(first_header.hash),
                            encode_hex(first_header.parent_hash)))

            score = 0 if is_genesis else cls._get_score(
                db, first_header.parent_hash)

        for header in headers:
            db.set(
                header.hash,
                rlp.encode(header),
            )

            score += header.difficulty

            db.set(
                SchemaV1.make_block_hash_to_score_lookup_key(header.hash),
                rlp.encode(score, sedes=rlp.sedes.big_endian_int),
            )

        try:
            previous_canonical_head = cls._get_canonical_head(db).hash
            head_score = cls._get_score(db, previous_canonical_head)
        except CanonicalHeadNotFound:
            (new_canonical_headers,
             old_canonical_headers) = cls._set_as_canonical_chain_head(
                 db, header.hash)
        else:
            if score > head_score:
                (new_canonical_headers,
                 old_canonical_headers) = cls._set_as_canonical_chain_head(
                     db, header.hash)
            else:
                new_canonical_headers = tuple()
                old_canonical_headers = tuple()

        return new_canonical_headers, old_canonical_headers
예제 #4
0
파일: lines.py 프로젝트: zorzalerrante/aves
    def prepare(self):
        if self.colormap is None:
            self.colormap = colors.LinearSegmentedColormap.from_list(
                "", [self.source_color, self.target_color])

        if self.linewidth is None:
            self.set_linewidth()

        self.prepared_data = {}

        for n_points, pairs in self.curves_per_length.items():
            segments = np.concatenate(
                [list(sliding_window(2, p[0])) for p in pairs])
            weights = np.concatenate(
                [list(repeat(p[1], n_points)) for p in pairs])
            color_values = np.concatenate(
                list(repeat(np.linspace(0, 1, num=n_points - 1), len(pairs))))

            if self.min_linewidth is not None:
                linewidth = np.squeeze(
                    self.linewidth_transform(
                        weights.reshape(-1, 1),
                        feature_range=(self.min_linewidth, self.linewidth),
                    ))
            else:
                linewidth = self.linewidth

            self.prepared_data[n_points] = {
                "segments": segments,
                "weights": weights,
                "color_values": color_values,
                "linewidth": linewidth,
            }

        self.prepared = True
예제 #5
0
    def build(self):
        layer_sizes = list(sliding_window(2, self.dimensions))
        if self.single_module == -1 or self.single_module == 0:
            layers = []
            for i, size in enumerate(layer_sizes):
                if i == len(layer_sizes) - 1:
                    self.cluster_layer = RBF_Layer(in_features=size[0],
                                                   out_features=size[1],
                                                   basis_func=self.basis_func)
                else:
                    layers.append(("fc" + str(i), nn.Linear(size[0], size[1])))
                    if i < len(self.dimensions) - 2:
                        layers.append(("act" + str(i), self.act()))
                        layers.append(
                            ("drop" + str(i + 1), nn.Dropout(self.keep_prob)))
            self.encoder = nn.Sequential(OrderedDict(layers))
        else:
            self.encoder = nn.Sequential()

        if self.single_module == 0 or self.single_module == 1:
            layers = []
            layer_sizes[-1] = (layer_sizes[-1][0],
                               layer_sizes[-1][1] + self.extra_feature_len)
            for i, size in enumerate(layer_sizes[-1::-1]):
                layers.append(("fc" + str(i), nn.Linear(size[1], size[0])))
                if i < len(self.dimensions) - 2:
                    layers.append(("act" + str(i), self.act()))
                    layers.append(
                        ("drop" + str(i + 1), nn.Dropout(self.keep_prob)))
            self.decoder = nn.Sequential(OrderedDict(layers))
        else:
            self.decoder = nn.Sequential()
예제 #6
0
    def compute_divided_edge_length(self, edge_idx):
        length = 0.0

        for p0, p1 in sliding_window(2, self.subdivision_points[edge_idx]):
            length += point_distance(p0, p1)

        return length
예제 #7
0
    def _decode_header_to_dict(
            cls, encoded_header: bytes) -> Iterator[Tuple[str, Any]]:
        if len(encoded_header) != cls.smc_encoded_size:
            raise ValidationError(
                "Expected encoded header to be of size: {0}. Got size {1} instead.\n- {2}"
                .format(
                    cls.smc_encoded_size,
                    len(encoded_header),
                    encode_hex(encoded_header),
                ))

        start_indices = accumulate(lambda i, field: i + field[2],
                                   cls.fields_with_sizes, 0)
        field_bounds = sliding_window(2, start_indices)
        for byte_range, field in zip(field_bounds, cls._meta.fields):
            start_index, end_index = byte_range
            field_name, field_type = field

            field_bytes = encoded_header[start_index:end_index]
            if field_type == rlp.sedes.big_endian_int:
                # remove the leading zeros, to avoid `not minimal length` error in deserialization
                formatted_field_bytes = field_bytes.lstrip(b'\x00')
            elif field_type == address:
                formatted_field_bytes = field_bytes[-20:]
            else:
                formatted_field_bytes = field_bytes
            yield field_name, field_type.deserialize(formatted_field_bytes)
예제 #8
0
    def _persist_block_chain(
        cls, db: BaseDB, blocks: Iterable[BaseBeaconBlock],
        block_class: Type[BaseBeaconBlock]
    ) -> Tuple[Tuple[BaseBeaconBlock, ...], Tuple[BaseBeaconBlock, ...]]:
        try:
            first_block = first(blocks)
        except StopIteration:
            return tuple(), tuple()
        else:
            for parent, child in sliding_window(2, blocks):
                if parent.root != child.parent_root:
                    raise ValidationError(
                        "Non-contiguous chain. Expected {} to have {} as parent but was {}"
                        .format(
                            encode_hex(child.root),
                            encode_hex(parent.root),
                            encode_hex(child.parent_root),
                        ))

            is_genesis = first_block.parent_root == GENESIS_PARENT_HASH
            if not is_genesis and not cls._block_exists(
                    db, first_block.parent_root):
                raise ParentNotFound(
                    "Cannot persist block ({}) with unknown parent ({})".
                    format(encode_hex(first_block.root),
                           encode_hex(first_block.parent_root)))

            if is_genesis:
                score = 0
            else:
                score = cls._get_score(db, first_block.parent_root)

        for block in blocks:
            db.set(
                block.root,
                rlp.encode(block),
            )

            # TODO: It's a stub before we implement fork choice rule
            score = block.slot

            db.set(
                SchemaV1.make_block_root_to_score_lookup_key(block.root),
                rlp.encode(score, sedes=rlp.sedes.big_endian_int),
            )

        try:
            previous_canonical_head = cls._get_canonical_head(db,
                                                              block_class).root
            head_score = cls._get_score(db, previous_canonical_head)
        except CanonicalHeadNotFound:
            return cls._set_as_canonical_chain_head(db, block.root,
                                                    block_class)

        if score > head_score:
            return cls._set_as_canonical_chain_head(db, block.root,
                                                    block_class)
        else:
            return tuple(), tuple()
예제 #9
0
    def _persist_header_chain(
        cls, db: BaseDB, headers: Iterable[BlockHeader]
    ) -> Tuple[Tuple[BlockHeader, ...], Tuple[BlockHeader, ...]]:
        headers_iterator = iter(headers)

        try:
            first_header = first(headers_iterator)
        except StopIteration:
            return tuple(), tuple()

        is_genesis = first_header.parent_hash == GENESIS_PARENT_HASH
        if not is_genesis and not cls._header_exists(db,
                                                     first_header.parent_hash):
            raise ParentNotFound(
                "Cannot persist block header ({}) with unknown parent ({})".
                format(encode_hex(first_header.hash),
                       encode_hex(first_header.parent_hash)))

        if is_genesis:
            score = 0
        else:
            score = cls._get_score(db, first_header.parent_hash)

        curr_chain_head = first_header
        db.set(
            curr_chain_head.hash,
            rlp.encode(curr_chain_head),
        )
        score = cls._set_hash_scores_to_db(db, curr_chain_head, score)

        orig_headers_seq = concat([(first_header, ), headers_iterator])
        for parent, child in sliding_window(2, orig_headers_seq):
            if parent.hash != child.parent_hash:
                raise ValidationError(
                    "Non-contiguous chain. Expected {} to have {} as parent but was {}"
                    .format(
                        encode_hex(child.hash),
                        encode_hex(parent.hash),
                        encode_hex(child.parent_hash),
                    ))

            curr_chain_head = child
            db.set(
                curr_chain_head.hash,
                rlp.encode(curr_chain_head),
            )

            score = cls._set_hash_scores_to_db(db, curr_chain_head, score)

        try:
            previous_canonical_head = cls._get_canonical_head(db).hash
            head_score = cls._get_score(db, previous_canonical_head)
        except CanonicalHeadNotFound:
            return cls._set_as_canonical_chain_head(db, curr_chain_head.hash)

        if score > head_score:
            return cls._set_as_canonical_chain_head(db, curr_chain_head.hash)

        return tuple(), tuple()
예제 #10
0
def itinerary_dists(positions, itinerary):
    """Return list of pairs, ['dest', 'dist']."""
    distances = [
        distance(positions[b], positions[a])
        for (a, b) in sliding_window(2, itinerary)
    ]
    labeled = list(zip(itinerary[1:], distances))
    return labeled
예제 #11
0
def prepare_single_babel_language(corpus_dir: Pathlike, output_dir: Optional[Pathlike] = None):
    manifests = defaultdict(dict)
    for split in ('dev', 'eval', 'training'):
        audio_dir = corpus_dir / f'conversational/{split}/audio'
        recordings = RecordingSet.from_recordings(Recording.from_sphere(p) for p in audio_dir.glob('*.sph'))
        if len(recordings) == 0:
            logging.warning(f"No SPHERE files found in {audio_dir}")
        manifests[split]['recordings'] = recordings

        supervisions = []
        text_dir = corpus_dir / f'conversational/{split}/transcription'
        for p in text_dir.glob('*'):
            # p.stem -> BABEL_BP_101_10033_20111024_205740_inLine
            # parts:
            #   0 -> BABEL
            #   1 -> BP
            #   2 -> <language-code> (101)
            #   3 -> <speaker-id> (10033)
            #   4 -> <date> (20111024)
            #   5 -> <hour> (205740)
            #   6 -> channel (inLine) ; inLine <=> A ; outLine <=> B ; "scripted" <=> A
            p0, p1, lang_code, speaker, date, hour, channel, *_ = p.stem.split('_')
            channel = {'inLine': 'A', 'outLine': 'B'}.get(channel, 'A')
            # Add a None at the end so that the last timestamp is only used as "next_timestamp"
            # and ends the iretation (otherwise we'd lose the last segment).
            lines = p.read_text().splitlines() + [None]
            for (timestamp, text), (next_timestamp, _) in sliding_window(2, zip(lines[::2], lines[1::2])):
                start = float(timestamp[1:-1])
                end = float(next_timestamp[1:-1])
                supervisions.append(
                    SupervisionSegment(
                        id=f'{lang_code}_{speaker}_{channel}_{date}_{hour}_{int(100 * start):06}',
                        recording_id=p.stem,
                        start=start,
                        duration=round(end - start, ndigits=8),
                        channel=0,
                        text=normalize_text(text),
                        language=BABELCODE2LANG[lang_code],
                        speaker=speaker,
                    )
                )
        if len(supervisions) == 0:
            logging.warning(f"No supervisions found in {text_dir}")
        manifests[split]['supervisions'] = SupervisionSet.from_segments(supervisions)

        validate_recordings_and_supervisions(
            manifests[split]['recordings'],
            manifests[split]['superevisions']
        )

        if output_dir is not None:
            language = BABELCODE2LANG[lang_code]
            if split == 'training':
                split = 'train'
            manifests[split]['recordings'].to_json(f'recordings_{language}_{split}.json')
            manifests[split]['supervisions'].to_json(f'supervisions_{language}_{split}.json')

    return manifests
예제 #12
0
파일: Candidates.py 프로젝트: webbcla/c2xg
    def get_score(self, current_candidate):

        total_score = 0.0

        for pair in ct.sliding_window(2, current_candidate):

            current_dict = self.association_dict[pair]
            current_score = max(current_dict["RL"], current_dict["LR"])
            total_score += current_score

        return total_score
예제 #13
0
    def build_node_memberships(self):
        self.membership_per_level = defaultdict(dict)

        self.membership_per_level[0] = dict(
            zip(map(int, self.network.vertices()), self.block_levels[0]))

        for i, (l0, l1) in enumerate(sliding_window(2, self.block_levels),
                                     start=1):
            update_level = dict(zip(np.unique(l0), l1))
            self.membership_per_level[i] = valmap(
                lambda x: update_level[x], self.membership_per_level[i - 1])
예제 #14
0
	def get_score(self, current_candidate):
	
		total_score = 0.0
		
		for pair in ct.sliding_window(2, current_candidate):
		
			current_dict = self.association_dict[pair]
			current_score = max(current_dict["RL"], current_dict["LR"])
			total_score += current_score
		
		return total_score
예제 #15
0
파일: sentence.py 프로젝트: yukota/elpod
 def _search_grammer_path(self, pos_via_point):
     pos_via_and_end = (self.START_NODE,) + pos_via_point + (self.END_NODE,)
     paths = []
     cost = 0
     for network_start_end in sliding_window(2, pos_via_and_end):
         path = networkx.bidirectional_dijkstra(self._grammer_graph, network_start_end[0], network_start_end[1])
         cost += path[0]
         node_path = path[1][1:]
         paths += node_path
     paths.pop()
     return cost, paths
예제 #16
0
 def fhs(self, n_scenarios=250, start_date=None, end_date=None):
     x = sliding_window(n_scenarios+1, range(len(self.ts.index)))
     scenarios = np.zeros((len(self.ts.index), n_scenarios+1))
     for i, el in enumerate(x):
         l = list(el)
         cur_idx, hist_idx = l[-1], l[:-1]
         neutral = self.ts.Value.values[cur_idx]
         ret = self.ts.DevolLogReturns.values[hist_idx]
         vol = self.ts.Vola.values[cur_idx]
         scenarios[cur_idx, 1:] = self.scenario_values(ret, neutral, vol)
         scenarios[cur_idx, 0] = neutral
     return scenarios
예제 #17
0
    def prepare_segments(self, level=None):
        self.segments_per_pair = defaultdict(list)

        if level is None:
            level = self.community_level

        for edge_data in self.edges:
            segments = list(sliding_window(2, edge_data['spline']))
            values = np.linspace(0, 1, num=self.n_points - 1)
            pair = (self.membership_per_level[level][edge_data['source']],
                    self.membership_per_level[level][edge_data['target']])
            #print(pair)
            #break

            self.segments_per_pair[pair].append(
                (segments, values, edge_data['weight']))
예제 #18
0
파일: Candidates.py 프로젝트: webbcla/c2xg
    def get_pairwise_lists(self, candidate):

        lr_list = []  #Initiate list of LR association values
        rl_list = []  #Initiate list of RL association values

        #Populate the pairwise value lists
        for current_pair in ct.sliding_window(2, candidate):

            lr_list.append(self.association_dict[current_pair]["LR"])
            rl_list.append(self.association_dict[current_pair]["RL"])

        #Send lists to class-external jitted function for processing
        return_list = calculate_measures(np.array(lr_list), np.array(rl_list))

        #Check for end-point
        try:
            endpoint_lr = self.association_dict[(candidate[0],
                                                 candidate[-1])]["LR"]
            endpoint_rl = self.association_dict[(candidate[0],
                                                 candidate[-1])]["RL"]

        except Exception as e:
            endpoint_lr = 0.0
            endpoint_rl = 0.0

        #Add Endpoint to return_list
        return_list.append(endpoint_lr)
        return_list.append(endpoint_rl)

        #return_list contains the following items:
        #--- candidate (representation, index) tuples
        #--- mean_lr
        #--- mean_rl
        #--- min_lr
        #--- min_rl
        #--- directional_scalar
        #--- directional_categorical
        #--- reduced_beginning_lr
        #--- reduced_beginning_rl
        #--- reduced_end_lr
        #--- reduced_end_rl
        #--- endpoint_lr
        #--- endpoint_rl

        return return_list

    #----------------------------------------------------------------------------------------------#
예제 #19
0
def make_ngrams(s, n, joiner=None):
    """ Make n-grams
    
    For character ngrams, s should be a string
    For token/word ngrams, s should be a sequence of tokens

    joiner='' is recommended for characters, and joiner='_' for words.
    """
    try:
        ngrams = tz.sliding_window(n, s)
    except StopIteration:
        # bug in toolz/cytoolz?
        yield from ()

    if joiner is not None:
        ngrams = (joiner.join(grams) for grams in ngrams)

    yield from ngrams
예제 #20
0
	def get_pairwise_lists(self, candidate):

		lr_list = []	#Initiate list of LR association values
		rl_list = []	#Initiate list of RL association values
		
		#Populate the pairwise value lists
		for current_pair in ct.sliding_window(2, candidate):

			lr_list.append(self.association_dict[current_pair]["LR"])
			rl_list.append(self.association_dict[current_pair]["RL"])

		#Send lists to class-external jitted function for processing
		return_list = calculate_measures(np.array(lr_list), np.array(rl_list))
		
		#Check for end-point
		try:
			endpoint_lr = self.association_dict[(candidate[0], candidate[-1])]["LR"]
			endpoint_rl = self.association_dict[(candidate[0], candidate[-1])]["RL"]
			
		except Exception as e:
			endpoint_lr = 0.0
			endpoint_rl = 0.0
			
		#Add Endpoint to return_list
		return_list.append(endpoint_lr)
		return_list.append(endpoint_rl)
		
		#return_list contains the following items:
		#--- candidate (representation, index) tuples
		#--- mean_lr 
		#--- mean_rl
		#--- min_lr
		#--- min_rl
		#--- directional_scalar
		#--- directional_categorical
		#--- reduced_beginning_lr
		#--- reduced_beginning_rl
		#--- reduced_end_lr
		#--- reduced_end_rl
		#--- endpoint_lr
		#--- endpoint_rl
		
		return return_list
	#----------------------------------------------------------------------------------------------#
예제 #21
0
def _compute_gas_price(probabilities, desired_probability):
    """
    Given a sorted range of ``Probability`` named-tuples returns a gas price
    computed based on where the ``desired_probability`` would fall within the
    range.

    :param probabilities: An iterable of `Probability` named-tuples sorted in reverse order.
    :param desired_probability: An floating point representation of the desired
        probability. (e.g. ``85% -> 0.85``)
    """
    first = probabilities[0]
    last = probabilities[-1]

    if desired_probability >= first.prob:
        return first.gas_price
    elif desired_probability <= last.prob:
        return last.gas_price

    for left, right in sliding_window(2, probabilities):
        if desired_probability < right.prob:
            continue
        elif desired_probability > left.prob:
            # This code block should never be reachable as it would indicate
            # that we already passed by the probability window in which our
            # `desired_probability` is located.
            raise Exception('Invariant')

        adj_prob = desired_probability - right.prob
        window_size = left.prob - right.prob
        position = adj_prob / window_size
        gas_window_size = left.gas_price - right.gas_price
        gas_price = int(math.ceil(right.gas_price +
                                  gas_window_size * position))
        return gas_price
    else:
        # The initial `if/else` clause in this function handles the case where
        # the `desired_probability` is either above or below the min/max
        # probability found in the `probabilities`.
        #
        # With these two cases handled, the only way this code block should be
        # reachable would be if the `probabilities` were not sorted correctly.
        # Otherwise, the `desired_probability` **must** fall between two of the
        # values in the `probabilities``.
        raise Exception('Invariant')
예제 #22
0
def _compute_gas_price(probabilities, desired_probability):
    """
    Given a sorted range of ``Probability`` named-tuples returns a gas price
    computed based on where the ``desired_probability`` would fall within the
    range.

    :param probabilities: An iterable of `Probability` named-tuples sorted in reverse order.
    :param desired_probability: An floating point representation of the desired
        probability. (e.g. ``85% -> 0.85``)
    """
    first = probabilities[0]
    last = probabilities[-1]

    if desired_probability >= first.prob:
        return first.gas_price
    elif desired_probability <= last.prob:
        return last.gas_price

    for left, right in sliding_window(2, probabilities):
        if desired_probability < right.prob:
            continue
        elif desired_probability > left.prob:
            # This code block should never be reachable as it would indicate
            # that we already passed by the probability window in which our
            # `desired_probability` is located.
            raise Exception('Invariant')

        adj_prob = desired_probability - right.prob
        window_size = left.prob - right.prob
        position = adj_prob / window_size
        gas_window_size = left.gas_price - right.gas_price
        gas_price = int(math.ceil(right.gas_price + gas_window_size * position))
        return gas_price
    else:
        # The initial `if/else` clause in this function handles the case where
        # the `desired_probability` is either above or below the min/max
        # probability found in the `probabilities`.
        #
        # With these two cases handled, the only way this code block should be
        # reachable would be if the `probabilities` were not sorted correctly.
        # Otherwise, the `desired_probability` **must** fall between two of the
        # values in the `probabilities``.
        raise Exception('Invariant')
예제 #23
0
파일: sentence.py 프로젝트: yukota/elpod
    def _create_grammer_network(self, posid_list):
        # cout node and edge
        node = set()
        for posid in chain.from_iterable(posid_list):
            node.add(posid)

        edge_weight_dict = {}
        for sentence in posid_list:
            for one_edge in sliding_window(2, sentence):
                edge_weight_dict[one_edge] = edge_weight_dict.get(one_edge, 1) + 1

        max_weight = max(edge_weight_dict.values())
        # NetworkX上、weightはcost扱いなので、出現頻度が高いものが低コストになるようにする

        # create direct network
        graph = networkx.DiGraph()
        graph.add_nodes_from(node)
        for edge, weight in edge_weight_dict.items():
            # 最低コストを1とする
            cost = max_weight - weight + 1
            graph.add_edge(edge[0], edge[1], weight=cost)
        return graph
예제 #24
0
def compute_gas_price(probabilities, desired_probability):
    first = probabilities[0]
    last = probabilities[-1]

    if desired_probability >= first.prob:
        return first.gas_price
    elif desired_probability <= last.prob:
        return last.gas_price

    for left, right in sliding_window(2, probabilities):
        if desired_probability < right.prob:
            continue
        elif desired_probability > left.prob:
            raise Exception('Invariant')

        adj_prob = desired_probability - right.prob
        window_size = left.prob - right.prob
        position = adj_prob / window_size
        gas_window_size = left.gas_price - right.gas_price
        gas_price = int(math.ceil(right.gas_price +
                                  gas_window_size * position))
        return gas_price
    else:
        raise Exception('Invariant')
예제 #25
0
    def _persist_block_chain(
        cls,
        db: DatabaseAPI,
        blocks: Iterable[BaseBeaconBlock],
        block_class: Type[BaseBeaconBlock],
        fork_choice_scorings: Iterable[ForkChoiceScoringFn],
    ) -> Tuple[Tuple[BaseBeaconBlock, ...], Tuple[BaseBeaconBlock, ...]]:
        blocks_iterator = iter(blocks)
        scorings_iterator = iter(fork_choice_scorings)

        try:
            first_block = first(blocks_iterator)
            first_scoring = first(scorings_iterator)
        except StopIteration:
            return tuple(), tuple()

        try:
            previous_canonical_head = cls._get_canonical_head(
                db, block_class).signing_root
            head_score = cls._get_score(db, previous_canonical_head)
        except CanonicalHeadNotFound:
            no_canonical_head = True
        else:
            no_canonical_head = False

        is_genesis = first_block.is_genesis
        if not is_genesis and not cls._block_exists(db,
                                                    first_block.parent_root):
            raise ParentNotFound(
                "Cannot persist block ({}) with unknown parent ({})".format(
                    encode_hex(first_block.signing_root),
                    encode_hex(first_block.parent_root),
                ))

        score = first_scoring(first_block)

        curr_block_head = first_block
        db.set(curr_block_head.signing_root, ssz.encode(curr_block_head))
        cls._add_block_root_to_slot_lookup(db, curr_block_head)
        cls._set_block_score_to_db(db, curr_block_head, score)
        cls._add_attestations_root_to_block_lookup(db, curr_block_head)

        orig_blocks_seq = concat([(first_block, ), blocks_iterator])

        for parent, child in sliding_window(2, orig_blocks_seq):
            if parent.signing_root != child.parent_root:
                raise ValidationError(
                    "Non-contiguous chain. Expected {} to have {} as parent but was {}"
                    .format(
                        encode_hex(child.signing_root),
                        encode_hex(parent.signing_root),
                        encode_hex(child.parent_root),
                    ))

            curr_block_head = child
            db.set(curr_block_head.signing_root, ssz.encode(curr_block_head))
            cls._add_block_root_to_slot_lookup(db, curr_block_head)
            cls._add_attestations_root_to_block_lookup(db, curr_block_head)

            # NOTE: len(scorings_iterator) should equal len(blocks_iterator)
            try:
                next_scoring = next(scorings_iterator)
            except StopIteration:
                raise MissingForkChoiceScoringFns

            score = next_scoring(curr_block_head)
            cls._set_block_score_to_db(db, curr_block_head, score)

        if no_canonical_head:
            return cls._set_as_canonical_chain_head(
                db, curr_block_head.signing_root, block_class)

        if score > head_score:
            return cls._set_as_canonical_chain_head(
                db, curr_block_head.signing_root, block_class)
        else:
            return tuple(), tuple()
예제 #26
0
 def sliding_window(self, n):
     ''' assuming should always be a generator - otherwise - going to get huge '''
     return fgenerator(self.__class__(sw) for sw in cytoolz.sliding_window(n, self))
예제 #27
0
def header_pairs(VM, headers, valid):
    for pair in sliding_window(2, headers):
        yield VM, pair[1], pair[0], valid
예제 #28
0
def slw2(n, seq):
	for i in toolz.sliding_window(n, ([None] * (n - 1)) + seq):
		yield tuple(filter(None, i))
예제 #29
0
 def _count_of_exact(seq):
     subs = ["".join(entry) for entry in sliding_window(len(sub), seq)]
     return subs.count(sub)
예제 #30
0
def slw(n, seq):
	yield from toolz.sliding_window(n, ([None] * (n - 1)) + seq)
예제 #31
0
    def persist_header_chain(
        self, headers: Iterable[BlockHeader]
    ) -> Tuple[Tuple[BlockHeader, ...], Tuple[BlockHeader, ...]]:
        """
        Return two iterable of headers, the first containing the new canonical headers,
        the second containing the old canonical headers
        """

        try:
            first_header = first(headers)
        except StopIteration:
            return tuple(), tuple()
        else:

            for parent, child in sliding_window(2, headers):
                if parent.hash != child.parent_hash:
                    raise ValidationError(
                        "Non-contiguous chain. Expected {} to have {} as parent but was {}"
                        .format(
                            encode_hex(child.hash),
                            encode_hex(parent.hash),
                            encode_hex(child.parent_hash),
                        ))

            is_genesis = first_header.parent_hash == GENESIS_PARENT_HASH
            if not is_genesis and not self.header_exists(
                    first_header.parent_hash):
                raise ParentNotFound(
                    "Cannot persist block header ({}) with unknown parent ({})"
                    .format(encode_hex(first_header.hash),
                            encode_hex(first_header.parent_hash)))

            score = 0 if is_genesis else self.get_score(
                first_header.parent_hash)

        for header in headers:
            self.db.set(
                header.hash,
                rlp.encode(header),
            )

            score += header.difficulty

            self.db.set(
                SchemaV1.make_block_hash_to_score_lookup_key(header.hash),
                rlp.encode(score, sedes=rlp.sedes.big_endian_int),
            )

        try:
            head_score = self.get_score(self.get_canonical_head().hash)
        except CanonicalHeadNotFound:
            (new_canonical_headers,
             old_canonical_headers) = self._set_as_canonical_chain_head(
                 header.hash)
        else:
            if score > head_score:
                (new_canonical_headers,
                 old_canonical_headers) = self._set_as_canonical_chain_head(
                     header.hash)
            else:
                new_canonical_headers = tuple()
                old_canonical_headers = tuple()

        return new_canonical_headers, old_canonical_headers
예제 #32
0
def make_supervisions(
        sgml_path: Pathlike,
        recording: Recording) -> Dict[str, List[SupervisionSegment]]:
    """Create supervisions for sections and segments for a given HUB4 recording."""
    doc = try_parse(sgml_path)
    episode = doc.find("episode")
    section_supervisions = []
    text_supervisions = []
    text_idx = 0
    for sec_idx, section in enumerate(doc.find("episode").find_all("section")):
        # Create a "section" supervision segment that informs what's the program and
        # type/topic of a given section.
        # It spans multiple regular segments with spoken content.
        sec_start = float(section.attrs["starttime"])
        section_supervisions.append(
            SupervisionSegment(
                id=f"{recording.id}_section{sec_idx:03d}",
                recording_id=recording.id,
                start=sec_start,
                duration=round(float(section.attrs["endtime"]) - sec_start,
                               ndigits=3),
                channel=0,
                language=episode.attrs["language"],
                custom={
                    "section": section.attrs["type"],
                    "program": episode.attrs["program"],
                },
            ))
        for turn in section.find_all("turn"):
            # An example of the format in each turn:
            #
            # <turn speaker=Peter_Jennings spkrtype=male startTime=336.704 endTime=338.229>
            # <overlap startTime=336.704 endTime=337.575>
            # <time sec=336.704>
            #  time served up until
            # </overlap>
            # <time sec=337.575>
            #  this point?
            # </turn>
            for child in turn.children:
                # Here, we switch to custom parsing code as explained at the top of this script.
                lines = [
                    l for l in str(child).split("\n") if len(l) and not any(
                        l.startswith(b) for b in EXCLUDE_BEGINNINGS)
                ]
                if not lines:
                    continue
                times = []
                texts = []
                for time_marker, text in group_lines_in_time_marker(lines):
                    match = re.search(r'sec="?(\d+\.?\d*)"?', time_marker)
                    times.append(float(match.group(1)))
                    texts.append(text)
                times.append(float(turn.attrs["endtime"]))
                # Having parsed the current section into start/end times and text
                # for individual speech segments, create a SupervisionSegment for each one.
                for (start, end), text in zip(sliding_window(2, times), texts):
                    text_supervisions.append(
                        SupervisionSegment(
                            id=f"{recording.id}_segment{text_idx:04d}",
                            recording_id=recording.id,
                            start=start,
                            duration=round(end - start, ndigits=8),
                            channel=0,
                            language=episode.attrs["language"],
                            text=text.strip(),
                            speaker=turn.attrs["speaker"],
                            gender=turn.attrs["spkrtype"],
                        ))
                text_idx += 1
    return {"sections": section_supervisions, "segments": text_supervisions}
예제 #33
0
파일: babel.py 프로젝트: glynpu/lhotse
def prepare_single_babel_language(
    corpus_dir: Pathlike,
    output_dir: Optional[Pathlike] = None,
    no_eval_ok: bool = False,
) -> Dict[str, Dict[str, Union[RecordingSet, SupervisionSet]]]:
    """
    Prepares manifests using a single BABEL LDC package.

    This function works like the following:

        - first, it will scan `corpus_dir` for a directory named `conversational`;
            if there is more than once, it picks the first one (and emits a warning)
        - then, it will try to find `dev`, `eval`, and `training` splits inside
            (if any of them is not present, it will skip it with a warning)
        - finally, it scans the selected location for SPHERE audio files and transcripts.

    :param corpus_dir: Path to the root of the LDC package with a BABEL language.
    :param output_dir: Path where the manifests are stored.json
    :param no_eval_ok: When set to True, this function won't emit a warning
        that the eval set was not found.
    :return:
    """
    manifests = defaultdict(dict)

    # Auto-detect the location of the "conversational" directory
    orig_corpus_dir = corpus_dir
    corpus_dir = Path(corpus_dir)
    corpus_dir = [d for d in corpus_dir.rglob("conversational") if d.is_dir()]
    if not corpus_dir:
        raise ValueError(
            f"Could not find 'conversational' directory anywhere inside '{orig_corpus_dir}' "
            f"- please check your path.")
    if len(corpus_dir) > 1:
        # People have very messy data distributions, the best we can do is warn them.
        logging.warning(
            f"It seems there are multiple 'conversational' directories in '{orig_corpus_dir}' - "
            f"we are selecting the first one only ({corpus_dir[0]}). Please ensure that you provided "
            f"the path to a single language's dir, and the root dir for all BABEL languages."
        )
    corpus_dir = corpus_dir[0].parent

    for split in ("dev", "eval", "training"):
        audio_dir = corpus_dir / f"conversational/{split}/audio"
        sph_recordings = RecordingSet.from_recordings(
            Recording.from_file(p) for p in audio_dir.glob("*.sph"))
        wav_recordings = RecordingSet.from_recordings(
            Recording.from_file(p) for p in audio_dir.glob("*.wav"))
        recordings = combine(sph_recordings, wav_recordings)
        if len(recordings) == 0:
            if split == "eval" and no_eval_ok:
                continue
            logging.warning(f"No SPHERE or WAV files found in {audio_dir}")

        supervisions = []
        text_dir = corpus_dir / f"conversational/{split}/transcription"
        for p in tqdm.tqdm(text_dir.glob("*")):
            # p.stem -> BABEL_BP_101_10033_20111024_205740_inLine
            # parts:
            #   0 -> BABEL
            #   1 -> BP
            #   2 -> <language-code> (101)
            #   3 -> <speaker-id> (10033)
            #   4 -> <date> (20111024)
            #   5 -> <hour> (205740)
            #   6 -> channel (inLine) ; inLine <=> A ; outLine <=> B ; "scripted" <=> A
            p0, p1, lang_code, speaker, date, hour, channel, *_ = p.stem.split(
                "_")
            channel = {"inLine": "A", "outLine": "B"}.get(channel, "A")
            # Fix problematic segments that have two consecutive timestamp lines with no transcript in between
            lines = p.read_text().splitlines() + [""]
            lines = [
                prev_l for prev_l, l in sliding_window(2, lines)
                if not (prev_l.startswith("[") and l.startswith("["))
            ]
            # Add a None at the end so that the last timestamp is only used as "next_timestamp"
            # and ends the iretation (otherwise we'd lose the last segment).
            lines += [None]
            for (timestamp,
                 text), (next_timestamp,
                         _) in sliding_window(2, zip(lines[::2], lines[1::2])):
                try:
                    start = float(timestamp[1:-1])
                    end = float(next_timestamp[1:-1])
                    # Create supervision
                    supervisions.append(
                        SupervisionSegment(
                            id=
                            f"{lang_code}_{speaker}_{channel}_{date}_{hour}_{int(100 * start):06}",
                            recording_id=p.stem,
                            start=start,
                            duration=round(end - start, ndigits=8),
                            channel=0,
                            text=normalize_text(text),
                            language=BABELCODE2LANG[lang_code],
                            speaker=f"{lang_code}_{speaker}_{channel}",
                        ))
                except Exception as e:
                    logging.warning(
                        f"Error while parsing segment. Message: {str(e)}")
                    raise ValueError(
                        f"Too many errors while parsing segments (file: '{p}'). "
                        f"Please check your data or increase the threshold.")
        supervisions = deduplicate_supervisions(supervisions)

        if len(supervisions) == 0:
            logging.warning(f"No supervisions found in {text_dir}")
        supervisions = SupervisionSet.from_segments(supervisions)

        # Fixing and validation of manifests
        if split == "eval" and len(supervisions) == 0:
            # We won't remove missing recordings for the "eval" split in cases where
            # the user does not have its corresponding transcripts (very likely).
            pass
        else:
            recordings, supervisions = remove_missing_recordings_and_supervisions(
                recordings, supervisions)
            supervisions = trim_supervisions_to_recordings(
                recordings, supervisions)
        validate_recordings_and_supervisions(recordings, supervisions)

        manifests[split] = {
            "recordings": recordings,
            "supervisions": supervisions
        }

        if output_dir is not None:
            output_dir = Path(output_dir)
            output_dir.mkdir(parents=True, exist_ok=True)
            language = BABELCODE2LANG[lang_code]
            save_split = "train" if split == "training" else split
            recordings.to_file(output_dir /
                               f"recordings_{language}_{save_split}.json")
            supervisions.to_file(output_dir /
                                 f"supervisions_{language}_{save_split}.json")

    return dict(manifests)
예제 #34
0
 def pos_grams(self, n):
     grams = cytoolz.sliding_window(n, self.words)
     for bg in cytoolz.remove(
             lambda x: any(t.like_num or t.is_stop for t in x), grams):
         yield " ".join(g.pos_ for g in bg)
예제 #35
0
    def _persist_block_chain(
        cls, db: BaseDB, blocks: Iterable[BaseBeaconBlock],
        block_class: Type[BaseBeaconBlock]
    ) -> Tuple[Tuple[BaseBeaconBlock, ...], Tuple[BaseBeaconBlock, ...]]:
        blocks_iterator = iter(blocks)

        try:
            first_block = first(blocks_iterator)
        except StopIteration:
            return tuple(), tuple()

        try:
            previous_canonical_head = cls._get_canonical_head(
                db, block_class).signed_root
            head_score = cls._get_score(db, previous_canonical_head)
        except CanonicalHeadNotFound:
            no_canonical_head = True
        else:
            no_canonical_head = False

        is_genesis = first_block.previous_block_root == GENESIS_PARENT_HASH
        if not is_genesis and not cls._block_exists(
                db, first_block.previous_block_root):
            raise ParentNotFound(
                "Cannot persist block ({}) with unknown parent ({})".format(
                    encode_hex(first_block.signed_root),
                    encode_hex(first_block.previous_block_root),
                ))

        if is_genesis:
            score = 0
            # TODO: this should probably be done as part of the fork choice rule processing
            db.set(
                SchemaV1.make_finalized_head_root_lookup_key(),
                first_block.signed_root,
            )
        else:
            score = first_block.slot

        curr_block_head = first_block
        db.set(
            curr_block_head.signed_root,
            ssz.encode(curr_block_head),
        )
        cls._add_block_root_to_slot_lookup(db, curr_block_head)
        cls._set_block_scores_to_db(db, curr_block_head)

        orig_blocks_seq = concat([(first_block, ), blocks_iterator])

        for parent, child in sliding_window(2, orig_blocks_seq):
            if parent.signed_root != child.previous_block_root:
                raise ValidationError(
                    "Non-contiguous chain. Expected {} to have {} as parent but was {}"
                    .format(
                        encode_hex(child.signed_root),
                        encode_hex(parent.signed_root),
                        encode_hex(child.previous_block_root),
                    ))

            curr_block_head = child
            db.set(
                curr_block_head.signed_root,
                ssz.encode(curr_block_head),
            )
            cls._add_block_root_to_slot_lookup(db, curr_block_head)
            score = cls._set_block_scores_to_db(db, curr_block_head)

        if no_canonical_head:
            return cls._set_as_canonical_chain_head(
                db, curr_block_head.signed_root, block_class)

        if score > head_score:
            return cls._set_as_canonical_chain_head(
                db, curr_block_head.signed_root, block_class)
        else:
            return tuple(), tuple()
예제 #36
0
def polygon_edges(polygon):
    return sliding_window(2, polygon)