コード例 #1
0
ファイル: api.py プロジェクト: cornell-zhang/heterocl
def init(init_dtype="int32", raise_assert_exception=True):
    """Initialize a HeteroCL environment with configurations.

    This API must be called each time the users write an application.
    Within the same HeteroCL environment, users can try different
    combinations of customization primitives.

    Parameters
    ----------
    init_dtype : Type, optional
        The default data type for each variables

    Examples
    --------
    .. code-block:: python

        # app 1
        hcl.init()
        A = hcl.placeholder(...)
        B = hcl.placeholder(...)
        def app1(A, B):
            # define the algorithm for app 1
        s = hcl.create_scheme([A, B], app1)
        # apply customization primitives
        f1 = hcl.build(s)
        # execute f1

        # app 2 - initialize again with a different data type
        hcl.init(hcl.Float())
        A = hcl.placeholder(...)
        B = hcl.placeholder(...)
        C = hcl.placeholder(...)
        def app2(A, B, C):
            # define the algorithm for app 2
        s = hcl.create_scheme([A, B, C], app2)
        f2 = hcl.build(s)
        # execute f2
    """
    # set the configurations
    config.init_dtype = init_dtype
    config.raise_assert_exception = raise_assert_exception
    # initialize global variables
    Schedule.stage_ops = []
    Schedule.stage_names = set()
    Schedule.mod_calls = dict()
    Schedule.last_stages = OrderedSet([])
    Scheme.current = None
コード例 #2
0
    def __init__(
        self,
        models: Union[List[Union[Model, Dict, str]], Model, Dict] = None,
        _filepath: str = None,
        _path_to_readme: str = None,
    ):
        """
        Args:
            models (list, Model, dict): Either a list of models, and individual model or a dict representing a model
            _filepath (str): The path of the file from which the list is initialized
            _path_to_readme (str): Path to README if loaded from there
        """

        check_errors = OrderedSet()

        if models is None:
            models = []

        if isinstance(models, Model) or isinstance(models, dict):
            models = [models]

        models_parsed = []
        for m in models:
            if isinstance(m, str):
                # link to model file - support wildcards
                for model_file in expand_wildcard_path(m, _filepath):
                    try:
                        model = Model.from_file(model_file, _filepath)
                        if isinstance(model, Model):
                            models_parsed.append(model)
                        elif isinstance(model, ModelList):
                            models_parsed.extend(model)
                    except (IOError, ValueError) as e:
                        check_errors.add(str(e))
            elif isinstance(m, Model):
                # model object
                models_parsed.append(m)
            else:
                # dict
                models_parsed.append(
                    Model.from_dict(m, _filepath, _path_to_readme))

        super().__init__(
            data=models_parsed,
            filepath=_filepath,
            check_errors=check_errors,
        )
コード例 #3
0
ファイル: api.py プロジェクト: cornell-zhang/heterocl
def create_scheme(inputs, func):
    """Create a quantization scheme.

    The first argument is a list of inputs to the second argument, which is a
    function the defines the algorithm. The numbers of arguments should match.
    The function will be set with attributes for later optimizations. This
    API returns an object that has two methods: `quantize` and `downsize`.

    Parameters
    ----------
    inputs : Tensor or list of Tensor
        A list of placeholders that are inputs to the algorithm. It can be a
        single tensor

    func : callable
        A function that defines the algorithm

    Returns
    -------
    Scheme

    See Also
    --------
    scheme.Scheme.downsize, scheme.Scheme.quantize

    Examples
    --------
    .. code-block:: python

        A = hcl.placeholder((10,))
        def algo(A):
            return hcl.compute(A.shape, lambda x: A[x]+1, "B")
        s = hcl.create_scheme(A, algo)
        s.downsize(algo.B, hcl.Int(8))
    """
    if not isinstance(inputs, list):
        inputs = [inputs]
    # reset the global variables
    Schedule.stage_ops = []
    Schedule.mod_calls = dict()
    Schedule.stage_names = set()
    Schedule.last_stages = OrderedSet([])
    with Stage("_top") as top:
        func(*inputs)
    for op in top.substages:
        func.__setattr__(op.name, op)
    return Scheme(inputs, func)
コード例 #4
0
ファイル: engine.py プロジェクト: KanHarI/units-calculator
def parse_pure_units(units_str: str) -> list[tuple[UnitsMeta, int]]:
    """Parse units string to list of units and exponents"""
    def _parse_pure_unit_exp(unit_exp_str: str) -> tuple[UnitsMeta, int]:
        unit_exp_parts = unit_exp_str.split("^")
        if len(unit_exp_parts) > 2:
            raise ValueError(f"Malformed unit: {unit_exp_str}")
        symbol = unit_exp_parts[0]
        unit = parse_symbol(symbol)
        exp = 1
        if len(unit_exp_parts) > 1:
            try:
                exp = int(unit_exp_parts[1].strip("()"))
            except ValueError as e:
                raise ValueError(
                    f"Cannot parse exponent {unit_exp_parts[1]} in unit {unit_exp_str}"
                ) from e
        return unit, exp

    def _parse_nodiv_unit_string(
            nodiv_str: str) -> list[tuple[UnitsMeta, int]]:
        _result: list[tuple[UnitsMeta, int]] = list()
        if len(nodiv_str) == 0:
            return _result
        units_exps = nodiv_str.split("*")
        for unit_exp in units_exps:
            _result.append(_parse_pure_unit_exp(unit_exp))
        return _result

    div_parts = units_str.split("/")
    if len(div_parts) > 2:
        raise ValueError("Cant have multiple '/' characters in unit string!")
    result_parts: list[tuple[UnitsMeta,
                             int]] = _parse_nodiv_unit_string(div_parts[0])
    if len(div_parts) > 1:
        result_parts += [(unit, -exp)
                         for (unit,
                              exp) in _parse_nodiv_unit_string(div_parts[1])]
    units = OrderedSet(unit for unit, _ in result_parts)
    result: list[tuple[UnitsMeta, int]] = list()
    for unit in units:
        exp = 0
        for res_unit, res_exp in result_parts:
            if res_unit is unit:
                exp += res_exp
        if exp != 0:
            result.append((unit, exp))
    return result
コード例 #5
0
    def write_wide_format_otu_table(**kwargs):
        output_table_io = kwargs.pop('output_table_io')
        table_collection = kwargs.pop('table_collection')
        if len(kwargs) > 0:
            raise Exception("Unexpected arguments detected: %s" % kwargs)

        if hasattr(output_table_io, 'name'):
            logging.info("Writing %s" % output_table_io.name)
        else:
            logging.info("Writing an OTU table")

        # Collect a hash of sequence to sample to num_seqs
        gene_to_seq_to_sample_to_count = OrderedDict()
        sequence_to_taxonomy = {}
        samples = OrderedSet()
        for otu in table_collection:
            if otu.marker not in gene_to_seq_to_sample_to_count:
                gene_to_seq_to_sample_to_count[otu.marker] = {}
            if otu.sequence not in gene_to_seq_to_sample_to_count[otu.marker]:
                gene_to_seq_to_sample_to_count[otu.marker][otu.sequence] = {}
            if otu.sample_name in gene_to_seq_to_sample_to_count[otu.marker][
                    otu.sequence]:
                raise Exception(
                    "Unexpectedly found 2 of the same sequences for the same sample and marker"
                )
            gene_to_seq_to_sample_to_count[otu.marker][otu.sequence][
                otu.sample_name] = otu.count
            samples.add(otu.sample_name)
            # This isn't perfect, because the same sequence might have
            # different taxonomies in different samples. But taxonomy might
            # be of regular form, or as a diamond example etc, so eh.
            sequence_to_taxonomy[otu.sequence] = otu.taxonomy

        output_table_io.write("\t".join(
            itertools.chain(  # header
                ['marker', 'sequence'], samples, ['taxonomy\n'])))
        for gene, seq_to_sample_to_count in gene_to_seq_to_sample_to_count.items(
        ):
            for seq, sample_to_count in seq_to_sample_to_count.items():
                row = [gene, seq]
                for sample in samples:
                    try:
                        row.append(str(sample_to_count[sample]))
                    except KeyError:
                        row.append('0')
                row.append(sequence_to_taxonomy[seq])
                output_table_io.write("\t".join(row) + "\n")
コード例 #6
0
ファイル: test.py プロジェクト: wimglenn/ordered-set
def test_unordered_inequality():
    assert OrderedSet([1, 2]) != set([])
    assert OrderedSet([1, 2]) != frozenset([2, 1, 3])

    assert OrderedSet([1, 2]) != {2: 'b'}
    assert OrderedSet([1, 2]) != {1: 1, 4: 2}.keys()
    assert OrderedSet([1, 2]) != {1: 1, 2: 3}.values()

    # Corner case: OrderedDict is not a Sequence, so we don't check for order,
    # even though it does have the concept of order.
    assert OrderedSet([1, 2]) != collections.OrderedDict([(2, 2), (3, 1)])
コード例 #7
0
def clean_text(dir_path, files):
    """ Remove punctuations, isolate digits, remove poornaviram.
    """
    digits = ['०', '१', '२', '३', '४', '५', '६', '७', '८', '९']
    #files = [f for f in listdir(dir_path)]

    #lines = set()
    for filename in files:
        lines = OrderedSet()
        print("FILE: ", filename)
        print(dir_path + '/' + filename)
        with open(dir_path + '/' + filename,
                  'r',
                  encoding='utf-8',
                  errors='ignore') as fp:
            for line in fp:
                line = line.replace('?', '\n')
                line = line.replace('.', '\n')
                line = line.replace('!', '\n')
                line = line.replace('।', '\n')
                line = line.replace('©', ' ')
                line = line.replace('“', ' ')
                line = line.replace('”', ' ')
                line = line.replace('…', ' ')
                #replace English characters and digits
                line = re.sub(r'[a-zA-Z0-9]', ' ', line)
                temp = line
                for character in temp:
                    if character in string.punctuation:
                        line = line.replace(character, ' ')
                for digit in digits:
                    line = line.replace(digit, ' ' + digit + ' ')
                line = line.replace('’', '')
                line = line.replace('‘', '')
                #replace multiple spaces with a single space
                line = ' '.join(line.split(' '))
                #line = line.replace('\n',' .\n')
                #print(line.strip())
                if line != '' and line != '\n':
                    if line.strip() not in lines:
                        lines.add(line.strip())
            fp.close()
            file = open(dir_path + '/' + filename, 'w', encoding='utf-8')
            for id, line in enumerate(lines):
                file.write(str(line) + '\n')
            file.close()
コード例 #8
0
 def __init__(self,
              name,
              value_fn,
              dependents,
              dtype=ztypes.float,
              **kwargs):  # TODO: automatize dependents
     dependents = convert_to_container(dependents)
     if dependents is None:
         params = OrderedSet()
     else:
         params = self._extract_dependents(dependents)
     params = {p.name: p for p in params}
     super().__init__(params=params,
                      value_fn=value_fn,
                      name=name,
                      dtype=dtype,
                      **kwargs)
コード例 #9
0
ファイル: vivado.py プロジェクト: Yummot/enzi
def inc_dir_filter(files):
    """inc_dir_filter for vivado"""
    if not files:
        return ''

    dedup_files = OrderedSet()
    if isinstance(files, Mapping):
        m = map(lambda i: dedup_files.update(i), files.values())
    elif isinstance(files, list):
        m = map(lambda i: dedup_files.add(i), files)
    else:
        fmt = 'unreachable files type shouldn\'t be {}'
        msg = fmt.format(files.__class__.__name__)
        logger.error(msg)
        raise RuntimeError(msg)
    _ = set(m)
    return ' '.join(dedup_files)
コード例 #10
0
def test_ugraph_init():
    nodes = (1, 2, 3)
    edges = ((1, 3), (1, 2), (2, 1))
    A = None

    exp_nodes, exp_edges, exp_A = (
        OrderedSet((1, 2, 3)),
        {(1, 2), (1, 3)},
        csr_matrix(((1, 1, 1, 1), ((0, 1, 0, 2), (1, 0, 2, 0))),
                   shape=(3, 3),
                   dtype=bool),
    )

    G = UGraph(nodes, edges, A)
    assert G.nodes == exp_nodes
    assert G.edges == exp_edges
    assert (G.A[G.A > 0].A == exp_A[exp_A > 0].A).all()
コード例 #11
0
def main():
    pairs_filename = sys.argv[1]
    selections_filename = sys.argv[2]
    pairs = read_pairs(pairs_filename)
    selections = read_selections(selections_filename)
    dg = nx.DiGraph()
    dg.add_weighted_edges_from(pairs)
    connectivity = nx.all_pairs_node_connectivity(dg)
    results = OrderedSet()
    for k, v in connectivity.items():
        if k in selections:
            results.add(MAPPING[k])
            for k2, v2 in v.items():
                if v2:
                    results.add(MAPPING[k2])
    for r in results:
        print(r)
コード例 #12
0
    def _filter_key_fields(self, query_response) -> str:
        results = OrderedSet()
        for obj in query_response:
            result = ''
            rpsl_object_class = OBJECT_CLASS_MAPPING[obj['object_class']]
            fields_included = rpsl_object_class.pk_fields + ['members', 'mp-members']

            for field_name in fields_included:
                field_data = obj['parsed_data'].get(field_name)
                if field_data:
                    if isinstance(field_data, list):
                        for item in field_data:
                            result += f'{field_name}: {item}\n'
                    else:
                        result += f'{field_name}: {field_data}\n'
            results.add(result)
        return '\n'.join(results)
コード例 #13
0
 def _censor_word_part(self, language: Language, word: spacy.tokens.Token) -> Tuple[Word, bool]:
     """
     :return: Tuple of censored word and flag of no profanity inside
     """
     lemmas = self._lemmas(word=word, language=language)
     if AnalysisType.DEEP in self.analyses:
         lemmas_only_letters = OrderedSet([
             self._keep_only_letters_or_dictionary_word(language=language, word=lemma) for lemma in lemmas])
         if lemmas_only_letters != lemmas:
             lemmas_only_letters = [
                 *chain(*(self._lemmas(word=lemma, language=language) for lemma in lemmas_only_letters))]
             lemmas.update(lemmas_only_letters)
     if self._has_no_profanity(lemmas):
         return Word(uncensored=word.text, censored=word.text), True
     censored_word = self._get_censored_word(word)
     if censored_word is not None:
         return censored_word, False
     for lemma in lemmas:
         if self._is_profane_word(language=language, word=lemma):
             if self.censor_whole_words:
                 censored = self._generate_fully_censored_word(word=word)
             else:
                 censored = self._generate_partly_censored_word(word=word, profane_word=lemma)
             censored_word = Word(uncensored=word.text, censored=censored, original_profane_word=lemma)
             self._save_censored_word(censored_word)
             return censored_word, False
     if AnalysisType.DEEP in self.analyses:
         for lemma in lemmas:
             if self._is_dictionary_word(language=language, word=lemma):
                 return Word(uncensored=word.text, censored=word.text), True
             automaton = LevenshteinAutomaton(tolerance=self._get_max_distance(len(lemma)),
                                              query_word=lemma,
                                              alphabet=self._alphabet)
             matching_bad_words = trie_automaton_intersection(automaton=automaton,
                                                              trie=self._get_trie(language=language),
                                                              include_error=False)
             if matching_bad_words:
                 bad_word = matching_bad_words[0]
                 if self.censor_whole_words:
                     censored = self._generate_fully_censored_word(word=word)
                 else:
                     censored = self._generate_partly_censored_word(word=word, profane_word=bad_word)
                 censored_word = Word(uncensored=word.text, censored=censored, original_profane_word=bad_word)
                 self._save_censored_word(censored_word)
                 return censored_word, False
     return Word(uncensored=word.text, censored=word.text), False
コード例 #14
0
    def _get_params(
        self,
        floating: bool | None = True,
        is_yield: bool | None = None,
        extract_independent: bool | None = True,
    ) -> set[ZfitParameter]:

        if (is_yield is True
            ):  # we want exclusively yields, we don't have them by default
            params = OrderedSet()
        else:
            params = self.params.values()
            params = extract_filter_params(
                params,
                floating=floating,
                extract_independent=extract_independent)
        return params
コード例 #15
0
ファイル: rlp.py プロジェクト: iJasonne/pslplay
    def __init__(self, name, sense, grounder, lpsolver):
        """
        Instantiates the model

        :param name: The name of the problem, describing it
        :param sense: LpMaximize or LpMinimize
        :param logkb: An instance of :class:`.logkb.LogKB`
        :param lp: A type of :class:`lp.LpProblem`
        """

        self.sense = sense
        self.grounder = grounder
        self.lpsolver = lpsolver
        self.name = name
        self._reloop_variables = OrderedSet([])
        self._constraints = []
        self.objective = None
コード例 #16
0
    def __init__(self, name, components=None, **kwargs):

        BondGraphBase.__init__(self, name, **kwargs)
        LabeledPortManager.__init__(self)
        self.components = OrderedSet()
        """The components, instances of :obj:`BondGraphBase`,
        that make up this model"""

        if components:
            for component in components:
                self.add(component)

        self.bonds = BondSet()
        """The list of connections between internal components"""

        self._port_map = dict()
        self._model_changed = True
コード例 #17
0
    def question_category_recognizer(self, dbTable_name):
        if dbTable_name == "interaction":
            self.dataframe = self.df_interactions
        elif dbTable_name == "assessment":
            self.dataframe = self.df_assessment

        self.users_categories_list = []
        user_ids = OrderedSet(self.dataframe["user_id"])
        for i in range(0, len(user_ids)):
            if user_ids[i]%4 == 0:
                self.users_categories_list.append("controlledLanguage")
            elif user_ids[i]%4 == 1:
                self.users_categories_list.append("sparqlQuery")
            elif user_ids[i]%4 == 2:
                self.users_categories_list.append("knowledge_graph")
            elif user_ids[i]%4 == 3:
                self.users_categories_list.append("verbalized_answer")
コード例 #18
0
def get_greedy(data: OrderedDictType[_T1, Set[_T2]]) -> OrderedSet[_T1]:
    """The parameter ngrams needs to be ordered to be able to produce reproductable results."""
    assert isinstance(data, OrderedDict)
    all_ngrams = {e for s in data.values() for e in s}
    available_entries = data.copy()
    covered: Set[_T2] = set()
    result: OrderedSet[_T1] = OrderedSet()

    while covered != all_ngrams:
        selected_key, selected_value = max(
            available_entries.items(),
            key=lambda x: get_new_units_count(x[1], covered))
        result.add(selected_key)
        available_entries.pop(selected_key)
        covered |= selected_value

    return result
コード例 #19
0
    def __init__(self, name, if_use_pad, if_use_unk):

        self.__name = name
        self.__if_use_pad = if_use_pad
        self.__if_use_unk = if_use_unk

        self.__index2instance = OrderedSet()
        self.__instance2index = OrderedDict()

        self.__counter = Counter()

        if if_use_pad:
            self.__sign_pad = "<PAD>"
            self.add_instance(self.__sign_pad)
        if if_use_unk:
            self.__sign_unk = "<UNK>"
            self.add_instance(self.__sign_unk)
コード例 #20
0
 def test_conditional_multiple_statement(self):
     cc = CCompiler("10.10.10.1_java:C:S1")
     cc.compile(TestCCompiler.get_composition())
     composition2 = "10.10.10.1_java:C:S1;" +\
                    "if (10.10.10.1_java:C:S1 == \"FOO\") { " +\
                    "  10.10.10.1_java:C:S1+10.10.10.1_java:C:S2;" +\
                    "  10.10.10.1_java:C:S1+10.10.10.1_java:C:S7;" +\
                    "  10.10.10.1_java:C:S1+10.10.10.1_java:C:S9;" +\
                    "}"
     cc.compile(composition2)
     owner_ss = ServiceState("10.10.10.1_java:C:S1", "\"FOO\"")
     input_ss = ServiceState("WHATEVER", "DON'T CARE")
     self.assertEqual(
         OrderedSet([
             "10.10.10.1_java:C:S2", "10.10.10.1_java:C:S7",
             "10.10.10.1_java:C:S9"
         ]), cc.get_links(owner_ss, input_ss))
コード例 #21
0
    def get_list(self, **filter_kwargs):
        """
        Returns a list of filtered, limited and flattened election results.
        """
        filters = self.build_filters(**filter_kwargs)
        fields = self.build_fields(**filter_kwargs)
        exclude_fields = self.build_exclude_fields(**filter_kwargs)
        self.apply_filters(**filters)
        self.apply_field_limits(fields, exclude_fields)
        # A list of encountered fields to accomodate dynamic document fields.
        # Start off with the list of known fields built in the constructor.
        self._fields = OrderedSet(self._output_fields)

        # It's slow to follow the referenced fields at the MongoEngine level
        # so just build our own map of related items in memory.
        #
        # We use as_pymongo() here, and below, because it's silly and expensive
        # to construct a bunch of model instances from the dictionary
        # representation returned by pymongo, only to convert them back to
        # dictionaries for serialization.
        related_map = {}
        for related_field, related_collection in list(
                self._relationships.items()):
            related_map[related_field] = {
                str(c['_id']): c
                for c in self._querysets[related_collection].as_pymongo()
            }

        # We'll save the flattened items as an attribute to support a
        # chainable interface.
        self._items = []
        primary_qs = self._querysets[self.primary_collection_name].as_pymongo()
        try:
            for primary in primary_qs:
                related = {}
                for fname, coll in list(self._relationships.items()):
                    related[fname] = related_map[coll][str(primary[fname])]

                flat = self.flatten(primary, **related)
                self._fields |= list(flat.keys())
                self._items.append(flat)
        except:
            pass

        return self._items
コード例 #22
0
ファイル: interconnect.py プロジェクト: mfkiwl/ee272_cgra
 def __lift_ports(self):
     # we assume it's a rectangular grid
     # we only care about the perimeter
     x_range = {self.x_min, self.x_max}
     y_range = {self.y_min, self.y_max}
     coordinates = OrderedSet()
     for (x, y) in self.tile_circuits:
         if x in x_range or y in y_range:
             coordinates.append((x, y))
     for x, y in coordinates:
         tile = self.tile_circuits[(x, y)]
         # we only lift sb ports
         sbs = tile.sbs
         for bit_width, switchbox in sbs.items():
             all_sbs = switchbox.switchbox.get_all_sbs()
             working_set = []
             if x == self.x_min:
                 # we lift west/left ports
                 for sb_node in all_sbs:
                     if sb_node.side != SwitchBoxSide.WEST:
                         continue
                     working_set.append(sb_node)
             elif x == self.x_max:
                 # we lift east/right ports
                 for sb_node in all_sbs:
                     if sb_node.side != SwitchBoxSide.EAST:
                         continue
                     working_set.append(sb_node)
             if y == self.y_min:
                 # we lift north/top ports
                 for sb_node in all_sbs:
                     if sb_node.side != SwitchBoxSide.NORTH:
                         continue
                     working_set.append(sb_node)
             elif y == self.y_max:
                 # we lift south/bottom ports
                 for sb_node in all_sbs:
                     if sb_node.side != SwitchBoxSide.SOUTH:
                         continue
                     working_set.append(sb_node)
             for sb_node in working_set:
                 sb_name = create_name(str(sb_node))
                 sb_port = tile.ports[sb_name]
                 self.add_port(sb_name, sb_port.base_type())
                 self.wire(self.ports[sb_name], sb_port)
コード例 #23
0
class StartForm(SpaceTraderForm):
    title = 'Start Game'
    template_file = 'start.html'
    allocated_skill_points = 0
    max_skill_points = 8
    difficulty_setting = 'Medium'
    error_message_set = OrderedSet()

    name = StringField('Name', validators=[DataRequired("Must input a name")])
    difficulty = RadioField('Difficulty',
                            default='1',
                            choices=[('0', 'Easy'), ('1', 'Medium'),
                                     ('2', 'Hard')])
    pilot_skill = IntegerField(
        'Pilot Skill',
        validators=[
            DataRequired("Must input a pilot skill level"), skill_check
        ])
    fighter_skill = IntegerField(
        'Fighter Skill',
        validators=[
            DataRequired("Must input a fighter skill level"), skill_check
        ])
    merchant_skill = IntegerField(
        'Merchant Skill',
        validators=[
            DataRequired("Must input a merchant skill level"), skill_check
        ])
    engineer_skill = IntegerField(
        'Engineer Skill',
        validators=[
            DataRequired("Must input an engineer skill level"), skill_check
        ])
    done = SubmitField('Start New Game')

    def validate(self):
        self.difficulty_setting = self.difficulty.choices[int(
            self.difficulty.data)][1]
        self.max_skill_points = 16 - (4 * int(self.difficulty.data))
        super_return = super().validate()
        self.error_message_set = OrderedSet()
        for error in self.errors:
            self.error_message_set.add(self.errors[error][0])

        return super_return
コード例 #24
0
    def init(self, domain: Graph, initial_sets: list, unlabeled_elements: set):
        self.distances = np.zeros(domain.size, dtype=np.int)
        self.distances.fill(-1)
        self.nearest_set = np.zeros(domain.size, dtype=np.int)
        self.nearest_set.fill(-1)
        for i, _set in enumerate(initial_sets):
            for x in _set:
                self.distances[x] = 0
                sp = nx.single_source_shortest_path(domain.data_object, x)
                for node, shortest_path in sp.items():
                    if node in unlabeled_elements:
                        if self.distances[node] == -1:
                            self.distances[node] = len(shortest_path) - 1
                            self.nearest_set[node] = i
                        else:
                            if self.distances[node] >= len(
                                    shortest_path) - 1 and self.random_extend:
                                if self.distances[node] > len(
                                        shortest_path) - 1:
                                    self.distances[node] = len(
                                        shortest_path) - 1
                                    self.nearest_set[node] = i
                                else:
                                    self.nearest_set[node] = random.sample(
                                        [self.nearest_set[node], i], i)[0]

        self.distances_sort_dict = dict(
            enumerate(np.sort(np.unique(self.distances))[::-1]))

        # define partial order
        self.partial_order.argmin_val = 0
        for elem in unlabeled_elements:
            try:
                self.partial_order.order[self.distances_sort_dict[
                    self.distances[elem]]].add(elem)
            except:
                self.partial_order.order[self.distances_sort_dict[
                    self.distances[elem]]] = {elem}
            self.v[elem] = OrderedSet()
            self.v[elem].add(self.nearest_set[elem])
            sample_set = set(range(len(initial_sets)))
            sample_set.remove(self.nearest_set[elem])
            samples = random.sample(sample_set, len(initial_sets) - 1)
            for i in samples:
                self.v[elem].add(i)
コード例 #25
0
def load_word2vec_bin(filename):
    label_list = []
    vec_list = []
    with gzip.open(filename, 'rb') as infile:
        header = infile.readline().rstrip()
        nrows_str, ncols_str = header.split()
        nrows = int(nrows_str)
        ncols = int(ncols_str)
        for row in range(nrows):
            label = _read_until_space(infile)
            if label == '</s>':
                label = 'WORD2VEC_SENTENCE_BOUNDARY'
            vec = _read_vec(infile, ncols)
            label_list.append(label)
            vec_list.append(vec)
    labels = OrderedSet(label_list)
    mat = np.array(vec_list)
    return WordVectors(labels, mat, standardizer=lambda x: x)
コード例 #26
0
ファイル: nmd.py プロジェクト: jessicalettes/nmd-exons
    def _get_all_transcripts_overlapping_exon(self, exon):
        """Makes set of all transcript ids in gene containing possible NMD exon

        Parameters
        ----------
        exon : gffutils feature exon
            The exon of interest that causes inclusion, exclusion or no NMD

        Returns
        -------
        all_transcripts : set
            The set of transcript ids from gene containing the exon of interest
        """
        all_transcripts = OrderedSet()
        for trans in self.db.region(region=exon, featuretype=TRANSCRIPT):
            if self._is_valid_transcript(trans):
                all_transcripts.add(trans[TRANSCRIPT_ID][0])
        return all_transcripts  # call transcripts_from_gene_containing_exon
コード例 #27
0
ファイル: nmd.py プロジェクト: jessicalettes/nmd-exons
    def _get_transcripts_with_exon(self, exon):
        """Create set of transcript ids that contain possible NMD exon

        Parameters
        ----------
        exon : gffutils feature exon
            The exon of interest that causes inclusion, exclusion or no NMD

        Returns
        -------
        transcripts_with_exon : set
            The set of transcript ids containing the exon of interest
        """
        transcripts_with_exon = OrderedSet()
        for exon_trans in self.db.parents(exon, featuretype=TRANSCRIPT):
            if self._is_valid_transcript(exon_trans):
                transcripts_with_exon.add(exon_trans[TRANSCRIPT_ID][0])
        return transcripts_with_exon  # parent_transcripts_of_exon
コード例 #28
0
def sort_greedy_epochs(data: OrderedDictType[_T1, Set[_T2]],
                       epochs: int) -> OrderedSet[_T1]:
    assert isinstance(data, OrderedDict)
    assert epochs >= 0
    result: OrderedSet[_T1] = OrderedSet()
    available_entries = data.copy()
    epochs_done = 0
    epochs_goal = min(epochs, len(available_entries))
    progress_bar = tqdm(total=epochs_goal, initial=0)
    while len(available_entries) > 0 and epochs_done != epochs_goal:
        selection = get_greedy(available_entries)
        result.update(selection)
        for selected_key in selection:
            available_entries.pop(selected_key)
        epochs_done += 1
        progress_bar.update(1)
    progress_bar.close()
    return result
コード例 #29
0
 def test_no_overwrite_on_merge(self):
     a = OrderedSet()
     a.add('A')
     a.add('Z')
     a.add('B')
     print(a)
     crdt1 = GSet()
     crdt2 = GSet()
     crdt1.add('A')
     crdt2.add('B')
     crdt1 = crdt1.merge(crdt2)
     print(crdt1._payload)
     crdt1.add('AA')
     crdt2.add('BB')
     crdt1 = crdt1.merge(crdt2)
     print(crdt1._payload)
     self.assertEqual(crdt1._payload, ['A', 'B', 'AA', 'BB'])
     self.assertEqual(crdt2._payload, ['B', 'BB'])
コード例 #30
0
def get_type_definition(types: List[Type]) -> str:
    t = OrderedSet(types)
    if Type('None') in t:
        if len(t) is 1:
            ImportRegistry.register_type('Any')
            return 'Any'
        t = t - {Type('None')}
        optional = True
    else:
        optional = False
    d = ','.join([v.type_definition for v in t])
    if len(t) > 1:
        ImportRegistry.register_type('Union')
        d = f'Union[{d}]'
    if optional:
        ImportRegistry.register_type('Optional')
        d = f'Optional[{d}]'
    return d