Esempio n. 1
0
    def get_data(self, *, catalog: CatalogName, entity_type: str,
                 file_url_func: FileUrlFunc, item_id: Optional[str],
                 filters: MutableFilters,
                 pagination: Optional[Pagination]) -> JSON:
        """
        Returns data for a particular entity type of single item.
        :param catalog: The name of the catalog to query
        :param entity_type: Which index to search (i.e. 'projects', 'specimens', etc.)
        :param pagination: A dictionary with pagination information as return from `_get_pagination()`
        :param filters: parsed JSON filters from the request
        :param item_id: If item_id is specified, only a single item is searched for
        :param file_url_func: A function that is used only when getting a *list* of files data.
        It creates the files URL based on info from the request. It should have the type
        signature `(uuid: str, **params) -> str`
        :return: The Elasticsearch JSON response
        """
        if item_id is not None:
            validate_uuid(item_id)
            filters.explicit['entryId'] = {'is': [item_id]}
        response = self.transform_request(catalog=catalog,
                                          filters=filters,
                                          pagination=pagination,
                                          entity_type=entity_type)

        for hit in response['hits']:
            entity = one(hit[entity_type])
            source_id = one(hit['sources'])['sourceId']
            entity['accessible'] = source_id in filters.source_ids

        def inject_file_urls(node: AnyMutableJSON, *path: str) -> None:
            if node is None:
                pass
            elif isinstance(node, (str, int, float, bool)):
                pass
            elif isinstance(node, list):
                for child in node:
                    inject_file_urls(child, *path)
            elif isinstance(node, dict):
                if path:
                    try:
                        next_node = node[path[0]]
                    except KeyError:
                        # Not all node trees will match the given path. (e.g. a
                        # response from the 'files' index won't have a
                        # 'matrices' in its 'hits[].projects' inner entities.
                        pass
                    else:
                        inject_file_urls(next_node, *path[1:])
                else:
                    try:
                        url = node['url']
                        version = node['version']
                        uuid = node['uuid']
                    except KeyError:
                        for child in node.values():
                            inject_file_urls(child, *path)
                    else:
                        if url is None:
                            node['url'] = file_url_func(catalog=catalog,
                                                        fetch=False,
                                                        file_uuid=uuid,
                                                        version=version)
            else:
                assert False

        inject_file_urls(response['hits'], 'projects', 'contributedAnalyses')
        inject_file_urls(response['hits'], 'projects', 'matrices')
        inject_file_urls(response['hits'], 'files')

        if item_id is not None:
            response = one(response['hits'],
                           too_short=EntityNotFoundError(entity_type, item_id))
        return response
Esempio n. 2
0
from more_itertools import one

from .._utils import names_and_abbrevs
from ..Unit import CURRENT, Dimension, LENGTH, MASS, TEMPERATURE, TIME, Unit

base_unit_map = {(names, abbrevs): Unit(dim, name=one(abbrevs))
                 for (names, abbrevs), dim in {
                     names_and_abbrevs(item): {
                         val: 1
                     } if isinstance(val, Dimension) else val
                     for item, val in {
                         (('meter', 'metre'), 'm'): LENGTH,
                         'second': TIME,
                         ('kilogram', 'kg'): MASS,
                         'Ampere': CURRENT,
                         'Kelvin': TEMPERATURE,
                     }.items()
                 }.items()}
Esempio n. 3
0
 def _get_cute_score_for_action(observation: Observation,
                                legal_move_action: Action) -> int:
     next_state = observation.state. \
                         get_next_state_from_actions({observation.letter: legal_move_action})
     return more_itertools.one(next_state.player_id_to_observation.values()).cute_score
Esempio n. 4
0
 def get_next_state_from_actions(self, player_id_to_action: Mapping[PlayerId, Action]) \
                                                                            -> SinglePlayerState:
     return self.get_next_state_from_action(more_itertools.one(player_id_to_action.values()))
Esempio n. 5
0
    def __init__(self, pattern_ll, cell_vect, nb_cells, offset, attractors,
                 soft_mat, name):
        """
        Contrat : Créer un maillage pour des RVE 2D, plans, comportant au plus 2 matériaux constitutifs et pouvant contenir plusieurs cellules.

        cell_vect : Vecteurs de périodicité en colonne, définit également le parallélogramme qui contient la cellule.
        #! La cellule doit être un parallélogramme.

        Parameters
        ----------
        pattern_ll : list
            Instances of LineLoop that define the contours of the microstructure.
        cell_vect : 2D array
            dimensions of the unit cell and directions of periodicity.
            (given in a 2D cartesian coordinate system)
        nb_cells : 1D array
            Numbers of cells in each direction of repetition/periodicity.
        offset : 1D array
            Relative position inside a cell of the point that will coincide with the origin of the global domain.
        attractors : list
            Instances of Point.
            Can also be = None or empty.
            It represent the points that will be used as attractors in the definition of the element characteristic length fields.
            Attractors are geometrical elements of the cell around which mesh refinement constraints will be set.
        name : string or Path
        """

        self.name = name.stem if isinstance(name, PurePath) else name
        model.add(self.name)
        model.setCurrent(self.name)

        if offset.any():
            nb_pattern = [
                math.ceil(val + 1) if offset[i] != 0 else math.ceil(val)
                for i, val in enumerate(nb_cells)
            ]
            nb_pattern = np.array(nb_pattern, dtype=np.int8)
            pattern_ll = offset_pattern(pattern_ll, offset, cell_vect)
        else:
            nb_pattern = np.int8(np.ceil(nb_cells))

        if not np.equal(nb_pattern, 1).all():
            duplicate_pattern(pattern_ll, nb_pattern, cell_vect)

        rve_vect = cell_vect * np.asarray(nb_cells)
        pt_o = np.zeros((3, ))
        macro_vtcs = [
            pt_o,
            rve_vect[:, 0],
            rve_vect[:, 0] + rve_vect[:, 1],
            rve_vect[:, 1],
        ]
        macro_ll = geo.LineLoop([geo.Point(c) for c in macro_vtcs])
        macro_s = geo.PlaneSurface(macro_ll)

        if attractors:
            if not all(isinstance(e, geo.Point) for e in attractors):
                _err_msg = """Use of curves as attractors for the refinement of the mesh
                is not yet fully supported in our python library for gmsh."""
                raise TypeError(_err_msg)
                # TODO : toujours d'actualité ?
            if offset.any():
                attractors = offset_pattern(attractors, offset, cell_vect)
            if not np.equal(nb_pattern, 1).all():
                duplicate_pattern(attractors, nb_pattern, cell_vect)

        logger.info("Start boolean operations on surfaces")
        _msg_ok = "The main material domain of the RVE is connected (topological property)."  # noqa
        _msg_no_surface = "The boolean operation for creating the main material domain of the RVE return 0 surfaces."
        _msg_not_connected = "The main material domain of the RVE obtained by a boolean operation is disconnected (topological property)."

        phy_surf = list()
        pattern_s = [geo.PlaneSurface(ll) for ll in pattern_ll]
        rve_s = geo.surface_bool_cut(macro_s, pattern_s)
        if len(rve_s) == 1:
            logger.info(_msg_ok)
        elif len(rve_s) == 0:
            logger.warning(_msg_no_surface)
        else:
            logger.warning(_msg_not_connected)

        rve_s_phy = geo.PhysicalGroup(rve_s, 2, "microstruct_domain")
        phy_surf.append(rve_s_phy)
        if soft_mat:
            soft_s = geo.surface_bool_cut(macro_s, rve_s)
            soft_s_phy = geo.PhysicalGroup(soft_s, 2, "soft_domain")
            phy_surf.append(soft_s_phy)
        logger.info("Done boolean operations on surfaces")

        if attractors:
            need_sync = False
            for entity in attractors:
                if not entity.tag:
                    entity.add_gmsh()
                    need_sync = True
            if need_sync:
                factory.synchronize()  # ? Pourrait être enlevé ?

        for gp in phy_surf:
            gp.add_gmsh()
        factory.synchronize()

        data = model.getPhysicalGroups()
        details = [
            f"Physical group id : {dimtag[1]}, " +
            f"dimension : {dimtag[0]}, " +
            f"name : {model.getPhysicalName(*dimtag)}, " +
            f"nb of entitities {len(model.getEntitiesForPhysicalGroup(*dimtag))} \n"
            for dimtag in data
        ]
        logger.debug(f"All physical groups in the model : {data}")
        logger.debug(f"Physical groups details : \n {details}")
        logger.info("Done generating the gmsh geometrical model")
        if isinstance(name, PurePath):
            gmsh.write(str(name.with_suffix(".brep")))
            logger.info(f"Saving brep at {str(name.with_suffix('.brep'))}")
        else:
            gmsh.write(f"{name}.brep")
            logger.info(f"Saving brep at {name}.brep")
        macro_bndry = macro_ll.sides
        if soft_mat:
            boundary = geo.AbstractSurface.get_surfs_boundary(rve_s + soft_s)
        else:
            try:
                s = one(rve_s)
                boundary = geo.AbstractSurface.get_surfs_boundary(
                    s, recursive=False)
            except ValueError:
                boundary = geo.AbstractSurface.get_surfs_boundary(
                    rve_s, recursive=False)
        for b in boundary:
            b.get_boundary(get_coords=True)
        # factory.synchronize()
        micro_bndry = list()
        for macro_line in macro_bndry:
            fragments = geo.macro_line_fragments(boundary, macro_line)
            for c in fragments:
                c.gmsh_type = model.getType(1, c.tag)
            lines_only = [c for c in fragments if c.gmsh_type == "Line"]
            micro_bndry.append(lines_only)
        macro_dir = list()
        for i in range(len(macro_bndry) // 2):
            macro_line = macro_bndry[i]
            direction = macro_line.def_pts[-1].coord - macro_line.def_pts[
                0].coord
            macro_dir.append(direction)
        for i, crvs in enumerate(micro_bndry):
            msh.order_curves(crvs, macro_dir[i % 2], orientation=True)
        msh.set_periodicity_pairs(micro_bndry[0], micro_bndry[2])
        msh.set_periodicity_pairs(micro_bndry[1], micro_bndry[3])
        logger.info("Done defining a mesh periodicity constraint")

        tags = [
            f"per_pair_{k}" for k in ("1_slave", "2_slave", "1_mast", "2_mast")
        ]
        per_pair_phy = list()
        for crvs, tag in zip(micro_bndry, tags):
            per_pair_phy.append(geo.PhysicalGroup(crvs, 1, tag))
        for gp in per_pair_phy:
            gp.add_gmsh()

        self.gen_vect = rve_vect
        self.nb_cells = nb_cells
        self.attractors = attractors if attractors else []
        self.phy_surf = phy_surf
        self.mesh_fields = []
        self.mesh_abs_path = ""
Esempio n. 6
0
class State(_BaseGrid, gamey.State):

    Observation = Observation
    Action = Action
    is_end = False

    def __init__(self, culture: Culture, *, board_size: int,
                 player_id_to_observation: ImmutableDict[str, Observation],
                 food_positions: FrozenSet[Position],
                 bullets: ImmutableDict[Position, FrozenSet[Bullet]] = ImmutableDict()) -> None:
        self.culture = culture
        assert len(self.culture.core_strategies) == N_CORE_STRATEGIES
        self.player_id_to_observation = player_id_to_observation
        self.bullets = bullets
        assert all(bullets.values()) # No empty sets in this bad boy.
        self.all_bullets = frozenset(itertools.chain.from_iterable(bullets.values()))
        self.food_positions = food_positions
        self.board_size = board_size
        self.position_to_observation = ImmutableDict(
            {observation.position: observation for observation in player_id_to_observation.values()}
        )

    def _reduce(self) -> tuple:
        return (
            type(self),
            frozenset(
                (letter, observation.position, observation.score, observation.reward,
                 observation.last_action) for letter, observation in
                self.player_id_to_observation.items()
            ),
            self.bullets, self.food_positions, self.board_size,
        )

    def __eq__(self, other: Any) -> bool:
        return isinstance(other, State) and self._reduce() == other._reduce()

    def __hash__(self) -> int:
        return hash(self._reduce())


    @staticmethod
    def make_initial(culture: Culture, *, board_size: int, starting_score: int = 0,
                     concurrent_food_tiles: int = 40) -> State:

        n_players = len(culture.strategies)
        random_positions_firehose = utils.iterate_deduplicated(
                                     State.iterate_random_positions(board_size=board_size))
        random_positions = tuple(
            more_itertools.islice_extended(
                random_positions_firehose)[:(n_players + concurrent_food_tiles)]
        )

        player_positions = random_positions[:n_players]
        food_positions = frozenset(random_positions[n_players:])
        assert len(food_positions) == concurrent_food_tiles

        player_id_to_observation = {}
        for letter, player_position in zip(LETTERS, player_positions):
            player_id_to_observation[letter] = Observation(state=None, position=player_position,
                                                        score=starting_score, letter=letter,
                                                        last_action=None)

        state = State(
            culture=culture,
            board_size=board_size,
            player_id_to_observation=ImmutableDict(player_id_to_observation),
            food_positions=food_positions,
        )

        for observation in player_id_to_observation.values():
            observation.state = state

        return state


    def get_next_state_from_actions(self, player_id_to_action: Mapping[Position, Action]) -> State:
        new_player_position_to_olds = collections.defaultdict(set)
        for letter, action in player_id_to_action.items():
            action: Action
            old_observation = self.player_id_to_observation[letter]
            assert action in old_observation.legal_actions
            old_player_position = old_observation.position
            if action.move is not None:
                new_player_position_to_olds[old_player_position +
                                                               action.move].add(old_player_position)
            else:
                new_player_position_to_olds[old_player_position].add(old_player_position)

        ############################################################################################
        ### Figuring out which players collided into each other: ###################################
        #                                                                                          #
        # There are three types of collisions:
        # 1. Two or more players that try to move into the same position.
        # 2. Two players that are trying to move into each other's positions.
        # 3. Any players that are trying to move into the old position of a player that had one
        #    of the two collisions above, and is therefore still occupying that position.

        collided_player_positions = set()
        while True:
            for new_player_position, old_player_positions in new_player_position_to_olds.items():
                if len(old_player_positions) >= 2:
                    # Yeehaw, we have a collision! This is either type 1 or type 3. Let's punish
                    # everyone!
                    collided_player_positions |= old_player_positions
                    del new_player_position_to_olds[new_player_position]
                    for old_player_position in old_player_positions:
                        new_player_position_to_olds[old_player_position].add(old_player_position)

                    # We modified the dict while iterating, let's restart the loop:
                    break

                elif (len(old_player_positions) == 1 and
                    ((old_player_position := more_itertools.one(old_player_positions)) !=
                      new_player_position) and new_player_position_to_olds.get(
                                               old_player_position, None) == {new_player_position}):
                    collided_player_positions |= {old_player_position, new_player_position}
                    new_player_position_to_olds[new_player_position] = {new_player_position}
                    new_player_position_to_olds[old_player_position] = {old_player_position}

                    # We modified the dict while iterating, let's restart the loop:
                    break

            else:
                # We already found all collisions, if any.
                break
        #                                                                                          #
        ### Finished figuring out which players collided into each other. ##########################
        ############################################################################################

        new_player_position_to_old = {
            new_player_position: more_itertools.one(old_player_positions) for
            new_player_position, old_player_positions in new_player_position_to_olds.items()
        }
        del new_player_position_to_olds # Prevent confusion

        ############################################################################################
        ### Figuring out bullets: ##################################################################
        #                                                                                          #

        # Todo: This section needs a lot of tests!

        wip_bullets = collections.defaultdict(set)

        # Continuing trajectory for existing bullets:
        for bullet in self.all_bullets:
            new_bullet = bullet.get_next_bullet()
            wip_bullets[new_bullet.position].add(new_bullet)

        # Processing new bullets that were just fired:
        for letter, action in player_id_to_action.items():
            if action.shoot is not None:
                player_position = self.player_id_to_observation[letter].position
                new_bullet = Bullet(player_position + action.shoot, action.shoot)
                wip_bullets[new_bullet.position].add(new_bullet)

        # Clearing bullets out of board:
        for position in [position for position, bullets in wip_bullets.items()
                         if (position not in self)]:
            del wip_bullets[position]

        # Figuring out which players were shot, removing these bullets:
        new_player_positions_that_were_shot = set()
        for new_player_position, old_player_position in new_player_position_to_old.items():
            if wip_bullets.get(new_player_position, None):
                # Common shooting case.
                del wip_bullets[new_player_position]
                new_player_positions_that_were_shot.add(new_player_position)
            elif translation := new_player_position - old_player_position: # Player moved
                oncoming_bullet_direction = - Step(*translation)
                oncoming_bullets = {bullet for bullet in wip_bullets.get(old_player_position, ()) if
                                    bullet.direction == oncoming_bullet_direction}
                if oncoming_bullets:
                    # Less-common shooting case: The player walked towards an oncoming bullet,
                    # switching places with it.
                    (oncoming_bullet,) = oncoming_bullets
                    wip_bullets[old_player_position].remove(oncoming_bullet)
                    new_player_positions_that_were_shot.add(new_player_position)
Esempio n. 7
0
def get_riff_by_id(conn, id):
    result = conn.execute(sa.text("SELECT * FROM riffs WHERE id=:id"), id=id)
    return one(result.fetchall())
Esempio n. 8
0
    async def get_presents_stat(self, import_id: str) -> Dict[str, Any]:
        aggregation = PresentsAggregation(import_id)
        cursor = self.client[self.db_name][self.collection_name].aggregate(
            aggregation.get())

        return one([presents_stat async for presents_stat in cursor])
    def test_readDbEntitiesOfClsToMerge(self) -> None:
        person_1 = schema.StatePerson(person_id=1, state_code=_STATE_CODE)
        sentence_group_1 = schema.StateSentenceGroup(
            sentence_group_id=1,
            external_id=_EXTERNAL_ID,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person_1,
        )
        person_1.sentence_groups = [sentence_group_1]

        person_2 = schema.StatePerson(person_id=2, state_code=_STATE_CODE)
        sentence_group_1_dup = schema.StateSentenceGroup(
            sentence_group_id=2,
            external_id=_EXTERNAL_ID,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person_2,
        )
        sentence_group_2 = schema.StateSentenceGroup(
            sentence_group_id=3,
            external_id=_EXTERNAL_ID_2,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person_2,
        )
        placeholder_sentence_group = schema.StateSentenceGroup(
            sentence_group_id=4,
            status=StateSentenceStatus.PRESENT_WITHOUT_INFO.value,
            state_code=_STATE_CODE,
            person=person_2,
        )

        person_2.sentence_groups = [
            sentence_group_1_dup,
            sentence_group_2,
            placeholder_sentence_group,
        ]

        with SessionFactory.using_database(self.database_key,
                                           autocommit=False) as session:
            session.add(person_1)
            session.add(person_2)
            session.flush()

            # Act
            trees_to_merge = read_db_entity_trees_of_cls_to_merge(
                session, _STATE_CODE, schema.StateSentenceGroup)

            sentence_group_trees_to_merge = one(trees_to_merge)
            self.assertEqual(len(sentence_group_trees_to_merge), 2)

            sentence_group_ids = set()

            for entity_tree in sentence_group_trees_to_merge:
                self.assertIsInstance(entity_tree, EntityTree)
                entity = entity_tree.entity
                if not isinstance(entity, schema.StateSentenceGroup):
                    self.fail(f"Expected StateSentenceGroup. Found {entity}")
                self.assertEqual(entity.external_id, _EXTERNAL_ID)
                sentence_group_ids.add(entity.sentence_group_id)

            self.assertEqual(sentence_group_ids, {1, 2})
Esempio n. 10
0
    def test_get_ingest_view_metadata_for_most_recent_valid_job(self) -> None:
        with freeze_time("2015-01-02T03:05:05"):
            self.metadata_manager.register_ingest_file_export_job(
                GcsfsIngestViewExportArgs(
                    ingest_view_name="file_tag",
                    upper_bound_datetime_prev=None,
                    upper_bound_datetime_to_export=datetime.datetime(
                        2015, 1, 2, 2, 2, 2, 2),
                ))

        with freeze_time("2015-01-02T03:06:06"):
            self.metadata_manager.register_ingest_file_export_job(
                GcsfsIngestViewExportArgs(
                    ingest_view_name="file_tag",
                    upper_bound_datetime_prev=datetime.datetime(
                        2015, 1, 2, 2, 2, 2, 2),
                    upper_bound_datetime_to_export=datetime.datetime(
                        2015, 1, 2, 3, 3, 3, 3),
                ))

        with freeze_time("2015-01-02T03:07:07"):
            self.metadata_manager.register_ingest_file_export_job(
                GcsfsIngestViewExportArgs(
                    ingest_view_name="another_tag",
                    upper_bound_datetime_prev=datetime.datetime(
                        2015, 1, 2, 3, 3, 3, 3),
                    upper_bound_datetime_to_export=datetime.datetime(
                        2015, 1, 2, 3, 4, 4, 4),
                ))

        most_recent_valid_job = (
            self.metadata_manager.
            get_ingest_view_metadata_for_most_recent_valid_job("file_tag"))

        self.assertIsNotNone(most_recent_valid_job)
        if most_recent_valid_job is None:
            self.fail("most_recent_valid_job is unexpectedly None")

        self.assertEqual("file_tag", most_recent_valid_job.file_tag)
        self.assertEqual(
            datetime.datetime(2015, 1, 2, 2, 2, 2, 2),
            most_recent_valid_job.datetimes_contained_lower_bound_exclusive,
        )
        self.assertEqual(
            datetime.datetime(2015, 1, 2, 3, 3, 3, 3),
            most_recent_valid_job.datetimes_contained_upper_bound_inclusive,
        )

        # Invalidate the row that was just returned
        session = SessionFactory.for_schema_base(OperationsBase)
        results = (session.query(
            schema.DirectIngestIngestFileMetadata).filter_by(
                file_id=most_recent_valid_job.file_id).all())
        result = one(results)
        result.is_invalidated = True
        session.commit()

        most_recent_valid_job = (
            self.metadata_manager.
            get_ingest_view_metadata_for_most_recent_valid_job("file_tag"))
        if most_recent_valid_job is None:
            self.fail("most_recent_valid_job is unexpectedly None")
        self.assertEqual("file_tag", most_recent_valid_job.file_tag)
        self.assertEqual(
            None,
            most_recent_valid_job.datetimes_contained_lower_bound_exclusive)
        self.assertEqual(
            datetime.datetime(2015, 1, 2, 2, 2, 2, 2),
            most_recent_valid_job.datetimes_contained_upper_bound_inclusive,
        )
Esempio n. 11
0
 def _init_objects_to_attributes(
     self
 ) -> ImmutableSetMultiDict[ObjectSemanticNode, AttributeSemanticNode]:
     return immutablesetmultidict(
         (one(attribute.slot_fillings.values()), attribute)
         for attribute in self.attributes)
Esempio n. 12
0
 def project_id(self):
     path = '{namespace}/{project}'.format(**self.options)
     return one(x for x in self.projects
                if x['path_with_namespace'] == path)['id']
Esempio n. 13
0
 def site_config(self, site_id):
     site_root = self._site_dir(site_id)
     config = list(site_root.glob('*.lektorproject'))
     if config:
         return inifile.IniFile(one(config))
     return collections.defaultdict(type(None))
Esempio n. 14
0
def simulate_timecourse_objective(
    parent_petab_problem: petab.Problem,
    timecourse_id: str,
    problem_parameters: Dict[str, float],
    parameter_mapping,
    sensi_orders: Tuple[int, ...] = (0, ),
    return_all: bool = False,
    max_abs_grad: float = None,
    **kwargs,
):
    #unscaled_problem_parameters = unscale_parameters(
    #    scaled_parameters=problem_parameters,
    #    petab_problem=parent_petab_problem,
    #)
    unscaled_problem_parameters = parent_petab_problem.unscale_parameters(
        scaled_parameters=problem_parameters, )

    #print('in')
    results = simulate_timecourse(
        parent_petab_problem,
        timecourse_id,
        problem_parameters=unscaled_problem_parameters,
        parameter_mapping=parameter_mapping,
        sensi_orders=sensi_orders,
        **kwargs,
    )
    #print('out')
    if kwargs.get('initial_states', None) is not None:
        pass
        #breakpoint()

    #sensitivity_parameter_ids = results[0]['sllh'].keys()
    sensitivity_parameter_ids = problem_parameters.keys()
    #print('l1')
    #print(problem_parameters)
    #breakpoint()
    #sensitivity_parameter_ids = parent_petab_problem.x_ids
    for result in results:
        if result['sllh'].keys() != sensitivity_parameter_ids:
            # FIXME reimplement so this still holds?
            #raise NotImplementedError(
            #    'All conditions must provide sensitivities for the same set '
            #    'of parameters.'
            #)
            pass
    #print('l2')
    accumulated_result = {
        FVAL:
        sum(-result['llh'] for result in results),
        GRAD: [
            sum(-result['sllh'][k] for result in results
                if k in result['sllh']) for k in sensitivity_parameter_ids
            #if k in result['sllh']
        ]
        #GRAD: {
        #    k: sum(-result['sllh'][k] for result in results)
        #    for k in sensitivity_parameter_ids
        #}
    }
    #print('l3')
    if return_all:
        # TODO magic constant
        accumulated_result[FINAL_STATES] = \
            one(results[-1]['rdatas']).x[-1].flatten()
        #accumulated_result[RESULTS] = results
    #print('l4')
    #print(accumulated_result)
    #print(accumulated_result)
    #breakpoint()
    #print(accumulated_result)

    if max_abs_grad is not None:
        pass
        #accumulated_result[GRAD] = list(np.nan_to_num(
        #    np.array(accumulated_result[GRAD]),
        #    nan=np.nan,
        #    posinf=max_abs_grad,
        #    neginf=-max_abs_grad,
        #))
    #print(accumulated_result)

    return accumulated_result
Esempio n. 15
0
    def test_no_overlap(self):
        """ Tests that decoders do not overlap.

        If a byte of the response is processed by a bit decoder, then it can only be processed
        by bit decoders, and for different bits (multiple decoders may not test the same bits).

        If a byte of the response is processed by another decoder (byte, word or longer) then
        it may only be tested by a single decoder.
        """
        for prefix, decoders in decoding.serial_page_prefix_to_decoders.items(
        ):
            non_bit_tested_byte_to_decoder = {}
            bit_tested_bytes_to_bit_to_decoders = defaultdict(dict)

            for decoder in decoders:
                # Find out which, if any, of the decode_bits canned decoders are used,
                # i.e. which bits are tested by this decoder. This only works if we only
                # use the canned decoders!
                tested_bits = {
                    i
                    for i, decode_fn in decoding.decode_bits.items()
                    if decode_fn is decoder.decode_fn
                }

                # We assume that if no decode_bits decoders are used, the decoder must process
                # entire bytes, and thus conflict with any other decoder that processes the
                # same bytes:
                self.assertIn(len(tested_bits), [0, 1])

                if len(tested_bits) == 1:
                    # Which bits is it testing? Ensure that it doesn't overlap any decoder
                    # that we've already checked, and allocate those bits to this one:
                    tested_byte = decoder.start_position
                    overlapping_decoder = non_bit_tested_byte_to_decoder.get(
                        tested_byte)
                    self.assertIsNone(
                        overlapping_decoder,
                        f"{decoder} and {overlapping_decoder} overlap on {prefix} "
                        f"byte {tested_byte}")

                    tested_bit = one(tested_bits)
                    overlapping_decoder = (
                        bit_tested_bytes_to_bit_to_decoders[tested_byte].get(
                            tested_bit))
                    self.assertIsNone(
                        overlapping_decoder,
                        f"{decoder} overlaps on {prefix} byte {tested_byte} "
                        f"bit {tested_bit}")

                    bit_tested_bytes_to_bit_to_decoders[tested_byte][
                        tested_bit] = decoder
                else:
                    # Which bytes is it testing? Ensure that it doesn't overlap any decoder
                    # that we've already checked, and allocate those bytes to this one:
                    overlapping_decoders = {
                        other_decoder
                        for position, other_decoder in
                        bit_tested_bytes_to_bit_to_decoders.items()
                        if (position < decoder.end_position
                            and position >= decoder.start_position)
                    }
                    self.assertEqual(
                        set(), overlapping_decoders,
                        f"{decoder} and {overlapping_decoders} overlap on {prefix} "
                        f"bytes {decoder.start_position}-{decoder.end_position - 1}"
                    )

                    overlapping_decoders = {
                        other_decoder
                        for position, other_decoder in
                        non_bit_tested_byte_to_decoder.items()
                        if (position < decoder.end_position
                            and position >= decoder.start_position)
                    }
                    self.assertEqual(
                        set(), overlapping_decoders,
                        f"{decoder} overlaps on {prefix} bytes "
                        f"{decoder.start_position}-{decoder.end_position - 1}")

                    for i in range(decoder.start_position,
                                   decoder.end_position):
                        non_bit_tested_byte_to_decoder[i] = decoder
Esempio n. 16
0
 def is_folder(self) -> bool:
     return one(unique_everseen(vp.is_folder for vp in self.generating_vps))
Esempio n. 17
0
 def get_qs_for_observation(
         self, observation: Observation) -> Mapping[Action, numbers.Real]:
     return more_itertools.one(self.get_qs_for_observations(
         (observation, )))
Esempio n. 18
0
    def process_form_submission(self, form, editor):
        people_allowed_to_sign_up = list(
            filter(
                lambda person: self.can_signup(editor, person),
                gefolge_web.login.Mensch if self.event.location is not None
                and self.event.location.is_online else self.event.signups))
        if len(people_allowed_to_sign_up) > 0:
            if len(people_allowed_to_sign_up) == 1:
                person_to_signup = more_itertools.one(
                    people_allowed_to_sign_up)
            else:
                person_to_signup = form.person_to_signup.data
            if not self.can_signup(editor, person_to_signup):
                flask.flash(
                    jinja2.Markup(
                        'Du bist nicht berechtigt, {} für diesen Programmpunkt anzumelden.'
                        .format(person_to_signup.__html__())), 'error')
                return flask.redirect(
                    flask.url_for('event_programmpunkt',
                                  event=self.event.event_id,
                                  programmpunkt=self.url_part))
            if 'challonge' in self.data:
                try:
                    if form.challonge_username.data:
                        challonge.participants.create(
                            self.data['challonge'],
                            name=person_to_signup.name,
                            challonge_username=form.challonge_username.data,
                            misc='id{}'.format(person_to_signup.snowflake))
                    else:
                        challonge.participants.create(
                            self.data['challonge'],
                            name=person_to_signup.name,
                            misc='id{}'.format(person_to_signup.snowflake))
                except challonge.api.ChallongeException as e:
                    flask.flash(
                        jinja2.Markup(
                            'Bei der Anmeldung auf Challonge ist ein Fehler aufgetreten. Bitte versuche es nochmal. Falls du Hilfe brauchst, wende dich bitte an {}. Fehlermeldung: {}'
                            .format(
                                gefolge_web.login.Mensch.admin().__html__(),
                                jinja2.escape(e))), 'error')
                    return flask.redirect(
                        flask.url_for('event_programmpunkt',
                                      event=self.event.event_id,
                                      programmpunkt=self.url_part))
            if 'smashgg' in self.data:
                if form.smashgg_slug.data:
                    #TODO validate that the user exists
                    if 'smashggSlugs' not in self.data:
                        self.data['smashggSlugs'] = {}
                    self.data['smashggSlugs'][str(
                        person_to_signup.snowflake)] = form.smashgg_slug.data
                    if self.orga is not None:
                        peter.msg(
                            self.orga,
                            '<@{}> ({}) hat sich für {} auf {} angemeldet. (smash.gg-Profil: {})'
                            .format(person_to_signup.snowflake,
                                    person_to_signup, self, self.event,
                                    form.smashgg_slug.data)
                        )  #TODO fix recipient if guest, fix formatting for EventGuests (dm_mention)
                else:
                    if self.orga is not None:
                        peter.msg(
                            self.orga,
                            '<@{}> ({}) hat sich für {} auf {} angemeldet. (kein smash.gg-Profil)'
                            .format(person_to_signup.snowflake,
                                    person_to_signup, self, self.event)
                        )  #TODO fix recipient if guest, fix formatting for EventGuests (dm_mention)
            self.signup(person_to_signup)

        self.process_form_details(form, editor)
Esempio n. 19
0
    def main(self):
        self.sources = self.get_source()
        self.targets = self.get_target()
        moved_ok = 0
        moved_err = 0
        deleted_ok = 0
        deleted_err = 0
        linked_ok = 0
        linked_err = 0

        logger.trace(
            json.dumps(self,
                       default=lambda o: o.__dict__,
                       sort_keys=True,
                       indent=2))

        for item in self.sources:
            if [
                    t for t in self.targets if t["path"] == item["path"]
            ] and one([t for t in self.targets if t["path"] == item["path"]
                       ])["inode"] != item["inode"]:
                if self.recycle_path:
                    if self.move(os.path.join(self.target_path, item["path"]),
                                 os.path.join(self.recycle_path,
                                              item["path"])):
                        moved_ok += 1
                    else:
                        moved_err += 1
                else:
                    if self.delete(os.path.join(self.target_path,
                                                item["path"])):
                        deleted_ok += 1
                    else:
                        deleted_err += 1

            if not [t for t in self.targets if t["path"] == item["path"]]:
                if self.link(item["src"],
                             os.path.join(self.target_path, item["path"])):
                    linked_ok += 1
                else:
                    linked_err += 1

        for item in self.targets:
            if not [s for s in self.sources if s["path"] == item["path"]]:
                if self.recycle_path:
                    if self.move(os.path.join(self.target_path, item["path"]),
                                 os.path.join(self.recycle_path,
                                              item["path"])):
                        moved_ok += 1
                    else:
                        moved_err += 1
                else:
                    if self.delete(os.path.join(self.target_path,
                                                item["path"])):
                        deleted_ok += 1
                    else:
                        deleted_err += 1

        if sum([
                moved_ok, moved_err, deleted_ok, deleted_err, linked_ok,
                linked_err
        ]) > 0:
            logger.success("Run completed:")
            if sum([moved_ok, moved_err]) > 0:
                logger.success(
                    f"  Moved {moved_ok} successfully, {moved_err} failed.")
            if sum([deleted_ok, deleted_err]) > 0:
                logger.success(
                    f"  Deleted {deleted_ok} successfully, {deleted_err} failed."
                )
            if sum([linked_ok, linked_err]) > 0:
                logger.success(
                    f"  Linked {linked_ok} successfully, {linked_err} failed.")
    setup_logging(filename=args.logfile)
    log = get_logger('import_container_data')
    log.info('start_ingest')

    aspace = ASpace()

    def get_jsons(repo_id, id_set):
        '''Get chunk of Archival Object JSONModelObjects'''
        return aspace.client.get(f'repositories/{repo_id}/archival_objects',
                                 params={
                                     'id_set': ",".join(map(str, id_set))
                                 }).json()

    repo_id_key_fn = lambda x: x['Repo ID']

    rows = list(dictify_sheet(one(args.excel)))
    ao_ids_by_repo_id = groupby(sorted(rows, key=repo_id_key_fn),
                                key=repo_id_key_fn)

    def get_id_num(uri):
        return int(uri[uri.rindex('/') + 1:])

    def get_uri_prefix(uri):
        return uri[:uri.rindex('/') + 1]

    ao_jsons_by_id = {}
    for repo_id, aos in ao_ids_by_repo_id:
        for chunk in chunked((el['Archival Object'] for el in aos), 250):
            ao_jsons_by_id.update({
                get_id_num(ao['uri']): ao
                for ao in get_jsons(repo_id, chunk)
Esempio n. 21
0
def pluralize(**thing):
    name, value = one(thing.items())
    if name.endswith('y') and name[-2] not in 'aeiou':
        name = f'{name[:-1]}ies' if value != 1 else name
        return f'{value} {name}'
    return f'{value} {name}{"s" * (value != 1)}'
Esempio n. 22
0
    def get_neurons_of_sample_states_and_best_actions(self) -> Tuple[np.ndarray,
                                                                     Tuple[Action, ...]]:

        culture = Culture(n_players=1, core_strategies=(self,) * N_CORE_STRATEGIES)
        def make_state(player_position: Position,
                       food_positions: Iterable[Position]) -> State:
            observation = Observation(None, player_position, letter=LETTERS[0],
                                      score=0, reward=0, last_action=None)
            player_id_to_observation = ImmutableDict({observation.letter: observation})
            state = State(
                culture, board_size=self.culture.board_size,
                player_id_to_observation=player_id_to_observation,
                food_positions=frozenset(food_positions)
            )
            observation.state = state
            return state

        def make_states(player_position: Position,
                        food_positions: Iterable[Position]) -> Tuple[State]:
            return tuple(
                make_state(rotated_player_position, rotated_food_positions) for
                rotated_player_position, *rotated_food_positions in zip(
                    *map(lambda position: position.iterate_rotations_in_board(
                                                                board_size=self.culture.board_size),
                         itertools.chain((player_position,), food_positions))
                )
            )

        player_positions = [Position(x, y) for x, y in itertools.product((5, 11, 16), repeat=2)]
        states = []
        for player_position in player_positions:
            distances_lists = tuple(
                itertools.chain.from_iterable(
                    itertools.combinations(range(1, 4 + 1), i) for i in range(1, 3 + 1)
                )
            )
            for distances, step in itertools.product(distances_lists, Step.all_steps):
                states.extend(
                    make_states(player_position,
                                [player_position + distance * step for distance in distances])
                )

        observations = [more_itertools.one(state.player_id_to_observation.values())
                        for state in states]

        def _get_cute_score_for_action(observation: Observation,
                                       legal_move_action: Action) -> int:
            next_state = observation.state. \
                                get_next_state_from_actions({observation.letter: legal_move_action})
            return more_itertools.one(next_state.player_id_to_observation.values()).cute_score

        return tuple((
            np.concatenate([observation.to_neurons()[np.newaxis, :] for
                            observation in observations]),
            tuple(
                max(
                    observation.legal_move_actions,
                    key=lambda legal_action: _get_cute_score_for_action(observation, legal_action)
                )
                for observation in observations
            )
        ))
Esempio n. 23
0
def enum(*items: PrimitiveJSON, type_: TYPE = None) -> JSON:
    """
    Returns an `enum` schema for the given items. By default, the schema type of
    the items is inferred, but a type may be passed explicitly to override that.
    However, the current implementation cannot detect some cases in which the
    types of the enum values contradict the explicit type.

    >>> from azul.doctests import assert_json
    >>> assert_json(enum('foo', 'bar', type_=str))
    {
        "type": "string",
        "enum": [
            "foo",
            "bar"
        ]
    }

    >>> assert_json(enum(2, 5, 7))
    {
        "type": "integer",
        "format": "int64",
        "enum": [
            2,
            5,
            7
        ]
    }

    >>> assert_json(enum('x', type_={'type': 'string'}))
    {
        "type": "string",
        "enum": [
            "x"
        ]
    }

    >>> enum('foo', 1.0)
    Traceback (most recent call last):
    ...
    ValueError: too many items in iterable (expected 1)

    >>> enum('foo', 'bar', type_=int)
    Traceback (most recent call last):
    ...
    AssertionError

    >>> assert_json(enum('foo', 'bar', type_="integer"))
    {
        "type": "integer",
        "enum": [
            "foo",
            "bar"
        ]
    }
    """

    if isinstance(type_, type):
        assert all(isinstance(item, type_) for item in items)
    else:
        inferred_type = one(set(map(type, items)))
        if type_ is None:
            type_ = inferred_type
        else:
            # Can't easily verify type when passed as string or mapping
            pass

    return {**make_type(type_), 'enum': items}
Esempio n. 24
0
def get_ancestor_class_sequence(
        class_name: 'AncestorClassName',
        ancestor_chain: Dict['AncestorClassName', str] = None,
        enforced_ancestor_choices:
        Dict['AncestorChoiceKey', 'AncestorClassName'] = None) \
        -> Sequence['AncestorClassName']:
    """Returns the sequence of ancestor classes leading from the root of the
    object graph, a Person type, all the way to the given |class_name|.

    This handles graphs with multiple inbound edges to a particular type. For
    example, StateCharge can be a child of IncarcerationSentence,
    SupervisionSentence, and Fine. To determine exactly which sequence to
    return, context is derived from the other parameters.

    |ancestor_chain| is the set of ids of known ancestor instances for which we
    are trying to find the ancestor sequence. If |class_name| is state_charge
    and the |ancestor_chain| is a dict containing a key of
    'state_incarceration_sentence', then the returned sequence will be
    ('state_person', 'state_sentence_group', 'state_incarceration_sentence').

    |enforced_ancestor_choices| is mapping of ancestor classes to specifically
    choose where there are multiple options. If |class_name| is state_charge and
    |enforced_ancestor_choices| is
    {'state_sentence': 'state_supervision_sentence'}, then the returned sequence
    will be
    ('state_person', 'state_sentence_group', 'state_supervision_sentence').

    If there is a valid key in the |ancestor_chain| and there are also
    |enforced_ancestor_choices|, then the key in |ancestor_chain| wins.

    Args:
        class_name: The name of the class to find the sequence for
        ancestor_chain: A dictionary with keys of ancestor class types and
            values of specific ancestor instance ids
        enforced_ancestor_choices: A dictionary with keys of kinds of ancestor
            choices, e.g. `state_sentence` for any of the state sentence types,
            and values of the specific choice for that kind of choice, e.g.
            `state_incarceration_sentence`
    """
    if not ancestor_chain:
        ancestor_chain = {}

    if not enforced_ancestor_choices:
        enforced_ancestor_choices = {}

    hierarchy_sequence = []
    for step in _HIERARCHY_MAP[class_name]:
        if isinstance(step, str):
            hierarchy_sequence.append(step)
        elif isinstance(step, AncestorTypeChoices):
            if step.key not in VALID_ANCESTOR_CHOICE_KEYS:
                raise ValueError(
                    f"Invalid ancestor choice key of [{step.key}], must be one "
                    f"of [{VALID_ANCESTOR_CHOICE_KEYS}]")

            # First, pick if we've selected one of the choices via the
            # ancestor chain
            choices_ancestor_chain_overlap = \
                step.ancestor_choices.intersection(ancestor_chain.keys())
            if choices_ancestor_chain_overlap:
                if len(choices_ancestor_chain_overlap) > 1:
                    raise ValueError(
                        "There are multiple valid ancestor choices in the "
                        "given ancestor chain. Valid choices are: "
                        f"[{step.ancestor_choices}]. Ancestor chain includes: "
                        f"[{ancestor_chain.keys()}]")
                hierarchy_sequence.append(one(choices_ancestor_chain_overlap))
                continue

            # Next, check the enforced_ancestor_choices
            if not enforced_ancestor_choices:
                raise ValueError(
                    "For possible ancestor choices of "
                    f"[{step.ancestor_choices}], there is neither overlap with "
                    f"the ancestor chain [{ancestor_chain.keys()}] nor a "
                    f"declared choice. We don't have enough information to "
                    f"construct the ancestor hierarchy for this object.")

            if step.key not in enforced_ancestor_choices:
                raise ValueError(
                    f"The enforced choices [{enforced_ancestor_choices}] don't "
                    f"contain a mapping for [{step.key}]. We don't have enough "
                    "information to construct the ancestor hierarchy for this "
                    "object.")

            choice = enforced_ancestor_choices[step.key]
            if choice not in _HIERARCHY_MAP:
                raise ValueError(
                    f"Invalid ancestor choice value of [{choice}], must be a "
                    "valid type listed in the hierarchy map.")
            hierarchy_sequence.append(choice)
        else:
            raise ValueError(f"Unknown type [{type(step)}] in hierarchy map.")
    return tuple(hierarchy_sequence)
Esempio n. 25
0
async def validator():
    cache_data = validator_TTCache.get(VALIDATOR_CACHE_KEY)
    if cache_data:
        resp: ValidatorsResponse = cache_data
    else:
        async with lock.lock:
            cache_data = validator_TTCache.get(VALIDATOR_CACHE_KEY)
            if cache_data:
                return cache_data
            else:
                latest_block_number_tasks = []
                for validator in setting.validator_list:
                    latest_block_number_tasks.append(
                        get_latest_block(validator))
                latest_infos = await asyncio.gather(*latest_block_number_tasks,
                                                    return_exceptions=True)
                latest_infos_no_exception = list(
                    filter(lambda x: x.block_number != NO_LATEST_BLOCK,
                           latest_infos))
                latest_num_dict: Dict[str, LatestInfo] = {
                    i.validator.host: i
                    for i in latest_infos
                }
                # get latest blocks from all the validators failed then randomly return the `nextToPropose`
                if len(latest_infos_no_exception) == 0:
                    best = random.choice(setting.validator_list)
                    max_block_numbers = NO_LATEST_BLOCK
                else:
                    max_block_numbers = max(
                        [i.block_number for i in latest_infos_no_exception])
                    latest = first_true(
                        latest_infos_no_exception,
                        lambda x: x.block_number == max_block_numbers)
                    index = one(
                        locate(setting.validator_list,
                               lambda x: x.pub_key == latest.sender))

                    # why +2 ?
                    # actually index validator should be the latest proposed validator
                    # but it is possible that at this moment, the next validator is already trying
                    # to propose a new block. So choosing the +2 validator is more reliable
                    best = nth(ncycles(setting.validator_list, 2), index + 2)
                split_validators = list(
                    split_before(setting.validator_list,
                                 lambda x: x.host == best.host))
                if len(split_validators) == 1:
                    sorted_validators = one(split_validators)
                else:
                    sorted_validators = last(split_validators) + first(
                        split_validators)

                validators = list(
                    map(
                        lambda x: Validator(host=x.host,
                                            grpc_port=x.grpc_port,
                                            http_port=x.http_port,
                                            latestBlockNumber=latest_num_dict.
                                            get(x.host).block_number,
                                            timestamp=latest_num_dict.get(
                                                x.host).timestamp),
                        sorted_validators))

                nextToPropose = NextToPropose(
                    host=best.host,
                    grpcPort=best.grpc_port,
                    httpPort=best.http_port,
                    latestBlockNumber=max_block_numbers)
                resp = ValidatorsResponse(nextToPropose=nextToPropose,
                                          validators=validators)
                validator_TTCache[VALIDATOR_CACHE_KEY] = resp
    return resp.dict()
Esempio n. 26
0
 def section(self):
     return more_itertools.one(section
                               for section in self.location.data['rooms']
                               if self.name in section).key
Esempio n. 27
0
 def open_file(self, flags):
     return one(self.generating_vps).open_file(flags)
Esempio n. 28
0
 def _data(self):
     response = self._vk.api.messages.getById(message_ids=self.id)
     return mit.one(response["items"])
Esempio n. 29
0
 def path(self) -> str:
     return one(
         unique_everseen(vp.partial_path for vp in self.generating_vps))
Esempio n. 30
0
def consensus(values):
    from itertools import groupby
    from more_itertools import one
    return one((x[0] for x in groupby(values)))
Esempio n. 31
0
def fix_sam_read_groups(sam_input, out_path, progress_bar=False):
    """
    This was written to deal with the result of read group tagging by BWA.
    The produced SAM has a single read group in the header, under a line like:

        @RG  ID:PLTF.RUN.FLOWCELL.LANE1  LB:LB  PL:ILLUMINA  PU:PU  SM:SM

    But then reads might come from different lanes than LANE1, which can be
    seen in the Illumina read IDs:

        PLTF:RUN:FLOWCELL:LANE1:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE1
        PLTF:RUN:FLOWCELL:LANE1:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE1
        PLTF:RUN:FLOWCELL:LANE2:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE1 <- :(
        PLTF:RUN:FLOWCELL:LANE2:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE1 <- :(

    The idea is to produce a new SAM fixing this situation in two ways:

    - The header of the new SAM must have a new @RG line for each different
        read group inferred from the read IDs. In this case:

        @RG  ID:PLTF.RUN.FLOWCELL.LANE1  LB:LB  PL:ILLUMINA  PU:PU  SM:SM
        @RG  ID:PLTF.RUN.FLOWCELL.LANE2  LB:LB  PL:ILLUMINA  PU:PU  SM:SM

    - The read group assigned to each read in the RG:Z: tag should match
      its true read grop, inferred from the first fields of the read ID:

        PLTF:RUN:FLOWCELL:LANE1:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE1
        PLTF:RUN:FLOWCELL:LANE1:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE1
        PLTF:RUN:FLOWCELL:LANE2:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE2 <- ok
        PLTF:RUN:FLOWCELL:LANE2:X:Y:FOO ... RG:Z:PLTF:RUN:FLOWCELL:LANE2 <- ok

    Returns the path to the new SAM produced with those fixes.
    """
    # NOTE: the SAM files will be HEAVY, specially for exome data (say, 20Gb,
    # 50 million lines).

    #  if os.path.isfile(out_path):
        #  os.remove(out_path)

    assert '.sam' in out_path # accepts files like "myfile.sam-luigi-tmp-1234"

    # Write the header of the new SAM, with one @RG entry for each lane

    headers = []
    with open(sam_input) as f_in:
        for line in f_in:
            if not line.startswith("@"): # When the header is over
                break
            headers.append(line)

    headers_not_RG = [l for l in headers if not l.startswith('@RG')]
    headers_RG = [l for l in headers if l.startswith('@RG')]

    # This entire function assumes the following structure for read group IDs:
    # INSTRUMENT.RUN.FLOWCELL.1-2-3
    # where 1-2-3 are the merged lane numbers
    lane_numbers = []
    for RG_line in headers_RG:
        RG_header_parts = RG_line.split('\t')
        # [@RG, ID:INSTRUMENT.RUN.FLOWCELL.1-2-3, LB:LIB, PL:ILLUMINA, ...]

        RG_ID = one(chunk for chunk in RG_header_parts if 'ID:' in chunk)
        # ID:INSTRUMENT.RUN.FLOWCELL.1-2-3

        lane_numbers_merged = RG_ID.split('.')[-1] # 1-2-3

        for lane_number in lane_numbers_merged.split('-'): # [1, 2, 3]
            lane_numbers.append(lane_number)

    # One RG line for each lane number seen:
    new_RG_headers = []
    template_RG_line = headers_RG[0]
    for lane_number in sorted(unique_everseen(lane_numbers)):
        new_RG_header = re.sub(r'(ID:)(.+?\.)(.+?\.)(.+?\.)(.+?)(\s)',
                               rf'\1\2\3\g<4>{lane_number}\6',
                               template_RG_line)
        new_RG_headers.append(new_RG_header)

    with open(out_path, 'w') as f_out:
        for header in headers_not_RG + new_RG_headers:
            f_out.write(header)

    # Write the body of the SAM, fixing read groups read by read:

    regex_id = re.compile(r'^(.+?:.+?:.+?:.+?):.+?:.+?:.+?\s')
    regex_rg = re.compile(r'\sRG:Z:(.+)\s')

    inferred_read_groups = set()

    with open(sam_input) as f_in, open(out_path, 'a') as f_out:
        input_iterable = tqdm(f_in) if progress_bar else f_in

        # NOTE: this will loop over ~50M lines for exome data!
        for line in input_iterable:
            if line.startswith('@'):
                continue
            fixed_line = line
            match_id = regex_id.match(line)
            match_rg = regex_rg.search(line)
            inferred_read_group = match_id.group(1).replace(':', '.')
            seen_read_group = match_rg.group(1)
            if seen_read_group != inferred_read_group:
                fixed_line = fixed_line.replace(seen_read_group,
                                                inferred_read_group)
            inferred_read_groups.add(inferred_read_group)
            f_out.write(fixed_line)

    return out_path