Ejemplo n.º 1
0
 def plot(self):
     counter = Counter()
     for combination in itertools_product(c.plot().most_common()
                                          for c in self.components):
         counter[self.evaluate(c[0]
                               for c in combination)] += functools_reduce(
                                   operator_mul,
                                   [c[1] for c in combination])
     return counter
Ejemplo n.º 2
0
 def _variants(self):
     options = self.tree_html.cssselect('ul.saleAttr')
     variants_initial = {}
     for option in options:
         option_name = option.xpath('../preceding-sibling::'
                                    'div[@class="dt"]/text()')[0]
         values = option.xpath('li/a/@title | li[not(a)]/text()')
         variants_initial[option_name] = values
     variant_names = variants_initial.keys()
     variant_values = list(
         itertools_product(*variants_initial.itervalues()))
     variants = []
     for variant_value in variant_values:
         properties = {}
         for i, v in enumerate(variant_value):
             properties[variant_names[i]] = v
         data = dict(price=None, selected=None, properties=properties)
         variants.append(data)
     return variants
Ejemplo n.º 3
0
 def fit_gp(self, num_samples=1, hp_tune_criterion=None):
     """ Fits a GP according to the tuning criterion. Returns the best GP along with the
     hyper-parameters. """
     if hp_tune_criterion is None:
         hp_tune_criterion = self.options.hp_tune_criterion
     if hp_tune_criterion == 'ml':
         if self.ml_hp_tune_opt_method in ['direct', 'rand', 'pdoo']:
             best_cts_hps = None
             best_dscr_hps = None
             best_other_params = None
             best_hps_val = -np.inf
             for dscr_hps in itertools_product(*self.dscr_hp_vals):
                 opt_cts_val, opt_cts_hps, opt_other_params = \
                    self._optimise_cts_hps_for_given_dscr_hps(dscr_hps)
                 if opt_cts_val > best_hps_val:
                     best_cts_hps = list(opt_cts_hps)
                     best_dscr_hps = list(dscr_hps)
                     best_other_params = opt_other_params
                     best_hps_val = opt_cts_val
             opt_gp = self.build_gp(best_cts_hps,
                                    best_dscr_hps,
                                    other_gp_params=best_other_params)
             opt_hps = (best_cts_hps, best_dscr_hps)
             return 'fitted_gp', opt_gp, opt_hps
         elif self.ml_hp_tune_opt_method == 'rand_exp_sampling':
             sample_cts_hps, sample_dscr_hps, sample_other_gp_params, sample_probs = \
               self._sample_cts_dscr_hps_for_rand_exp_sampling()
             return ('sample_hps_with_probs', sample_cts_hps,
                     sample_dscr_hps, sample_other_gp_params, sample_probs)
     elif hp_tune_criterion == 'post_sampling':
         sample_cts_hps, sample_dscr_hps, sample_other_gp_params = \
           self._sample_cts_dscr_hps_for_post_sampling(num_samples)
         if num_samples == 1:
             opt_gp = self.build_gp(
                 sample_cts_hps[0],
                 sample_dscr_hps[0],
                 other_gp_params=sample_other_gp_params[0])
             opt_hps = (sample_cts_hps, sample_dscr_hps)
             return 'post_fitted_gp', opt_gp, opt_hps
         else:
             return ('post_sample_hps_with_probs', sample_cts_hps,
                     sample_dscr_hps, sample_other_gp_params)
Ejemplo n.º 4
0
def constrained_permutations(init_golf_course,
                             list_holes,
                             list_balls,
                             list_dist_balls,
                             nb_of_balls=None):
    """
    :param init_golf_course: the golf course input
    :param list_holes: the list of holes on the golf course
    :param list_balls: the list of balls to put in the holes on the golf course
    :param list_dist_balls: the list of the distances to respect for each ball
    :param nb_of_balls: the number of elements we need to permute in the list (must be equal to the number of balls)
    :return: this function yields for each possible permutation, a golf course with the "ball-to-hole" paths set and
             the sum of the distances of those paths

    permutation example:
    holes: [A, B, C]
    balls: [1, 2]
    possible hole permutations with r = 2: [A, B], [B, A], [A, C], [C, A], [B, C], [C, B]
    possible "ball-to-hole" paths: [1-A, 2-B], [1-B, 2-A], [1-A, 2-C], [1-C, 2-A], [1-B, 2-C], [1-C, 2-B]
    """
    nb_of_holes = len(list_holes)
    nb_of_permuted_elt = nb_of_holes if nb_of_balls is None else nb_of_balls
    couples_not_possible = []

    # going to iterate the indices (nb_of_holes**nb_of_permuted_elt) times:
    for indices in itertools_product(range(nb_of_holes),
                                     repeat=nb_of_permuted_elt):

        # we select nb_of_permuted_elt within nb_of_holes (order counts)
        # to do that, we select only permutations without duplicate elements
        if len(set(indices)) == nb_of_permuted_elt:

            # check that this permutation is possible ("ball-to-hole" length VS distance to be respected)
            if is_distance_ok(list_balls, list_holes, list_dist_balls,
                              indices):

                # if (indices[i], i) couple has already been passed and is not possible
                # then we go to the next possible permutation
                for i, indices_i in enumerate(indices):
                    if (indices_i, i) in couples_not_possible:
                        break

                # for each new possible permutation, we deep-copy the golf course
                golf_course = copy_deepcopy(init_golf_course)
                list_dist_ball_hole = []
                # for each (ball, hole) couple in the permutation, we check if there is a possible path
                for i in range(nb_of_permuted_elt):
                    # because memory address of golf_course changes in the backtracking function
                    get_path_ball_to_hole, golf_course = get_path(
                        list_balls[i], list_holes[indices[i]],
                        list_dist_balls[i], list_dist_ball_hole, golf_course)
                    # if for one of the (ball, hole) couple, we don't find a "ball-to-hole" path
                    # then we go to the next possible permutation
                    if not get_path_ball_to_hole:
                        couples_not_possible.append((indices[i], i))
                        break

                # if we found a path for each of the (ball, hole) couples of this permutation:
                if len(list_dist_ball_hole) == nb_of_permuted_elt:
                    # we yield the golf course and the sum of the distances of each path
                    yield golf_course, sum(list_dist_ball_hole)
Ejemplo n.º 5
0
def product(list_of_lists):
    return list(itertools_product(*list_of_lists))
Ejemplo n.º 6
0
def __build_all_pathways(
    pathways: Dict,
    transfos: Dict,
    sink_molecules: List,
    rr_reactions: Dict,
    compounds_cache: Dict,
    max_subpaths_filter: int,
    lower_flux_bound: float,
    upper_flux_bound: float,
    logger: Logger = getLogger(__name__)
) -> Dict:
    """Builds pathways based on all combinations over
    reaction rules and template reactions (see
    `build_pathway_combinatorics` documentation).

    Parameters
    ----------
    pathways: Dict
        Metabolic pathways as list of chemical
        reactions where each reaction is defined by:
            - transformation ID,
            - reaction rule ID, and
            - template reaction ID
    transfos: Dict
        Full chemical transformations
    sink_molecules: List
        Sink chemical species IDs
    rr_reactions: Dict
        Reaction rules cache
    compounds_cache: Dict
        Compounds cache
    max_subpaths_filter: int
        Number of pathways (best) kept per master pathway
    lower_flux_bound: float
        Lower flux bound for all new reactions created
    upper_flux_bound: float
        Upper flux bound for all new reactions created
    logger: Logger, optional

    Returns
    -------
    Set of ranked rpPathway objects
    """

    res_pathways = {}

    nb_pathways = 0
    nb_unique_pathways = 0

    ## PATHWAYS
    for path_idx, transfos_lst in pathways.items():

        # Combine over multiple template reactions
        sub_pathways = list(itertools_product(*transfos_lst))

        ## SUB-PATHWAYS
        # # Keep only topX best sub_pathways
        # # within a same master pathway
        res_pathways[path_idx] = []
        for sub_path_idx in range(len(sub_pathways)):

            pathway = rpPathway(
                id=str(path_idx).zfill(3)+'_'+str(sub_path_idx+1).zfill(4),
                logger=logger
            )
            logger.debug(pathway.get_id())

            ## ITERATE OVER REACTIONS
            nb_reactions = len(sub_pathways[sub_path_idx])
            for rxn_idx in range(nb_reactions):

                rxn = sub_pathways[sub_path_idx][rxn_idx]
                transfo_id = rxn['rp2_transfo_id']
                transfo = transfos[transfo_id]
                rule_ids = rxn['rule_ids']
                tmpl_rxn_id = rxn['tmpl_rxn_ids']

                ## COMPOUNDS
                # Template reaction compounds
                added_cmpds = transfo['complement'][rule_ids][tmpl_rxn_id]['added_cmpds']
                # Add missing compounds to the cache
                for side in added_cmpds.keys():
                    for spe_id in added_cmpds[side].keys():
                        logger.debug(f'Add missing compound {spe_id}')
                        if spe_id not in Cache.get_objects():
                            try:
                                rpCompound(
                                    id=spe_id,
                                    smiles=compounds_cache[spe_id]['smiles'],
                                    inchi=compounds_cache[spe_id]['inchi'],
                                    inchikey=compounds_cache[spe_id]['inchikey'],
                                    formula=compounds_cache[spe_id]['formula'],
                                    name=compounds_cache[spe_id]['name']
                                )
                            except KeyError:
                                rpCompound(
                                    id=spe_id
                                )

                ## REACTION
                # Compounds from original transformation
                core_species = {
                    'right': deepcopy(transfo['right']),
                    'left': deepcopy(transfo['left'])
                }
                compounds = __add_compounds(core_species, added_cmpds)
                # revert reaction index (forward)
                rxn_idx_forward = nb_reactions - rxn_idx
                rxn = rpReaction(
                    id='rxn_'+str(rxn_idx_forward),
                    ec_numbers=transfo['ec'],
                    reactants=dict(compounds['left']),
                    products=dict(compounds['right']),
                    lower_flux_bound=lower_flux_bound,
                    upper_flux_bound=upper_flux_bound
                )
                # write infos
                for info_id, info in sub_pathways[sub_path_idx][rxn_idx].items():
                    getattr(rxn, 'set_'+info_id)(info)
                rxn.set_rule_score(rr_reactions[rule_ids][tmpl_rxn_id]['rule_score'])
                rxn.set_idx_in_path(rxn_idx_forward)

                # Add at the beginning of the pathway
                # to have the pathway in forward direction
                # Search for the target in the current reaction
                target_id = [spe_id for spe_id in rxn.get_products_ids() if 'TARGET' in spe_id]
                if target_id != []:
                    target_id = target_id[0]
                else:
                    target_id = None
                logger.debug(f'rxn: {rxn._to_dict()}')
                pathway.add_reaction(
                    rxn=rxn,
                    target_id=target_id
                )

                ## TRUNK SPECIES
                pathway.add_species_group(
                    'trunk',
                    [
                        spe_id
                        for value
                        in core_species.values()
                        for spe_id in value.keys()
                    ]
                )

                ## COMPLETED SPECIES
                pathway.add_species_group(
                    'completed',
                    [
                        spe_id
                        for value
                        in added_cmpds.values()
                        for spe_id in value.keys()
                    ]
                )

            ## SINK
            pathway.set_sink_species(
                list(
                    set(pathway.get_species_ids()) & set(sink_molecules)
                )
            )

            nb_pathways += 1

            ## RANK AMONG ALL SUB-PATHWAYS OF THE CURRENT MASTER PATHWAY
            res_pathways[path_idx] = __keep_unique_pathways(
                res_pathways[path_idx],
                pathway,
                logger
            )

        nb_unique_pathways += len(res_pathways[path_idx])

    # Flatten lists of pathways
    pathways = sum(
        [
            pathways
            for pathways in res_pathways.values()
        ], [])

    # Globally sort pathways
    pathways = sorted(pathways)[-max_subpaths_filter:]

    logger.info(f'Pathways statistics')
    logger.info(f'-------------------')
    logger.info(f'   pathways: {nb_pathways}')
    logger.info(f'   unique pathways: {nb_unique_pathways}')
    logger.info(f'   selected pathways: {len(pathways)} (topX filter = {max_subpaths_filter})')

    # Return topX pathway objects
    return [
        pathway.object
        for pathway in pathways
    ]

    # Transform the list of Item into a list of Pathway
    results = {}
    nb_sel_pathways = 0
    for res_pathway_idx, res_pathway in res_pathways.items():
        results[res_pathway_idx] = [pathway.object for pathway in res_pathway]
        nb_sel_pathways += len(results[res_pathway_idx])

    logger.info(f'Pathways selected: {nb_sel_pathways}/{nb_pathways}')

    return results
Ejemplo n.º 7
0
 def expandgrid(*itrs):
     product = list(itertools_product(*itrs))
     return {i: [x[i] for x in product]
             for i in range(len(itrs))}
Ejemplo n.º 8
0
def main(image_urls,
         access_id,
         secret_key,
         regions,
         volume_types=None,
         volume_via_s3=True,
         ex_virt_types=None,
         push_notifications=False,
         compose_id=None):
    """
    The `ec2.ec2initiate.main` function iterates over the image urls and start
    uploading the image to the specified regions. The `image_urls`,
    `access_id`, and `regions` are the required params. `volume_types`,
    `ex_virt_types` are optional arguments, if not passed the values are picked
    up from the fedimg configuration.
    `volume_via_s3`, `push_notifications`, and `compose_id` are optional params
    with default values.

    Args:
        image_urls (list): List of the image urls to create AMIs. (reques
        access_id (str): AWS EC2 access id
        secret_key (str): AWS_EC2 secret key
        regions (list): List of AWS regions the AMI to be uploaded.
        volume_types (list): List of supported volumes for the AMIs to
            be created.
        volume_via_s3 (bool): If `True`, the images are uploaded via s3 method
            else using creating builder instances.
        ex_virt_types (list): List of the supported virts for the AMIs to
            be created.
        push_notifications (bool): If `True` the messages will be pushed to
            fedmsg, else skipped.
        compose_id: id of the current compose in process.
    """

    root_volume_size = AWS_ROOT_VOLUME_SIZE
    published_images = []

    if volume_types is None:
        volume_types = AWS_VOLUME_TYPES

    if regions is None:
        regions = [AWS_BASE_REGION]

    for image_url in image_urls:

        # If the virt types is not provided then select the supported virt
        # types from the image.
        if ex_virt_types is None:
            virt_types = get_virt_types_from_url(image_url)
        else:
            virt_types = ex_virt_types

        try:
            source = get_source_from_image(image_url)
            if not source:
                raise ValueError

            image_architecture = get_file_arch(image_url)

            uploader = EC2ImageUploader(compose_id=compose_id,
                                        access_key=access_id,
                                        secret_key=secret_key,
                                        root_volume_size=root_volume_size,
                                        image_architecture=image_architecture,
                                        volume_via_s3=volume_via_s3,
                                        push_notifications=push_notifications,
                                        image_url=image_url)

            publisher = EC2ImagePublisher(
                compose_id=compose_id,
                access_key=access_id,
                secret_key=secret_key,
                push_notifications=push_notifications,
                image_url=image_url)

            combinations = itertools_product(
                *[regions, virt_types, volume_types])
            for region, virt_type, volume_type in combinations:
                uploader.set_region(region)
                _log.debug('(uploader) Region is set to: %r' % region)

                uploader.set_image_virt_type(virt_type)
                _log.debug('(uploader) Virtualization type '
                           'is set to: %r' % virt_type)

                image_name = get_image_name_from_image(image_url=image_url,
                                                       virt_type=virt_type,
                                                       region=region,
                                                       volume_type=volume_type)
                uploader.set_image_name(image_name)

                uploader.set_image_volume_type(volume_type)
                _log.debug('(uploader) Volume type is set to: %r' %
                           volume_type)

                uploader.set_availability_zone_for_region()

                if push_notifications:
                    fedimg.messenger.notify(
                        topic='image.upload',
                        msg=dict(
                            image_url=image_url,
                            image_name=get_image_name_from_ami_name_for_fedmsg(
                                image_name),
                            destination=region,
                            service='EC2',
                            status='started',
                            compose=compose_id,
                            extra=dict(virt_type=virt_type,
                                       vol_type=volume_type)))
                image = uploader.create_image(source)

                published_images.extend(
                    publisher.publish_images(
                        region_image_mapping=[(region, image.id)]))
        except Exception as e:
            _log.debug(e.message)
            #TODO: Implement the clean up of the images if failed.
            # uploader.clean_up(image_id=image.id, delete_snapshot=True)

    shutil.rmtree(os.path.dirname(source))
    return published_images