Beispiel #1
0
def generate_from_arrow_type(pa_type: pa.DataType) -> FeatureType:
    """
    generate_from_arrow_type accepts an arrow DataType and returns a datasets FeatureType to be used as the type for
        a single field.

    This is the high-level arrow->datasets type conversion and is inverted by get_nested_type().

    This operates at the individual *field* level, whereas Features.from_arrow_schema() operates at the
        full schema level and holds the methods that represent the bijection from Features<->pyarrow.Schema
    """
    if isinstance(pa_type, pa.StructType):
        return {
            field.name: generate_from_arrow_type(field.type)
            for field in pa_type
        }
    elif isinstance(pa_type, pa.FixedSizeListType):
        return Sequence(feature=generate_from_arrow_type(pa_type.value_type),
                        length=pa_type.list_size)
    elif isinstance(pa_type, pa.ListType):
        feature = generate_from_arrow_type(pa_type.value_type)
        if isinstance(feature, (dict, tuple, list)):
            return [feature]
        return Sequence(feature=feature)
    elif isinstance(pa_type, _ArrayXDExtensionType):
        array_feature = [None, None, Array2D, Array3D, Array4D,
                         Array5D][pa_type.ndims]
        return array_feature(shape=pa_type.shape, dtype=pa_type.value_type)
    elif isinstance(pa_type, pa.DictionaryType):
        raise NotImplementedError  # TODO(thom) this will need access to the dictionary as well (for labels). I.e. to the py_table
    elif isinstance(pa_type, pa.DataType):
        return Value(dtype=_arrow_to_datasets_dtype(pa_type))
    else:
        raise ValueError(f"Cannot convert {pa_type} to a Feature type.")
Beispiel #2
0
 def scan(
     self,
     filter_expression=None,
     attributes: Sequence(str) = None,
     limit: int = None,
     consistent_read: bool = False,
     total_segments: int = None,
     segment: int = None,
     exclusive_start_key: dict = None,
     _index: str = None,
 ) -> (List[dict], dict):
     request = {"ConsistentRead": consistent_read}
     if filter_expression:
         request["FilterExpression"] = filter_expression
     if attributes:
         request.update(self.serialize_attributes(attributes))
     if limit:
         request["Limit"] = limit
     if exclusive_start_key:
         request["ExclusiveStartKey"] = exclusive_start_key
     if total_segments:
         request["TotalSegments"] = total_segments
     if segment is not None:
         request["Segment"] = segment
     if _index:
         request["IndexName"] = _index
     try:
         response = self.get_table().scan(**request)
         return (response["Items"], response.get("LastEvaluatedKey"))
     except ClientError as e:
         handle_client_error(e)
Beispiel #3
0
 def query(
     self,
     key_condition,
     filter_expression=None,
     attributes: Sequence(str) = None,
     limit: int = None,
     consistent_read: bool = False,
     scan_index_forward: bool = True,
     exclusive_start_key: dict = None,
     _index: str = None,
 ) -> (List[dict], dict):
     request = {
         "KeyConditionExpression": key_condition,
         "ConsistentRead": consistent_read,
         "ScanIndexForward": scan_index_forward,
     }
     if filter_expression:
         request["FilterExpression"] = filter_expression
     if attributes:
         request.update(self.serialize_attributes(attributes))
     if limit:
         request["Limit"] = limit
     if exclusive_start_key:
         request["ExclusiveStartKey"] = exclusive_start_key
     if _index:
         request["IndexName"] = _index
     try:
         response = self.get_table().query(**request)
         return (response["Items"], response.get("LastEvaluatedKey"))
     except ClientError as e:
         handle_client_error(e)
Beispiel #4
0
def generate_from_dict(obj: Any):
    """Regenerate the nested feature object from a serialized dict.
    We use the '_type' fields to get the dataclass name to load.

    generate_from_dict is the recursive helper for Features.from_dict, and allows for a convenient constructor syntax
        to define features from json dictionaries. This function is used in particular when deserializing
        a DatasetInfo that was dumped to a json dictionary. This acts as an analogue to
        Features.from_arrow_schema and handles the recursive field-by-field instantiation, but doesn't require any
        mapping to/from pyarrow, except for the fact that it takes advantage of the mapping of pyarrow primitive dtypes
        that Value() automatically performs.
    """
    # Nested structures: we allow dict, list/tuples, sequences
    if isinstance(obj, list):
        return [generate_from_dict(value) for value in obj]
    # Otherwise we have a dict or a dataclass
    if "_type" not in obj:
        return {key: generate_from_dict(value) for key, value in obj.items()}
    class_type = globals()[obj.pop("_type")]

    if class_type == Sequence:
        return Sequence(feature=generate_from_dict(obj["feature"]),
                        length=obj["length"])

    field_names = set(f.name for f in fields(class_type))
    return class_type(**{k: v for k, v in obj.items() if k in field_names})
Beispiel #5
0
def _batch_verify(ec: Curve, hf: Callable[[Any], Any], ms: Sequence[bytes],
                  P: Sequence[Point], sig: Sequence[ECSS]) -> bool:

    # the bitcoin proposed standard is only valid for curves
    # whose prime p = 3 % 4
    if not ec.pIsThreeModFour:
        errmsg = 'curve prime p must be equal to 3 (mod 4)'
        raise ValueError(errmsg)

    batch_size = len(P)
    if len(ms) != batch_size:
        errMsg = f"mismatch between number of pubkeys ({batch_size}) "
        errMsg += f"and number of messages ({len(ms)})"
        raise ValueError(errMsg)
    if len(sig) != batch_size:
        errMsg = f"mismatch between number of pubkeys ({batch_size}) "
        errMsg += f"and number of signatures ({len(sig)})"
        raise ValueError(errMsg)

    if batch_size == 1:
        return _verify(ec, hf, ms[0], P[0], sig[0])

    t = 0
    scalars: Sequence(int) = list()
    points: Sequence[Point] = list()
    for i in range(batch_size):
        r, s = _to_sig(ec, sig[i])
        _ensure_msg_size(hf, ms[i])
        ec.require_on_curve(P[i])
        e = _e(ec, hf, r, P[i], ms[i])
        # raises an error if y does not exist
        # no need to check for quadratic residue
        y = ec.y(r)

        # a in [1, n-1]
        # deterministically generated using a CSPRNG seeded by a
        # cryptographic hash (e.g., SHA256) of all inputs of the
        # algorithm, or randomly generated independently for each
        # run of the batch verification algorithm
        a = (1 if i == 0 else (1 + random.getrandbits(ec.nlen)) % ec.n)
        scalars.append(a)
        points.append(_jac_from_aff((r, y)))
        scalars.append(a * e % ec.n)
        points.append(_jac_from_aff(P[i]))
        t += a * s

    TJ = _mult_jac(ec, t, ec.GJ)
    RHSJ = _multi_mult(ec, scalars, points)

    # return T == RHS, checked in Jacobian coordinates
    RHSZ2 = RHSJ[2] * RHSJ[2]
    TZ2 = TJ[2] * TJ[2]
    if (TJ[0] * RHSZ2) % ec._p != (RHSJ[0] * TZ2) % ec._p:
        return False

    return (TJ[1] * RHSZ2 * RHSJ[2]) % ec._p == (RHSJ[1] * TZ2 * TJ[2]) % ec._p
Beispiel #6
0
    def _normalize_domain(self, domain: typing.Sequence) -> tuple:
        if isinstance(domain, str):
            host_name, _, tld = domain.partition('.')
        elif isinstance(domain, collections.Sequence):
            host_name, tld = domain
        else:
            raise TypeError('Argument "domain" must either be a string or '
                            'a sequence of two strings (domain and TLD).')

        return host_name, tld
Beispiel #7
0
def generate_from_arrow_type(pa_type: pa.DataType):
    if isinstance(pa_type, pa.StructType):
        return {field.name: generate_from_arrow_type(field.type) for field in pa_type}
    elif isinstance(pa_type, pa.FixedSizeListType):
        return Sequence(feature=generate_from_arrow_type(pa_type.value_type), length=pa_type.list_size)
    elif isinstance(pa_type, pa.ListType):
        feature = generate_from_arrow_type(pa_type.value_type)
        if isinstance(feature, (dict, tuple, list)):
            return [feature]
        return Sequence(feature=feature)
    elif isinstance(pa_type, _ArrayXDExtensionType):
        array_feature = [None, None, Array2D, Array3D, Array4D, Array5D][pa_type.ndims]
        return array_feature(shape=pa_type.shape, dtype=pa_type.value_type)
    elif isinstance(pa_type, pa.DictionaryType):
        raise NotImplementedError  # TODO(thom) this will need access to the dictionary as well (for labels). I.e. to the py_table
    elif isinstance(pa_type, pa.DataType):
        return Value(dtype=str(pa_type))
    else:
        raise ValueError(f"Cannot convert {pa_type} to a Feature type.")
Beispiel #8
0
    def _normalize_domain(self, domain: typing.Sequence) -> tuple:
        if isinstance(domain, str):
            host_name, _, tld = domain.partition('.')
        elif isinstance(domain, collections.Sequence):
            host_name, tld = domain
        else:
            raise TypeError('Argument "domain" must either be a string or '
                            'a sequence of two strings (domain and TLD).')

        return host_name, tld
Beispiel #9
0
def silentindex(a: typing.Sequence,
                b: typing.Any,
                multiple: bool = True) -> typing.Union[tuple, int]:
    """Alternative to list.index(), such that a missing value returns None
    of raising an Exception
    """
    if b in a:
        if multiple:
            return tuple([k for k, v in enumerate(a) if v is b])

        return a.index(b)  # returns the index of first occurrence of b in a

    else:
        return None
Beispiel #10
0
def __make_pydantic(cls):
    """
    Temporary wrapper function to convert an MSONable class into a PyDantic
    Model for the sake of building schemas
    """

    if any(cls == T for T in built_in_primitives):
        return cls

    if cls in prim_to_type_hint:
        return prim_to_type_hint[cls]

    if cls == Any:
        return Any

    if type(cls) == TypeVar:
        return cls

    if hasattr(cls, "__origin__") and hasattr(cls, "__args__"):

        args = tuple(__make_pydantic(arg) for arg in cls.__args__)
        if cls.__origin__ == Union:
            return Union.__getitem__(args)

        if cls.__origin__ == Optional and len(args) == 1:
            return Optional.__getitem__(args)

        if cls._name == "List":
            return List.__getitem__(args)

        if cls._name == "Tuple":
            return Tuple.__getitem__(args)

        if cls._name == "Set":
            return Set.__getitem__(args)

        if cls._name == "Sequence":
            return Sequence.__getitem__(args)

    if issubclass(cls, MSONable):
        if cls.__name__ not in STUBS:
            STUBS[cls] = MSONable_to_pydantic(cls)
        return STUBS[cls]

    if cls == ndarray:
        return List[Any]

    return cls
Beispiel #11
0
def generate_from_dict(obj: Any):
    """ Regenerate the nested feature object from a serialized dict.
        We use the '_type' fields to get the dataclass name to load.
    """
    # Nested structures: we allow dict, list/tuples, sequences
    if isinstance(obj, list):
        return [generate_from_dict(value) for value in obj]
    # Otherwise we have a dict or a dataclass
    if "_type" not in obj:
        return {key: generate_from_dict(value) for key, value in obj.items()}
    class_type = globals()[obj.pop("_type")]

    if class_type == Sequence:
        return Sequence(feature=generate_from_dict(obj["feature"]), length=obj["length"])

    field_names = set(f.name for f in fields(class_type))
    return class_type(**{k: v for k, v in obj.items() if k in field_names})
Beispiel #12
0
    def run_sequence_phase(self, behavior: Behavior):
        # Check if apis exist in the same call graph
        trees = [self.get_invoke_tree(api)
                 for api in behavior.api_objects]  # tree list

        # Test each combination of trees
        for first_index in range(len(trees)):
            for second_index in range(first_index + 1, len(trees)):
                first_tree = trees[first_index]
                second_tree = trees[second_index]

                first_all_methods = {
                    node.identifier
                    for node in first_tree.all_nodes()
                }
                second_all_methods = {
                    node.identifier
                    for node in second_tree.all_nodes()
                }
                common_parents = first_all_methods.intersection(
                    second_all_methods)

                # Stage 3 - Check combination
                # Stage 4 - Check sequence
                # Check invoke address
                passing_3_list = []
                passing_4_list = []
                for parent in common_parents:
                    # Test sequence of invoke addresses from two methods
                    first_bytecode_for_first_method = min(
                        first_tree.get_node(parent).data)
                    least_bytecode_for_second_method = max(
                        second_tree.get_node(parent).data)

                    cloned_behavior = copy(behavior)
                    cloned_behavior.sequence = Sequence(
                        parent, (trees[first_index], trees[second_index]))
                    if first_bytecode_for_first_method < least_bytecode_for_second_method:
                        passing_4_list.append(cloned_behavior)
                    else:
                        passing_3_list.append(cloned_behavior)

        return passing_3_list, passing_4_list
Beispiel #13
0
"""Define the array class, hide implementation"""
from typing import Any, Callable, Dict, Tuple, List, TypeVar, Union, Optional, Iterator, Sequence
from pathlib import Path
from functools import reduce
import numpy as np
from .array_mixin import OpDelegatorMixin
from ._main import search_ar_int

T = TypeVar("T", bound="DataFrame")
NpzFile = Dict[str, np.ndarray]
Sequence.register(np.ndarray)


def _is_1d(array: np.ndarray) -> bool:
    if array.ndim == 1:
        return True
    return len(np.flatnonzero(array.shape > 1)) <= 1


def _name_axes(axes: List[np.ndarray],
               copy: bool = False) -> Dict[str, np.ndarray]:
    """generate axis name in the sequence of x, y, z, xx, yy, zz, xxx, ..."""
    order = np.arange(len(axes))
    return {
        chr(x) * y: (axis.copy() if copy else axis)
        for x, y, axis in zip(order % 3 + 120, order // 3 + 1, axes)
    }


def _order_axes(named_axes: Dict[str, np.ndarray],
                copy: bool = True) -> List[np.ndarray]:
Beispiel #14
0
def constraints_plot(
    m_arr: T.Sequence,
    sigmin: float = 1e-15,
    sigmax: float = 1e25,
    *,
    savefig: T.Optional[str] = None,
    constr_labels: bool = False,
    all_constrs: bool = False,
    # individual constraints
    mica_constr: bool = False,
    CMB_constr: bool = False,
    WD_constr: bool = False,
    superbursts_constr: bool = False,
    humandeath_constr: bool = False,
    dfn_constr: bool = False,
    lensing_constr: bool = False,
    bh_constr: bool = False,
):
    """Make standard constraint plot, with custom constraints in context.

    Custom constraints are created in the `with` statement

    Parameters
    ----------
    m_arr : Sequence
        mass array
    sigmin : float
        minimum plotted sigma
    sigmax : float
        maximum plotted sigma
    constr_labels : bool
        whether to add labels to all the `Other Parameters`

    Yields
    ------
    fig : :class:`~matplotlib.Figure`
    ax : :class:`~matplotlib.Axes`
    m_arr : Sequence
        mass array
    sigmin : float
        minimum plotted sigma
    sigmax : float
        maximum plotted sigma

    Other Parameters
    ----------------
    mica_constr: bool
        Whether to label the mica constraints (default False)

        References : [1]_, [2]_, [3]_

        See :func:`~macro_lightning.plot.plot_mica_constraints`
    CMB_constr: bool
        Whether to label the CMB constraints (default False).

        References : [5]_

        See :func:`~macro_lightning.plot.plot_cmb_constraints`
    WD_constr: bool
        Whether to label the White Dwarf constraints (default False).

        References : [4]_

        See :func:`~macro_lightning.plot.plot_white_dwarf_constraints`
    superbursts_constr: bool
        Whether to label the superbursts constraints (default False).

        References : [4]_

        See :func:`~macro_lightning.plot.plot_superbursts_constraints`
    humandeath_constr: bool
        Whether to label the human-death constraints (default False).

        References : [6]_

        See :func:`~macro_lightning.plot.plot_humandeath_constraints`
    dfn_constr: bool
        Whether to label the DFN constraints (default False).

        References : [7]_

        See :func:`~macro_lightning.plot.plot_dfn_constraints`
    lensing_constr: bool
        Whether to label the micro-lensing constraints (default False).

        References : [8]_, [9]_, [10]_, [11]_, [12]_

        See :func:`~macro_lightning.plot.plot_lensing_constraints`
    bh_constr: bool
        Whether to label the black hole constraints (default False).

        See :func:`~macro_lightning.plot.plot_black_hole_constraints`

    Examples
    --------
    In this example we make an empty constraint plot.
        >>> M = np.logspace(1, 25)
        >>> with constraints_plot(M, sigmin=1e-15):
        ...     pass

    References
    ----------
    .. [1] Price, P. (1988). Limits on Contribution of Cosmic Nuclearites
        to Galactic Dark MatterPhysical Review D, 38, 3813-3814.
    .. [2] De Rujula, A., & Glashow, S. (1984).
        Nuclearites: A Novel Form of Cosmic RadiationNature, 312, 734-737.
    .. [3] David M. Jacobs, Glenn D. Starkman, & Bryan W. Lynn (2014).
        Macro Dark MatterMNRAS.
    .. [4] R. J. Wilkinson, J. Lesgourgues, and C. Bœhm,
        Journal of Cosmology and Astroparticle Physics 2014, 026 (2014)
    .. [5] J. S. Sidhu and G. D. Starkman, Physical Review D 101 (2020),
        0.1103/physrevd.101.083503.
    .. [6] J. S. Sidhu, R. Scherrer, and G. Starkman,
        Physics Letters B 803, 135300 (2020).
    .. [7] J. S. Sidhu and G. Starkman, Physical Review D 100 (2019),
        10.1103/physrevd.100.123008.
    .. [8] C. Alcock et al., The Astrophysical Journal 550, L169 (2001).
    .. [9] K. Griest, A. M. Cieplak, and M. J. Lehner, Physical
        Review Letters 111, 181302 (2013).
    .. [10] P. Tisserand et al., Astronomy & Astrophysics 469, 387 (2007).
    .. [11] B. J. Carr, K. Kohri, Y. Sendouda, and J. Yokoyama,
        Physical Review D 81, 104019 (2010).
    .. [12] H. Niikura et al., Nature Astronomy 3, 524 (2019)

    """
    fig, ax = pyplot.subplots(figsize=(8, 5.5))
    ax.grid(True, alpha=0.7)

    ax.set_xlabel(r"$M_{X}$ [g]", fontsize=18)
    ax.set_xlim([m_arr.min(), m_arr.max()])
    for tick in ax.xaxis.get_major_ticks():
        tick.label.set_fontsize(14)

    ax.set_ylim(sigmin, sigmax)  # min/max of nuclear_density(M1)
    ax.set_ylabel(r"$\sigma_{X}$ [cm$^{2}$]", fontsize=18)
    for tick in ax.yaxis.get_major_ticks():
        tick.label.set_fontsize(14)

    plot_reference_densities(m_arr)

    # previous constraints

    if mica_constr or all_constrs:
        plot_mica_constraints(label=constr_labels)
    if WD_constr or all_constrs:
        plot_white_dwarf_constraints(label=constr_labels)
    if CMB_constr or all_constrs:
        plot_cmb_constraints(m_arr, sigmax=sigmax, label=constr_labels)
    if superbursts_constr or all_constrs:
        plot_superbursts_constraints(label=constr_labels)
    if humandeath_constr or all_constrs:
        plot_humandeath_constraints(label=constr_labels)
    if dfn_constr or all_constrs:
        plot_dfn_constraints(label=constr_labels)
    if lensing_constr or all_constrs:
        plot_lensing_constraints(Mmicro=None, label=constr_labels)
    if bh_constr or all_constrs:
        plot_black_hole_constraints(m_arr, sigmin=sigmin, label=constr_labels)

    try:
        yield fig, ax, m_arr, sigmin, sigmax

    finally:

        ax.legend(loc="upper left", shadow=True, fontsize=12, ncol=2)
        pyplot.tight_layout()

        if savefig is not None:
            fig.savefig(savefig)
Beispiel #15
0
 def __init__(self, *args, return_value: typing.Sequence = [], **kwargs):
     super().__init__(*args, **kwargs)
     self.iter = return_value.__iter__()
     self.return_value = self
Beispiel #16
0
def are_all_items_equal(items: ty.Sequence) -> bool:
    return items and items.count(items[0]) == len(items)