Example #1
0
def fake_amr_ds(fields=("Density", ), geometry="cartesian", particles=0):
    from yt.frontends.stream.api import load_amr_grids
    prng = RandomState(0x4d3d3d3)
    LE, RE = _geom_transforms[geometry]
    LE = np.array(LE)
    RE = np.array(RE)
    data = []
    for gspec in _amr_grid_index:
        level, left_edge, right_edge, dims = gspec
        left_edge = left_edge * (RE - LE) + LE
        right_edge = right_edge * (RE - LE) + LE
        gdata = dict(level=level,
                     left_edge=left_edge,
                     right_edge=right_edge,
                     dimensions=dims)
        for f in fields:
            gdata[f] = prng.random_sample(dims)
        if particles:
            for i, f in enumerate('particle_position_%s' % ax for ax in 'xyz'):
                pdata = prng.random_sample(particles)
                pdata /= (right_edge[i] - left_edge[i])
                pdata += left_edge[i]
                gdata['io', f] = (pdata, 'code_length')
            for f in ('particle_velocity_%s' % ax for ax in 'xyz'):
                gdata['io', f] = (prng.random_sample(particles) - 0.5, 'cm/s')
            gdata['io', 'particle_mass'] = (prng.random_sample(particles), 'g')
        data.append(gdata)
    bbox = np.array([LE, RE]).T
    return load_amr_grids(data, [32, 32, 32], geometry=geometry, bbox=bbox)
Example #2
0
class TestExtMathUtils(unittest.TestCase):
    """Test utils.extmath"""

    def setUp(self):
        self.rand = RandomState(0)

    def test_row_normalize_exp(self):
        arr = self.rand.random_sample((400, 200))
        arr2 = arr.copy()
        # in-place update
        row_log_normalize_exp(arr)
        arr2 -= logsumexp(arr2, axis=1)[:, np.newaxis]
        assert_almost_equal(arr, arr2)

    def test_mean_change_2d(self):
        arr1 = self.rand.random_sample((1000, 200))
        arr2 = self.rand.random_sample((1000, 200))
        ret1 = mean_change_2d(arr1, arr2)
        ret2 = np.abs(arr1 - arr2).mean()
        assert_almost_equal(ret1, ret2)

    def test_beta_param_update(self):
        alpha = self.rand.rand() * 10
        n_cols = self.rand.randint(100, 200)
        row_stats = self.rand.random_sample(n_cols)

        # in-place update
        arr1 = np.empty((2, n_cols - 1))
        beta_param_update(alpha, row_stats, arr1)
        # expect output
        arr2 = np.empty((2, n_cols - 1))
        arr2[0] = 1.0 + row_stats[:n_cols-1]
        arr2[1] = alpha + np.flipud(np.cumsum(np.flipud(row_stats[1:])))
        assert_almost_equal(arr1, arr2)
Example #3
0
File: testing.py Project: tukss/yt
def fake_amr_ds(
        fields=("Density", ), geometry="cartesian", particles=0,
        length_unit=None):
    from yt.loaders import load_amr_grids

    prng = RandomState(0x4D3D3D3)
    LE, RE = _geom_transforms[geometry]
    LE = np.array(LE)
    RE = np.array(RE)
    data = []
    for gspec in _amr_grid_index:
        level, left_edge, right_edge, dims = gspec
        left_edge = left_edge * (RE - LE) + LE
        right_edge = right_edge * (RE - LE) + LE
        gdata = dict(level=level,
                     left_edge=left_edge,
                     right_edge=right_edge,
                     dimensions=dims)
        for f in fields:
            gdata[f] = prng.random_sample(dims)
        if particles:
            for i, f in enumerate(f"particle_position_{ax}" for ax in "xyz"):
                pdata = prng.random_sample(particles)
                pdata /= right_edge[i] - left_edge[i]
                pdata += left_edge[i]
                gdata["io", f] = (pdata, "code_length")
            for f in (f"particle_velocity_{ax}" for ax in "xyz"):
                gdata["io", f] = (prng.random_sample(particles) - 0.5, "cm/s")
            gdata["io", "particle_mass"] = (prng.random_sample(particles), "g")
        data.append(gdata)
    bbox = np.array([LE, RE]).T
    return load_amr_grids(data, [32, 32, 32],
                          geometry=geometry,
                          bbox=bbox,
                          length_unit=length_unit)
    def _read(self, file_path):
        rs = RandomState(seed=1000)
        with open(cached_path(file_path), "r") as data_file:
            for _, line in enumerate(data_file.readlines()):
                items = json.loads(line)
                metadata = items["metadata"]
                tokens = metadata["tokens"]
                query = None if metadata["query"] == "None" else metadata[
                    "query"]
                label = metadata["label"]
                rationale = [x["span"] for x in items["rationale"]["spans"]]
                document = metadata["document"]

                if "annotation_id" in items:
                    annotation_id = items["annotation_id"]
                else:
                    annotation_id = hashlib.sha1(
                        document.encode("utf-8") +
                        (query.encode("utf-8") if query is not None else "".
                         encode("utf-8"))).hexdigest()

                if rs.random_sample() < self._keep_prob:
                    instance = self.text_to_instance(
                        annotation_id=annotation_id,
                        document=document,
                        query=query,
                        label=label,
                        rationale=rationale,
                        tokens_existing=tokens,
                    )
                    if instance is not None:
                        yield instance
Example #5
0
    def _read(self, file_path):
        rs = RandomState(seed=1000)
        with open(cached_path(file_path), "r") as data_file:
            for _, line in enumerate(data_file.readlines()):
                items = json.loads(line)
                document = items["original_document"]
                annotation_id = items["annotation_id"]
                query = items.get("query", None)
                label = items.get("label", None)
                if rs.random_sample() < self._human_prob:
                    rationale = items.get("human_rationale")
                else:
                    rationale = items.get("predicted_rationale")["spans"]
                    rationale = [span["span"] for span in rationale]

                if label is not None:
                    label = str(label).replace(" ", "_")

                instance = self.text_to_instance(
                    annotation_id=annotation_id,
                    document=document,
                    query=query,
                    label=label,
                    rationale=rationale,
                )
                yield instance
Example #6
0
 def sample_transformed(
     *, rng: RandomState, lo: float, hi: float,
 ) -> float:
     assert 0.0 <= lo <= hi <= 1.0, \
         f'bounds [{lo},{hi}] must be within [0,1]'
     size = hi - lo
     return rng.random_sample() * size + lo
Example #7
0
def fake_particle_ds(
        fields=("particle_position_x", "particle_position_y",
                "particle_position_z", "particle_mass", "particle_velocity_x",
                "particle_velocity_y", "particle_velocity_z"),
        units=('cm', 'cm', 'cm', 'g', 'cm/s', 'cm/s', 'cm/s'),
        negative=(False, False, False, False, True, True, True),
        npart=16**3,
        length_unit=1.0,
        data=None):
    from yt.frontends.stream.api import load_particles

    prng = RandomState(0x4d3d3d3)
    if not iterable(negative):
        negative = [negative for f in fields]
    assert (len(fields) == len(negative))
    offsets = []
    for n in negative:
        if n:
            offsets.append(0.5)
        else:
            offsets.append(0.0)
    data = {}
    for field, offset, u in zip(fields, offsets, units):
        if field in data:
            v = data[field]
            continue
        if "position" in field:
            v = prng.normal(loc=0.5, scale=0.25, size=npart)
            np.clip(v, 0.0, 1.0, v)
        v = (prng.random_sample(npart) - offset)
        data[field] = (v, u)
    bbox = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]])
    ds = load_particles(data, 1.0, bbox=bbox)
    return ds
Example #8
0
def sample_list_item(x: List[Any], probs: Optional[np.ndarray],
                     random_state: RandomState) -> Any:
    """
    Sample a list item according to the items' probabilities.

    :param x: Items to sample.
    :param probs: Probabilities (must have same length as `x` and sum to 1), or None for uniform distribution.
    :param random_state: Random state.
    :return: Sampled list item.
    """

    if probs is None:
        probs = np.repeat(1 / len(x), len(x))

    cdf_y_rand = random_state.random_sample()

    cum_probs = probs.cumsum()
    final_cum_prob = cum_probs[-1]

    if abs(1.0 - final_cum_prob) > 0.00001:
        raise ValueError(
            f'Expected cumulative probabilities to sum to 1, but got {final_cum_prob} instead.'
        )

    x_i = next(i for i, cum_prob in enumerate(cum_probs)
               if cdf_y_rand < cum_prob)

    return x[x_i]
class ArbitraryDriver(object):
    def __init__(self,
                 seed,
                 item_range_max,
                 permutation_seed=100,
                 name=None,
                 zipf_param=1.0001,
                 d_second=-1):
        self.rand = RandomState(seed)
        self.max_item = item_range_max
        self.permute_seed = permutation_seed

        if name == None:
            self.name = self.__class__.__name__
        else:
            self.name = name

    @classmethod
    def get_item(self, r_float):
        pass

    def get_cost(self, r_float, item_num):
        pass

    def permute_float(self, r_float):
        return r_float

    def sample_item_w_cost(self):
        r_float = self.rand.random_sample()
        cost_float = self.permute_float(r_float)
        item = self.get_item(r_float)
        cost = self.get_cost(cost_float, item)

        return (item, cost)
Example #10
0
def test_precision():

    rng_reg = RandomState(2)
    rng_clf = RandomState(8)
    for X, y, clf in zip(
        (rng_reg.random_sample((5, 2)), rng_clf.random_sample((1000, 4))),
        (rng_reg.random_sample((5, )), rng_clf.randint(2, size=(1000, ))),
        (
            DecisionTreeRegressor(
                criterion="friedman_mse", random_state=0, max_depth=1),
            DecisionTreeClassifier(max_depth=1, random_state=0),
        ),
    ):

        clf.fit(X, y)
        for precision in (4, 3):
            dot_data = export_graphviz(clf,
                                       out_file=None,
                                       precision=precision,
                                       proportion=True)

            # With the current random state, the impurity and the threshold
            # will have the number of precision set in the export_graphviz
            # function. We will check the number of precision with a strict
            # equality. The value reported will have only 2 precision and
            # therefore, only a less equal comparison will be done.

            # check value
            for finding in finditer(r"value = \d+\.\d+", dot_data):
                assert len(search(r"\.\d+",
                                  finding.group()).group()) <= precision + 1
            # check impurity
            if is_classifier(clf):
                pattern = r"gini = \d+\.\d+"
            else:
                pattern = r"friedman_mse = \d+\.\d+"

            # check impurity
            for finding in finditer(pattern, dot_data):
                assert len(search(r"\.\d+",
                                  finding.group()).group()) == precision + 1
            # check threshold
            for finding in finditer(r"<= \d+\.\d+", dot_data):
                assert len(search(r"\.\d+",
                                  finding.group()).group()) == precision + 1
Example #11
0
def test_precision():

    rng_reg = RandomState(2)
    rng_clf = RandomState(8)
    for X, y, clf in zip(
            (rng_reg.random_sample((5, 2)),
             rng_clf.random_sample((1000, 4))),
            (rng_reg.random_sample((5, )),
             rng_clf.randint(2, size=(1000, ))),
            (DecisionTreeRegressor(criterion="friedman_mse", random_state=0,
                                   max_depth=1),
             DecisionTreeClassifier(max_depth=1, random_state=0))):

        clf.fit(X, y)
        for precision in (4, 3):
            dot_data = export_graphviz(clf, out_file=None, precision=precision,
                                       proportion=True)

            # With the current random state, the impurity and the threshold
            # will have the number of precision set in the export_graphviz
            # function. We will check the number of precision with a strict
            # equality. The value reported will have only 2 precision and
            # therefore, only a less equal comparison will be done.

            # check value
            for finding in finditer(r"value = \d+\.\d+", dot_data):
                assert_less_equal(
                    len(search(r"\.\d+", finding.group()).group()),
                    precision + 1)
            # check impurity
            if is_classifier(clf):
                pattern = r"gini = \d+\.\d+"
            else:
                pattern = r"friedman_mse = \d+\.\d+"

            # check impurity
            for finding in finditer(pattern, dot_data):
                assert_equal(len(search(r"\.\d+", finding.group()).group()),
                             precision + 1)
            # check threshold
            for finding in finditer(r"<= \d+\.\d+", dot_data):
                assert_equal(len(search(r"\.\d+", finding.group()).group()),
                             precision + 1)
Example #12
0
class RandomIntVal:
	
	seed = 1012810
	nState = RandomState(seed)
	cState = random.StrongRandom()
	
	def __init__(self, seed=1012810):
		self.nState = RandomState(seed)
		self.cState = random.StrongRandom()

		# Sampler warmup
		print "Starting Sampler Warm-up"
		junk = self.nState.random_sample(10000)
		print "Warm-up Complete"
	
	def getValue(self):
		maxsize = sys.maxint-1
		rn = float(self.cState.randint(0,maxsize))/maxsize
		return rn
	def getValueTwister(self):
		return self.nState.random_sample()
Example #13
0
    def get_cost(self, r_float, item):
        cost = (item**self.cost_power) / self.inner_max_cost

        if self.perturbate > 0:
            if len(self.perturbed) == 0:
                r = RandomState(self.permute_seed)
                self.perturbed = (
                    1.0 - (r.random_sample(self.max_item) * self.perturbate))

                #            item = float(self.perturbed[item] * item)
                cost *= float(self.perturbed[item])
        return cost
Example #14
0
def fake_random_ds(ndims,
                   peak_value=1.0,
                   fields=("density", "velocity_x", "velocity_y",
                           "velocity_z"),
                   units=('g/cm**3', 'cm/s', 'cm/s', 'cm/s'),
                   particle_fields=None,
                   particle_field_units=None,
                   negative=False,
                   nprocs=1,
                   particles=0,
                   length_unit=1.0,
                   unit_system="cgs",
                   bbox=None):
    from yt.frontends.stream.api import load_uniform_grid
    prng = RandomState(0x4d3d3d3)
    if not iterable(ndims):
        ndims = [ndims, ndims, ndims]
    else:
        assert (len(ndims) == 3)
    if not iterable(negative):
        negative = [negative for f in fields]
    assert (len(fields) == len(negative))
    offsets = []
    for n in negative:
        if n:
            offsets.append(0.5)
        else:
            offsets.append(0.0)
    data = {}
    for field, offset, u in zip(fields, offsets, units):
        v = (prng.random_sample(ndims) - offset) * peak_value
        if field[0] == "all":
            data['number_of_particles'] = v.size
            v = v.ravel()
        data[field] = (v, u)
    if particles:
        if particle_fields is not None:
            for field, unit in zip(particle_fields, particle_field_units):
                if field in ('particle_position', 'particle_velocity'):
                    data['io', field] = (prng.random_sample(
                        (particles, 3)), unit)
                else:
                    data['io',
                         field] = (prng.random_sample(size=particles), unit)
        else:
            for f in ('particle_position_%s' % ax for ax in 'xyz'):
                data['io',
                     f] = (prng.random_sample(size=particles), 'code_length')
            for f in ('particle_velocity_%s' % ax for ax in 'xyz'):
                data['io',
                     f] = (prng.random_sample(size=particles) - 0.5, 'cm/s')
            data['io', 'particle_mass'] = (prng.random_sample(particles), 'g')
        data['number_of_particles'] = particles
    ug = load_uniform_grid(data,
                           ndims,
                           length_unit=length_unit,
                           nprocs=nprocs,
                           unit_system=unit_system,
                           bbox=bbox)
    return ug
Example #15
0
def fake_random_ds(
    ndims,
    peak_value=1.0,
    fields=("density", "velocity_x", "velocity_y", "velocity_z"),
    units=("g/cm**3", "cm/s", "cm/s", "cm/s"),
    particle_fields=None,
    particle_field_units=None,
    negative=False,
    nprocs=1,
    particles=0,
    length_unit=1.0,
    unit_system="cgs",
    bbox=None,
):
    from yt.loaders import load_uniform_grid

    prng = RandomState(0x4D3D3D3)
    if not is_sequence(ndims):
        ndims = [ndims, ndims, ndims]
    else:
        assert len(ndims) == 3
    if not is_sequence(negative):
        negative = [negative for f in fields]
    assert len(fields) == len(negative)
    offsets = []
    for n in negative:
        if n:
            offsets.append(0.5)
        else:
            offsets.append(0.0)
    data = {}
    for field, offset, u in zip(fields, offsets, units):
        v = (prng.random_sample(ndims) - offset) * peak_value
        if field[0] == "all":
            v = v.ravel()
        data[field] = (v, u)
    if particles:
        if particle_fields is not None:
            for field, unit in zip(particle_fields, particle_field_units):
                if field in ("particle_position", "particle_velocity"):
                    data["io", field] = (prng.random_sample((int(particles), 3)), unit)
                else:
                    data["io", field] = (prng.random_sample(size=int(particles)), unit)
        else:
            for f in (f"particle_position_{ax}" for ax in "xyz"):
                data["io", f] = (prng.random_sample(size=particles), "code_length")
            for f in (f"particle_velocity_{ax}" for ax in "xyz"):
                data["io", f] = (prng.random_sample(size=particles) - 0.5, "cm/s")
            data["io", "particle_mass"] = (prng.random_sample(particles), "g")
    ug = load_uniform_grid(
        data,
        ndims,
        length_unit=length_unit,
        nprocs=nprocs,
        unit_system=unit_system,
        bbox=bbox,
    )
    return ug
Example #16
0
def samples_generator(fn, shape, rng, seed):
    u'''
    Generate random samples for the model:
    @fn - function to be applied on the input features to get the ouput
    @shape - shape of the features matrix (num_samples, num_features)
    @rng - range of the input features to be generated within (a,b)
    Outputs a tuple of input and output features matrix
    '''
    prng = RandomState(int(seed))
    x = (rng[1] - rng[0]) * prng.random_sample(shape) + rng[0]
    y = np.apply_along_axis(fn, 1, x).reshape((shape[0], -1))
    z = np.zeros((shape[0], shape[1] - y.shape[1]))
    y = np.concatenate((y, z), axis=1)

    return x, y
Example #17
0
def fake_particle_ds(
    fields=None,
    units=None,
    negative=None,
    npart=16 ** 3,
    length_unit=1.0,
    data=None,
):
    from yt.loaders import load_particles

    prng = RandomState(0x4D3D3D3)
    if negative is not None and not is_sequence(negative):
        negative = [negative for f in fields]

    fields, units, negative = _check_field_unit_args_helper(
        {
            "fields": fields,
            "units": units,
            "negative": negative,
        },
        {
            "fields": _fake_particle_ds_default_fields,
            "units": _fake_particle_ds_default_units,
            "negative": _fake_particle_ds_default_negative,
        },
    )

    offsets = []
    for n in negative:
        if n:
            offsets.append(0.5)
        else:
            offsets.append(0.0)
    data = data if data else {}
    for field, offset, u in zip(fields, offsets, units):
        if field in data:
            v = data[field]
            continue
        if "position" in field:
            v = prng.normal(loc=0.5, scale=0.25, size=npart)
            np.clip(v, 0.0, 1.0, v)
        v = prng.random_sample(npart) - offset
        data[field] = (v, u)
    bbox = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]])
    ds = load_particles(data, 1.0, bbox=bbox)
    return ds
Example #18
0
def fake_particle_ds(
    fields=(
        "particle_position_x",
        "particle_position_y",
        "particle_position_z",
        "particle_mass",
        "particle_velocity_x",
        "particle_velocity_y",
        "particle_velocity_z",
    ),
    units=("cm", "cm", "cm", "g", "cm/s", "cm/s", "cm/s"),
    negative=(False, False, False, False, True, True, True),
    npart=16 ** 3,
    length_unit=1.0,
    data=None,
):
    from yt.loaders import load_particles

    prng = RandomState(0x4D3D3D3)
    if not is_sequence(negative):
        negative = [negative for f in fields]
    assert len(fields) == len(negative)
    offsets = []
    for n in negative:
        if n:
            offsets.append(0.5)
        else:
            offsets.append(0.0)
    data = data if data else {}
    for field, offset, u in zip(fields, offsets, units):
        if field in data:
            v = data[field]
            continue
        if "position" in field:
            v = prng.normal(loc=0.5, scale=0.25, size=npart)
            np.clip(v, 0.0, 1.0, v)
        v = prng.random_sample(npart) - offset
        data[field] = (v, u)
    bbox = np.array([[0.0, 1.0], [0.0, 1.0], [0.0, 1.0]])
    ds = load_particles(data, 1.0, bbox=bbox)
    return ds
Example #19
0
def test(cores=None):
    """
    """
    #Test data
    w = ps.lat2W(10, 10)
    random_int = RandomState(123456789)
    attribute = random_int.random_sample((w.n, 2))

    #mp Boilerplate
    if cores == None:
        cores = mp.cpu_count()
    numifs = 20

    #Locking solution space
    solution_lock = mp.Lock()
    csoln_space = mp.Array(ctypes.c_int32,
                           numifs * (w.n + 1),
                           lock=solution_lock)
    soln_space = np.frombuffer(csoln_space.get_obj(), dtype=np.int32)
    soln_space[:] = 0
    soln_space.shape = (-1, w.n + 1)
    initshared_soln(csoln_space)

    jobs = []
    for i in xrange(cores):
        p = IFS(attribute, w, lock=solution_lock, pid=i)
        jobs.append(p)
        p.start()
    for j in jobs:
        j.join()

    for i in range(numifs):
        checkcontiguity(soln_space[i], w)
    """
    for i in range(numifs):
        print soln_space[i][1:].reshape(-1,10)
        print
    """
    print "Generated solution space with {} regions per solution".format(
        soln_space[:, 0])
Example #20
0
    def _read(self, file_path):
        gold_path, predicted_path = file_path.split(";")
        rs = RandomState(seed=1000)
        with open(cached_path(gold_path), "r") as gold_file, open(cached_path(predicted_path, "r")) as predicted_file:
            for _, (gold_line, predicted_line) in enumerate(
                zip_longest(gold_file.readlines(), predicted_file.readlines())
            ):
                gold_items = json.loads(gold_line)
                predicted_items = json.loads(predicted_line)

                assert gold_items["document"] == predicted_items["metadata"]["document"], breakpoint()
                assert gold_items["annotation_id"] == predicted_items["metadata"]["annotation_id"], breakpoint()

                metadata = predicted_items["metadata"]
                tokens = metadata["tokens"]

                predicted_rationale = [x["span"] for x in predicted_items["rationale"]["spans"]]
                predicted_token_rationale = [0] * len(metadata["tokens"])
                for s, e in predicted_rationale:
                    for i in range(s, e):
                        predicted_token_rationale[i] = 1

                gold_token_rationale = self.map_rationale_to_gold_document(gold_items, tokens)

                if rs.random_sample() < self._human_prob:
                    rationale = gold_token_rationale
                else:
                    rationale = predicted_token_rationale

                instance = self.text_to_instance(
                    annotation_id=gold_items["annotation_id"],
                    document=gold_items["document"],
                    query=gold_items.get("query", None),
                    label=gold_items["label"],
                    rationale=rationale,
                    tokens_existing=tokens,
                )
                if instance is not None:
                    yield instance
Example #21
0
def test(cores=None):
    """
    """
    #Test data
    w = ps.lat2W(10, 10)
    random_int = RandomState(123456789)
    attribute = random_int.random_sample((w.n, 2))

    #mp Boilerplate
    if cores == None:
        cores = mp.cpu_count()
    numifs = 20

    #Locking solution space
    solution_lock = mp.Lock()
    csoln_space = mp.Array(ctypes.c_int32, numifs * (w.n + 1), lock=solution_lock)
    soln_space = np.frombuffer(csoln_space.get_obj(), dtype=np.int32)
    soln_space[:] = 0
    soln_space.shape = (-1, w.n + 1)
    initshared_soln(csoln_space)

    jobs = []
    for i in xrange(cores):
        p = IFS(attribute, w, lock=solution_lock, pid=i)
        jobs.append(p)
        p.start()
    for j in jobs:
        j.join()

    for i in range(numifs):
        checkcontiguity(soln_space[i], w)

    """
    for i in range(numifs):
        print soln_space[i][1:].reshape(-1,10)
        print
    """
    print "Generated solution space with {} regions per solution".format(soln_space[:,0])
Example #22
0
class Randomizer(object):
    def __init__(self, size, seed=None):
        self.size = size
        self.local_index = 0
        self.total_count = 0
        self.Seed = seed
        self.Rstate = RandomState(seed)
        self.np_random = self.Rstate.random_sample(size)

    def next_element(self, array, index=0):
        """
            Get the next random element, and index from given array starting from index to end
        """
        i = self.next_random(index, len(array))
        return array[i], i

    def sample(self, population, k):
        # An n-length list is smaller than a k-length set
        n = len(population)
        result = [None] * k
        pool = list(population)
        for i in range(k):  # invariant:  non-selected at [0,n-i)
            j = self.next_random(0, n - i)
            result[i] = pool[j]
            pool[j] = pool[n - i - 1]  # move non-selected item into vacancy
        return result

    def random(self):
        self.local_index += 1
        self.total_count += 1
        if self.local_index >= self.size:
            #  print("Run out of random, reseting")
            self.local_index = 0

        return self.np_random[self.local_index]

    def next_random(self, low, high):
        return int(self.random() * (high - low) + low)
Example #23
0
    def _read(self, file_path):
        rs = RandomState(seed=1000)
        with open(cached_path(file_path), "r") as data_file:
            for _, line in enumerate(data_file.readlines()):
                items = json.loads(line)
                document = items["document"]
                annotation_id = items["annotation_id"]
                query = items.get("query", None)
                label = items.get("label", None)
                rationale = items.get("rationale", [])

                if label is not None:
                    label = str(label).replace(" ", "_")

                if rs.random_sample() < self._keep_prob:
                    instance = self.text_to_instance(
                        annotation_id=annotation_id,
                        document=document,
                        query=query,
                        label=label,
                        rationale=rationale)
                    if instance is not None:
                        yield instance
Example #24
0
class TestAnalyzer:

    def setUp(self):

        self.prng = RandomState(133)

        self.df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2],
                                         'f1': self.prng.normal(0, 1, 10),
                                         'f2': self.prng.normal(1, 0.1, 10),
                                         'f3': self.prng.normal(2, 0.1, 10),
                                         'group': ['group1'] * 10},
                                        index=range(0, 10))

        self.df_features_same_score = self.df_features.copy()
        self.df_features_same_score[['sc1']] = [3] * 10

        self.df_features_with_groups = self.df_features.copy()
        self.df_features_with_groups['group'] = ['group1']*5 + ['group2']*5

        self.df_features_with_groups_and_length = self.df_features_with_groups.copy()
        self.df_features_with_groups_and_length['length'] = self.prng.normal(50, 250, 10)

        self.human_scores = pd.Series(self.prng.randint(1, 5, size=10))
        self.system_scores = pd.Series(self.prng.random_sample(10) * 5)
        self.same_human_scores = pd.Series([3] * 10)

        # get the directory containing the tests
        self.test_dir = dirname(__file__)

    def test_correlation_helper(self):

        # test that there are no nans for data frame with 10 values
        retval = Analyzer.correlation_helper(self.df_features, 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_equal(retval[1].isnull().values.sum(), 0)

    def test_correlation_helper_for_data_with_one_row(self):
        # this should return two data frames with nans
        retval = Analyzer.correlation_helper(self.df_features[:1], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 3)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_correlation_helper_for_data_with_two_rows(self):
        # this should return 1/-1 for marginal correlations and nans for
        # partial correlations
        retval = Analyzer.correlation_helper(self.df_features[:2], 'sc1', 'group')
        assert_equal(abs(retval[0].values).sum(), 3)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_correlation_helper_for_data_with_three_rows(self):
        # this should compute marginal correlations but return Nans for
        # partial correlations
        retval = Analyzer.correlation_helper(self.df_features[:3], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_correlation_helper_for_data_with_four_rows(self):
        # this should compute marginal correlations and return a unity
        # matrix for partial correlations
        # it should also raise a UserWarning
        with warnings.catch_warnings(record=True) as warning_list:
            retval = Analyzer.correlation_helper(self.df_features[:4], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_almost_equal(np.abs(retval[1].values).sum(), 0.9244288637889855)
        assert issubclass(warning_list[-1].category, UserWarning)



    def test_correlation_helper_for_data_with_groups(self):
        retval = Analyzer.correlation_helper(self.df_features_with_groups, 'sc1', 'group')
        assert_equal(len(retval[0]), 2)
        assert_equal(len(retval[1]), 2)


    def test_correlation_helper_for_one_group_with_one_row(self):
        # this should return a data frames with nans for group with 1 row
        retval = Analyzer.correlation_helper(self.df_features_with_groups[:6], 'sc1', 'group')
        assert_equal(len(retval[0]), 2)
        assert_equal(len(retval[1]), 2)
        assert_equal(retval[0].isnull().values.sum(), 3)

    def test_correlation_helper_for_groups_and_length(self):
        retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length,
                                             'sc1', 'group', include_length=True)
        for df in retval:
            assert_equal(len(df), 2)
            assert_equal(len(df.columns), 3)


    def test_correlation_helper_for_group_with_one_row_and_length(self):
        # this should return a data frames with nans for group with 1 row
        retval = Analyzer.correlation_helper(self.df_features_with_groups_and_length[:6],
                                             'sc1', 'group', include_length=True)
        for df in retval:
            assert_equal(len(df), 2)
            assert_equal(len(df.columns), 3)



    def test_that_correlation_helper_works_for_data_with_the_same_human_score(self):
        # this test should raise UserWarning because the determinant is very close to
        # zero. It also raises Runtime warning because
        # variance of human scores is 0.
        with warnings.catch_warnings(record=True) as warning_list:
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            retval = Analyzer.correlation_helper(self.df_features_same_score, 'sc1', 'group')
            assert_equal(retval[0].isnull().values.sum(), 3)
            assert_equal(retval[1].isnull().values.sum(), 3)
            assert issubclass(warning_list[-1].category, UserWarning)


    def test_that_metrics_helper_works_for_data_with_one_row(self):
        # There should be NaNs for SMD, correlations and both sds
        # note that we will get a value for QWK since we are
        # dividing by N and not N-1
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            evals = Analyzer.metrics_helper(self.human_scores[0:1],
                                            self.system_scores[0:1])
            assert_equal(evals.isnull().values.sum(), 5)

    def test_that_metrics_helper_works_for_data_with_the_same_label(self):
        # There should be NaNs for correlation and SMD.
        # Note that for a dataset with a single response
        # kappas will be 0 or 1
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            evals = Analyzer.metrics_helper(self.same_human_scores,
                                            self.system_scores)
            assert_equal(evals.isnull().values.sum(), 2)

    def test_metrics_helper_population_sds(self):
        df_new_features = pd.read_csv(join(self.test_dir, 'data', 'files', 'train.csv'))
        # compute the metrics when not specifying the population SDs
        computed_metrics1 = Analyzer.metrics_helper(df_new_features['score'],
                                                    df_new_features['score2'])
        expected_metrics1 = pd.Series({'N': 500.0,
                                       'R2': 0.65340566606389394,
                                       'RMSE': 0.47958315233127197,
                                       'SMD': 0.03679030063229779,
                                       'adj_agr': 100.0,
                                       'corr': 0.82789026370069529,
                                       'exact_agr': 77.0,
                                       'h_max': 6.0,
                                       'h_mean': 3.4199999999999999,
                                       'h_min': 1.0,
                                       'h_sd': 0.81543231461565147,
                                       'kappa': 0.6273493195074531,
                                       'sys_max': 6.0,
                                       'sys_mean': 3.4500000000000002,
                                       'sys_min': 1.0,
                                       'sys_sd': 0.81782496620652367,
                                       'wtkappa': 0.8273273273273274})

        # and now compute them specifying the population SDs
        computed_metrics2 = Analyzer.metrics_helper(df_new_features['score'],
                                                    df_new_features['score2'],
                                                    population_human_score_sd=0.5,
                                                    population_system_score_sd=0.4,
                                                    smd_method='williamson')
        # the only number that should change is the SMD
        expected_metrics2 = expected_metrics1.copy()
        expected_metrics2['SMD'] = 0.066259

        assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index())
        assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index())


    def test_metrics_helper_zero_system_sd(self):
        human_scores = [1, 3, 4, 2, 3, 1, 3, 4, 2, 1]
        system_score = [2.54] * 10
        computed_metrics1 = Analyzer.metrics_helper(human_scores,
                                                    system_score)
        expected_metrics1 = pd.Series({'N': 10,
                                       'R2': -0.015806451612903283,
                                       'RMSE': 1.122319027727856,
                                       'SMD': 0.11927198519188371,
                                       'adj_agr': 50.0,
                                       'corr': None,
                                       'exact_agr': 0,
                                       'h_max': 4,
                                       'h_mean': 2.4,
                                       'h_min': 1.0,
                                       'h_sd': 1.1737877907772674,
                                       'kappa': 0,
                                       'sys_max': 2.54,
                                       'sys_mean': 2.54,
                                       'sys_min': 2.54,
                                       'sys_sd': 0,
                                       'wtkappa': 0})
        # now compute DSM
        computed_metrics2 = Analyzer.metrics_helper(human_scores,
                                                    system_score,
                                                    use_diff_std_means=True)

        # the only number that should change is the SMD
        expected_metrics2 = expected_metrics1.copy()
        expected_metrics2.drop("SMD", inplace=True)
        expected_metrics2['DSM'] = None
        assert_series_equal(computed_metrics1.sort_index(),
                            expected_metrics1.sort_index(),
                            check_dtype=False)
        assert_series_equal(computed_metrics2.sort_index(),
                            expected_metrics2.sort_index(),
                            check_dtype=False)


    def test_compute_pca_less_samples_than_features(self):
        # test pca when we have less samples than
        # features. In this case the number of components
        # equals to the number of samples.
        df = pd.DataFrame({'a': range(50)})
        for i in range(100):
            df[i] = df['a'] * i
        (components, variance) = Analyzer.compute_pca(df, df.columns)
        assert_equal(len(components.columns), 50)
        assert_equal(len(variance.columns), 50)

    def test_compute_disattenuated_correlations_single_human(self):
        hm_corr = pd.Series([0.9, 0.8, 0.6],
                            index=['raw', 'raw_trim', 'raw_trim_round'])
        hh_corr = pd.Series([0.81], index=[''])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_equal(df_dis_corr.loc['raw', 'corr_disattenuated'], 1.0)

    def test_compute_disattenuated_correlations_matching_human(self):
        hm_corr = pd.Series([0.9, 0.4, 0.6],
                            index=['All data', 'GROUP1', 'GROUP2'])
        hh_corr = pd.Series([0.81, 0.64, 0.36],
                            index=['All data', 'GROUP1', 'GROUP2'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, 0.5, 1.0])

    def test_compute_disattenuated_correlations_single_matching_human(self):
        hm_corr = pd.Series([0.9, 0.4, 0.6],
                            index=['All data', 'GROUP1', 'GROUP2'])
        hh_corr = pd.Series([0.81],
                            index=['All data'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan])

    def test_compute_disattenuated_correlations_mismatched_indices(self):
        hm_corr = pd.Series([0.9, 0.6],
                            index=['All data', 'GROUP2'])
        hh_corr = pd.Series([0.81, 0.64],
                            index=['All data', 'GROUP1'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan])

    def test_compute_disattenuated_correlations_negative_human(self):
        hm_corr = pd.Series([0.9, 0.8],
                            index=['All data', 'GROUP1'])
        hh_corr = pd.Series([-0.03, 0.64],
                            index=['All data', 'GROUP1'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 2)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [np.nan, 1.0])
Example #25
0
prng = RandomState(133)
df_features = pd.DataFrame(
    {
        'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2],
        'f1': prng.normal(0, 1, 10),
        'f2': prng.normal(1, 0.1, 10),
        'f3': prng.normal(2, 0.1, 10),
        'group': ['group1'] * 10
    },
    index=range(0, 10))

df_features_same_score = df_features.copy()
df_features_same_score[['sc1']] = [3] * 10

human_scores = pd.Series(prng.randint(1, 5, size=10))
system_scores = pd.Series(prng.random_sample(10) * 5)
same_human_scores = pd.Series([3] * 10)


def test_correlation_helper():
    # test that there are no nans for data frame with 10 values
    retval = correlation_helper(df_features, 'sc1', 'group')
    assert_equal(retval[0].isnull().values.sum(), 0)
    assert_equal(retval[1].isnull().values.sum(), 0)


def test_that_correlation_helper_works_for_data_with_one_row():
    # this should return two data frames with nans
    retval = correlation_helper(df_features[:1], 'sc1', 'group')
    assert_equal(retval[0].isnull().values.sum(), 3)
    assert_equal(retval[1].isnull().values.sum(), 3)
Example #26
0
α_EXACT = 2.4
β_EXACT = 0.6


def my_exp_func(x):
    return α_EXACT * np.exp(β_EXACT * x)


xmin = 0
xmax = 5.0
Nsamples = 20

#x_sample = np.linspace(xmin, xmax, Nsamples)
# OR: (randomly spaced data)
#
x_rand = (xmax - xmin) * rs.random_sample(Nsamples) + xmin
x_sample = np.sort(x_rand)

y_exact = my_exp_func(x_sample)

NOISE_AMPLITUDE = 1.0
y_noisy = y_exact + rs.randn(Nsamples) * NOISE_AMPLITUDE

#plt.clf()
#plt.plot(x_sample, y_exact, label="exact", marker="o")
#plt.plot(x_sample, y_noisy, label="noisy", marker="o", linewidth=0)
#plt.legend()
#plt.savefig("IMG_DATA_v2.png", dpi=150)

# Do linear regression here ...
        replace = np.argmax(sharedSoln[0])
        sharedSoln[:,replace] = sharedSoln[:,current_best]
        replace_list.append(replace)
    return replace_list
def tabulength(numP):
    '''Talliard(1990)'''
    smin = (numP-1) * 0.9
    smax = (numP-1) * 1.1
    tabu_length = 6 + (randint(0,int(smax - smin)))
    return int(tabu_length)        

'''Test Data Generation a la PySAL tests.'''                                      
#Setup the test data:
w = pysal.lat2W(10, 10)
random_init = RandomState(123456789)
z = random_init.random_sample((w.n, 2))
#print z.max(), z.min(), z.std() #Comment out to verify that the 'random' seed is identical over tests
p = np.ones((w.n, 1), float) 
floor_variable = p
floor = 3

'''START TIMING HERE - AFTER TEST DATA GENERATED'''
time0 = time.time()

#Multiprocessing setup
cores = mp.cpu_count()
cores = cores * 2
numP = len(p)+1
#Shared memory solution space
lockSoln = mp.Lock()
cSoln = Array(ctypes.c_double, numP*cores, lock=lockSoln)
Example #28
0
import os
import struct
import timeit

import numpy as np
import pandas as pd
from numpy.random import RandomState

rs = RandomState()

SETUP = '''
import numpy as np
import {mod}.{rng}
rs = {mod}.{rng}.RandomState()
rs.random_sample()
'''

scale_32 = scale_64 = 1
if struct.calcsize('P') == 8 and os.name != 'nt':
    # 64 bit
    scale_32 = 0.5
else:
    scale_64 = 2

# RNGS = ['mlfg_1279_861', 'mrg32k3a', 'pcg64', 'pcg32', 'mt19937', 'xorshift128', 'xorshift1024',
# 'xoroshiro128plus', 'dsfmt', 'random']
RNGS = ['mt19937']


def timer(code, setup):
    return 1000 * min(timeit.Timer(code, setup=setup).repeat(10, 10)) / 10.0
Example #29
0
 def get_cost(self, r_float, item_num):
     if len(self.costs) == 0:
         r = RandomState(self.permute_seed)
         self.costs = r.random_sample(self.max_item)
     return float(self.costs[item_num])
Example #30
0
class SampleConsensus(metaclass=abc.ABCMeta):
    '''
    SampleConsensus represents the base class.
    All sample consensus methods must inherit from this class.
    '''
    def __init__(self,
                 model,
                 random=False,
                 probability=.99,
                 threshold=float('inf'),
                 max_iterations=1000):
        self._sac_model = model
        self.probability = probability
        self.distance_threshold = threshold
        self.max_iterations = max_iterations
        self._model = []
        self._inliers = []
        self._model_coefficients = None
        if random:
            self._rng = RandomState()
        else:
            self._rng = RandomState(12345)

    @abc.abstractmethod
    def compute_model(self):
        '''
        Compute the actual model. Pure virtual.
        '''
        pass

    @property
    def model(self):
        '''
        Return indices of the points that build the best model found so far.
        '''
        return self._model

    @property
    def inliers(self):
        '''
        Return the best set of inliers found so far for this model.
        '''
        return self._inliers

    @property
    def model_coefficients(self):
        '''
        Return the model coefficients of the best model found so far.
        '''
        return self._model_coefficients

    @property
    def sample_consensus_model(self):
        '''
        Get the Sample Consensus model used
        '''
        return self._sac_model

    @sample_consensus_model.setter
    def sample_consensus_model(self, value):
        '''
        Set the Sample Consensus model to use
        '''
        self._sac_model = value

    def refine_model(self, sigma=3., max_iterations=1000):
        '''
        Refine the model found.

        This loops over the model coefficients and optimizes them together
        with the set of inliers, until the change in the set of inliers is
        minimal.

        # Parameters
        sigma : float
            standard deviation multiplier for considering a sample as inlier (Mahalanobis distance)
        max_iterations : int
            the maxim number of iterations to try to refine in case the inliers keep on changing
        '''
        if self._sac_model is None:
            raise ValueError('null model!')
        logger = logging.getLogger('pcl.sac.SampleConsensus.refine_model')

        inlier_distance_threshold_sqr = self.distance_threshold * self.distance_threshold
        error_threshold = self.distance_threshold
        sigma_sqr = sigma * sigma
        refine_iterations = 0
        inlier_changed, oscillating = False, False
        inliers_sizes = []
        new_inliers = prev_inliers = self._inliers
        new_model_coefficients = self._model_coefficients
        while True:
            # Optimize the model coefficients
            new_model_coefficients = self._sac_model\
                                         .optimize_model_coefficients(prev_inliers,
                                                                      new_model_coefficients)
            inliers_sizes.append(len(prev_inliers))

            # Select the new inliers based on the optimized coefficients and new threshold
            new_inliers = self._sac_model.select_within_distance(
                new_model_coefficients, error_threshold)
            logger.debug(
                'Number of inliers found (before/after): %lu/%lu, ' +
                'with an error threshold of %g.', len(prev_inliers),
                len(new_inliers), error_threshold)

            if len(new_inliers) == 0:
                refine_iterations += 1
                if refine_iterations >= max_iterations:
                    break
                continue

            # Estimate the variance and the new threshold
            variance = self._sac_model.compute_variance()
            error_threshold = math.sqrt(
                min(inlier_distance_threshold_sqr, sigma_sqr * variance))

            logger.debug(
                'New estimated error threshold: %g on iteration %d out of %d.',
                error_threshold, refine_iterations, max_iterations)
            inlier_changed = False
            prev_inliers, new_inliers = new_inliers, prev_inliers
            # If the number of inliers changed, then we are still optimizing
            if len(new_inliers) != len(prev_inliers):
                # Check if the number of inliers is oscillating in between two values
                if len(inliers_sizes) >= 4:
                    if inliers_sizes[-1] == inliers_sizes[-3] and \
                       inliers_sizes[-2] == inliers_sizes[-4]:
                        oscillating = False
                        break
                inlier_changed = True
                continue

            # Check the values of the inlier set
            for idx, val in enumerate(prev_inliers):
                # If the value of the inliers changed, then we are still optimizing
                if val != new_inliers[idx]:
                    inlier_changed = True
                    break

            refine_iterations += 1
            if inlier_changed and refine_iterations < max_iterations:
                break

            # If the new set of inliers is empty, we didn't do a good job refining
            if len(new_inliers) == 0:
                logger.error('Refinement failed: got an empty set of inliers!')
                return False

            if oscillating:
                logger.debug('Detected oscillations in the model refinement.')
                return True

            if not inlier_changed:
                self._inliers, new_inliers = new_inliers, self._inliers
                self._model_coefficients = new_model_coefficients
                return True

            # If no inliers have been changed anymore, then the refinement was successful
            return False

    def get_random_samples(self, indices, nr_samples):
        '''
        Get a set of randomly selected indices.

        # Parameters
            indices : list or array
                The input indices vector
            nr_samples : int
                The desired number of point indices to randomly select
        '''
        sample = self._rng.random_sample(nr_samples) * len(indices)
        return np.array(indices, copy=False)[sample.astype(int)]
Example #31
0
from rsmtool.analysis import (correlation_helper,
                              metrics_helper)

prng = RandomState(133)
df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2],
                            'f1': prng.normal(0, 1, 10),
                            'f2': prng.normal(1, 0.1, 10),
                            'f3': prng.normal(2, 0.1, 10),
                            'group': ['group1']*10},
                             index  = range(0, 10))

df_features_same_score = df_features.copy()
df_features_same_score[['sc1']] = [3]*10

human_scores = pd.Series(prng.randint(1, 5, size=10))
system_scores = pd.Series(prng.random_sample(10)*5)
same_human_scores = pd.Series([3]*10)

def test_correlation_helper():
    # test that there are no nans for data frame with 10 values
    retval = correlation_helper(df_features, 'sc1', 'group')
    assert_equal(retval[0].isnull().values.sum(), 0)
    assert_equal(retval[1].isnull().values.sum(), 0)


def test_that_correlation_helper_works_for_data_with_one_row():
    # this should return two data frames with nans
    retval = correlation_helper(df_features[:1], 'sc1', 'group')
    assert_equal(retval[0].isnull().values.sum(), 3)
    assert_equal(retval[1].isnull().values.sum(), 3)
    def anytime_explain(self, instance, callback=None, update_func=None, update_prediction=None):
        data_rows, no_atr = self.data.X.shape
        class_value = self.model(instance)[0]
        prng = RandomState(self.seed)

        self.init_arrays(no_atr)
        attr_values = self.get_atr_column(instance)

        batch_mx_size = self.batch_size * no_atr
        z_sq = abs(st.norm.ppf(self.p_val/2))**2

        tiled_inst = self.tile_instance(instance)
        inst1 = copy.deepcopy(tiled_inst)
        inst2 = copy.deepcopy(tiled_inst)

        worst_case = self.max_iter*no_atr
        time_point = time.time()
        update_table = False

        domain = Domain([ContinuousVariable("Score"),
                         ContinuousVariable("Error")],
                        metas=[StringVariable(name="Feature"), StringVariable(name="Value")])

        if update_prediction is not None:
            update_prediction(class_value)

        def create_res_table():
            nonzero = self.steps != 0
            expl_scaled = (self.expl[nonzero] /
                           self.steps[nonzero]).reshape(1, -1)
            """ creating return array"""
            ips = np.hstack((expl_scaled.T, np.sqrt(
                z_sq * self.var[nonzero] / self.steps[nonzero]).reshape(-1, 1)))
            table = Table.from_numpy(domain, ips,
                                     metas=np.hstack((np.asarray(self.atr_names)[nonzero[0]].reshape(-1, 1),
                                                      attr_values[nonzero[0]].reshape(-1, 1))))
            return table

        while not(all(self.iterations_reached[0, :] > self.max_iter)):
            prog = 1 - np.sum(self.max_iter -
                              self.iterations_reached)/worst_case
            if (callback(int(prog*100))):
                break
            if not(any(self.iterations_reached[0, :] > self.max_iter)):
                a = np.argmax(prng.multinomial(
                    1, pvals=(self.var[0, :]/(np.sum(self.var[0, :])))))
            else:
                a = np.argmin(self.iterations_reached[0, :])

            perm = (prng.random_sample(batch_mx_size).reshape(
                self.batch_size, no_atr)) > 0.5
            rand_data = self.data.X[prng.randint(0,
                                                 data_rows, size=self.batch_size), :]
            inst1.X = np.copy(tiled_inst.X)
            inst1.X[perm] = rand_data[perm]
            inst2.X = np.copy(inst1.X)

            inst1.X[:, a] = tiled_inst.X[:, a]
            inst2.X[:, a] = rand_data[:, a]
            f1 = self._get_predictions(inst1, class_value)
            f2 = self._get_predictions(inst2, class_value)

            diff = np.sum(f1 - f2)
            self.expl[0, a] += diff

            """update variance"""
            self.steps[0, a] += self.batch_size
            self.iterations_reached[0, a] += self.batch_size
            d = diff - self.mu[0, a]
            self.mu[0, a] += d / self.steps[0, a]
            self.M2[0, a] += d * (diff - self.mu[0, a])
            self.var[0, a] = self.M2[0, a] / (self.steps[0, a] - 1)

            if time.time() - time_point > 1:
                update_table = True
                time_point = time.time()

            if update_table:
                update_table = False
                update_func(create_res_table())

            # exclude from sampling if necessary
            needed_iter = z_sq * self.var[0, a] / (self.error**2)
            if (needed_iter <= self.steps[0, a]) and (self.steps[0, a] >= self.min_iter) or (self.steps[0, a] > self.max_iter):
                self.iterations_reached[0, a] = self.max_iter + 1

        return class_value, create_res_table()
import os
import struct
import timeit

import pandas as pd
import numpy as np
from numpy.random import RandomState

rs = RandomState()

SETUP = '''
import numpy as np
import {mod}.{rng}
rs = {mod}.{rng}.RandomState()
rs.random_sample()
'''

scale_32 = scale_64 = 1
if struct.calcsize('P') == 8 and os.name != 'nt':
    # 64 bit
    scale_32 = 0.5
else:
    scale_64 = 2

RNGS = ['mlfg_1279_861', 'mrg32k3a', 'pcg64', 'pcg32', 'mt19937', 'xorshift128', 'xorshift1024',
        'xoroshiro128plus', 'dsfmt', 'random']


def timer(code, setup):
    return 1000 * min(timeit.Timer(code, setup=setup).repeat(10, 10)) / 10.0
Example #34
0
    host = MPI.Get_processor_name()
    info = MPI.INFO_NULL

    nlocalcores = mp.cpu_count()  #One core is manager

    if rank == 0:
        """
        The rank 0 process is the master manager.  This process:

        1. Reads the data from the shapefile or DB
        2. Generates the W Object
        3. Sends the W object and attribute vector to all children
        """
        w = ps.lat2W(8,8)
        random_int = RandomState(123456789)
        attribute = random_int.random_sample((w.n, 1))
        numifs =  8

        data = {'w':w,
                'numifs':numifs}

        print "I have {} cores in a shared memory space".format(nlocalcores)
    else:
        data = None

    #Broadcast 2 sets of data, a list of Python objects and an array of attribute information
    data = comm.bcast(data, root=0) #Inefficient Python object, better to get full, pass and reform?
    if rank != 0:
        w = data['w']
        numifs = data['numifs']
        attribute = np.empty((w.n, 1), dtype=np.float)
Example #35
0
    def anytime_explain(self, instance, callback=None, update_func=None, update_prediction=None):
        data_rows, no_atr = self.data.X.shape
        class_value = self.model(instance)[0]
        prng = RandomState(self.seed)

        self.init_arrays(no_atr)
        attr_values = self.get_atr_column(instance)

        batch_mx_size = self.batch_size * no_atr
        z_sq = abs(st.norm.ppf(self.p_val/2))**2

        tiled_inst = self.tile_instance(instance)
        inst1 = copy.deepcopy(tiled_inst)
        inst2 = copy.deepcopy(tiled_inst)

        worst_case = self.max_iter*no_atr
        time_point = time.time()
        update_table = False

        domain = Domain([ContinuousVariable("Score"),
                         ContinuousVariable("Error")],
                        metas=[StringVariable(name="Feature"), StringVariable(name = "Value")])

        if update_prediction is not None:
            update_prediction(class_value)

        def create_res_table():
            nonzero = self.steps != 0
            expl_scaled = (self.expl[nonzero]/self.steps[nonzero]).reshape(1, -1)
            # creating return array
            ips = np.hstack((expl_scaled.T, np.sqrt(
                z_sq * self.var[nonzero] / self.steps[nonzero]).reshape(-1, 1)))
            table = Table.from_numpy(domain, ips,
                                     metas=np.hstack((np.asarray(self.atr_names)[nonzero[0]].reshape(-1, 1),
                                                        attr_values[nonzero[0]].reshape(-1,1))))
            return table

        while not(all(self.iterations_reached[0, :] > self.max_iter)):
            prog = 1 - np.sum(self.max_iter - self.iterations_reached)/worst_case
            if (callback(int(prog*100))):
                break
            if not(any(self.iterations_reached[0, :] > self.max_iter)):
                a = np.argmax(prng.multinomial(
                    1, pvals=(self.var[0, :]/(np.sum(self.var[0, :])))))
            else:
                a = np.argmin(self.iterations_reached[0, :])

            perm = (prng.random_sample(batch_mx_size).reshape(
                self.batch_size, no_atr)) > 0.5
            rand_data = self.data.X[prng.randint(0,
                                                 data_rows, size=self.batch_size), :]
            inst1.X = np.copy(tiled_inst.X)
            inst1.X[perm] = rand_data[perm]
            inst2.X = np.copy(inst1.X)

            inst1.X[:, a] = tiled_inst.X[:, a]
            inst2.X[:, a] = rand_data[:, a]
            f1 = self._get_predictions(inst1, class_value)
            f2 = self._get_predictions(inst2, class_value)

            diff = np.sum(f1 - f2)
            self.expl[0, a] += diff

            # update variance
            self.steps[0, a] += self.batch_size
            self.iterations_reached[0, a] += self.batch_size
            d = diff - self.mu[0, a]
            self.mu[0, a] += d / self.steps[0, a]
            self.M2[0, a] += d * (diff - self.mu[0, a])
            self.var[0, a] = self.M2[0, a] / (self.steps[0, a] - 1)

            if time.time() - time_point > 1:
                update_table = True
                time_point = time.time()

            if update_table:
                update_table = False
                update_func(create_res_table())

            # exclude from sampling if necessary
            needed_iter = z_sq * self.var[0, a] / (self.error**2)
            if (needed_iter <= self.steps[0, a]) and (self.steps[0, a] >= self.min_iter) or (self.steps[0, a] > self.max_iter):
                self.iterations_reached[0, a] = self.max_iter + 1

        return class_value, create_res_table()
class TestAnalyzer:

    def setUp(self):

        self.prng = RandomState(133)

        self.df_features = pd.DataFrame({'sc1': [1, 2, 3, 4, 1, 2, 3, 4, 1, 2],
                                         'f1': self.prng.normal(0, 1, 10),
                                         'f2': self.prng.normal(1, 0.1, 10),
                                         'f3': self.prng.normal(2, 0.1, 10),
                                         'group': ['group1'] * 10},
                                        index=range(0, 10))

        self.df_features_same_score = self.df_features.copy()
        self.df_features_same_score[['sc1']] = [3] * 10

        self.human_scores = pd.Series(self.prng.randint(1, 5, size=10))
        self.system_scores = pd.Series(self.prng.random_sample(10) * 5)
        self.same_human_scores = pd.Series([3] * 10)

        # get the directory containing the tests
        self.test_dir = dirname(__file__)

    def test_correlation_helper(self):

        # test that there are no nans for data frame with 10 values
        retval = Analyzer.correlation_helper(self.df_features, 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_equal(retval[1].isnull().values.sum(), 0)

    def test_that_correlation_helper_works_for_data_with_one_row(self):
        # this should return two data frames with nans
        # we expect a runtime warning here so let's suppress it
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            retval = Analyzer.correlation_helper(self.df_features[:1], 'sc1', 'group')
            assert_equal(retval[0].isnull().values.sum(), 3)
            assert_equal(retval[1].isnull().values.sum(), 3)

    def test_that_correlation_helper_works_for_data_with_two_rows(self):
        # this should return 1/-1 for marginal correlations and nans for
        # partial correlations
        retval = Analyzer.correlation_helper(self.df_features[:2], 'sc1', 'group')
        assert_equal(abs(retval[0].values).sum(), 3)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_that_correlation_helper_works_for_data_with_three_rows(self):
        # this should compute marginal correlations but return Nans for
        # partial correlations
        retval = Analyzer.correlation_helper(self.df_features[:3], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_equal(retval[1].isnull().values.sum(), 3)

    def test_that_correlation_helper_works_for_data_with_four_rows(self):
        # this should compute marginal correlations and return a unity
        # matrix for partial correlations
        retval = Analyzer.correlation_helper(self.df_features[:4], 'sc1', 'group')
        assert_equal(retval[0].isnull().values.sum(), 0)
        assert_almost_equal(abs(retval[1].values).sum(), 3)

    def test_that_correlation_helper_works_for_data_with_the_same_label(self):

        # this should return two data frames with nans
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            retval = Analyzer.correlation_helper(self.df_features_same_score, 'sc1', 'group')
            assert_equal(retval[0].isnull().values.sum(), 3)
            assert_equal(retval[1].isnull().values.sum(), 3)

    def test_that_metrics_helper_works_for_data_with_one_row(self):
        # There should be NaNs for SMD, correlations and both sds
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            evals = Analyzer.metrics_helper(self.human_scores[0:1],
                                            self.system_scores[0:1])
            assert_equal(evals.isnull().values.sum(), 4)

    def test_that_metrics_helper_works_for_data_with_the_same_label(self):
        # There should be NaNs for correlation.
        # Note that for a dataset with a single response
        # kappas will be 0 or 1
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            evals = Analyzer.metrics_helper(self.same_human_scores,
                                            self.system_scores)
            assert_equal(evals.isnull().values.sum(), 1)

    def test_metrics_helper_population_sds(self):
        df_new_features = pd.read_csv(join(self.test_dir, 'data', 'files', 'train.csv'))
        # compute the metrics when not specifying the population SDs
        computed_metrics1 = Analyzer.metrics_helper(df_new_features['score'],
                                                    df_new_features['score2'])
        expected_metrics1 = pd.Series({'N': 500.0,
                                       'R2': 0.65340566606389394,
                                       'RMSE': 0.47958315233127197,
                                       'SMD': 0.036736365006090885,
                                       'adj_agr': 100.0,
                                       'corr': 0.82789026370069529,
                                       'exact_agr': 77.0,
                                       'h_max': 6.0,
                                       'h_mean': 3.4199999999999999,
                                       'h_min': 1.0,
                                       'h_sd': 0.81543231461565147,
                                       'kappa': 0.6273493195074531,
                                       'sys_max': 6.0,
                                       'sys_mean': 3.4500000000000002,
                                       'sys_min': 1.0,
                                       'sys_sd': 0.81782496620652367,
                                       'wtkappa': 0.82732732732732728})

        # and now compute them specifying the population SDs
        computed_metrics2 = Analyzer.metrics_helper(df_new_features['score'],
                                                    df_new_features['score2'],
                                                    population_human_score_sd=0.5,
                                                    population_system_score_sd=0.4)
        # the only number that should change is the SMD
        expected_metrics2 = expected_metrics1.copy()
        expected_metrics2['SMD'] = 0.066259

        assert_series_equal(computed_metrics1.sort_index(), expected_metrics1.sort_index())
        assert_series_equal(computed_metrics2.sort_index(), expected_metrics2.sort_index())

    def test_compute_pca_less_components_than_features(self):
        # test pca when we have less components than features
        df = pd.DataFrame({'a': range(100)})
        for i in range(100):
            df[i] = df['a'] * i
        (components, variance) = Analyzer.compute_pca(df, df.columns)
        assert_equal(len(components.columns), 100)
        assert_equal(len(variance.columns), 100)

    def test_compute_disattenuated_correlations_single_human(self):
        hm_corr = pd.Series([0.9, 0.8, 0.6],
                            index=['raw', 'raw_trim', 'raw_trim_round'])
        hh_corr = pd.Series([0.81], index=[''])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_equal(df_dis_corr.loc['raw', 'corr_disattenuated'], 1.0)

    def test_compute_disattenuated_correlations_matching_human(self):
        hm_corr = pd.Series([0.9, 0.4, 0.6],
                            index=['All data', 'GROUP1', 'GROUP2'])
        hh_corr = pd.Series([0.81, 0.64, 0.36],
                            index=['All data', 'GROUP1', 'GROUP2'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, 0.5, 1.0])

    def test_compute_disattenuated_correlations_single_matching_human(self):
        hm_corr = pd.Series([0.9, 0.4, 0.6],
                            index=['All data', 'GROUP1', 'GROUP2'])
        hh_corr = pd.Series([0.81],
                            index=['All data'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan])

    def test_compute_disattenuated_correlations_mismatched_indices(self):
        hm_corr = pd.Series([0.9, 0.6],
                            index=['All data', 'GROUP2'])
        hh_corr = pd.Series([0.81, 0.64],
                            index=['All data', 'GROUP1'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 3)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [1.0, np.nan, np.nan])

    def test_compute_disattenuated_correlations_negative_human(self):
        hm_corr = pd.Series([0.9, 0.8],
                            index=['All data', 'GROUP1'])
        hh_corr = pd.Series([-0.03, 0.64],
                            index=['All data', 'GROUP1'])
        df_dis_corr = Analyzer.compute_disattenuated_correlations(hm_corr,
                                                                  hh_corr)
        assert_equal(len(df_dis_corr), 2)
        assert_array_equal(df_dis_corr['corr_disattenuated'], [np.nan, 1.0])
class MobilityGenerator(object):
    """ Generates intermodal mobility for SUMO starting from a synthetic population. """

    _conf = None
    _profiling = None

    _random_generator = None

    _sumo_network = None
    _sumo_parkings = collections.defaultdict(list)
    _parking_cache = dict()
    _parking_position = dict()
    _taz_weights = dict()
    _buildings_by_taz = dict()
    _edges_by_taz = dict()

    _blacklisted_edges = set()

    _all_trips = collections.defaultdict(dict)

    def __init__(self, conf, profiling=False):
        """
         Initialize the synthetic population.
            :param conf: distionary with the configurations
            :param profiling=False: enable cProfile
        """

        self._conf = conf
        self._profiling = profiling

        self._random_generator = RandomState(seed=self._conf['seed'])

        logging.info('Starting TraCI with file %s.', conf['sumocfg'])
        sumocfg = '{}/{}'.format(BASE_DIR, conf['sumocfg'])
        traci.start(['sumo', '-c', sumocfg])

        logging.info('Loading SUMO net file %s%s', BASE_DIR,
                     conf['SUMOnetFile'])
        self._sumo_network = sumolib.net.readNet('{}/{}'.format(
            BASE_DIR, conf['SUMOnetFile']))

        logging.info('Loading SUMO parking lots from file %s%s', BASE_DIR,
                     conf['SUMOadditionals']['parkings'])
        self._load_parkings('{}/{}'.format(
            BASE_DIR, conf['SUMOadditionals']['parkings']))

        logging.info('Loading TAZ weights from %s%s', BASE_DIR,
                     conf['population']['tazWeights'])
        self._load_weights_from_csv('{}/{}'.format(
            BASE_DIR, conf['population']['tazWeights']))

        logging.info('Loading buildings weights from %s%s', BASE_DIR,
                     conf['population']['buildingsWeight'])
        self._load_buildings_weight_from_csv_dir('{}/{}'.format(
            BASE_DIR, conf['population']['buildingsWeight']))

        logging.info('Loading edges in each TAZ from %s%s', BASE_DIR,
                     conf['population']['tazDefinition'])
        self._load_edges_from_taz('{}/{}'.format(
            BASE_DIR, conf['population']['tazDefinition']))

        logging.info('Computing the number of entities for each vType..')
        self._compute_vehicles_per_type()

    def mobility_generation(self):
        """ Generate the mobility for the synthetic population. """
        logging.info('Generating trips for each vType..')
        self._compute_trips_per_type()

    def save_mobility(self):
        """ Save the generated trips to files. """
        logging.info('Saving trips files..')
        self._saving_trips_to_files()

    @staticmethod
    def close_traci():
        """ Artefact to close TraCI properly. """
        logging.info('Closing TraCI.')
        traci.close()

    ## ---------------------------------------------------------------------------------------- ##
    ##                                          Loaders                                         ##
    ## ---------------------------------------------------------------------------------------- ##

    def _load_parkings(self, filename):
        """ Load parkings ids from XML file. """
        xml_tree = xml.etree.ElementTree.parse(filename).getroot()
        for child in xml_tree:
            if (child.tag == 'parkingArea' and child.attrib['id'] in
                    self._conf['intermodalOptions']['parkingAreaWhitelist']):
                edge = child.attrib['lane'].split('_')[0]
                position = float(child.attrib['startPos']) + 2.5
                self._sumo_parkings[edge].append(child.attrib['id'])
                self._parking_position[child.attrib['id']] = position

    def _load_weights_from_csv(self, filename):
        """ Load the TAZ weight from a CSV file. """
        with open(filename, 'r') as csvfile:
            weightreader = csv.reader(csvfile)
            header = None
            for row in weightreader:
                if not header:
                    header = row
                else:
                    self._taz_weights[int(row[0])] = {
                        header[0]: int(row[0]),
                        header[1]: row[1],
                        header[2]: int(row[2]),
                        header[3]: float(row[3]),
                        'weight': (int(row[2]) / float(row[3])),
                    }

    def _load_buildings_weight_from_csv_dir(self, directory):
        """ Load the buildings weight from multiple CSV files. """

        allfiles = [
            os.path.join(directory, f) for f in os.listdir(directory)
            if os.path.isfile(os.path.join(directory, f))
        ]
        for filename in sorted(allfiles):
            logging.debug('Loding %s', filename)
            with open(filename, 'r') as csvfile:
                weightreader = csv.reader(csvfile)
                header = None
                taz = None
                buildings = []
                for row in weightreader:
                    if not header:
                        header = row
                    else:
                        taz = row[0]
                        buildings.append((
                            float(row[3]),  # weight
                            row[4],  # generic edge
                            row[5]))  # pedestrian edge

                if len(buildings) < 10:
                    logging.debug('Dropping %s, only %d buildings found.',
                                  filename, len(buildings))
                    continue

                weighted_buildings = []
                cum_sum = 0.0
                for weight, g_edge, p_edge in sorted(buildings):
                    cum_sum += weight
                    weighted_buildings.append(
                        (cum_sum, g_edge, p_edge, weight))
                self._buildings_by_taz[taz] = weighted_buildings

    def _load_edges_from_taz(self, filename):
        """ Load edges from the TAZ file. """
        xml_tree = xml.etree.ElementTree.parse(filename).getroot()
        for child in xml_tree:
            if child.tag == 'taz':
                self._edges_by_taz[
                    child.attrib['id']] = child.attrib['edges'].split(' ')

    ## ---------------------------------------------------------------------------------------- ##
    ##                                Mobility Generation                                       ##
    ## ---------------------------------------------------------------------------------------- ##

    def _compute_vehicles_per_type(self):
        """
        Compute the absolute number of trip that are going to be created
        for each vechile type, given a population.
        """
        logging.info('Population: %d', self._conf['population']['entities'])

        for v_type in self._conf['distribution'].keys():
            self._conf['distribution'][v_type]['tot'] = int(
                self._conf['population']['entities'] *
                self._conf['distribution'][v_type]['perc'])
            logging.info('\t %s: %d', v_type,
                         self._conf['distribution'][v_type]['tot'])

    def _normal_departure_time(self):
        """ Return the departure time, comuted using a normal distribution. """
        departure = int(
            numpy.random.normal(loc=self._conf['peak']['mean'],
                                scale=self._conf['peak']['std'],
                                size=1))
        while (departure < self._conf['interval']['begin']
               or departure > self._conf['interval']['end']):
            departure = int(
                numpy.random.normal(loc=self._conf['peak']['mean'],
                                    scale=self._conf['peak']['std'],
                                    size=1))
        return departure

    def _compute_trips_per_type(self):
        """ Compute the trips for the synthetic population for each vType. """

        for v_type in self._conf['distribution'].keys():
            total = 0
            for key, area in self._conf['distribution'][v_type][
                    'composition'].items():
                vehicles = int(self._conf['distribution'][v_type]['tot'] *
                               area['perc'])
                logging.info('[%s] Computing %d trips from %s to %s ... ',
                             v_type, vehicles, area['from'], area['to'])

                if self._profiling:
                    _pr = cProfile.Profile()
                    _pr.enable()

                for veh_id in tqdm(range(vehicles)):
                    ## Generating departure time
                    _depart = self._normal_departure_time()
                    if _depart not in self._all_trips[v_type].keys():
                        self._all_trips[v_type][_depart] = []

                    ## Trip generation

                    # Parking lot at the end of the trip.
                    with_parking = 'withParking' in area.keys(
                    ) and area['withParking']

                    # Modes for intermodal trips.
                    modes = None
                    if 'modes' in area.keys() and area['modes']:
                        modes = area['modes']

                    _from = None
                    _to = None

                    # (Intermodal) trip
                    _from, _to, _mode, _stages = self._find_allowed_pair_traci(
                        v_type, modes, _depart,
                        self._conf['taz'][area['from']],
                        self._conf['taz'][area['to']], with_parking)
                    modes = _mode

                    # Fixing the parking lots stops from the configuration.
                    parking_id = None
                    if with_parking:
                        parking_id = self._has_parking_lot(_to)
                    if not parking_id:
                        with_parking = False

                    # Trip creation
                    complete_trip = {
                        'id': '{}_{}_{}'.format(v_type, key, veh_id),
                        'depart': _depart,
                        'from': _from,
                        'to': _to,
                        'type': v_type,
                        'mode': modes,
                        'withParking': with_parking,
                        'PLid': parking_id,
                        'stages': _stages,
                    }
                    complete_trip['sumoTrip'] = self._generate_sumo_trip(
                        complete_trip)

                    self._all_trips[v_type][_depart].append(complete_trip)
                    total += 1

                if self._profiling:
                    _pr.disable()
                    _s = io.StringIO()
                    _ps = pstats.Stats(_pr, stream=_s).sort_stats('cumulative')
                    _ps.print_stats(10)
                    print(_s.getvalue())
                    input("Press any key to continue..")

            logging.info('Generated %d trips for %s.', total, v_type)

    ## ---- PARKING AREAS: location and selection ---- ##

    def _get_parking_id(self, edge):
        """ Randomly select one of the parkings. """
        if not self._sumo_parkings[edge]:
            return None
        pos = self._random_generator.randint(0, len(self._sumo_parkings[edge]))
        return self._sumo_parkings[edge][pos]

    def _has_parking_lot(self, edge):
        """ Retrieve the parking area ID. """
        parking_id = None
        if edge in self._sumo_parkings.keys():
            parking_id = self._get_parking_id(edge)
        return parking_id

    def _check_parkings_cache(self, edge):
        """ Check among the previously computed results of _find_closest_parking """
        if edge in self._parking_cache.keys():
            return self._parking_cache[edge]
        return None

    def _find_closest_parking(self, edge):
        """ Given and edge, find the closest parking area. """
        distance = sys.float_info.max

        ret = self._check_parkings_cache(edge)
        if ret:
            return ret

        for p_edge, parkings in self._sumo_parkings.items():
            _is_allowed = False
            for parking in parkings:
                if parking in self._conf['intermodalOptions'][
                        'parkingAreaWhitelist']:
                    _is_allowed = True
                    break
            if not _is_allowed:
                continue

            try:
                route = traci.simulation.findIntermodalRoute(
                    p_edge, edge, walkFactor=.9, pType="pedestrian")
            except traci.exceptions.TraCIException:
                logging.error(
                    '_find_closest_parking: findIntermodalRoute %s -> %s failed.',
                    p_edge, edge)
                route = None

            if route:
                cost = self._cost_from_route(route)
                if distance > cost:
                    distance = cost
                    ret = p_edge, route

        if ret:
            self._parking_cache[edge] = ret
            return ret

        logging.fatal('Edge %s is not reachable from any parking lot.', edge)
        self._blacklisted_edges.add(edge)
        return None, None

    ## ----     Functions for _compute_trips_per_type: _find_allowed_pair_traci            ---- ##

    def _find_allowed_pair_traci(self, v_type, modes, departure, from_area,
                                 to_area, with_parking):
        """ Return an origin ad an allowed destination, with mode and route stages.

            findRoute(self, fromEdge, toEdge, vType="", depart=-1., routingMode=0)

            findIntermodalRoute(
                self, fromEdge, toEdge, modes="", depart=-1., routingMode=0, speed=-1.,
                walkFactor=-1., departPos=-1., arrivalPos=-1., departPosLat=-1.,
                pType="", vType="", destStop=""):
        """

        counter = 0
        _is_intermodal = False
        selected_mode = None
        selected_route = None
        if modes:
            _is_intermodal = True

        if _is_intermodal:
            od_found = False
            while not od_found:
                ## Origin and Destination Selection
                from_edge, to_edge = self._select_pair(from_area, to_area,
                                                       True)

                ## Evaluate all the possible (intermodal) routes
                solutions = self._find_intermodal_route(
                    from_edge, to_edge, modes, departure, with_parking)
                if solutions:
                    winner = sorted(solutions)[0]  # let the winner win
                    selected_mode = winner[1]
                    selected_route = winner[2]
                    od_found = True

                counter += 1
                if counter % 10 == 0:
                    logging.debug(
                        '%d pairs done, still looking for the good one..',
                        counter)

        else:
            route = None
            while not self._is_valid_route(None, route):
                ## Origin and Destination Selection
                from_edge, to_edge = self._select_pair(from_area, to_area)
                try:
                    route = traci.simulation.findRoute(from_edge,
                                                       to_edge,
                                                       vType=v_type)
                except traci.exceptions.TraCIException:
                    logging.debug(
                        '_find_allowed_pair_traci: findRoute FAILED.')
                    route = None

                counter += 1
                if counter % 10 == 0:
                    logging.debug(
                        '%d pairs done, still looking for the good one..',
                        counter)

            selected_mode = v_type
            selected_route = route

        if counter >= 10:
            logging.debug('It required %d iterations to find a valid pair.',
                          counter)
        return from_edge, to_edge, selected_mode, selected_route

    def _find_intermodal_route(self, from_edge, to_edge, modes, departure,
                               with_parking):
        """ Evaluate all the possible (intermodal) routes. """
        solutions = list()
        for mode, weight in modes:
            _last_mile = None
            _modes, _ptype, _vtype = self._get_mode_parameters(mode)

            if with_parking and _vtype in self._conf['intermodalOptions'][
                    'vehicleAllowedParking']:
                ## Find the closest parking area
                p_edge, _last_mile = self._find_closest_parking(to_edge)
                if _last_mile:
                    try:
                        route = traci.simulation.findIntermodalRoute(
                            from_edge,
                            p_edge,
                            depart=departure,
                            walkFactor=.9,  # speed=1.0
                            modes=_modes,
                            pType=_ptype,
                            vType=_vtype)
                    except traci.exceptions.TraCIException:
                        logging.error(
                            '_find_intermodal_route: findIntermodalRoute w parking FAILED.'
                        )
                        route = None
                    if (self._is_valid_route(_modes, route)
                            and route[-1].type == tc.STAGE_DRIVING):
                        route[-1].destStop = self._get_parking_id(p_edge)
                        route.extend(_last_mile)
                        solutions.append(
                            (self._cost_from_route(route) * weight, mode,
                             route))
            else:
                try:
                    route = traci.simulation.findIntermodalRoute(
                        from_edge,
                        to_edge,
                        depart=departure,
                        walkFactor=.9,  # speed=1.0
                        modes=_modes,
                        pType=_ptype,
                        vType=_vtype)
                except traci.exceptions.TraCIException:
                    logging.error(
                        '_find_intermodal_route: findIntermodalRoute wout parking FAILED.'
                    )
                    route = None

                if self._is_valid_route(_modes, route):
                    solutions.append(
                        (self._cost_from_route(route) * weight, mode, route))

        return solutions

    ## ---- PAIR SELECTION: origin - destination - mode ---- ##

    def _select_pair(self, from_area, to_area, pedestrian=False):
        """ Randomly select one pair, chosing between buildings and TAZ. """
        from_taz = str(self._select_taz_from_weighted_area(from_area))
        to_taz = str(self._select_taz_from_weighted_area(to_area))

        if from_taz in self._buildings_by_taz.keys(
        ) and to_taz in self._buildings_by_taz.keys():
            return self._select_pair_from_taz_wbuildings(
                self._buildings_by_taz[from_taz][:],
                self._buildings_by_taz[to_taz][:], pedestrian)
        return self._select_pair_from_taz(self._edges_by_taz[from_taz][:],
                                          self._edges_by_taz[to_taz][:])

    def _select_taz_from_weighted_area(self, area):
        """ Select a TAZ from an area using its weight. """
        selection = self._random_generator.uniform(0, 1)
        total_weight = sum([self._taz_weights[taz]['weight'] for taz in area])
        cumulative = 0.0
        for taz in area:
            cumulative += self._taz_weights[taz]['weight'] / total_weight
            if selection <= cumulative:
                return taz
        return None  # this is matematically impossible,
        # if this happens, there is a mistake in the weights.

    def _valid_pair(self, from_edge, to_edge):
        """ This is just to avoid a HUGE while condition.
            sumolib.net.edge.is_fringe()
        """
        from_edge_sumo = self._sumo_network.getEdge(from_edge)
        to_edge_sumo = self._sumo_network.getEdge(to_edge)

        if from_edge_sumo.is_fringe(from_edge_sumo.getOutgoing()):
            return False
        if to_edge_sumo.is_fringe(to_edge_sumo.getIncoming()):
            return False
        if from_edge == to_edge:
            return False
        if to_edge in self._blacklisted_edges:
            return False
        if not to_edge_sumo.allows('pedestrian'):
            return False
        return True

    def _select_pair_from_taz(self, from_taz, to_taz):
        """ Randomly select one pair from a TAZ.
            Important: from_taz and to_taz MUST be passed by copy.
            Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions.
        """

        from_edge = from_taz.pop(
            self._random_generator.randint(0, len(from_taz)))
        to_edge = to_taz.pop(self._random_generator.randint(0, len(to_taz)))

        _to = False
        while not self._valid_pair(from_edge, to_edge) and from_taz and to_taz:
            if not self._sumo_network.getEdge(to_edge).allows(
                    'pedestrian') or _to:
                to_edge = to_taz.pop(
                    self._random_generator.randint(0, len(to_taz)))
                _to = False
            else:
                from_edge = from_taz.pop(
                    self._random_generator.randint(0, len(from_taz)))
                _to = True

        return from_edge, to_edge

    def _select_pair_from_taz_wbuildings(self, from_buildings, to_buildings,
                                         pedestrian):
        """ Randomly select one pair from a TAZ.
            Important: from_buildings and to_buildings MUST be passed by copy.
            Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions.
        """

        from_edge, _index = self._get_weighted_edge(
            from_buildings, self._random_generator.random_sample(), False)
        del from_buildings[_index]
        to_edge, _index = self._get_weighted_edge(
            to_buildings, self._random_generator.random_sample(), pedestrian)
        del to_buildings[_index]

        _to = True
        while not self._valid_pair(
                from_edge, to_edge) and from_buildings and to_buildings:
            if not self._sumo_network.getEdge(to_edge).allows(
                    'pedestrian') or _to:
                to_edge, _index = self._get_weighted_edge(
                    to_buildings, self._random_generator.random_sample(),
                    pedestrian)
                del to_buildings[_index]
                _to = False
            else:
                from_edge, _index = self._get_weighted_edge(
                    from_buildings, self._random_generator.random_sample(),
                    False)
                del from_buildings[_index]
                _to = True

        return from_edge, to_edge

    @staticmethod
    def _get_weighted_edge(edges, double, pedestrian):
        """ Return an edge and its position using the cumulative sum of the weigths in the area. """
        pos = -1
        ret = None
        for cum_sum, g_edge, p_edge, _ in edges:
            if ret and cum_sum > double:
                return ret, pos
            if pedestrian and p_edge:
                ret = p_edge
            elif not pedestrian and g_edge:
                ret = g_edge
            elif g_edge:
                ret = g_edge
            else:
                ret = p_edge
            pos += 1

        return edges[-1][1], len(edges) - 1

    ## ---- INTERMODAL: modes and route validity ---- ##

    @staticmethod
    def _get_mode_parameters(mode):
        """ Return the correst TraCI parameters for the requested mode.
            Parameters: _modes, _ptype, _vtype
        """
        if mode == 'public':
            return 'public', '', ''
        elif mode == 'bicycle':
            return 'bicycle', '', 'bicycle'
        elif mode == 'walk':
            return '', 'pedestrian', ''
        return 'car', '', mode

    @staticmethod
    def _is_valid_route(mode, route):
        """ Handle simultaneously findRoute and findIntermodalRoute results. """
        if route is None:
            # traci failed
            return False
        elif mode is None:
            # only for findRoute
            if route.edges:
                return True
        elif mode == 'public':
            for stage in route:
                if stage.line:
                    return True
        elif mode == 'car':
            for stage in route:
                if stage.type == tc.STAGE_DRIVING and stage.edges:
                    return True
        else:
            for stage in route:
                if stage.edges:
                    return True
        return False

    @staticmethod
    def _cost_from_route(route):
        """ Compute the route cost. """
        cost = 0.0
        for stage in route:
            cost += stage.cost
        return cost

    ## ---------------------------------------------------------------------------------------- ##
    ##                                Saving trips to files                                     ##
    ## ---------------------------------------------------------------------------------------- ##

    ROUTES_TPL = """<?xml version="1.0" encoding="UTF-8"?>

<!--
    Monaco SUMO Traffic (MoST) Scenario
    Copyright (C) 2019
    Lara CODECA

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
-->

<routes xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://sumo.dlr.de/xsd/routes_file.xsd"> {trips}
</routes>"""

    VEHICLE = """
    <vehicle id="{id}" type="{v_type}" depart="{depart}" departLane="best" arrivalPos="{arrival}">{route}{stop}
    </vehicle>"""

    ROUTE = """
        <route edges="{edges}"/>"""

    STOP_PARKING = """
        <stop parkingArea="{id}" until="{until}"/>"""

    PERSON = """
    <person id="{id}" type="pedestrian" depart="{depart}">{stages}
    </person>"""

    WALK = """
        <walk edges="{edges}"/>"""

    WALK_BUS = """
        <walk edges="{edges}" busStop="{busStop}"/>"""

    RIDE_BUS = """
        <ride busStop="{busStop}" lines="{lines}" intended="{intended}" depart="{depart}"/>"""

    RIDE_TRIGGERED = """
        <ride from="{from_edge}" to="{to_edge}" lines="{vehicle_id}"/>"""

    VEHICLE_TRIGGERED = """
    <vehicle id="{id}" type="{v_type}" depart="triggered" departLane="best" arrivalPos="{arrival}">{route}{stop}
    </vehicle>"""

    def _generate_sumo_trip(self, vehicle):
        """ Generate the SUMO tag version of the trip. """

        all_trips = ''

        _begin = self._conf['stopUntil']['begin']
        _end = self._conf['stopUntil']['end']

        if vehicle['type'] == 'pedestrian':
            triggered = ''
            stages = ''
            for stage in vehicle['stages']:
                if stage.type == tc.STAGE_WALKING:
                    if stage.destStop:
                        stages += self.WALK_BUS.format(edges=' '.join(
                            stage.edges),
                                                       busStop=stage.destStop)
                    else:
                        stages += self.WALK.format(edges=' '.join(stage.edges))
                elif stage.type == tc.STAGE_DRIVING:
                    if stage.line != stage.intended:
                        # intended is the transport id, so it must be different
                        stages += self.RIDE_BUS.format(busStop=stage.destStop,
                                                       lines=stage.line,
                                                       intended=stage.intended,
                                                       depart=stage.depart)
                    else:
                        # triggered vehicle (line = intended) ask why to SUMO.
                        _tr_id = '{}_tr'.format(vehicle['id'])
                        _route = self.ROUTE.format(edges=' '.join(stage.edges))
                        _stop = ''
                        if stage.destStop:
                            _stop = self.STOP_PARKING.format(
                                id=stage.destStop,
                                until=self._random_generator.randint(
                                    _begin, _end))
                        _arrival = 'random'
                        if _stop:
                            _arrival = self._parking_position[stage.destStop]
                        triggered += self.VEHICLE_TRIGGERED.format(
                            id=_tr_id,
                            v_type=vehicle['mode'],
                            route=_route,
                            stop=_stop,
                            arrival=_arrival)
                        stages += self.RIDE_TRIGGERED.format(
                            from_edge=stage.edges[0],
                            to_edge=stage.edges[-1],
                            vehicle_id=_tr_id)
            all_trips += triggered
            all_trips += self.PERSON.format(id=vehicle['id'],
                                            depart=vehicle['depart'],
                                            stages=stages)
        else:
            _route = self.ROUTE.format(edges=' '.join(vehicle['stages'].edges))
            _stop = ''
            if vehicle['withParking']:
                _stop = self.STOP_PARKING.format(
                    id=vehicle['PLid'],
                    until=self._random_generator.randint(_begin, _end))
            _arrival = 'random'
            if _stop:
                _arrival = self._parking_position[vehicle['PLid']]
            all_trips += self.VEHICLE.format(id=vehicle['id'],
                                             v_type=vehicle['type'],
                                             depart=vehicle['depart'],
                                             route=_route,
                                             stop=_stop,
                                             arrival=_arrival)

        return all_trips

    def _saving_trips_to_files(self):
        """ Saving all te trips to files divided by vType. """

        _begin = self._conf['stopUntil']['begin']
        _end = self._conf['stopUntil']['end']

        for v_type, dict_trips in self._all_trips.items():
            filename = '{}/{}{}.rou.xml'.format(BASE_DIR,
                                                self._conf['outputPrefix'],
                                                v_type)
            with open(filename, 'w') as tripfile:
                all_trips = ''
                for time in sorted(dict_trips.keys()):
                    for vehicle in dict_trips[time]:
                        all_trips += vehicle['sumoTrip']
                tripfile.write(self.ROUTES_TPL.format(trips=all_trips))
            logging.info('Saved %s', filename)
class Environment():
    """ Loads, stores, interact the SAGA evironment required for the mobility generation. """
    def __init__(self, conf, sumo, logger, profiling=False):
        """
        Initialize the synthetic population.
            :param conf: distionary with the configurations
            :param sumo: already initialized SUMO simulation (TraCI or LibSUMO)
            :param profiling=False: enable cProfile
        """
        self._conf = conf
        self._sumo = sumo
        self.logger = logger

        self._max_retry_number = 1000
        if 'maxNumTry' in conf:
            self._max_retry_number = conf['maxNumTry']

        self._profiling = profiling

        self._random_generator = RandomState(seed=self._conf['seed'])

        self.logger.info('Loading SUMO net file %s', conf['SUMOnetFile'])
        self.sumo_network = sumolib.net.readNet(conf['SUMOnetFile'])

        self.logger.info('Loading SUMO parking lots from file %s',
                         conf['SUMOadditionals']['parkings'])
        self._blacklisted_edges = set()
        self._sumo_parkings = collections.defaultdict(list)
        self._parking_cache = dict()
        self._parking_position = dict()
        self._load_parkings(conf['SUMOadditionals']['parkings'])

        self.logger.info('Loading SUMO taxi stands from file %s',
                         conf['intermodalOptions']['taxiStands'])
        self._sumo_taxi_stands = collections.defaultdict(list)
        self._taxi_stand_cache = dict()
        self._taxi_stand_position = dict()
        self._load_taxi_stands(conf['intermodalOptions']['taxiStands'])

        self.logger.info('Loading TAZ weights from %s',
                         conf['population']['tazWeights'])
        self._taz_weights = dict()
        self._load_weights_from_csv(conf['population']['tazWeights'])

        self.logger.info('Loading buildings weights from %s',
                         conf['population']['buildingsWeight'])
        self._buildings_by_taz = dict()
        self._load_buildings_weight_from_csv_dir(
            conf['population']['buildingsWeight'])

        self.logger.info('Loading edges in each TAZ from %s',
                         conf['population']['tazDefinition'])
        self._edges_by_taz = dict()
        self._load_edges_from_taz(conf['population']['tazDefinition'])

    # LOADERS

    def _load_parkings(self, filename):
        """ Load parkings ids from XML file. """
        if not os.path.isfile(filename):
            return
        xml_tree = xml.etree.ElementTree.parse(filename).getroot()
        for child in xml_tree:
            if child.tag != 'parkingArea':
                continue
            if child.attrib['id'] not in self._conf['intermodalOptions'][
                    'parkingAreaBlacklist']:
                edge = child.attrib['lane'].split('_')[0]
                position = float(child.attrib['startPos']) + 2.5
                self._sumo_parkings[edge].append(child.attrib['id'])
                self._parking_position[child.attrib['id']] = position

    def _load_taxi_stands(self, filename):
        """ Taxi stands ids from XML file. """
        if not os.path.isfile(filename):
            return
        xml_tree = xml.etree.ElementTree.parse(filename).getroot()
        for child in xml_tree:
            if child.tag != 'parkingArea':
                continue
            if child.attrib['id'] not in self._conf['intermodalOptions'][
                    'taxiStandsBlacklist']:
                edge = child.attrib['lane'].split('_')[0]
                position = float(child.attrib['startPos']) + 2.5
                self._sumo_taxi_stands[edge].append(child.attrib['id'])
                self._taxi_stand_position[child.attrib['id']] = position

    def _load_weights_from_csv(self, filename):
        """ Load the TAZ weight from a CSV file. """
        with open(filename, 'r') as csvfile:
            weightreader = csv.reader(csvfile)
            header = []
            for row in weightreader:
                if not row:
                    continue  # empty line
                if not header:
                    header = row
                elif row:  # ignoring empty lines
                    self._taz_weights[row[0]] = {
                        header[0]: row[0],
                        header[1]: row[1],
                        header[2]: int(row[2]),
                        header[3]: float(row[3]),
                        'weight': (int(row[2]) / float(row[3])),
                    }

    def _load_buildings_weight_from_csv_dir(self, directory):
        """ Load the buildings weight from multiple CSV files. """

        allfiles = [
            os.path.join(directory, f) for f in os.listdir(directory)
            if os.path.isfile(os.path.join(directory, f))
        ]
        for filename in sorted(allfiles):
            self.logger.debug('Loding %s', filename)
            with open(filename, 'r') as csvfile:
                weightreader = csv.reader(csvfile)
                header = None
                taz = None
                buildings = []
                for row in weightreader:
                    if not row:
                        continue  # empty line
                    if header is None:
                        header = row
                    else:
                        taz = row[0]
                        buildings.append((
                            float(row[3]),  # weight
                            row[4],  # generic edge
                            row[5]))  # pedestrian edge

                if len(buildings) < 10:
                    self.logger.debug('Dropping %s, only %d buildings found.',
                                      filename, len(buildings))
                    continue

                weighted_buildings = []
                cum_sum = 0.0
                for weight, g_edge, p_edge in sorted(buildings):
                    cum_sum += weight
                    weighted_buildings.append(
                        (cum_sum, g_edge, p_edge, weight))
                self._buildings_by_taz[taz] = weighted_buildings

    def _load_edges_from_taz(self, filename):
        """ Load edges from the TAZ file. """
        xml_tree = xml.etree.ElementTree.parse(filename).getroot()
        for child in xml_tree:
            if child.tag == 'taz':
                self._edges_by_taz[
                    child.attrib['id']] = child.attrib['edges'].split(' ')

    # LANES & EDGES

    def get_random_lane_from_tazs(self):
        """
        Retrieve a random edge usable by a taxi based on the option
            "intermodalOptions":"taxiFleetInitialTAZs": ['taz', ...]
        """
        _locations = self._conf['intermodalOptions']['taxiFleetInitialTAZs']
        _lane = None
        _retry_counter = 0
        while not _lane and _retry_counter < self._max_retry_number * 100:
            try:
                if _locations:
                    _taz = self._random_generator.choice(_locations)
                    _edges = self._edges_by_taz[_taz]
                    _edge = self._random_generator.choice(_edges)
                else:
                    _edge = self._random_generator.choice(
                        self.sumo_network.getEdges()).getID()
                _lane = self.get_stopping_lane(_edge, ['taxi', 'passenger'])
            except sagaexceptions.TripGenerationGenericError:
                _retry_counter += 1
                _lane = None
        if _lane is None:
            self.logger.critical(
                '_get_random_lane_from_TAZs with "%s" generated %d errors, '
                'taxi generation aborted..', pformat(_locations),
                _retry_counter)
        return _lane

    def get_all_neigh_edges(self, origin, distance):
        """ Returns all the edges reachable from the origin within the given radius. """
        _edge_shape = self.sumo_network.getEdge(origin).getShape()
        x_coord = _edge_shape[-1][0]
        y_coord = _edge_shape[-1][1]
        edges = self.sumo_network.getNeighboringEdges(x_coord,
                                                      y_coord,
                                                      r=distance)
        edges = [edge.getID() for edge, _ in edges]
        return edges

    def get_arrival_pos_from_edge(self, edge, position):
        """
        If the position is too close to the end, it may genrate error with
        findIntermodalRoute.
        """
        length = self.sumo_network.getEdge(edge).getLength()
        if length < self._conf['minEdgeAllowed']:
            return None
        if position > length - 1.0:
            return length - 1.0
        if position < 1.0:
            return 1.0
        return position

    def get_random_pos_from_edge(self, edge):
        """ Return a random position in the given edge. """
        length = self.sumo_network.getEdge(edge).getLength()
        position = None
        if length < self._conf['stopBufferDistance']:
            position = length / 2.0

        # avoid the proximity of the intersection
        begin = self._conf['stopBufferDistance'] / 2.0
        end = length - begin
        position = (end -
                    begin) * self._random_generator.random_sample() + begin
        self.logger.debug('get_random_pos_from_edge: [%s] %f (%f)', edge,
                          position, length)
        return position

    ## ---- PAIR SELECTION: origin - destination - mode ---- ##

    def _select_pair(self, from_area, to_area, pedestrian=False):
        """ Randomly select one pair, chosing between buildings and TAZ. """
        from_taz = str(self._select_taz_from_weighted_area(from_area))
        to_taz = str(self._select_taz_from_weighted_area(to_area))

        if from_taz in self._buildings_by_taz.keys(
        ) and to_taz in self._buildings_by_taz.keys():
            return self._select_pair_from_taz_wbuildings(
                self._buildings_by_taz[from_taz][:],
                self._buildings_by_taz[to_taz][:], pedestrian)
        return self._select_pair_from_taz(self._edges_by_taz[from_taz][:],
                                          self._edges_by_taz[to_taz][:])

    def _select_taz_from_weighted_area(self, area):
        """ Select a TAZ from an area using its weight. """
        selection = self._random_generator.uniform(0, 1)
        total_weight = sum([self._taz_weights[taz]['weight'] for taz in area])
        if total_weight <= 0:
            error_msg = 'Error with area {}, total sum of weights is {}. '.format(
                area, total_weight)
            error_msg += 'It must be strictly positive.'
            raise Exception(error_msg, [(taz, self._taz_weights[taz]['weight'])
                                        for taz in area])
        cumulative = 0.0
        for taz in area:
            cumulative += self._taz_weights[taz]['weight'] / total_weight
            if selection <= cumulative:
                return taz
        return None  # this is matematically impossible,
        # if this happens, there is a mistake in the weights.

    def _valid_pair(self, from_edge, to_edge):
        """ This is just to avoid a HUGE while condition.
            sumolib.net.edge.is_fringe()
        """
        from_edge_sumo = self.sumo_network.getEdge(from_edge)
        to_edge_sumo = self.sumo_network.getEdge(to_edge)

        if from_edge_sumo.is_fringe(from_edge_sumo.getOutgoing()):
            return False
        if to_edge_sumo.is_fringe(to_edge_sumo.getIncoming()):
            return False
        if from_edge == to_edge:
            return False
        if to_edge in self._blacklisted_edges:
            return False
        if not to_edge_sumo.allows('pedestrian'):
            return False
        return True

    def _select_pair_from_taz(self, from_taz, to_taz):
        """ Randomly select one pair from a TAZ.
            Important: from_taz and to_taz MUST be passed by copy.
            Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions.
        """

        from_edge = from_taz.pop(
            self._random_generator.randint(0, len(from_taz)))
        to_edge = to_taz.pop(self._random_generator.randint(0, len(to_taz)))

        _to = False
        while not self._valid_pair(from_edge, to_edge) and from_taz and to_taz:
            if not self.sumo_network.getEdge(to_edge).allows(
                    'pedestrian') or _to:
                to_edge = to_taz.pop(
                    self._random_generator.randint(0, len(to_taz)))
                _to = False
            else:
                from_edge = from_taz.pop(
                    self._random_generator.randint(0, len(from_taz)))
                _to = True

        return from_edge, to_edge

    def _select_pair_from_taz_wbuildings(self, from_buildings, to_buildings,
                                         pedestrian):
        """ Randomly select one pair from a TAZ.
            Important: from_buildings and to_buildings MUST be passed by copy.
            Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions.
        """

        from_edge, _index = self._get_weighted_edge(
            from_buildings, self._random_generator.random_sample(), False)
        del from_buildings[_index]
        to_edge, _index = self._get_weighted_edge(
            to_buildings, self._random_generator.random_sample(), pedestrian)
        del to_buildings[_index]

        _to = True
        while not self._valid_pair(
                from_edge, to_edge) and from_buildings and to_buildings:
            if not self.sumo_network.getEdge(to_edge).allows(
                    'pedestrian') or _to:
                to_edge, _index = self._get_weighted_edge(
                    to_buildings, self._random_generator.random_sample(),
                    pedestrian)
                del to_buildings[_index]
                _to = False
            else:
                from_edge, _index = self._get_weighted_edge(
                    from_buildings, self._random_generator.random_sample(),
                    False)
                del from_buildings[_index]
                _to = True

        return from_edge, to_edge

    @staticmethod
    def _get_weighted_edge(edges, double, pedestrian):
        """ Return an edge and its position using the cumulative sum of the weigths in the area. """
        pos = -1
        ret = None
        for cum_sum, g_edge, p_edge, _ in edges:
            if ret and cum_sum > double:
                return ret, pos
            if pedestrian and p_edge:
                ret = p_edge
            elif not pedestrian and g_edge:
                ret = g_edge
            elif g_edge:
                ret = g_edge
            else:
                ret = p_edge
            pos += 1
        return edges[-1][1], len(edges) - 1

    def get_stopping_lane(self, edge, vtypes=['passenger']):
        """
        Returns the vehicle-friendly stopping lane closer to the sidewalk that respects the
        configuration parameter 'minEdgeAllowed'.
        """
        for lane in self.sumo_network.getEdge(edge).getLanes():
            if lane.getLength() >= self._conf['minEdgeAllowed']:
                for vtype in vtypes:
                    if lane.allows(vtype):
                        return lane.getID()
        raise sagaexceptions.TripGenerationGenericError(
            '"{}" cannot stop on edge {}'.format(vtypes, edge))

    ## PARKING AREAS: location and selection

    def get_parking_position(self, parking_id):
        """ Returns the position for a given parking. """
        return self._parking_position[parking_id]

    def find_closest_parking(self, edge):
        """ Given and edge, find the closest parking area. """
        distance = sys.float_info.max

        ret = self._check_parkings_cache(edge)
        if ret:
            return ret

        p_id = None

        for p_edge, parkings in self._sumo_parkings.items():
            for parking in parkings:
                if parking not in self._conf['intermodalOptions'][
                        'parkingAreaBlacklist']:
                    p_id = parking
                    break
            if p_id:
                try:
                    route = self._sumo.simulation.findIntermodalRoute(
                        p_edge, edge, pType="pedestrian")
                except TraCIException:
                    route = None
                if route and not isinstance(route, list):
                    # list in until SUMO 1.4.0 included, tuple onward
                    route = list(route)
                if route:
                    cost = sumoutils.cost_from_route(route)
                    if distance > cost:
                        distance = cost
                        ret = p_id, p_edge, route

        if ret:
            self._parking_cache[edge] = ret
            return ret

        self.logger.fatal('Edge %s is not reachable from any parking lot.',
                          edge)
        self._blacklisted_edges.add(edge)
        return None, None, None

    def _check_parkings_cache(self, edge):
        """ Check among the previously computed results of _find_closest_parking """
        if edge in self._parking_cache.keys():
            return self._parking_cache[edge]
        return None

## ---- PAIR SELECTION: origin - destination - mode ---- ##

    def select_pair(self, from_area, to_area, pedestrian=False):
        """ Randomly select one pair, chosing between buildings and TAZ. """
        from_taz = str(self._select_taz_from_weighted_area(from_area))
        to_taz = str(self._select_taz_from_weighted_area(to_area))

        if from_taz in self._buildings_by_taz.keys(
        ) and to_taz in self._buildings_by_taz.keys():
            return self._select_pair_from_taz_wbuildings(
                self._buildings_by_taz[from_taz][:],
                self._buildings_by_taz[to_taz][:], pedestrian)
        return self._select_pair_from_taz(self._edges_by_taz[from_taz][:],
                                          self._edges_by_taz[to_taz][:])

    def valid_pair(self, from_edge, to_edge):
        """ This is just to avoid a HUGE while condition.
            sumolib.net.edge.is_fringe()
        """
        from_edge_sumo = self.sumo_network.getEdge(from_edge)
        to_edge_sumo = self.sumo_network.getEdge(to_edge)

        if from_edge_sumo.is_fringe(from_edge_sumo.getOutgoing()):
            return False
        if to_edge_sumo.is_fringe(to_edge_sumo.getIncoming()):
            return False
        if from_edge == to_edge:
            return False
        if to_edge in self._blacklisted_edges:
            return False
        if not to_edge_sumo.allows('pedestrian'):
            return False
        return True

    def _select_taz_from_weighted_area(self, area):
        """ Select a TAZ from an area using its weight. """
        selection = self._random_generator.uniform(0, 1)
        total_weight = sum([self._taz_weights[taz]['weight'] for taz in area])
        if total_weight <= 0:
            error_msg = 'Error with area {}, total sum of weights is {}. '.format(
                area, total_weight)
            error_msg += 'It must be strictly positive.'
            raise Exception(error_msg, [(taz, self._taz_weights[taz]['weight'])
                                        for taz in area])
        cumulative = 0.0
        for taz in area:
            cumulative += self._taz_weights[taz]['weight'] / total_weight
            if selection <= cumulative:
                return taz
        return None  # this is matematically impossible,
        # if this happens, there is a mistake in the weights.

    def _select_pair_from_taz(self, from_taz, to_taz):
        """ Randomly select one pair from a TAZ.
            Important: from_taz and to_taz MUST be passed by copy.
            Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions.
        """

        from_edge = from_taz.pop(
            self._random_generator.randint(0, len(from_taz)))
        to_edge = to_taz.pop(self._random_generator.randint(0, len(to_taz)))

        _to = False
        while not self._valid_pair(from_edge, to_edge) and from_taz and to_taz:
            if not self.sumo_network.getEdge(to_edge).allows(
                    'pedestrian') or _to:
                to_edge = to_taz.pop(
                    self._random_generator.randint(0, len(to_taz)))
                _to = False
            else:
                from_edge = from_taz.pop(
                    self._random_generator.randint(0, len(from_taz)))
                _to = True

        return from_edge, to_edge

    def _select_pair_from_taz_wbuildings(self, from_buildings, to_buildings,
                                         pedestrian):
        """ Randomly select one pair from a TAZ.
            Important: from_buildings and to_buildings MUST be passed by copy.
            Note: sumonet.getEdge(from_edge).allows(v_type) does not support distributions.
        """

        from_edge, _index = self._get_weighted_edge(
            from_buildings, self._random_generator.random_sample(), False)
        del from_buildings[_index]
        to_edge, _index = self._get_weighted_edge(
            to_buildings, self._random_generator.random_sample(), pedestrian)
        del to_buildings[_index]

        _to = True
        while not self._valid_pair(
                from_edge, to_edge) and from_buildings and to_buildings:
            if not self.sumo_network.getEdge(to_edge).allows(
                    'pedestrian') or _to:
                to_edge, _index = self._get_weighted_edge(
                    to_buildings, self._random_generator.random_sample(),
                    pedestrian)
                del to_buildings[_index]
                _to = False
            else:
                from_edge, _index = self._get_weighted_edge(
                    from_buildings, self._random_generator.random_sample(),
                    False)
                del from_buildings[_index]
                _to = True

        return from_edge, to_edge

    @staticmethod
    def _get_weighted_edge(edges, double, pedestrian):
        """ Return an edge and its position using the cumulative sum of the weigths in the area. """
        pos = -1
        ret = None
        for cum_sum, g_edge, p_edge, _ in edges:
            if ret and cum_sum > double:
                return ret, pos
            if pedestrian and p_edge:
                ret = p_edge
            elif not pedestrian and g_edge:
                ret = g_edge
            elif g_edge:
                ret = g_edge
            else:
                ret = p_edge
            pos += 1
        return edges[-1][1], len(edges) - 1
Example #39
0
class Generator():
    seed = None
    random = None
    def __init__(self, seed=1):
        super(Generator, self).__init__()
        self.random = RandomState(seed)
        self.seed = seed
        
    def reseed(self):
        self.random = RandomState(self.seed)
        
    def randSyllable(self):
        c1_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will start the syllable
        s1_dice = ( self.random.random_sample() < 0.05 ) #Chance that a special conjunction consonant is used
        v1_dice = ( self.random.random_sample() < 0.85 ) #Chance that a regular vowel will be used
        c2_add_dice = ( self.random.random_sample() < 0.28 ) #Chance that it has an ending consonant
        c2_dice = ( self.random.random_sample() < 0.91 ) #Chance that a regular consonant will end the syllable
        s2_dice = ( self.random.random_sample() < 0.03 ) #Chance that the ending has an addon consonant
        
        c1 = self.random.choice(REGULAR_CONSONANTS) if c1_dice else self.random.choice(COMPOSITE_CONSONANTS)
        s1 = self.random.choice(SPECIAL_CONSONANTS) if s1_dice else ''
        v1 = self.random.choice(REGULAR_VOWELS) if v1_dice else self.random.choice(COMPOSITE_VOWELS)
        c2 = ( self.random.choice(REGULAR_CONSONANTS) if c2_dice else self.random.choice(ENDING_CONSONANTS) ) if c2_add_dice else ''
        s2 = self.random.choice(ADDON_ENDING_CONSONANTS) if s2_dice else ''
        syllable = c1+s1+v1+c2+s2
#         print(syllable)
        return syllable
    
    def randWord(self, s=2):
        """ s = number of syllables in int """
        word = ''
        for syllable in range(0, s):
            word += self.randSyllable()
        return word
    
    def randSentence(self, meter=[2, 2, 1, 2, 3, 2, 1, 2, 2]):
        sentence = []
        for syllable in meter:
            sentence.append(self.randWord(syllable))
        return ' '.join(sentence)
    
    def randParagraph(self):
        paragraph = []
        rand_wordcount = [ self.random.randint(3, 6) for i in range(0, self.random.randint( 4, 5 )) ]
        for words in rand_wordcount:
            rand_meter = [ self.random.randint(1, 4) for i in range(0, words) ]
            sentence = self.randSentence(rand_meter)
            paragraph.append(sentence)
        return '. '.join(paragraph)
    
    def randDictionary(self, word_list=['apple', 'banana', 'cake', 'dog', 'elephant', 'fruit', 'guava', 'human', 'island', 'joke', 'king', 'love', 'mother', 'nature', 'ocean', 'pie', 'queen', 'random', 'start', 'tree', 'up', 'vine', 'wisdom', 'yellow', 'zoo' ]):
        rand_dict_e2r = { word: self.randWord() for word in word_list }
        rand_dict_r2e = { v: k for k, v in rand_dict_e2r.items() }
        ordered_e2r = OrderedDict()
        print("English to Random Language")
        for key in sorted(rand_dict_e2r.keys()):
            print(key+ ' : '+rand_dict_e2r[key])
            ordered_e2r[key] = rand_dict_e2r[key]
        ordered_r2e = OrderedDict()
        print("\n\nRandom Language to English")
        for key in sorted(rand_dict_r2e.keys()):
            print(key+ ' : '+rand_dict_r2e[key])
            ordered_r2e[key] = rand_dict_r2e[key]
        return ( ordered_e2r, ordered_r2e )
    
    def convertWord(self, word):
        word = word.lower()
        saved_state = self.random.get_state()
        
        # Word mapping method : md5
        # To make it more natural, this mapping should be updated
        # to reflect natural language patterns
        md5 = hashlib.md5(bytes(word, encoding='utf-8'))
        wordseed = ( self.seed + int.from_bytes(md5.digest(), 'little') ) % (2**31)
#         print(wordseed)
        self.random.seed( wordseed )
        randword = self.randWord( math.ceil( abs( self.random.normal(2, 1) ) ) )
        self.random.set_state(saved_state)
        return randword
     
    def convertSentence(self, sentence):
        words = sentence.split()
        converted = [self.convertWord(word) for word in words]
        return ' '.join(converted)