예제 #1
0
def test_norm():
    passed_count = 0

    print("Test 1: letter frequencies in English text")
    temp_dict = letters_unscaled.copy()
    d = Distribution(temp_dict)
    d.normalize()
    passed = True
    for key in d.d:
        if not math.isclose(letters[key], d.d[key]):
            passed = False
            print("Probability of", key, d.d[key],
                  "does not match expected probability", letters[key])
    if passed:
        print("Test PASSED")
        passed_count += 1
    else:
        print("Test FAILED")
    print("Test 2: drawing from an urn")
    temp_dict = rgb_unscaled.copy()
    d = Distribution(temp_dict)
    d.normalize()
    passed = True
    for key in rgb:
        if not math.isclose(rgb[key], d.d[key]):
            passed = False
            print("Probability of", key, d.d[key],
                  "does not match expected probability", letters[key])
    if passed:
        print("Test PASSED")
        passed_count += 1
    else:
        print("Test FAILED")
    return passed_count
예제 #2
0
    def __init__(self, env, node, resource, properties):
        """
        Creates a link and automatically assign a uniqueid to the link
        It requires a simpy environment where to operate.
        It also require a simpy resource to operate correctly and
        reserve the channel for a message.

        :param env: Simpy environment
        :param node: Node which the link is refered to
        :param resource: unitary resource used to lock the link
        :param properties: properties of the link in the graphml that
            needs to be evaluated
        """
        self._id = Link.__link_counter
        Link.__link_counter += 1
        self._env = env
        self._node = node
        self._res = resource
        self._delay = None
        if Link.DELAY in properties:
            self._delay = Distribution(json.loads(properties[Link.DELAY]))
        if Link.POLICY_FUNCTION in properties:
            self._policy_function = PolicyFunction(properties[Link.POLICY_FUNCTION])
        else:
            self._policy_function = PolicyFunction(PolicyFunction.PASS_EVERYTHING)
        self._mrai = 30.0
        if Link.MRAI in properties:
            self._mrai = float(properties[Link.MRAI])
        self._mrai_active = False
        self._jitter = Distribution(json.loads('{"distribution": "unif", \
                       "min": 0, "max": ' + str(self._mrai*0.25)  + ', "int": 0.01}'))
예제 #3
0
파일: models.py 프로젝트: isuraed/bluechip
 def __init__(self):
     super(PlayerManager, self).__init__()
     self.first_name_dist = Distribution(self.create_frequency_dist("FirstName"))
     self.last_name_dist = Distribution(self.create_frequency_dist("LastName"))
     self.state_dist = Distribution(self.create_frequency_dist("State"))
     self.position_dist = Distribution(self.create_frequency_dist("Position"))
     self.talent_dist = Distribution(self.create_frequency_dist("ProfilePoint"))
    def __init__(self, value):
        """Parses the settings in the value parameter.

        Arguments:
            value {str|dict} -- If a string, it is a pointer to a JSON-encoded file containing the settings. If a dict, then it is the settings.
        """

        if type(value) is str:
            # Treat the value as a file locator.
            with open(value, 'r') as settingsFile:
                data = json.load(settingsFile)
        else:
            data = value

        self.topology_type = TopologyType[data['type']]
        if self.topology_type == TopologyType.STATIC_UNIFORM_DELAY:
            self.network_delay = Distribution(data['networkDelay'])
            self.static_file = data['file']
            self.static_graph = None
        else:
            self.number_of_miners = data['numberOfMiners']

            if self.topology_type == TopologyType.GEOMETRIC_UNIFORM_DELAY or self.topology_type == TopologyType.LOBSTER_UNIFORM_DELAY:
                # Graphs with uniform delays for message transmission.
                self.network_delay = Distribution(data['networkDelay'])

                if self.topology_type == TopologyType.GEOMETRIC_UNIFORM_DELAY:
                    self.radius = data['radius']
                elif self.topology_type == TopologyType.LOBSTER_UNIFORM_DELAY:
                    self.p1 = data['p1']
                    self.p2 = data['p2']
            else:
                raise NotImplementedError(
                    "Selected topology type is not implemented.")
예제 #5
0
파일: tree.py 프로젝트: milj/random-forest
def build_tree(columns, target_column, rows, score_type):
    """Recursively build a decision tree by finding the best column, value and operation to split on"""

    if not rows:
        return Node(distribution=Distribution({}))

    best_partition = {}
    score = Distribution(value_counts(rows, target_column)).score(score_type)

    for column in [column for column in columns if column != target_column]:
        column_value_set = {row[column] for row in rows} - {''}
        assert (
            # all values have the same operations, or no values
            len({operations_for(value)
                 for value in column_value_set}) <= 1)
        if len(column_value_set) == 0:
            continue

        for operation in operations_for(next(iter(column_value_set))):
            for pivot in column_value_set:

                positive_rows, negative_rows = partition(
                    rows, column, operation, pivot)

                score_gain = (
                    score - (len(positive_rows) / len(rows)) *
                    Distribution(value_counts(
                        positive_rows, target_column)).score(score_type) -
                    (len(negative_rows) / len(rows)) *
                    Distribution(value_counts(
                        negative_rows, target_column)).score(score_type))

                if score_gain > best_partition.get('score_gain', 0.0):
                    best_partition = {
                        'score_gain': score_gain,
                        'column': column,
                        'operation': operation,
                        'pivot': pivot,
                        'positive_rows': positive_rows,
                        'negative_rows': negative_rows,
                    }

    if best_partition.get('score_gain', 0) > 0:
        return Node(
            column=best_partition['column'],
            operation=best_partition['operation'],
            pivot=best_partition['pivot'],
            positive_branch=build_tree(columns=columns,
                                       target_column=target_column,
                                       rows=best_partition['positive_rows'],
                                       score_type=score_type),
            negative_branch=build_tree(columns=columns,
                                       target_column=target_column,
                                       rows=best_partition['negative_rows'],
                                       score_type=score_type),
        )
    else:
        return Node(
            distribution=Distribution(value_counts(rows, target_column)))
예제 #6
0
def main():
    dist1 = Distribution(id=0, vals=[2], probs=[1])
    dist2 = Distribution(id=1, vals=[5], probs=[1])
    dist3 = Distribution(id=2, vals=[2, 8], probs=[0.5, 0.5])

    env = Environment(total_bandwidth = 10,\
        distribution_list=[dist1,dist2,dist3], \
        mu_list=[1,2,3], lambda_list=[3,2,1],\
        num_of_each_type_distribution_list=[300,300,300])

    action_dim = 2
    state_dim = 6
    rpm = DQNPytorchReplayMemory(MEMORY_SIZE)  # DQN的经验回放池

    # 根据parl框架构建agent
    model = DQNPtorchModel(state_dim=state_dim, act_dim=action_dim)
    algorithm = DQNPytorchAlg(model,
                              act_dim=action_dim,
                              gamma=GAMMA,
                              lr=LEARNING_RATE)
    agent = DQNPytorchAgent(
        algorithm,
        obs_dim=state_dim,
        act_dim=action_dim,
        e_greed=0.1,  # 有一定概率随机选取动作,探索
        e_greed_decrement=1e-6)  # 随着训练逐步收敛,探索的程度慢慢降低

    # 加载模型
    # save_path = './dqn_model.ckpt'
    # agent.restore(save_path)

    # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够
    while len(rpm) < MEMORY_WARMUP_SIZE:
        run_episode(env, agent, rpm)

    max_episode = 2000

    # start train
    episode = 0
    while episode < max_episode:  # 训练max_episode个回合,test部分不计算入episode数量
        # train part
        for i in range(0, 50):
            total_reward = run_episode(env, agent, rpm)
            episode += 1

        # test part
        eval_reward, num_accpet = evaluate(env, agent)  # render=True 查看显示效果
        print(
            f'episode{episode}:evaluate reward,{eval_reward}, num of accpet:{num_accpet}'
        )

    # 训练结束,保存模型
    save_path = './dqn_pytorch_model.ckpt'
    agent.save(save_path)
예제 #7
0
    def initialize(self, config):
        self.run_number = config.run_number
        self.application = config.get_param(Cluster.APPLICATION)
        self.size = Distribution(config.get_param(Cluster.SIZE))
        self.offer = Distribution(config.get_param(Cluster.OFFER))
        self.resource = config.get_param(Cluster.RESOURCE)
        self.resource_scale = int(config.get_param(Cluster.RESOURCE_SCALE))
        Configuration().host = config.get_param(Cluster.ENDPOINT)
        self.api = swagger_client.DeploymentsApi()
        self.nodes_api = swagger_client.NodesApi()

        self.index = 0
예제 #8
0
    def __init__(self, config, channel, x, y):
        """
        Constructor.
        :param config: the set of configs loaded by the simulator
        :param channel: the channel to which frames are sent
        :param x: x position
        :param y: y position
        """
        Module.__init__(self)

        #Number of slots in the contention window
        self.window_slots_count = config.get_param(Node.WINDOW_SIZE)
        #Duration in seconds of the channel listening period
        self.listening_duration = config.get_param(Node.LISTENING_TIME)
        #Duration in seconds of each slot
        self.slot_duration = self.listening_duration

        # load configuration parameters
        self.datarate = config.get_param(Node.DATARATE)
        self.queue_size = config.get_param(Node.QUEUE)
        self.interarrival = Distribution(config.get_param(Node.INTERARRIVAL))
        self.size = Distribution(config.get_param(Node.SIZE))

        self.proc_time = Distribution(config.get_param(Node.PROC_TIME))
        self.maxsize = config.get_param(Node.MAXSIZE)
        # queue of packets to be sent
        self.queue = []


        # current state
        self.state = None
        self.switch_state(Node.IDLE)

        # save position
        self.x = x
        self.y = y
        # save channel
        self.channel = channel

        #Number of packets being received
        self.packets_in_air = 0

        #Number of window slots we still have to wait before transmitting
        self.slot_countdown = 0

        #First packet in the current sequence of receiving packets
        self.rx_sequence_first_packet = None

        #Hook to events in the queue for future manipulation
        self.end_listenting_event_hook = None
        self.end_slot_event_hook = None
예제 #9
0
def test_prob():
    unif_set = {1, 5, 7, 8}
    rgb_set = {"red", "green"}
    passed_count = 0

    print(
        "Test 1: Uniform distribution on integers 1-10.\nTest set: {1, 5, 7, 8}\nExpected result: 0.4"
    )
    d = Distribution(unif_dist)
    result = d.prob(unif_set)
    print("Result:", result)
    if math.isclose(0.4, result):
        print("Test PASSED")
        passed_count += 1
    else:
        print("Test FAILED")

    print(
        "Test 2: Drawing from an urn with 3 red, 7 green, 8 blue balls.\nTest set: {\"red\", \"green\"}\nExpected result: 10/18"
    )
    d = Distribution(rgb)
    result = d.prob(rgb_set)
    print("Result:", result)
    if math.isclose(10 / 18, result):
        print("Test PASSED")
        passed_count += 1
    else:
        print("Test FAILED")

    print(
        "Test 3: Benford's law on digits 1-9.\nTest set: {2, 8, 9}\nExpected result: 0.273"
    )
    d = Distribution(benford)
    result = d.prob(benford_set_a)
    print("Result:", result)
    if math.isclose(0.273, result):
        print("Test PASSED")
        passed_count += 1
    else:
        print("Test FAILED")
    '''
    if setbonus:
        print("Test 4: Union of two sets using Benford's law.\nTest sets: {2, 8, 9}, {1, 2, 7, 8}\nExpected result: .632")
        result = probfunc(benford, benford_set_a, benford_set_b)
        print("Result:", result)
        if math.isclose(0.632, result):
            print("Test PASSED")
        else:
            print("Test FAILED")
    '''
    return passed_count
예제 #10
0
def _convolution_in_point(t_val,
                          f,
                          g,
                          n_integral=100,
                          inverse_time=None,
                          return_log=False):
    '''
    evaluates int_tau f(t+tau)g(tau) or int_tau f(t-tau)g(tau) if inverse time is TRUE
    '''
    if inverse_time is None:
        raise Exception("Inverse time argument must be set!")

    # determine integration boundaries:
    if inverse_time:
        ## tau>g.xmin and t-tau<f.xmax
        tau_min = max(t_val - f.xmax, g.xmin)
        ## tau<g.xmax and t-tau>f.xmin
        tau_max = min(t_val - f.xmin, g.xmax)
    else:
        ## tau>g.xmin and t+tau>f.xmin
        tau_min = max(f.xmin - t_val, g.xmin)
        ## tau<g.xmax and t+tau<f.xmax
        tau_max = min(f.xmax - t_val, g.xmax)
        #print(tau_min, tau_max)

    if tau_max <= tau_min + ttconf.TINY_NUMBER:
        if return_log:
            return ttconf.BIG_NUMBER
        else:
            return 0.0  #  functions do not overlap

    else:
        # create the tau-grid for the interpolation object in the overlap region
        if inverse_time:
            tau = np.unique(
                np.concatenate((g.x, t_val - f.x, [tau_min, tau_max])))
        else:
            tau = np.unique(
                np.concatenate((g.x, f.x - t_val, [tau_min, tau_max])))
        tau = tau[(tau > tau_min - ttconf.TINY_NUMBER)
                  & (tau < tau_max + ttconf.TINY_NUMBER)]
        if len(tau) < 10:
            tau = np.linspace(tau_min, tau_max, 10)

        if inverse_time:  # add negative logarithms
            fg = f(t_val - tau) + g(tau)
        else:
            fg = f(t_val + tau) + g(tau)

        # create the interpolation object on this grid
        FG = Distribution(tau, fg, is_log=True, kind='linear')
        #integrate the interpolation object, return log, make neg_log
        #print('FG:',FG.xmin, FG.xmax, FG(FG.xmin), FG(FG.xmax))
        res = -FG.integrate(
            a=FG.xmin, b=FG.xmax, n=n_integral, return_log=True)

        if return_log:
            return res
        else:
            return np.exp(-res)
def get_incident_duration(random_state=None):
    shape = 0.9689235428381716
    loc = -2.005873343967834
    scale = 30.310979782335075
    duration_dist = Distribution(stats.lognorm(shape, loc, scale),
                                 random_state=random_state)
    return duration_dist
def get_incident_interarrival(random_state=None):
    alpha = 0.7949678079328055  # interarrival based on monday - friday gamma dist
    loc = 0
    scale = 294.3450468550495
    interarrival_dist = Distribution(stats.gamma(alpha, loc, scale),
                                     random_state=random_state)
    return interarrival_dist
예제 #13
0
    def __init__(self, fname):
        """
        Arguments:
            fname {str} -- Filename to load settings from.
        """

        with open(fname, 'r') as settingsFile:
            data = json.load(settingsFile)

        # Load settings.
        self.thread_workers = data['threadWorkers']
        self.number_of_executions = data['numberOfExecutions']
        self.topology_selection = TopologySelection[data['topologySelection']]
        self.termination_condition = TerminationCondition[
            data['terminationCondition']]
        self.termination_value = data['terminationValue']
        self.miner_power_distribution = Distribution(data['minerPower'])
        self.top_miner_power = None

        # Percent power share of the top N miners, currently drawn from https://btc.com/stats/pool?pool_mode=week on July 11, 2018.
        # Each element of the list is an float between 0 and 100; the list must sum to < 100.
        if 'topMinerPower' in data:
            self.top_miner_power = data['topMinerPower']

        self.target_termination_ticks = -1

        # Parameterize in JSON later?
        self.allow_termination_cooldown = True
        self.hard_limit_ticks = 1000  # Should this be a function of the number of miners?

        # Load the other settings objects.
        self.topology = TopologySettings(data['topology'])
        self.protocol = ProtocolSettings(data['protocol'])
예제 #14
0
def main():
     # create environment
    dist1 = Distribution(id=0, vals=[2], probs=[1])
    dist2 = Distribution(id=1, vals=[5], probs=[1])
    dist3 = Distribution(id=2, vals=[2,8], probs=[0.5,0.5])

    env = Environment(total_bandwidth = 10,\
        distribution_list=[dist1,dist2,dist3], \
        mu_list=[1,2,3], lambda_list=[3,2,1],\
        num_of_each_type_distribution_list=[300,300,300])
    # env = gym.make('CartPole-v0')
    # env = env.unwrapped # Cancel the minimum score limit
    # obs_dim = env.observation_space.shape[0]
    # act_dim = env.action_space.n
    obs_dim = 6
    act_dim = 2
    logger.info('obs_dim {}, act_dim {}'.format(obs_dim, act_dim))

    # 根据parl框架构建agent
    model = Model(act_dim=act_dim)
    alg = PolicyGradient(model, lr=LEARNING_RATE)
    agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim)

    # 加载模型
    if os.path.exists('./policy_grad_model.ckpt'):
        agent.restore('./policy_grad_model.ckpt')
        # run_episode(env, agent, train_or_test='test', render=True)
        # exit()

    for i in range(1000):
        obs_list, action_list, reward_list = run_episode(env, agent)
        if i % 10 == 0:
            logger.info("Episode {}, Reward Sum {}.".format(
                i, sum(reward_list)))

        batch_obs = np.array(obs_list)
        batch_action = np.array(action_list)
        batch_reward = calc_reward_to_go(reward_list, gamma=0.9)

        agent.learn(batch_obs, batch_action, batch_reward)
        if (i + 1) % 100 == 0:
            total_reward = evaluate(env, agent, render=True)
            logger.info('Test reward: {}'.format(total_reward))

    # save the parameters to ./policy_grad_model.ckpt
    agent.save('./policy_grad_model.ckpt')
예제 #15
0
 def __init__(self, config, channel, x, y):
     """
     Constructor.
     :param config: the set of configs loaded by the simulator
     :param channel: the channel to which frames are sent
     :param x: x position
     :param y: y position
     """
     Module.__init__(self)
     # load configuration parameters
     self.datarate = config.get_param(Node.DATARATE)
     self.queue_size = config.get_param(Node.QUEUE)
     self.interarrival = Distribution(config.get_param(Node.INTERARRIVAL))
     self.size = Distribution(config.get_param(Node.SIZE))
     self.proc_time = Distribution(config.get_param(Node.PROC_TIME))
     self.maxsize = config.get_param(Node.MAXSIZE)
     # queue of packets to be sent
     self.queue = []
     # current state
     self.state = Node.IDLE
     self.logger.log_state(self, Node.IDLE)
     # save position
     self.x = x
     self.y = y
     # save channel
     self.channel = channel
     # current packet being either sent or received
     self.current_pkt = None
     # count the number of frames currently under reception
     self.receiving_count = 0
     # timeout event used to avoid being stuck in the RX state
     self.timeout_rx_event = None
     # timeout used for the p-persistence
     self.timeout_wt_event = None
     # time needed to transmit a packet with the maximum size
     self.packet_max_tx_time = self.maxsize * 8.0 / self.datarate
     # p-persistence probability [simple carrier sensing]
     self.p_persistence = float(config.get_param(Node.PERSISTENCE))
     # timeout time for the rx timeout event. set as the time needed to
     # transmit a packet of the maximum size plus a small amount of 10
     # microseconds
     self.timeout_time = self.packet_max_tx_time + 10e-6
     # determine the type of propagation..
     self.realistic_propagation = config.get_param(
         Node.PROPAGATION) == "realistic"
예제 #16
0
 def show_distribution(self):
     try:
         filename = f'{self.fraud_target.username}_dataframe.csv'
         dist = Distribution(filename)
         dist.get_distribution()
     except AttributeError:
         print('No dataframe .csv found. Could not retrieve distribution.')
     except:
         print('An error occurred while building the distribution.')
def arrival_rate_distribution(random_state=None):
    # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 36), random_state=random_state)
    # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 24), random_state=random_state)
    car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 12),
                                              random_state=random_state)
    # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 4), random_state=random_state)
    # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 2), random_state=random_state)
    # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1), random_state=random_state)
    return car_interarrival_rate_dist
예제 #18
0
def main():
    """Main function for calling others"""
    parsing = ArgumentParser()
    rooms_file, students_file = parsing.args_info()
    example = FileReader()
    rooms = example.file_reader(rooms_file)
    students = example.file_reader(students_file)
    new_info = Distribution(rooms, students).student_distribution()
    result = JsonExporter(new_info).unloading()
    print(result)
예제 #19
0
def assign_preferences(n=config.NUM_AGENTS,
                       num_items_assigned=config.NUM_AGENTS):

    # create means
    item_means = [
        Distribution(config.ITEM_MEAN, config.ITEM_VAR).sample()
        for _ in xrange(n)
    ]
    logging.debug("Item means: " + str(item_means))

    # create agents and shuffle their order
    agents = [Agent(i) for i in xrange(n)]
    random.shuffle(agents)

    # assign agents preferences

    for agent in agents:
        agent.cardinal_prefs = [
            Distribution(item_mean, config.PREFERENCE_VAR).sample()
            for item_mean in item_means
        ]

    logging.debug("Cardinal Prefs: " + str(agents[0].cardinal_prefs))

    logging.debug("correlation between means and preferences is " +
                  str(np.corrcoef(item_means, agents[0].cardinal_prefs)[0, 1]))

    # sort agents' preferences
    for agent in agents:
        agent.ordinal_prefs = [
            sorted(agent.cardinal_prefs).index(x) for x in agent.cardinal_prefs
        ]

    logging.debug(agents[0].cardinal_prefs)
    logging.debug(agents[0].ordinal_prefs)

    # for TTC, every agent owns a house a priori
    for i in xrange(num_items_assigned):
        agents[i].item = i

    return agents
예제 #20
0
    def __init__(self, config, channel, x, y):
        """
        :param initialState: The state the FSM has to start from
        :param transitions: A dictionary that maps pairs (State, Event) to a
        transition function. The transition function is defined as t(E) -> E
        where E is the event to handle.
        The FSM will move to the state returned by the function.
        If the function returns None, the FSM remains in the same state.
        """
        Module.__init__(self)

        # load configuration parameters
        self.datarate = config.get_param(FSMNode.DATARATE)
        self.queue_size = config.get_param(FSMNode.QUEUE)
        self.interarrival = Distribution(config.get_param(FSMNode.INTERARRIVAL))
        self.size = Distribution(config.get_param(FSMNode.SIZE))
        self.proc_time = Distribution(config.get_param(FSMNode.PROC_TIME))

        # a slot lasts the maximum time a packet would take to be transmitted
        max_pkt_time = (config.get_param(FSMNode.SIZE)[Distribution.MAX] * 8) / self.datarate
        prop_delay = (config.get_param(Channel.PAR_RANGE)) / Channel.SOL

        self.slot_duration = max_pkt_time + prop_delay

        # the slots distribution for a node
        self.slots = Distribution({"distribution" : "unif",
                                   "int" : True, "min" : 0,
                                   "max" : config.get_param(FSMNode.MAXSLOTS) })

        # save position
        self.x = x
        self.y = y

        # save channel
        self.channel = channel

        # queue of packets to be sent
        self.queue = []
예제 #21
0
 def get(self):
     key = self.get_argument('key')
     try:
         dist = Distribution(key).get_dist()
     except KeyError:
         return self.finish({
             "status_code":404,
             "data":[],
             "error_message": "Could not find distribution in Forget Table"
         })
     return self.finish({
         "status_code":200,
         "data":[{
             "bin":key, 
             "probability":value
         } for key,value in dist.iteritems()]
     })
예제 #22
0
    def data_lines(cls, scenario):
        data_string = ""
        data_items = len(scenario.dataList)
        dist = Distribution()
        uniform_distribution = dist.percentage_distribution(
            UniformDistribution(), data_items)
        binomial_distribution = dist.percentage_distribution(
            BinomialDistribution(), data_items)

        data_string += SaveAnalysis.leading_line(binomial_distribution,
                                                 scenario,
                                                 uniform_distribution)
        data_string += SaveAnalysis.measurement_lines(binomial_distribution,
                                                      scenario,
                                                      uniform_distribution)
        data_string += SaveAnalysis.trailing_line(scenario)

        return data_string
예제 #23
0
def test_condition():
    passed_count = 0
    print(
        "Testing conditioning.\nComputing conditional distribution of letters, conditional on vowels."
    )

    d = Distribution(letters)
    d.condition(vowel_set)
    print("Expected distribution:\n", vowels)
    print("Result:\n", d.d)
    passed = True
    for key in vowels:
        if not math.isclose(vowels[key], d.d[key]):
            passed = False
            print("Probability of", key, d.d[key],
                  "does not match expected probability", vowels[key])
    if passed:
        print("Test PASSED")
        passed_count += 1
    else:
        print("Test FAILED")
    return passed_count
예제 #24
0
 def get(self):
     key = self.get_argument('key')
     bin = self.get_argument('bin')
     try:
         self.finish({
             "status_code":200,
             "data":[{
                 "bin": bin,
                 "probability": Distribution(key).get_bin(bin)
             }]
         })
     except ValueError:
         self.finish({
             "status_code":404,
             "data":[],
             "error_message": "Could not find bin in distribution"
         })
     except KeyError:
         self.finish({
             "status_code":404,
             "data":[],
             "error_message": "Could not find distribution in Forget Table"
         })
    def most_probable_adjacent(self,
                               count: int,
                               max_distance: int = 2) -> dict:
        """
		for each element el, we will edit it by 1 and 2 (in the edit distance
		model--see Models in models.py) and assign them value prob/2 and prob/3
		respectively. Then, sum up every single one of those transitions (or
		adjacencies). Then, create a distribution out of the "histogram" created

		:param count:
			how many to include for each index in the set of most probable digits
		:param max_distance:
			the maximum distance is how far to get the adjacency. So, if it's 3,
			then for each element el, we will look at all codes distant by 1
			from el, distant by 2, then distant by 3.
		"""
        histogram = {}
        # calibrate the maximum distance to be limited
        # to 9 and ensure it's an integer
        max_distance = int(max(min(max_distance, 9), 1))
        for el in range(10**self._digit_count):
            s_el = Models.extend_integer(el, self._digit_count)
            prob = self.prob(el)
            histogram[el] = histogram.get(el, 0) + prob
            # add histogram for changes for each valid edit_distance
            for edit_distance in range(1, max_distance + 1):
                for el_with_edit in CombinationLockCracker.generate_edits(
                        s_el, distance=edit_distance):
                    i_el = int(el_with_edit)
                    histogram[i_el] = histogram.get(i_el,
                                                    0) + (prob / edit_distance)
        # finally, return the most probable from a new
        # distribution created through the histogram
        dist = Distribution(histogram)
        most_probable = dist.most_probable(count)
        return {el: dist.prob(el) for el in most_probable}
예제 #26
0
    def test_distribution_count(self):
        instance = BinomialDistribution
        exp = Distribution()
        distribution = exp.frequency_distribution(instance, 1)
        self.assertEqual(distribution[0], 1)
        self.assertEqual(distribution[1], 1)

        distribution = exp.frequency_distribution(instance, 2)
        self.assertEqual(distribution[0], 1)
        self.assertEqual(distribution[1], 2)
        self.assertEqual(distribution[2], 1)

        distribution = exp.frequency_distribution(instance, 3)
        self.assertEqual(distribution[0], 1)
        self.assertEqual(distribution[1], 3)
        self.assertEqual(distribution[2], 3)
        self.assertEqual(distribution[3], 1)

        distribution = exp.frequency_distribution(instance, 4)
        self.assertEqual(distribution[0], 1)
        self.assertEqual(distribution[1], 4)
        self.assertEqual(distribution[2], 6)
        self.assertEqual(distribution[3], 4)
        self.assertEqual(distribution[4], 1)
예제 #27
0
    def convolve(cls,
                 node_interp,
                 branch_interp,
                 max_or_integral='integral',
                 n_integral=100,
                 inverse_time=True,
                 rel_tol=0.05,
                 yc=10):
        '''
        calculate H(t) = \int_tau f(t-tau)g(tau) if inverse_time=True
                  H(t) = \int_tau f(t+tau)g(tau) if inverse_time=False

        This function determines the time points of the grid of the result to
        ensure an accurate approximation.
        '''

        if max_or_integral not in ['max', 'integral']:
            raise Exception(
                "Max_or_integral expected to be 'max' or 'integral', got " +
                str(max_or_integral) + " instead.")

        def conv_in_point(time_point):

            if max_or_integral == 'integral':  # compute integral of the convolution
                return _evaluate_convolution(time_point,
                                             node_interp,
                                             branch_interp,
                                             n_integral=n_integral,
                                             return_log=True,
                                             inverse_time=inverse_time)

            else:  # compute max of the convolution
                return _max_of_integrand(time_point,
                                         node_interp,
                                         branch_interp,
                                         return_log=True,
                                         inverse_time=inverse_time)

        # estimate peak and width
        joint_fwhm = (node_interp.fwhm + branch_interp.fwhm)
        min_fwhm = min(node_interp.fwhm, branch_interp.fwhm)
        # determine support of the resulting convolution
        # in order to be positive, the flipped support of f, shifted by t and g need to overlap
        if inverse_time:
            new_peak_pos = node_interp.peak_pos + branch_interp.peak_pos
            tmin = node_interp.xmin + branch_interp.xmin
            tmax = node_interp.xmax + branch_interp.xmax
        else:
            new_peak_pos = node_interp.peak_pos - branch_interp.peak_pos
            tmin = node_interp.xmin - branch_interp.xmax
            tmax = node_interp.xmax - branch_interp.xmin

        # make initial node grid consisting of linearly spaced points around
        # the center and quadratically spaced points at either end
        n_grid_points = ttconf.NODE_GRID_SIZE
        n = n_grid_points / 3
        center_width = 3 * joint_fwhm
        grid_center = new_peak_pos + np.linspace(-1, 1, n) * center_width

        # add the right and left grid if it is needed
        right_range = (tmax - grid_center[-1])
        if right_range > 4 * center_width:
            grid_right = grid_center[-1] + right_range * (np.linspace(0, 1, n)
                                                          **2.0)
        elif right_range > 0:  # use linear grid the right_range is comparable to center_width
            grid_right = grid_center[-1] + right_range * np.linspace(
                0, 1, int(min(n, 1 + 0.5 * n * right_range / center_width)))
        else:
            grid_right = []

        left_range = grid_center[0] - tmin
        if left_range > 4 * center_width:
            grid_left = tmin + left_range * (np.linspace(0, 1, n)**2.0)
        elif left_range > 0:
            grid_left = tmin + left_range * np.linspace(
                0, 1, int(min(n, 1 + 0.5 * n * left_range / center_width)))
        else:
            grid_left = []

        if tmin > -1:
            grid_zero_left = tmin + (tmax - tmin) * np.linspace(0, 0.01, 11)**2
        else:
            grid_zero_left = [tmin]
        if tmax < 1:
            grid_zero_right = tmax - (tmax - tmin) * np.linspace(0, 0.01,
                                                                 11)**2
        else:
            grid_zero_right = [tmax]

        # make grid and calculate convolution
        t_grid_0 = np.unique(
            np.concatenate([
                grid_zero_left, grid_left[:-1], grid_center, grid_right[1:],
                grid_zero_right
            ]))
        t_grid_0 = t_grid_0[(t_grid_0 > tmin - ttconf.TINY_NUMBER)
                            & (t_grid_0 < tmax + ttconf.TINY_NUMBER)]

        # res0 - the values of the convolution (integral or max)
        # t_0  - the value, at which the res0 achieves maximum
        #        (when determining the maximum of the integrand, otherwise meaningless)
        res_0, t_0 = np.array([conv_in_point(t_val) for t_val in t_grid_0]).T

        # refine grid as necessary and add new points
        # calculate interpolation error at all internal points [2:-2] bc end points are sometime off scale
        interp_error = np.abs(res_0[3:-1] + res_0[1:-3] - 2 * res_0[2:-2])
        # determine the number of extra points needed, criterion depends on distance from peak dy
        dy = (res_0[2:-2] - res_0.min())
        dx = np.diff(t_grid_0)
        refine_factor = np.minimum(
            np.minimum(
                np.array(np.floor(
                    np.sqrt(interp_error / (rel_tol * (1 + (dy / yc)**4)))),
                         dtype=int),
                np.array(100 * (dx[1:-2] + dx[2:-1]) / min_fwhm, dtype=int)),
            10)

        insert_point_idx = np.zeros(interp_error.shape[0] + 1, dtype=int)
        insert_point_idx[1:] = refine_factor
        insert_point_idx[:-1] += refine_factor
        # add additional points if there are any to add

        if np.sum(insert_point_idx):
            add_x = np.concatenate([
                np.linspace(t1, t2, n + 2)[1:-1] for t1, t2, n in zip(
                    t_grid_0[1:-2], t_grid_0[2:-1], insert_point_idx) if n > 0
            ])
            # calculate convolution at these points
            add_y, add_t = np.array([conv_in_point(t_val)
                                     for t_val in add_x]).T

            t_grid_0 = np.concatenate((t_grid_0, add_x))
            res_0 = np.concatenate((res_0, add_y))
            t_0 = np.concatenate((t_0, add_t))

        # instantiate the new interpolation object and return
        res_y = cls(t_grid_0, res_0, is_log=True, kind='linear')

        # the interpolation object, which is used to store the value of the
        # grid, which maximizes the convolution (for 'max' option),
        # or flat -1 distribution (for 'integral' option)
        # this grid is the optimal branch length
        res_t = Distribution(t_grid_0, t_0, is_log=True, kind='linear')

        return res_y, res_t
예제 #28
0
    def _ml_t_marginal(self, assign_dates=False):
        """
        Compute the marginal probability distribution of the internal nodes positions by
        propagating from the tree leaves towards the root. The result of
        this operation are the probability distributions of each internal node,
        conditional on the constraints on all leaves of the tree, which have sampling dates.
        The probability distributions are set as marginal_pos_LH attributes to the nodes.

        Parameters
        ----------

         assign_dates : bool, default False
            If True, the inferred dates will be assigned to the nodes as
            :code:`time_before_present' attributes, and their branch lengths
            will be corrected accordingly.
            .. Note::
                Normally, the dates are assigned by running joint reconstruction.

        Returns
        -------

         None
            Every internal node is assigned the probability distribution in form
            of an interpolation object and sends this distribution further towards the
            root.

        """

        def _cleanup():
            for node in self.tree.find_clades():
                try:
                    del node.marginal_pos_Lx
                    del node.subtree_distribution
                    del node.msg_from_parent
                    #del node.marginal_pos_LH
                except:
                    pass


        self.logger("ClockTree - Marginal reconstruction:  Propagating leaves -> root...", 2)
        # go through the nodes from leaves towards the root:
        for node in self.tree.find_clades(order='postorder'):  # children first, msg to parents
            if node.bad_branch:
                # no information
                node.marginal_pos_Lx = None
            else: # all other nodes
                if node.date_constraint is not None and node.date_constraint.is_delta: # there is a time constraint
                    # initialize the Lx for nodes with precise date constraint:
                    # subtree probability given the position of the parent node
                    # position of the parent node is given by the branch length
                    # distribution attached to the child node position
                    node.subtree_distribution = node.date_constraint
                    bl = node.branch_length_interpolator.x
                    x = bl + node.date_constraint.peak_pos
                    node.marginal_pos_Lx = Distribution(x, node.branch_length_interpolator(bl),
                                                        min_width=self.min_width, is_log=True)

                else: # all nodes without precise constraint but positional information
                      # subtree likelihood given the node's constraint and child msg:
                    msgs_to_multiply = [node.date_constraint] if node.date_constraint is not None else []
                    msgs_to_multiply.extend([child.marginal_pos_Lx for child in node.clades
                                             if child.marginal_pos_Lx is not None])

                    # combine the different msgs and constraints
                    if len(msgs_to_multiply)==0:
                        # no information
                        node.marginal_pos_Lx = None
                        continue
                    elif len(msgs_to_multiply)==1:
                        node.subtree_distribution = msgs_to_multiply[0]
                    else: # combine the different msgs and constraints
                        node.subtree_distribution = Distribution.multiply(msgs_to_multiply)

                    if node.up is None: # this is the root, set dates
                        node.subtree_distribution._adjust_grid(rel_tol=self.rel_tol_prune)
                        node.marginal_pos_Lx = node.subtree_distribution
                        node.marginal_pos_LH = node.subtree_distribution
                        self.tree.positional_marginal_LH = -node.subtree_distribution.peak_val
                    else: # otherwise propagate to parent
                        res, res_t = NodeInterpolator.convolve(node.subtree_distribution,
                                        node.branch_length_interpolator,
                                        max_or_integral='integral',
                                        n_grid_points = self.node_grid_points,
                                        n_integral=self.n_integral,
                                        rel_tol=self.rel_tol_refine)
                        res._adjust_grid(rel_tol=self.rel_tol_prune)
                        node.marginal_pos_Lx = res

        self.logger("ClockTree - Marginal reconstruction:  Propagating root -> leaves...", 2)
        from scipy.interpolate import interp1d
        for node in self.tree.find_clades(order='preorder'):

            ## The root node
            if node.up is None:
                node.msg_from_parent = None # nothing beyond the root
            # all other cases (All internal nodes + unconstrained terminals)
            else:
                parent = node.up
                # messages from the complementary subtree (iterate over all sister nodes)
                complementary_msgs = [sister.marginal_pos_Lx for sister in parent.clades
                                            if (sister != node) and (sister.marginal_pos_Lx is not None)]

                # if parent itself got smth from the root node, include it
                if parent.msg_from_parent is not None:
                    complementary_msgs.append(parent.msg_from_parent)
                elif parent.marginal_pos_Lx is not None:
                    complementary_msgs.append(parent.marginal_pos_LH)

                if len(complementary_msgs):
                    msg_parent_to_node = NodeInterpolator.multiply(complementary_msgs)
                    msg_parent_to_node._adjust_grid(rel_tol=self.rel_tol_prune)
                else:
                    from utils import numeric_date
                    x = [parent.numdate, numeric_date()]
                    msg_parent_to_node = NodeInterpolator(x, [1.0, 1.0],min_width=self.min_width)

                # integral message, which delivers to the node the positional information
                # from the complementary subtree
                res, res_t = NodeInterpolator.convolve(msg_parent_to_node, node.branch_length_interpolator,
                                                    max_or_integral='integral',
                                                    inverse_time=False,
                                                    n_grid_points = self.node_grid_points,
                                                    n_integral=self.n_integral,
                                                    rel_tol=self.rel_tol_refine)

                node.msg_from_parent = res
                if node.marginal_pos_Lx is None:
                    node.marginal_pos_LH = node.msg_from_parent
                else:
                    node.marginal_pos_LH = NodeInterpolator.multiply((node.msg_from_parent, node.subtree_distribution))

                self.logger('ClockTree._ml_t_root_to_leaves: computed convolution'
                                ' with %d points at node %s'%(len(res.x),node.name),4)

                if self.debug:
                    tmp = np.diff(res.y-res.peak_val)
                    nsign_changed = np.sum((tmp[1:]*tmp[:-1]<0)&(res.y[1:-1]-res.peak_val<500))
                    if nsign_changed>1:
                        import matplotlib.pyplot as plt
                        plt.ion()
                        plt.plot(res.x, res.y-res.peak_val, '-o')
                        plt.plot(res.peak_pos - node.branch_length_interpolator.x,
                                 node.branch_length_interpolator(node.branch_length_interpolator.x)-node.branch_length_interpolator.peak_val, '-o')
                        plt.plot(msg_parent_to_node.x,msg_parent_to_node.y-msg_parent_to_node.peak_val, '-o')
                        plt.ylim(0,100)
                        plt.xlim(-0.05, 0.05)
                        import ipdb; ipdb.set_trace()

            # assign positions of nodes and branch length only when desired
            # since marginal reconstruction can result in negative branch length
            if assign_dates:
                node.time_before_present = node.marginal_pos_LH.peak_pos
                if node.up:
                    node.clock_length = node.up.time_before_present - node.time_before_present
                    node.branch_length = node.clock_length

            # construct the inverse cumulant distribution to evaluate confidence intervals
            if node.marginal_pos_LH.is_delta:
                node.marginal_inverse_cdf=interp1d([0,1], node.marginal_pos_LH.peak_pos*np.ones(2), kind="linear")
            else:
                dt = np.diff(node.marginal_pos_LH.x)
                y = node.marginal_pos_LH.prob_relative(node.marginal_pos_LH.x)
                int_y = np.concatenate(([0], np.cumsum(dt*(y[1:]+y[:-1])/2.0)))
                int_y/=int_y[-1]
                node.marginal_inverse_cdf = interp1d(int_y, node.marginal_pos_LH.x, kind="linear")
                node.marginal_cdf = interp1d(node.marginal_pos_LH.x, int_y, kind="linear")

        if not self.debug:
            _cleanup()

        return
예제 #29
0
def _convolution_integrand(t_val, f, g, inverse_time=None, return_log=False):
    '''
    Evaluates int_tau f(t+tau)*g(tau) or int_tau f(t-tau)g(tau) if inverse time is TRUE

    Parameters
    -----------

     t_val : double
        Time point

     f : Interpolation object
        First multiplier in convolution

     g : Interpolation object
        Second multiplier in convolution

     inverse_time : bool, None
        time direction. If True, then the f(t-tau)*g(tau) is calculated, otherwise,
        f(t+tau)*g(tau)

     return_log : bool
        If True, the logarithm will be returned


    Returns
    -------

     FG : Distribution
        The function to be integrated as Distribution object (interpolator)

    '''

    if inverse_time is None:
        raise Exception("Inverse time argument must be set!")

    # determine integration boundaries:
    if inverse_time:
        ## tau>g.xmin and t-tau<f.xmax
        tau_min = max(t_val - f.xmax, g.xmin)
        ## tau<g.xmax and t-tau>f.xmin
        tau_max = min(t_val - f.xmin, g.xmax)
    else:
        ## tau>g.xmin and t+tau>f.xmin
        tau_min = max(f.xmin - t_val, g.xmin)
        ## tau<g.xmax and t+tau<f.xmax
        tau_max = min(f.xmax - t_val, g.xmax)
        #print(tau_min, tau_max)

    if tau_max <= tau_min:
        if return_log:
            return ttconf.BIG_NUMBER
        else:
            return 0.0  #  functions do not overlap

    else:
        # create the tau-grid for the interpolation object in the overlap region
        if inverse_time:
            tau = np.unique(
                np.concatenate((g.x, t_val - f.x, [tau_min, tau_max])))
        else:
            tau = np.unique(
                np.concatenate((g.x, f.x - t_val, [tau_min, tau_max])))
        tau = tau[(tau > tau_min - ttconf.TINY_NUMBER)
                  & (tau < tau_max + ttconf.TINY_NUMBER)]
        if len(tau) < 10:
            tau = np.linspace(tau_min, tau_max, 10)

        if inverse_time:  # add negative logarithms
            tnode = t_val - tau
            fg = f(tnode) + g(tau, tnode=tnode)
        else:
            fg = f(t_val + tau) + g(tau, tnode=t_val)

        # create the interpolation object on this grid
        FG = Distribution(tau, fg, is_log=True, kind='linear')
        return FG
예제 #30
0
    def _ml_t_joint(self):
        """
        Compute the joint maximum likelihood assignment of the internal nodes positions by
        propagating from the tree leaves towards the root. Given the assignment of parent nodes,
        reconstruct the maximum-likelihood positions of the child nodes by propagating
        from the root to the leaves. The result of this operation is the time_before_present
        value, which is the position of the node, expressed in the units of the
        branch length, and scaled from the present-day. The value is assigned to the
        corresponding attribute of each node of the tree.

        Returns
        -------

         None
            Every internal node is assigned the probability distribution in form
            of an interpolation object and sends this distribution further towards the
            root.

        """

        def _cleanup():
            for node in self.tree.find_clades():
                del node.joint_pos_Lx
                del node.joint_pos_Cx


        self.logger("ClockTree - Joint reconstruction:  Propagating leaves -> root...", 2)
        # go through the nodes from leaves towards the root:
        for node in self.tree.find_clades(order='postorder'):  # children first, msg to parents
            # Lx is the maximal likelihood of a subtree given the parent position
            # Cx is the branch length corresponding to the maximally likely subtree
            if node.bad_branch:
                # no information at the node
                node.joint_pos_Lx = None
                node.joint_pos_Cx = None
            else: # all other nodes
                if node.date_constraint is not None and node.date_constraint.is_delta: # there is a time constraint
                    # subtree probability given the position of the parent node
                    # Lx.x is the position of the parent node
                    # Lx.y is the probablity of the subtree (consisting of one terminal node in this case)
                    # Cx.y is the branch length corresponding the optimal subtree
                    bl = node.branch_length_interpolator.x
                    x = bl + node.date_constraint.peak_pos
                    node.joint_pos_Lx = Distribution(x, node.branch_length_interpolator(bl),
                                                     min_width=self.min_width, is_log=True)
                    node.joint_pos_Cx = Distribution(x, bl, min_width=self.min_width) # map back to the branch length
                else: # all nodes without precise constraint but positional information
                    msgs_to_multiply = [node.date_constraint] if node.date_constraint is not None else []
                    msgs_to_multiply.extend([child.joint_pos_Lx for child in node.clades
                                             if child.joint_pos_Lx is not None])

                    # subtree likelihood given the node's constraint and child messages
                    if len(msgs_to_multiply) == 0: # there are no constraints
                        node.joint_pos_Lx = None
                        node.joint_pos_Cx = None
                        continue
                    elif len(msgs_to_multiply)>1: # combine the different msgs and constraints
                        subtree_distribution = Distribution.multiply(msgs_to_multiply)
                    else: # there is exactly one constraint.
                        subtree_distribution = msgs_to_multiply[0]
                    if node.up is None: # this is the root, set dates
                        subtree_distribution._adjust_grid(rel_tol=self.rel_tol_prune)

                        # set root position and joint likelihood of the tree
                        node.time_before_present = subtree_distribution.peak_pos
                        node.joint_pos_Lx = subtree_distribution
                        node.joint_pos_Cx = None
                        node.clock_length = node.branch_length
                    else: # otherwise propagate to parent
                        res, res_t = NodeInterpolator.convolve(subtree_distribution,
                                        node.branch_length_interpolator,
                                        max_or_integral='max',
                                        inverse_time=True,
                                        n_grid_points = self.node_grid_points,
                                        n_integral=self.n_integral,
                                        rel_tol=self.rel_tol_refine)

                        res._adjust_grid(rel_tol=self.rel_tol_prune)

                        node.joint_pos_Lx = res
                        node.joint_pos_Cx = res_t


        # go through the nodes from root towards the leaves and assign joint ML positions:
        self.logger("ClockTree - Joint reconstruction:  Propagating root -> leaves...", 2)
        for node in self.tree.find_clades(order='preorder'):  # root first, msgs to children

            if node.up is None: # root node
                continue # the position was already set on the previous step

            if node.joint_pos_Cx is None: # no constraints or branch is bad - reconstruct from the branch len interpolator
                node.branch_length = node.branch_length_interpolator.peak_pos

            elif isinstance(node.joint_pos_Cx, Distribution):
                # NOTE the Lx distribution is the likelihood, given the position of the parent
                # (Lx.x = parent position, Lx.y = LH of the node_pos given Lx.x,
                # the length of the branch corresponding to the most likely
                # subtree is node.Cx(node.time_before_present))
                subtree_LH = node.joint_pos_Lx(node.up.time_before_present)
                node.branch_length = node.joint_pos_Cx(max(node.joint_pos_Cx.xmin,
                                            node.up.time_before_present)+ttconf.TINY_NUMBER)

            node.time_before_present = node.up.time_before_present - node.branch_length
            node.clock_length = node.branch_length

            # just sanity check, should never happen:
            if node.branch_length < 0 or node.time_before_present < 0:
                if node.branch_length<0 and node.branch_length>-ttconf.TINY_NUMBER:
                    self.logger("ClockTree - Joint reconstruction: correcting rounding error of %s"%node.name, 4)
                    node.branch_length = 0

        self.tree.positional_joint_LH = self.timetree_likelihood()
        # cleanup, if required
        if not self.debug:
            _cleanup()