def test_norm(): passed_count = 0 print("Test 1: letter frequencies in English text") temp_dict = letters_unscaled.copy() d = Distribution(temp_dict) d.normalize() passed = True for key in d.d: if not math.isclose(letters[key], d.d[key]): passed = False print("Probability of", key, d.d[key], "does not match expected probability", letters[key]) if passed: print("Test PASSED") passed_count += 1 else: print("Test FAILED") print("Test 2: drawing from an urn") temp_dict = rgb_unscaled.copy() d = Distribution(temp_dict) d.normalize() passed = True for key in rgb: if not math.isclose(rgb[key], d.d[key]): passed = False print("Probability of", key, d.d[key], "does not match expected probability", letters[key]) if passed: print("Test PASSED") passed_count += 1 else: print("Test FAILED") return passed_count
def __init__(self, env, node, resource, properties): """ Creates a link and automatically assign a uniqueid to the link It requires a simpy environment where to operate. It also require a simpy resource to operate correctly and reserve the channel for a message. :param env: Simpy environment :param node: Node which the link is refered to :param resource: unitary resource used to lock the link :param properties: properties of the link in the graphml that needs to be evaluated """ self._id = Link.__link_counter Link.__link_counter += 1 self._env = env self._node = node self._res = resource self._delay = None if Link.DELAY in properties: self._delay = Distribution(json.loads(properties[Link.DELAY])) if Link.POLICY_FUNCTION in properties: self._policy_function = PolicyFunction(properties[Link.POLICY_FUNCTION]) else: self._policy_function = PolicyFunction(PolicyFunction.PASS_EVERYTHING) self._mrai = 30.0 if Link.MRAI in properties: self._mrai = float(properties[Link.MRAI]) self._mrai_active = False self._jitter = Distribution(json.loads('{"distribution": "unif", \ "min": 0, "max": ' + str(self._mrai*0.25) + ', "int": 0.01}'))
def __init__(self): super(PlayerManager, self).__init__() self.first_name_dist = Distribution(self.create_frequency_dist("FirstName")) self.last_name_dist = Distribution(self.create_frequency_dist("LastName")) self.state_dist = Distribution(self.create_frequency_dist("State")) self.position_dist = Distribution(self.create_frequency_dist("Position")) self.talent_dist = Distribution(self.create_frequency_dist("ProfilePoint"))
def __init__(self, value): """Parses the settings in the value parameter. Arguments: value {str|dict} -- If a string, it is a pointer to a JSON-encoded file containing the settings. If a dict, then it is the settings. """ if type(value) is str: # Treat the value as a file locator. with open(value, 'r') as settingsFile: data = json.load(settingsFile) else: data = value self.topology_type = TopologyType[data['type']] if self.topology_type == TopologyType.STATIC_UNIFORM_DELAY: self.network_delay = Distribution(data['networkDelay']) self.static_file = data['file'] self.static_graph = None else: self.number_of_miners = data['numberOfMiners'] if self.topology_type == TopologyType.GEOMETRIC_UNIFORM_DELAY or self.topology_type == TopologyType.LOBSTER_UNIFORM_DELAY: # Graphs with uniform delays for message transmission. self.network_delay = Distribution(data['networkDelay']) if self.topology_type == TopologyType.GEOMETRIC_UNIFORM_DELAY: self.radius = data['radius'] elif self.topology_type == TopologyType.LOBSTER_UNIFORM_DELAY: self.p1 = data['p1'] self.p2 = data['p2'] else: raise NotImplementedError( "Selected topology type is not implemented.")
def build_tree(columns, target_column, rows, score_type): """Recursively build a decision tree by finding the best column, value and operation to split on""" if not rows: return Node(distribution=Distribution({})) best_partition = {} score = Distribution(value_counts(rows, target_column)).score(score_type) for column in [column for column in columns if column != target_column]: column_value_set = {row[column] for row in rows} - {''} assert ( # all values have the same operations, or no values len({operations_for(value) for value in column_value_set}) <= 1) if len(column_value_set) == 0: continue for operation in operations_for(next(iter(column_value_set))): for pivot in column_value_set: positive_rows, negative_rows = partition( rows, column, operation, pivot) score_gain = ( score - (len(positive_rows) / len(rows)) * Distribution(value_counts( positive_rows, target_column)).score(score_type) - (len(negative_rows) / len(rows)) * Distribution(value_counts( negative_rows, target_column)).score(score_type)) if score_gain > best_partition.get('score_gain', 0.0): best_partition = { 'score_gain': score_gain, 'column': column, 'operation': operation, 'pivot': pivot, 'positive_rows': positive_rows, 'negative_rows': negative_rows, } if best_partition.get('score_gain', 0) > 0: return Node( column=best_partition['column'], operation=best_partition['operation'], pivot=best_partition['pivot'], positive_branch=build_tree(columns=columns, target_column=target_column, rows=best_partition['positive_rows'], score_type=score_type), negative_branch=build_tree(columns=columns, target_column=target_column, rows=best_partition['negative_rows'], score_type=score_type), ) else: return Node( distribution=Distribution(value_counts(rows, target_column)))
def main(): dist1 = Distribution(id=0, vals=[2], probs=[1]) dist2 = Distribution(id=1, vals=[5], probs=[1]) dist3 = Distribution(id=2, vals=[2, 8], probs=[0.5, 0.5]) env = Environment(total_bandwidth = 10,\ distribution_list=[dist1,dist2,dist3], \ mu_list=[1,2,3], lambda_list=[3,2,1],\ num_of_each_type_distribution_list=[300,300,300]) action_dim = 2 state_dim = 6 rpm = DQNPytorchReplayMemory(MEMORY_SIZE) # DQN的经验回放池 # 根据parl框架构建agent model = DQNPtorchModel(state_dim=state_dim, act_dim=action_dim) algorithm = DQNPytorchAlg(model, act_dim=action_dim, gamma=GAMMA, lr=LEARNING_RATE) agent = DQNPytorchAgent( algorithm, obs_dim=state_dim, act_dim=action_dim, e_greed=0.1, # 有一定概率随机选取动作,探索 e_greed_decrement=1e-6) # 随着训练逐步收敛,探索的程度慢慢降低 # 加载模型 # save_path = './dqn_model.ckpt' # agent.restore(save_path) # 先往经验池里存一些数据,避免最开始训练的时候样本丰富度不够 while len(rpm) < MEMORY_WARMUP_SIZE: run_episode(env, agent, rpm) max_episode = 2000 # start train episode = 0 while episode < max_episode: # 训练max_episode个回合,test部分不计算入episode数量 # train part for i in range(0, 50): total_reward = run_episode(env, agent, rpm) episode += 1 # test part eval_reward, num_accpet = evaluate(env, agent) # render=True 查看显示效果 print( f'episode{episode}:evaluate reward,{eval_reward}, num of accpet:{num_accpet}' ) # 训练结束,保存模型 save_path = './dqn_pytorch_model.ckpt' agent.save(save_path)
def initialize(self, config): self.run_number = config.run_number self.application = config.get_param(Cluster.APPLICATION) self.size = Distribution(config.get_param(Cluster.SIZE)) self.offer = Distribution(config.get_param(Cluster.OFFER)) self.resource = config.get_param(Cluster.RESOURCE) self.resource_scale = int(config.get_param(Cluster.RESOURCE_SCALE)) Configuration().host = config.get_param(Cluster.ENDPOINT) self.api = swagger_client.DeploymentsApi() self.nodes_api = swagger_client.NodesApi() self.index = 0
def __init__(self, config, channel, x, y): """ Constructor. :param config: the set of configs loaded by the simulator :param channel: the channel to which frames are sent :param x: x position :param y: y position """ Module.__init__(self) #Number of slots in the contention window self.window_slots_count = config.get_param(Node.WINDOW_SIZE) #Duration in seconds of the channel listening period self.listening_duration = config.get_param(Node.LISTENING_TIME) #Duration in seconds of each slot self.slot_duration = self.listening_duration # load configuration parameters self.datarate = config.get_param(Node.DATARATE) self.queue_size = config.get_param(Node.QUEUE) self.interarrival = Distribution(config.get_param(Node.INTERARRIVAL)) self.size = Distribution(config.get_param(Node.SIZE)) self.proc_time = Distribution(config.get_param(Node.PROC_TIME)) self.maxsize = config.get_param(Node.MAXSIZE) # queue of packets to be sent self.queue = [] # current state self.state = None self.switch_state(Node.IDLE) # save position self.x = x self.y = y # save channel self.channel = channel #Number of packets being received self.packets_in_air = 0 #Number of window slots we still have to wait before transmitting self.slot_countdown = 0 #First packet in the current sequence of receiving packets self.rx_sequence_first_packet = None #Hook to events in the queue for future manipulation self.end_listenting_event_hook = None self.end_slot_event_hook = None
def test_prob(): unif_set = {1, 5, 7, 8} rgb_set = {"red", "green"} passed_count = 0 print( "Test 1: Uniform distribution on integers 1-10.\nTest set: {1, 5, 7, 8}\nExpected result: 0.4" ) d = Distribution(unif_dist) result = d.prob(unif_set) print("Result:", result) if math.isclose(0.4, result): print("Test PASSED") passed_count += 1 else: print("Test FAILED") print( "Test 2: Drawing from an urn with 3 red, 7 green, 8 blue balls.\nTest set: {\"red\", \"green\"}\nExpected result: 10/18" ) d = Distribution(rgb) result = d.prob(rgb_set) print("Result:", result) if math.isclose(10 / 18, result): print("Test PASSED") passed_count += 1 else: print("Test FAILED") print( "Test 3: Benford's law on digits 1-9.\nTest set: {2, 8, 9}\nExpected result: 0.273" ) d = Distribution(benford) result = d.prob(benford_set_a) print("Result:", result) if math.isclose(0.273, result): print("Test PASSED") passed_count += 1 else: print("Test FAILED") ''' if setbonus: print("Test 4: Union of two sets using Benford's law.\nTest sets: {2, 8, 9}, {1, 2, 7, 8}\nExpected result: .632") result = probfunc(benford, benford_set_a, benford_set_b) print("Result:", result) if math.isclose(0.632, result): print("Test PASSED") else: print("Test FAILED") ''' return passed_count
def _convolution_in_point(t_val, f, g, n_integral=100, inverse_time=None, return_log=False): ''' evaluates int_tau f(t+tau)g(tau) or int_tau f(t-tau)g(tau) if inverse time is TRUE ''' if inverse_time is None: raise Exception("Inverse time argument must be set!") # determine integration boundaries: if inverse_time: ## tau>g.xmin and t-tau<f.xmax tau_min = max(t_val - f.xmax, g.xmin) ## tau<g.xmax and t-tau>f.xmin tau_max = min(t_val - f.xmin, g.xmax) else: ## tau>g.xmin and t+tau>f.xmin tau_min = max(f.xmin - t_val, g.xmin) ## tau<g.xmax and t+tau<f.xmax tau_max = min(f.xmax - t_val, g.xmax) #print(tau_min, tau_max) if tau_max <= tau_min + ttconf.TINY_NUMBER: if return_log: return ttconf.BIG_NUMBER else: return 0.0 # functions do not overlap else: # create the tau-grid for the interpolation object in the overlap region if inverse_time: tau = np.unique( np.concatenate((g.x, t_val - f.x, [tau_min, tau_max]))) else: tau = np.unique( np.concatenate((g.x, f.x - t_val, [tau_min, tau_max]))) tau = tau[(tau > tau_min - ttconf.TINY_NUMBER) & (tau < tau_max + ttconf.TINY_NUMBER)] if len(tau) < 10: tau = np.linspace(tau_min, tau_max, 10) if inverse_time: # add negative logarithms fg = f(t_val - tau) + g(tau) else: fg = f(t_val + tau) + g(tau) # create the interpolation object on this grid FG = Distribution(tau, fg, is_log=True, kind='linear') #integrate the interpolation object, return log, make neg_log #print('FG:',FG.xmin, FG.xmax, FG(FG.xmin), FG(FG.xmax)) res = -FG.integrate( a=FG.xmin, b=FG.xmax, n=n_integral, return_log=True) if return_log: return res else: return np.exp(-res)
def get_incident_duration(random_state=None): shape = 0.9689235428381716 loc = -2.005873343967834 scale = 30.310979782335075 duration_dist = Distribution(stats.lognorm(shape, loc, scale), random_state=random_state) return duration_dist
def get_incident_interarrival(random_state=None): alpha = 0.7949678079328055 # interarrival based on monday - friday gamma dist loc = 0 scale = 294.3450468550495 interarrival_dist = Distribution(stats.gamma(alpha, loc, scale), random_state=random_state) return interarrival_dist
def __init__(self, fname): """ Arguments: fname {str} -- Filename to load settings from. """ with open(fname, 'r') as settingsFile: data = json.load(settingsFile) # Load settings. self.thread_workers = data['threadWorkers'] self.number_of_executions = data['numberOfExecutions'] self.topology_selection = TopologySelection[data['topologySelection']] self.termination_condition = TerminationCondition[ data['terminationCondition']] self.termination_value = data['terminationValue'] self.miner_power_distribution = Distribution(data['minerPower']) self.top_miner_power = None # Percent power share of the top N miners, currently drawn from https://btc.com/stats/pool?pool_mode=week on July 11, 2018. # Each element of the list is an float between 0 and 100; the list must sum to < 100. if 'topMinerPower' in data: self.top_miner_power = data['topMinerPower'] self.target_termination_ticks = -1 # Parameterize in JSON later? self.allow_termination_cooldown = True self.hard_limit_ticks = 1000 # Should this be a function of the number of miners? # Load the other settings objects. self.topology = TopologySettings(data['topology']) self.protocol = ProtocolSettings(data['protocol'])
def main(): # create environment dist1 = Distribution(id=0, vals=[2], probs=[1]) dist2 = Distribution(id=1, vals=[5], probs=[1]) dist3 = Distribution(id=2, vals=[2,8], probs=[0.5,0.5]) env = Environment(total_bandwidth = 10,\ distribution_list=[dist1,dist2,dist3], \ mu_list=[1,2,3], lambda_list=[3,2,1],\ num_of_each_type_distribution_list=[300,300,300]) # env = gym.make('CartPole-v0') # env = env.unwrapped # Cancel the minimum score limit # obs_dim = env.observation_space.shape[0] # act_dim = env.action_space.n obs_dim = 6 act_dim = 2 logger.info('obs_dim {}, act_dim {}'.format(obs_dim, act_dim)) # 根据parl框架构建agent model = Model(act_dim=act_dim) alg = PolicyGradient(model, lr=LEARNING_RATE) agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim) # 加载模型 if os.path.exists('./policy_grad_model.ckpt'): agent.restore('./policy_grad_model.ckpt') # run_episode(env, agent, train_or_test='test', render=True) # exit() for i in range(1000): obs_list, action_list, reward_list = run_episode(env, agent) if i % 10 == 0: logger.info("Episode {}, Reward Sum {}.".format( i, sum(reward_list))) batch_obs = np.array(obs_list) batch_action = np.array(action_list) batch_reward = calc_reward_to_go(reward_list, gamma=0.9) agent.learn(batch_obs, batch_action, batch_reward) if (i + 1) % 100 == 0: total_reward = evaluate(env, agent, render=True) logger.info('Test reward: {}'.format(total_reward)) # save the parameters to ./policy_grad_model.ckpt agent.save('./policy_grad_model.ckpt')
def __init__(self, config, channel, x, y): """ Constructor. :param config: the set of configs loaded by the simulator :param channel: the channel to which frames are sent :param x: x position :param y: y position """ Module.__init__(self) # load configuration parameters self.datarate = config.get_param(Node.DATARATE) self.queue_size = config.get_param(Node.QUEUE) self.interarrival = Distribution(config.get_param(Node.INTERARRIVAL)) self.size = Distribution(config.get_param(Node.SIZE)) self.proc_time = Distribution(config.get_param(Node.PROC_TIME)) self.maxsize = config.get_param(Node.MAXSIZE) # queue of packets to be sent self.queue = [] # current state self.state = Node.IDLE self.logger.log_state(self, Node.IDLE) # save position self.x = x self.y = y # save channel self.channel = channel # current packet being either sent or received self.current_pkt = None # count the number of frames currently under reception self.receiving_count = 0 # timeout event used to avoid being stuck in the RX state self.timeout_rx_event = None # timeout used for the p-persistence self.timeout_wt_event = None # time needed to transmit a packet with the maximum size self.packet_max_tx_time = self.maxsize * 8.0 / self.datarate # p-persistence probability [simple carrier sensing] self.p_persistence = float(config.get_param(Node.PERSISTENCE)) # timeout time for the rx timeout event. set as the time needed to # transmit a packet of the maximum size plus a small amount of 10 # microseconds self.timeout_time = self.packet_max_tx_time + 10e-6 # determine the type of propagation.. self.realistic_propagation = config.get_param( Node.PROPAGATION) == "realistic"
def show_distribution(self): try: filename = f'{self.fraud_target.username}_dataframe.csv' dist = Distribution(filename) dist.get_distribution() except AttributeError: print('No dataframe .csv found. Could not retrieve distribution.') except: print('An error occurred while building the distribution.')
def arrival_rate_distribution(random_state=None): # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 36), random_state=random_state) # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 24), random_state=random_state) car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 12), random_state=random_state) # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 4), random_state=random_state) # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1 / 2), random_state=random_state) # car_interarrival_rate_dist = Distribution(stats.expon(loc=0, scale=1), random_state=random_state) return car_interarrival_rate_dist
def main(): """Main function for calling others""" parsing = ArgumentParser() rooms_file, students_file = parsing.args_info() example = FileReader() rooms = example.file_reader(rooms_file) students = example.file_reader(students_file) new_info = Distribution(rooms, students).student_distribution() result = JsonExporter(new_info).unloading() print(result)
def assign_preferences(n=config.NUM_AGENTS, num_items_assigned=config.NUM_AGENTS): # create means item_means = [ Distribution(config.ITEM_MEAN, config.ITEM_VAR).sample() for _ in xrange(n) ] logging.debug("Item means: " + str(item_means)) # create agents and shuffle their order agents = [Agent(i) for i in xrange(n)] random.shuffle(agents) # assign agents preferences for agent in agents: agent.cardinal_prefs = [ Distribution(item_mean, config.PREFERENCE_VAR).sample() for item_mean in item_means ] logging.debug("Cardinal Prefs: " + str(agents[0].cardinal_prefs)) logging.debug("correlation between means and preferences is " + str(np.corrcoef(item_means, agents[0].cardinal_prefs)[0, 1])) # sort agents' preferences for agent in agents: agent.ordinal_prefs = [ sorted(agent.cardinal_prefs).index(x) for x in agent.cardinal_prefs ] logging.debug(agents[0].cardinal_prefs) logging.debug(agents[0].ordinal_prefs) # for TTC, every agent owns a house a priori for i in xrange(num_items_assigned): agents[i].item = i return agents
def __init__(self, config, channel, x, y): """ :param initialState: The state the FSM has to start from :param transitions: A dictionary that maps pairs (State, Event) to a transition function. The transition function is defined as t(E) -> E where E is the event to handle. The FSM will move to the state returned by the function. If the function returns None, the FSM remains in the same state. """ Module.__init__(self) # load configuration parameters self.datarate = config.get_param(FSMNode.DATARATE) self.queue_size = config.get_param(FSMNode.QUEUE) self.interarrival = Distribution(config.get_param(FSMNode.INTERARRIVAL)) self.size = Distribution(config.get_param(FSMNode.SIZE)) self.proc_time = Distribution(config.get_param(FSMNode.PROC_TIME)) # a slot lasts the maximum time a packet would take to be transmitted max_pkt_time = (config.get_param(FSMNode.SIZE)[Distribution.MAX] * 8) / self.datarate prop_delay = (config.get_param(Channel.PAR_RANGE)) / Channel.SOL self.slot_duration = max_pkt_time + prop_delay # the slots distribution for a node self.slots = Distribution({"distribution" : "unif", "int" : True, "min" : 0, "max" : config.get_param(FSMNode.MAXSLOTS) }) # save position self.x = x self.y = y # save channel self.channel = channel # queue of packets to be sent self.queue = []
def get(self): key = self.get_argument('key') try: dist = Distribution(key).get_dist() except KeyError: return self.finish({ "status_code":404, "data":[], "error_message": "Could not find distribution in Forget Table" }) return self.finish({ "status_code":200, "data":[{ "bin":key, "probability":value } for key,value in dist.iteritems()] })
def data_lines(cls, scenario): data_string = "" data_items = len(scenario.dataList) dist = Distribution() uniform_distribution = dist.percentage_distribution( UniformDistribution(), data_items) binomial_distribution = dist.percentage_distribution( BinomialDistribution(), data_items) data_string += SaveAnalysis.leading_line(binomial_distribution, scenario, uniform_distribution) data_string += SaveAnalysis.measurement_lines(binomial_distribution, scenario, uniform_distribution) data_string += SaveAnalysis.trailing_line(scenario) return data_string
def test_condition(): passed_count = 0 print( "Testing conditioning.\nComputing conditional distribution of letters, conditional on vowels." ) d = Distribution(letters) d.condition(vowel_set) print("Expected distribution:\n", vowels) print("Result:\n", d.d) passed = True for key in vowels: if not math.isclose(vowels[key], d.d[key]): passed = False print("Probability of", key, d.d[key], "does not match expected probability", vowels[key]) if passed: print("Test PASSED") passed_count += 1 else: print("Test FAILED") return passed_count
def get(self): key = self.get_argument('key') bin = self.get_argument('bin') try: self.finish({ "status_code":200, "data":[{ "bin": bin, "probability": Distribution(key).get_bin(bin) }] }) except ValueError: self.finish({ "status_code":404, "data":[], "error_message": "Could not find bin in distribution" }) except KeyError: self.finish({ "status_code":404, "data":[], "error_message": "Could not find distribution in Forget Table" })
def most_probable_adjacent(self, count: int, max_distance: int = 2) -> dict: """ for each element el, we will edit it by 1 and 2 (in the edit distance model--see Models in models.py) and assign them value prob/2 and prob/3 respectively. Then, sum up every single one of those transitions (or adjacencies). Then, create a distribution out of the "histogram" created :param count: how many to include for each index in the set of most probable digits :param max_distance: the maximum distance is how far to get the adjacency. So, if it's 3, then for each element el, we will look at all codes distant by 1 from el, distant by 2, then distant by 3. """ histogram = {} # calibrate the maximum distance to be limited # to 9 and ensure it's an integer max_distance = int(max(min(max_distance, 9), 1)) for el in range(10**self._digit_count): s_el = Models.extend_integer(el, self._digit_count) prob = self.prob(el) histogram[el] = histogram.get(el, 0) + prob # add histogram for changes for each valid edit_distance for edit_distance in range(1, max_distance + 1): for el_with_edit in CombinationLockCracker.generate_edits( s_el, distance=edit_distance): i_el = int(el_with_edit) histogram[i_el] = histogram.get(i_el, 0) + (prob / edit_distance) # finally, return the most probable from a new # distribution created through the histogram dist = Distribution(histogram) most_probable = dist.most_probable(count) return {el: dist.prob(el) for el in most_probable}
def test_distribution_count(self): instance = BinomialDistribution exp = Distribution() distribution = exp.frequency_distribution(instance, 1) self.assertEqual(distribution[0], 1) self.assertEqual(distribution[1], 1) distribution = exp.frequency_distribution(instance, 2) self.assertEqual(distribution[0], 1) self.assertEqual(distribution[1], 2) self.assertEqual(distribution[2], 1) distribution = exp.frequency_distribution(instance, 3) self.assertEqual(distribution[0], 1) self.assertEqual(distribution[1], 3) self.assertEqual(distribution[2], 3) self.assertEqual(distribution[3], 1) distribution = exp.frequency_distribution(instance, 4) self.assertEqual(distribution[0], 1) self.assertEqual(distribution[1], 4) self.assertEqual(distribution[2], 6) self.assertEqual(distribution[3], 4) self.assertEqual(distribution[4], 1)
def convolve(cls, node_interp, branch_interp, max_or_integral='integral', n_integral=100, inverse_time=True, rel_tol=0.05, yc=10): ''' calculate H(t) = \int_tau f(t-tau)g(tau) if inverse_time=True H(t) = \int_tau f(t+tau)g(tau) if inverse_time=False This function determines the time points of the grid of the result to ensure an accurate approximation. ''' if max_or_integral not in ['max', 'integral']: raise Exception( "Max_or_integral expected to be 'max' or 'integral', got " + str(max_or_integral) + " instead.") def conv_in_point(time_point): if max_or_integral == 'integral': # compute integral of the convolution return _evaluate_convolution(time_point, node_interp, branch_interp, n_integral=n_integral, return_log=True, inverse_time=inverse_time) else: # compute max of the convolution return _max_of_integrand(time_point, node_interp, branch_interp, return_log=True, inverse_time=inverse_time) # estimate peak and width joint_fwhm = (node_interp.fwhm + branch_interp.fwhm) min_fwhm = min(node_interp.fwhm, branch_interp.fwhm) # determine support of the resulting convolution # in order to be positive, the flipped support of f, shifted by t and g need to overlap if inverse_time: new_peak_pos = node_interp.peak_pos + branch_interp.peak_pos tmin = node_interp.xmin + branch_interp.xmin tmax = node_interp.xmax + branch_interp.xmax else: new_peak_pos = node_interp.peak_pos - branch_interp.peak_pos tmin = node_interp.xmin - branch_interp.xmax tmax = node_interp.xmax - branch_interp.xmin # make initial node grid consisting of linearly spaced points around # the center and quadratically spaced points at either end n_grid_points = ttconf.NODE_GRID_SIZE n = n_grid_points / 3 center_width = 3 * joint_fwhm grid_center = new_peak_pos + np.linspace(-1, 1, n) * center_width # add the right and left grid if it is needed right_range = (tmax - grid_center[-1]) if right_range > 4 * center_width: grid_right = grid_center[-1] + right_range * (np.linspace(0, 1, n) **2.0) elif right_range > 0: # use linear grid the right_range is comparable to center_width grid_right = grid_center[-1] + right_range * np.linspace( 0, 1, int(min(n, 1 + 0.5 * n * right_range / center_width))) else: grid_right = [] left_range = grid_center[0] - tmin if left_range > 4 * center_width: grid_left = tmin + left_range * (np.linspace(0, 1, n)**2.0) elif left_range > 0: grid_left = tmin + left_range * np.linspace( 0, 1, int(min(n, 1 + 0.5 * n * left_range / center_width))) else: grid_left = [] if tmin > -1: grid_zero_left = tmin + (tmax - tmin) * np.linspace(0, 0.01, 11)**2 else: grid_zero_left = [tmin] if tmax < 1: grid_zero_right = tmax - (tmax - tmin) * np.linspace(0, 0.01, 11)**2 else: grid_zero_right = [tmax] # make grid and calculate convolution t_grid_0 = np.unique( np.concatenate([ grid_zero_left, grid_left[:-1], grid_center, grid_right[1:], grid_zero_right ])) t_grid_0 = t_grid_0[(t_grid_0 > tmin - ttconf.TINY_NUMBER) & (t_grid_0 < tmax + ttconf.TINY_NUMBER)] # res0 - the values of the convolution (integral or max) # t_0 - the value, at which the res0 achieves maximum # (when determining the maximum of the integrand, otherwise meaningless) res_0, t_0 = np.array([conv_in_point(t_val) for t_val in t_grid_0]).T # refine grid as necessary and add new points # calculate interpolation error at all internal points [2:-2] bc end points are sometime off scale interp_error = np.abs(res_0[3:-1] + res_0[1:-3] - 2 * res_0[2:-2]) # determine the number of extra points needed, criterion depends on distance from peak dy dy = (res_0[2:-2] - res_0.min()) dx = np.diff(t_grid_0) refine_factor = np.minimum( np.minimum( np.array(np.floor( np.sqrt(interp_error / (rel_tol * (1 + (dy / yc)**4)))), dtype=int), np.array(100 * (dx[1:-2] + dx[2:-1]) / min_fwhm, dtype=int)), 10) insert_point_idx = np.zeros(interp_error.shape[0] + 1, dtype=int) insert_point_idx[1:] = refine_factor insert_point_idx[:-1] += refine_factor # add additional points if there are any to add if np.sum(insert_point_idx): add_x = np.concatenate([ np.linspace(t1, t2, n + 2)[1:-1] for t1, t2, n in zip( t_grid_0[1:-2], t_grid_0[2:-1], insert_point_idx) if n > 0 ]) # calculate convolution at these points add_y, add_t = np.array([conv_in_point(t_val) for t_val in add_x]).T t_grid_0 = np.concatenate((t_grid_0, add_x)) res_0 = np.concatenate((res_0, add_y)) t_0 = np.concatenate((t_0, add_t)) # instantiate the new interpolation object and return res_y = cls(t_grid_0, res_0, is_log=True, kind='linear') # the interpolation object, which is used to store the value of the # grid, which maximizes the convolution (for 'max' option), # or flat -1 distribution (for 'integral' option) # this grid is the optimal branch length res_t = Distribution(t_grid_0, t_0, is_log=True, kind='linear') return res_y, res_t
def _ml_t_marginal(self, assign_dates=False): """ Compute the marginal probability distribution of the internal nodes positions by propagating from the tree leaves towards the root. The result of this operation are the probability distributions of each internal node, conditional on the constraints on all leaves of the tree, which have sampling dates. The probability distributions are set as marginal_pos_LH attributes to the nodes. Parameters ---------- assign_dates : bool, default False If True, the inferred dates will be assigned to the nodes as :code:`time_before_present' attributes, and their branch lengths will be corrected accordingly. .. Note:: Normally, the dates are assigned by running joint reconstruction. Returns ------- None Every internal node is assigned the probability distribution in form of an interpolation object and sends this distribution further towards the root. """ def _cleanup(): for node in self.tree.find_clades(): try: del node.marginal_pos_Lx del node.subtree_distribution del node.msg_from_parent #del node.marginal_pos_LH except: pass self.logger("ClockTree - Marginal reconstruction: Propagating leaves -> root...", 2) # go through the nodes from leaves towards the root: for node in self.tree.find_clades(order='postorder'): # children first, msg to parents if node.bad_branch: # no information node.marginal_pos_Lx = None else: # all other nodes if node.date_constraint is not None and node.date_constraint.is_delta: # there is a time constraint # initialize the Lx for nodes with precise date constraint: # subtree probability given the position of the parent node # position of the parent node is given by the branch length # distribution attached to the child node position node.subtree_distribution = node.date_constraint bl = node.branch_length_interpolator.x x = bl + node.date_constraint.peak_pos node.marginal_pos_Lx = Distribution(x, node.branch_length_interpolator(bl), min_width=self.min_width, is_log=True) else: # all nodes without precise constraint but positional information # subtree likelihood given the node's constraint and child msg: msgs_to_multiply = [node.date_constraint] if node.date_constraint is not None else [] msgs_to_multiply.extend([child.marginal_pos_Lx for child in node.clades if child.marginal_pos_Lx is not None]) # combine the different msgs and constraints if len(msgs_to_multiply)==0: # no information node.marginal_pos_Lx = None continue elif len(msgs_to_multiply)==1: node.subtree_distribution = msgs_to_multiply[0] else: # combine the different msgs and constraints node.subtree_distribution = Distribution.multiply(msgs_to_multiply) if node.up is None: # this is the root, set dates node.subtree_distribution._adjust_grid(rel_tol=self.rel_tol_prune) node.marginal_pos_Lx = node.subtree_distribution node.marginal_pos_LH = node.subtree_distribution self.tree.positional_marginal_LH = -node.subtree_distribution.peak_val else: # otherwise propagate to parent res, res_t = NodeInterpolator.convolve(node.subtree_distribution, node.branch_length_interpolator, max_or_integral='integral', n_grid_points = self.node_grid_points, n_integral=self.n_integral, rel_tol=self.rel_tol_refine) res._adjust_grid(rel_tol=self.rel_tol_prune) node.marginal_pos_Lx = res self.logger("ClockTree - Marginal reconstruction: Propagating root -> leaves...", 2) from scipy.interpolate import interp1d for node in self.tree.find_clades(order='preorder'): ## The root node if node.up is None: node.msg_from_parent = None # nothing beyond the root # all other cases (All internal nodes + unconstrained terminals) else: parent = node.up # messages from the complementary subtree (iterate over all sister nodes) complementary_msgs = [sister.marginal_pos_Lx for sister in parent.clades if (sister != node) and (sister.marginal_pos_Lx is not None)] # if parent itself got smth from the root node, include it if parent.msg_from_parent is not None: complementary_msgs.append(parent.msg_from_parent) elif parent.marginal_pos_Lx is not None: complementary_msgs.append(parent.marginal_pos_LH) if len(complementary_msgs): msg_parent_to_node = NodeInterpolator.multiply(complementary_msgs) msg_parent_to_node._adjust_grid(rel_tol=self.rel_tol_prune) else: from utils import numeric_date x = [parent.numdate, numeric_date()] msg_parent_to_node = NodeInterpolator(x, [1.0, 1.0],min_width=self.min_width) # integral message, which delivers to the node the positional information # from the complementary subtree res, res_t = NodeInterpolator.convolve(msg_parent_to_node, node.branch_length_interpolator, max_or_integral='integral', inverse_time=False, n_grid_points = self.node_grid_points, n_integral=self.n_integral, rel_tol=self.rel_tol_refine) node.msg_from_parent = res if node.marginal_pos_Lx is None: node.marginal_pos_LH = node.msg_from_parent else: node.marginal_pos_LH = NodeInterpolator.multiply((node.msg_from_parent, node.subtree_distribution)) self.logger('ClockTree._ml_t_root_to_leaves: computed convolution' ' with %d points at node %s'%(len(res.x),node.name),4) if self.debug: tmp = np.diff(res.y-res.peak_val) nsign_changed = np.sum((tmp[1:]*tmp[:-1]<0)&(res.y[1:-1]-res.peak_val<500)) if nsign_changed>1: import matplotlib.pyplot as plt plt.ion() plt.plot(res.x, res.y-res.peak_val, '-o') plt.plot(res.peak_pos - node.branch_length_interpolator.x, node.branch_length_interpolator(node.branch_length_interpolator.x)-node.branch_length_interpolator.peak_val, '-o') plt.plot(msg_parent_to_node.x,msg_parent_to_node.y-msg_parent_to_node.peak_val, '-o') plt.ylim(0,100) plt.xlim(-0.05, 0.05) import ipdb; ipdb.set_trace() # assign positions of nodes and branch length only when desired # since marginal reconstruction can result in negative branch length if assign_dates: node.time_before_present = node.marginal_pos_LH.peak_pos if node.up: node.clock_length = node.up.time_before_present - node.time_before_present node.branch_length = node.clock_length # construct the inverse cumulant distribution to evaluate confidence intervals if node.marginal_pos_LH.is_delta: node.marginal_inverse_cdf=interp1d([0,1], node.marginal_pos_LH.peak_pos*np.ones(2), kind="linear") else: dt = np.diff(node.marginal_pos_LH.x) y = node.marginal_pos_LH.prob_relative(node.marginal_pos_LH.x) int_y = np.concatenate(([0], np.cumsum(dt*(y[1:]+y[:-1])/2.0))) int_y/=int_y[-1] node.marginal_inverse_cdf = interp1d(int_y, node.marginal_pos_LH.x, kind="linear") node.marginal_cdf = interp1d(node.marginal_pos_LH.x, int_y, kind="linear") if not self.debug: _cleanup() return
def _convolution_integrand(t_val, f, g, inverse_time=None, return_log=False): ''' Evaluates int_tau f(t+tau)*g(tau) or int_tau f(t-tau)g(tau) if inverse time is TRUE Parameters ----------- t_val : double Time point f : Interpolation object First multiplier in convolution g : Interpolation object Second multiplier in convolution inverse_time : bool, None time direction. If True, then the f(t-tau)*g(tau) is calculated, otherwise, f(t+tau)*g(tau) return_log : bool If True, the logarithm will be returned Returns ------- FG : Distribution The function to be integrated as Distribution object (interpolator) ''' if inverse_time is None: raise Exception("Inverse time argument must be set!") # determine integration boundaries: if inverse_time: ## tau>g.xmin and t-tau<f.xmax tau_min = max(t_val - f.xmax, g.xmin) ## tau<g.xmax and t-tau>f.xmin tau_max = min(t_val - f.xmin, g.xmax) else: ## tau>g.xmin and t+tau>f.xmin tau_min = max(f.xmin - t_val, g.xmin) ## tau<g.xmax and t+tau<f.xmax tau_max = min(f.xmax - t_val, g.xmax) #print(tau_min, tau_max) if tau_max <= tau_min: if return_log: return ttconf.BIG_NUMBER else: return 0.0 # functions do not overlap else: # create the tau-grid for the interpolation object in the overlap region if inverse_time: tau = np.unique( np.concatenate((g.x, t_val - f.x, [tau_min, tau_max]))) else: tau = np.unique( np.concatenate((g.x, f.x - t_val, [tau_min, tau_max]))) tau = tau[(tau > tau_min - ttconf.TINY_NUMBER) & (tau < tau_max + ttconf.TINY_NUMBER)] if len(tau) < 10: tau = np.linspace(tau_min, tau_max, 10) if inverse_time: # add negative logarithms tnode = t_val - tau fg = f(tnode) + g(tau, tnode=tnode) else: fg = f(t_val + tau) + g(tau, tnode=t_val) # create the interpolation object on this grid FG = Distribution(tau, fg, is_log=True, kind='linear') return FG
def _ml_t_joint(self): """ Compute the joint maximum likelihood assignment of the internal nodes positions by propagating from the tree leaves towards the root. Given the assignment of parent nodes, reconstruct the maximum-likelihood positions of the child nodes by propagating from the root to the leaves. The result of this operation is the time_before_present value, which is the position of the node, expressed in the units of the branch length, and scaled from the present-day. The value is assigned to the corresponding attribute of each node of the tree. Returns ------- None Every internal node is assigned the probability distribution in form of an interpolation object and sends this distribution further towards the root. """ def _cleanup(): for node in self.tree.find_clades(): del node.joint_pos_Lx del node.joint_pos_Cx self.logger("ClockTree - Joint reconstruction: Propagating leaves -> root...", 2) # go through the nodes from leaves towards the root: for node in self.tree.find_clades(order='postorder'): # children first, msg to parents # Lx is the maximal likelihood of a subtree given the parent position # Cx is the branch length corresponding to the maximally likely subtree if node.bad_branch: # no information at the node node.joint_pos_Lx = None node.joint_pos_Cx = None else: # all other nodes if node.date_constraint is not None and node.date_constraint.is_delta: # there is a time constraint # subtree probability given the position of the parent node # Lx.x is the position of the parent node # Lx.y is the probablity of the subtree (consisting of one terminal node in this case) # Cx.y is the branch length corresponding the optimal subtree bl = node.branch_length_interpolator.x x = bl + node.date_constraint.peak_pos node.joint_pos_Lx = Distribution(x, node.branch_length_interpolator(bl), min_width=self.min_width, is_log=True) node.joint_pos_Cx = Distribution(x, bl, min_width=self.min_width) # map back to the branch length else: # all nodes without precise constraint but positional information msgs_to_multiply = [node.date_constraint] if node.date_constraint is not None else [] msgs_to_multiply.extend([child.joint_pos_Lx for child in node.clades if child.joint_pos_Lx is not None]) # subtree likelihood given the node's constraint and child messages if len(msgs_to_multiply) == 0: # there are no constraints node.joint_pos_Lx = None node.joint_pos_Cx = None continue elif len(msgs_to_multiply)>1: # combine the different msgs and constraints subtree_distribution = Distribution.multiply(msgs_to_multiply) else: # there is exactly one constraint. subtree_distribution = msgs_to_multiply[0] if node.up is None: # this is the root, set dates subtree_distribution._adjust_grid(rel_tol=self.rel_tol_prune) # set root position and joint likelihood of the tree node.time_before_present = subtree_distribution.peak_pos node.joint_pos_Lx = subtree_distribution node.joint_pos_Cx = None node.clock_length = node.branch_length else: # otherwise propagate to parent res, res_t = NodeInterpolator.convolve(subtree_distribution, node.branch_length_interpolator, max_or_integral='max', inverse_time=True, n_grid_points = self.node_grid_points, n_integral=self.n_integral, rel_tol=self.rel_tol_refine) res._adjust_grid(rel_tol=self.rel_tol_prune) node.joint_pos_Lx = res node.joint_pos_Cx = res_t # go through the nodes from root towards the leaves and assign joint ML positions: self.logger("ClockTree - Joint reconstruction: Propagating root -> leaves...", 2) for node in self.tree.find_clades(order='preorder'): # root first, msgs to children if node.up is None: # root node continue # the position was already set on the previous step if node.joint_pos_Cx is None: # no constraints or branch is bad - reconstruct from the branch len interpolator node.branch_length = node.branch_length_interpolator.peak_pos elif isinstance(node.joint_pos_Cx, Distribution): # NOTE the Lx distribution is the likelihood, given the position of the parent # (Lx.x = parent position, Lx.y = LH of the node_pos given Lx.x, # the length of the branch corresponding to the most likely # subtree is node.Cx(node.time_before_present)) subtree_LH = node.joint_pos_Lx(node.up.time_before_present) node.branch_length = node.joint_pos_Cx(max(node.joint_pos_Cx.xmin, node.up.time_before_present)+ttconf.TINY_NUMBER) node.time_before_present = node.up.time_before_present - node.branch_length node.clock_length = node.branch_length # just sanity check, should never happen: if node.branch_length < 0 or node.time_before_present < 0: if node.branch_length<0 and node.branch_length>-ttconf.TINY_NUMBER: self.logger("ClockTree - Joint reconstruction: correcting rounding error of %s"%node.name, 4) node.branch_length = 0 self.tree.positional_joint_LH = self.timetree_likelihood() # cleanup, if required if not self.debug: _cleanup()