def __init__(self, src_id, seed, t_min, Wm, Wh, Wr, Wt, Bh, sim_opts, wt, vt, bt, init_h, src_embed_map, algo_feed=False, algo_feed_args=None, algo_c=1.0): super(ExpRecurrentBroadcasterMP, self).__init__(src_id, seed) self.sink_ids = sim_opts.sink_ids self.end_time = sim_opts.end_time self.init = False # Used to create h_next self.Wm = Wm self.Wh = Wh self.Wr = Wr self.Wt = Wt self.Bh = Bh self.cur_h = init_h self.src_embed_map = src_embed_map self.algo_feed = algo_feed self.algo_feed_args = algo_feed_args self.algo_ranks = [] self.c_is = [] self.time_deltas = [] self.algo_c = algo_c # Needed for the sampler self.params = Deco.Options(**{ 'wt': wt, 'vt': vt, 'bt': bt, 'init_h': init_h }) self.exp_sampler = ExpCDFSampler(_opts=self.params, t_min=t_min, seed=seed + 1)
def __init__(self, src_id, seed, trainer, t_min=0): super(ExpRecurrentBroadcaster, self).__init__(src_id, seed) self.init = False self.trainer = trainer self.params = Deco.Options(**self.trainer.sess.run({ # 'Wm': trainer.tf_Wm, # 'Wh': trainer.tf_Wh, # 'Bh': trainer.tf_Bh, # 'Wt': trainer.tf_Wt, # 'Wr': trainer.tf_Wr, 'wt': trainer.tf_wt, 'vt': trainer.tf_vt, 'bt': trainer.tf_bt, 'init_h': trainer.tf_h })) self.cur_h = self.params.init_h self.exp_sampler = ExpCDFSampler(_opts=self.params, t_min=t_min, seed=seed + 1)
def_opts = Deco.Options( batch_size=64, # 16, 32, 64 learning_rate=0.1, # 0.1, 0.01, 0.001 momentum=0.9, decay_steps=100, decay_rate=0.001, l2_penalty=0.001, # Unused float_type=tf.float32, seed=42, scope='RMTPP', save_dir='./save.rmtpp/', summary_dir='./summary.rmtpp/', device_gpu='/gpu:0', device_cpu='/cpu:0', bptt=20, cpu_only=False, embed_size=__EMBED_SIZE, Wem=lambda num_categories: np.random.RandomState(42).randn( num_categories, __EMBED_SIZE) * 0.01, Wt=np.ones((1, __HIDDEN_LAYER_SIZE)) * 1e-3, Wh=np.eye(__HIDDEN_LAYER_SIZE), bh=np.ones((1, __HIDDEN_LAYER_SIZE)), wt=1.0, Wy=np.ones((__EMBED_SIZE, __HIDDEN_LAYER_SIZE)) * 0.0, Vy=lambda num_categories: np.ones( (__HIDDEN_LAYER_SIZE, num_categories)) * 0.001, Vt=np.ones((__HIDDEN_LAYER_SIZE, 1)) * 0.001, bt=np.log(1.0), # bt is provided by the base_rate bk=lambda num_categories: np.ones((1, num_categories)) * 0.0)
def find_significance(user_id, user_repository, num_segments=24, segment_length=60 * 60, return_tweet_times=False): # 1. Find all the followers # 2. Find tweet times of the followers # 3. Fit them in num_segments - per day. experiment_times = get_start_end_time() start_time = experiment_times.start_time # GMT: Wed, 01 Jul 2009 00:00:00 GMT followee_threshold = 500 user_followers = user_repository.get_user_followers(user_id) if len(user_followers) > followee_threshold: logging.error('Number of followers is more than 500.') return None follower_significance = [] all_tweet_times = [] time_period = num_segments * segment_length # Not sorting the users here to keep the same order as was recorded for the user initially for idx, follower_id in enumerate(user_followers): num_followees_of_follower = len( user_repository.get_user_followees(follower_id)) if num_followees_of_follower < followee_threshold: follower_tweet_times = user_repository.get_user_tweets(follower_id) # Only use times before start_time follower_tweet_times = follower_tweet_times[ follower_tweet_times < start_time] follower_tweet_bins = [0] * num_segments if return_tweet_times: all_tweet_times.append(follower_tweet_times) for tweet_time in follower_tweet_times: idx = int(num_segments * ((tweet_time - start_time) % time_period) / time_period) follower_tweet_bins[idx] += 1 follower_significance.append(follower_tweet_bins) raw_significance = np.asarray(follower_significance) total_raw_significance = raw_significance.sum(1) significance = raw_significance / total_raw_significance[:, None] avg_for_others = np.nanmean(significance, axis=0) # Fill in the NaNs with the average for the other followers, who have # at least one follower. significance[total_raw_significance == 0, :] = avg_for_others # Now if there are any NaNs still left (i.e. if nobody tweeted anything) significance[np.isnan(significance)] = 1.0 / num_segments ret = Deco.Options(raw_significance=raw_significance, significance=significance, total_followers=len(user_followers)) if return_tweet_times: ret = ret.set_new(all_tweet_times=all_tweet_times) return ret
def get_start_end_time(): return Deco.Options( start_time=1246406400, # GMT: Wed, 01 Jul 2009 00:00:00 GMT end_time=1251763200 # GMT: Tue, 01 Sep 2009 00:00:00 GMT )
# Output headers: # - TraceId # - VoterId # - ParentCommentId , ChildCommentId # - ParentVote , ChildVote # - ArticleId , ArticleTopic # - ParentCommenterId , ChildCommenterId # - ParentCommentTime , ChildCommentTime # - ParentVoteTime , ChildVoteTime DEF_OPTS = Deco.Options(N=10, M=100, A=100, K=1, P=1000, verbose=True, voting=COS_SIM, force=False, seed=42) def choose_idx_item(random_state, arr): """Returns a random item and an index.""" idx = random_state.randint(low=0, high=len(arr)) return idx, arr[idx] @click.command() @click.argument('output_path') @click.argument('truth_path')