예제 #1
0
    def __init__(self, src_id, seed, t_min,
                 Wm, Wh, Wr, Wt, Bh, sim_opts,
                 wt, vt, bt, init_h, src_embed_map,
                 algo_feed=False, algo_feed_args=None, algo_c=1.0):
        super(ExpRecurrentBroadcasterMP, self).__init__(src_id, seed)
        self.sink_ids = sim_opts.sink_ids
        self.end_time = sim_opts.end_time
        self.init = False

        # Used to create h_next
        self.Wm = Wm
        self.Wh = Wh
        self.Wr = Wr
        self.Wt = Wt
        self.Bh = Bh
        self.cur_h = init_h
        self.src_embed_map = src_embed_map
        self.algo_feed = algo_feed
        self.algo_feed_args = algo_feed_args
        self.algo_ranks = []
        self.c_is = []
        self.time_deltas = []
        self.algo_c = algo_c

        # Needed for the sampler
        self.params = Deco.Options(**{
            'wt': wt,
            'vt': vt,
            'bt': bt,
            'init_h': init_h
        })

        self.exp_sampler = ExpCDFSampler(_opts=self.params,
                                         t_min=t_min,
                                         seed=seed + 1)
예제 #2
0
    def __init__(self, src_id, seed, trainer, t_min=0):
        super(ExpRecurrentBroadcaster, self).__init__(src_id, seed)
        self.init = False

        self.trainer = trainer

        self.params = Deco.Options(**self.trainer.sess.run({
            # 'Wm': trainer.tf_Wm,
            # 'Wh': trainer.tf_Wh,
            # 'Bh': trainer.tf_Bh,
            # 'Wt': trainer.tf_Wt,
            # 'Wr': trainer.tf_Wr,
            'wt':
            trainer.tf_wt,
            'vt':
            trainer.tf_vt,
            'bt':
            trainer.tf_bt,
            'init_h':
            trainer.tf_h
        }))

        self.cur_h = self.params.init_h

        self.exp_sampler = ExpCDFSampler(_opts=self.params,
                                         t_min=t_min,
                                         seed=seed + 1)
예제 #3
0
def_opts = Deco.Options(
    batch_size=64,  # 16, 32, 64
    learning_rate=0.1,  # 0.1, 0.01, 0.001
    momentum=0.9,
    decay_steps=100,
    decay_rate=0.001,
    l2_penalty=0.001,  # Unused
    float_type=tf.float32,
    seed=42,
    scope='RMTPP',
    save_dir='./save.rmtpp/',
    summary_dir='./summary.rmtpp/',
    device_gpu='/gpu:0',
    device_cpu='/cpu:0',
    bptt=20,
    cpu_only=False,
    embed_size=__EMBED_SIZE,
    Wem=lambda num_categories: np.random.RandomState(42).randn(
        num_categories, __EMBED_SIZE) * 0.01,
    Wt=np.ones((1, __HIDDEN_LAYER_SIZE)) * 1e-3,
    Wh=np.eye(__HIDDEN_LAYER_SIZE),
    bh=np.ones((1, __HIDDEN_LAYER_SIZE)),
    wt=1.0,
    Wy=np.ones((__EMBED_SIZE, __HIDDEN_LAYER_SIZE)) * 0.0,
    Vy=lambda num_categories: np.ones(
        (__HIDDEN_LAYER_SIZE, num_categories)) * 0.001,
    Vt=np.ones((__HIDDEN_LAYER_SIZE, 1)) * 0.001,
    bt=np.log(1.0),  # bt is provided by the base_rate
    bk=lambda num_categories: np.ones((1, num_categories)) * 0.0)
예제 #4
0
def find_significance(user_id,
                      user_repository,
                      num_segments=24,
                      segment_length=60 * 60,
                      return_tweet_times=False):
    # 1. Find all the followers
    # 2. Find tweet times of the followers
    # 3. Fit them in num_segments - per day.

    experiment_times = get_start_end_time()
    start_time = experiment_times.start_time  # GMT: Wed, 01 Jul 2009 00:00:00 GMT

    followee_threshold = 500

    user_followers = user_repository.get_user_followers(user_id)

    if len(user_followers) > followee_threshold:
        logging.error('Number of followers is more than 500.')
        return None

    follower_significance = []
    all_tweet_times = []

    time_period = num_segments * segment_length

    # Not sorting the users here to keep the same order as was recorded for the user initially
    for idx, follower_id in enumerate(user_followers):
        num_followees_of_follower = len(
            user_repository.get_user_followees(follower_id))

        if num_followees_of_follower < followee_threshold:
            follower_tweet_times = user_repository.get_user_tweets(follower_id)
            # Only use times before start_time
            follower_tweet_times = follower_tweet_times[
                follower_tweet_times < start_time]

            follower_tweet_bins = [0] * num_segments
            if return_tweet_times:
                all_tweet_times.append(follower_tweet_times)

            for tweet_time in follower_tweet_times:
                idx = int(num_segments *
                          ((tweet_time - start_time) % time_period) /
                          time_period)
                follower_tweet_bins[idx] += 1

            follower_significance.append(follower_tweet_bins)

    raw_significance = np.asarray(follower_significance)
    total_raw_significance = raw_significance.sum(1)
    significance = raw_significance / total_raw_significance[:, None]
    avg_for_others = np.nanmean(significance, axis=0)
    # Fill in the NaNs with the average for the other followers, who have
    # at least one follower.
    significance[total_raw_significance == 0, :] = avg_for_others

    # Now if there are any NaNs still left (i.e. if nobody tweeted anything)
    significance[np.isnan(significance)] = 1.0 / num_segments

    ret = Deco.Options(raw_significance=raw_significance,
                       significance=significance,
                       total_followers=len(user_followers))

    if return_tweet_times:
        ret = ret.set_new(all_tweet_times=all_tweet_times)

    return ret
예제 #5
0
def get_start_end_time():
    return Deco.Options(
        start_time=1246406400,  # GMT: Wed, 01 Jul 2009 00:00:00 GMT
        end_time=1251763200  # GMT: Tue, 01 Sep 2009 00:00:00 GMT
    )
예제 #6
0
# Output headers:
#  - TraceId
#  - VoterId
#  - ParentCommentId   , ChildCommentId
#  - ParentVote        , ChildVote
#  - ArticleId         , ArticleTopic
#  - ParentCommenterId , ChildCommenterId
#  - ParentCommentTime , ChildCommentTime
#  - ParentVoteTime    , ChildVoteTime

DEF_OPTS = Deco.Options(N=10,
                        M=100,
                        A=100,
                        K=1,
                        P=1000,
                        verbose=True,
                        voting=COS_SIM,
                        force=False,
                        seed=42)


def choose_idx_item(random_state, arr):
    """Returns a random item and an index."""
    idx = random_state.randint(low=0, high=len(arr))
    return idx, arr[idx]


@click.command()
@click.argument('output_path')
@click.argument('truth_path')