from tideh import training_cross_validation number_of_files = 100 # number of files to train on file_name_prefix = 'data/training/RT' # file names prefix of files used for training iterations = 5 # number of cross validation iterations pred_time = 168 # prediction time (hours) # get file paths of files to use for training file_names = [ file_name_prefix + str(i) + '.txt' for i in range(1, number_of_files + 1) ] # load events for optimized training events_data = [] for file in file_names: events_data.append(load_events_vec( file, 1 / 3600, 24)) # convert event_times and start_time to hours # initial simplex matrix passed to simplex algorithm # ordering is r0, phi, taum; taum is trained by 1/taum simplex = [ [0.2, 0, 0.25], [0, -0.1, 0.25], [0.2, 0.1, 0.15], [0.4, 0.0, 0.15], ] start_values = [0.2, 0, 0.25] (mean_err, median_err, param), _ = training_cross_validation(events_data, iterations, start_values,
args = parser.parse_args() month = args.m if args.m else 'nov' month = month.upper() input_path = os.path.join("data", "reddit_data", month + "_INPUT") output_path = os.path.join("data", "reddit_data", month + "_OUTPUT") subreddit_number = args.srd filename = args.fl + ".txt" filename = os.path.join(input_path, subreddit_number, filename) obs_time = int(args.ot) if args.ot else 1 pred_time = int(args.pt) if args.pt else 720 # 24 * 30 # the number of retweets is not necessary for the further steps # make sure that all times are loaded in the correct time unit (hours) # here it is important that there is one nd-array for event times and one for the follower counts (_, start_time), (event_times, follower) = load_events_vec(filename) # additional parameters passed to infectious rate function add_params = {'t0': start_time, 'bounds': [(-1, 0.5), (1, 20.)]} params, err, _ = estimate_parameters_optimized(event_times=event_times, follower=follower, obs_time=obs_time, **add_params) results = {} results['p0'] = params[0] results['r0'] = params[1] results['phi0'] = params[2] results['tm'] = params[3] results['avg_fit_error'] = err * 100
1) Model parameters of TiDeH (p_0, r_0, phi_0, t_m). 2) Observation time (= obs_time). Outputs are 1) Estimate of model parameters of TiDeH (p_0, r_0, phi_0, t_m). This code is developed by Sylvain Gauthier and Sebastian Rühl under the supervision of Ryota Kobayashi. """ from tideh.simulate import simulate_time_rescaling from tideh.functions import infectious_rate_tweets from tideh import load_events_vec from tideh import estimate_parameters_optimized # load pool of follower counts used for simulation from file file_path = 'data/example/sample_file.txt' _, (_, follower_pool) = load_events_vec(file_path) runtime = 72 # simulate for 3 days # parameters of infectious rate p0 = 0.001 r0 = 0.424 phi0 = 0.125 taum = 2. # simulate event_times, follower = simulate_time_rescaling( runtime=runtime, p=lambda t: infectious_rate_tweets(t, p0, r0, phi0, taum), follower_pool=follower_pool[1:], int_fol_cnt=follower_pool[0])