if G_DIFF == False and D_DIFF == True: sys.exit() tf.set_random_seed(SEED) np.random.seed(SEED) ############################################################################## # prepare data FILE_NAME = 'pickled_data_ppgan_{}'.format(DATA) if not os.path.isfile(FILE_NAME): if DATA == 'gaussian' and False: #QQ_plot for gaussian is not good as hawkes,selfcorrecting, perhaps that simulating is not good. intensityGaussian = IntensitySumGaussianKernel(3, [3, 7, 11], [1, 1, 1], [2, 3, 2]) real_sequences = generate_sample(intensityGaussian, T, 20000) sequence2file(real_sequences, 'gaussian') else: real_sequences = file2sequence(DATA) lambda0 = np.mean([len(item) for item in real_sequences]) / T intensityPoisson = IntensityHomogenuosPoisson(lambda0) fake_sequences = generate_sample(intensityPoisson, T, 20000) pickle.dump([real_sequences, fake_sequences], open(FILE_NAME, 'wb')) else: real_sequences, fake_sequences = pickle.load(open(FILE_NAME, 'rb')) print( np.mean([len(item) for item in real_sequences]) / T, np.mean([len(item) for item in fake_sequences]) / T) if not REAL_DATA: real_sequences = real_sequences[:SEQ_NUM]
else: REAL_DATA = False tf.set_random_seed(SEED) np.random.seed(SEED) ############################################################################## # prepare data FILE_NAME = 'pickled_data_{}'.format(DATA) if not os.path.isfile(FILE_NAME): if DATA == 'gaussian': #QQ_plot for gaussian is not good as hawkes,selfcorrecting, perhaps that simulating is not good. intensityGaussian = IntensitySumGaussianKernel(3, [3, 7, 11], [1, 1, 1], [2, 3, 2]) real_sequences = generate_sample(intensityGaussian, T, 20000) sequence2file(real_sequences, 'gaussian') else: real_sequences = file2sequence(DATA) #lambda0 = np.mean([len(item) for item in real_sequences])/T #intensityPoisson = IntensityHomogenuosPoisson(lambda0) #fake_sequences = generate_sample(intensityPoisson, T, 20000) pickle.dump(real_sequences, open(FILE_NAME, 'wb')) else: real_sequences = pickle.load(open(FILE_NAME, 'rb')) print((np.mean([len(item) for item in real_sequences]) / T)) if not REAL_DATA: real_sequences = real_sequences[:SEQ_NUM] real_iterator = PaddedDataIterator(real_sequences, T, MARK, D_DIFF)
plt.close() if not REAL_DATA and DATA!="rmtpp": integral_intensity = get_integral(sequences_generator, DATA) integral_intensity = np.asarray(integral_intensity) fig = sm.qqplot(integral_intensity, stats.expon, distargs=(), loc=0, scale=1,line='45') res,slope_intercept = stats.probplot(integral_intensity, dist=stats.expon) plt.grid() fig.savefig('out/{}/{}.png'.format(saved_file,it)) plt.close() if np.abs(1-slope_intercept[0])<1e-1 and deviation<1e-1: stop_indicator = True elif deviation<1e-2: stop_indicator = True if it == ITERS-1 or stop_indicator: sequences_generator = [] for _ in range(int(20000/BATCH_SIZE)): sequences_gen = sess.run(fake_data,feed_dict={Z:fake_batch[0], fake_seqlen:fake_batch[1]}) shape_gen = sequences_gen.shape sequences_gen = np.reshape(sequences_gen,(shape_gen[0],shape_gen[1])) if D_DIFF: sequences_gen = np.cumsum(sequences_gen,axis=1) sequences_gen = sequence_filter(sequences_gen,fake_batch[1]) # remove padding tokens sequences_generator +=sequences_gen sequence2file(sequences_generator, 'wgan_{}_{}_{}_{}'.format(DATA,SEQ_NUM,ITERATION,LAMBDA_LP)) break
else: REAL_DATA = False tf.set_random_seed(SEED) np.random.seed(SEED) ############################################################################## # prepare data FILE_NAME = 'pickled_data_ppgan_{}'.format(DATA) if not os.path.isfile(FILE_NAME): if DATA == 'gaussian': #QQ_plot for gaussian is not good as hawkes,selfcorrecting, perhaps that simulating is not good. intensityGaussian = IntensitySumGaussianKernel(3, [3, 7, 11], [1, 1, 1], [2, 3, 2]) real_sequences = generate_sample(intensityGaussian, T, 20000) sequence2file(real_sequences, 'gaussian') else: real_sequences = file2sequence(DATA) lambda0 = np.mean([len(item) for item in real_sequences]) / T intensityPoisson = IntensityHomogenuosPoisson(lambda0) fake_sequences = generate_sample(intensityPoisson, T, 2000) pickle.dump([real_sequences, fake_sequences], open(FILE_NAME, 'wb')) else: real_sequences, fake_sequences = pickle.load(open(FILE_NAME, 'rb')) real_sequences, _ = pickle.load(open(FILE_NAME, 'rb')) print((np.mean([len(item) for item in real_sequences]) / T), ((np.mean([len(item) for item in fake_sequences]) / T))) if not REAL_DATA: real_sequences = real_sequences[:SEQ_NUM]