Ejemplo n.º 1
0
Archivo: ppwgan.py Proyecto: zhh0998/pp
if G_DIFF == False and D_DIFF == True:
    sys.exit()
tf.set_random_seed(SEED)
np.random.seed(SEED)

##############################################################################
# prepare data

FILE_NAME = 'pickled_data_ppgan_{}'.format(DATA)
if not os.path.isfile(FILE_NAME):
    if DATA == 'gaussian' and False:  #QQ_plot for gaussian is not good as hawkes,selfcorrecting, perhaps that simulating is not good.
        intensityGaussian = IntensitySumGaussianKernel(3, [3, 7, 11],
                                                       [1, 1, 1], [2, 3, 2])
        real_sequences = generate_sample(intensityGaussian, T, 20000)
        sequence2file(real_sequences, 'gaussian')
    else:
        real_sequences = file2sequence(DATA)

    lambda0 = np.mean([len(item) for item in real_sequences]) / T
    intensityPoisson = IntensityHomogenuosPoisson(lambda0)
    fake_sequences = generate_sample(intensityPoisson, T, 20000)
    pickle.dump([real_sequences, fake_sequences], open(FILE_NAME, 'wb'))
else:
    real_sequences, fake_sequences = pickle.load(open(FILE_NAME, 'rb'))

print(
    np.mean([len(item) for item in real_sequences]) / T,
    np.mean([len(item) for item in fake_sequences]) / T)
if not REAL_DATA:
    real_sequences = real_sequences[:SEQ_NUM]
Ejemplo n.º 2
0
Archivo: rmtpp.py Proyecto: zhh0998/pp
else:
    REAL_DATA = False

tf.set_random_seed(SEED)
np.random.seed(SEED)

##############################################################################
# prepare data

FILE_NAME = 'pickled_data_{}'.format(DATA)
if not os.path.isfile(FILE_NAME):
    if DATA == 'gaussian':  #QQ_plot for gaussian is not good as hawkes,selfcorrecting, perhaps that simulating is not good.
        intensityGaussian = IntensitySumGaussianKernel(3, [3, 7, 11],
                                                       [1, 1, 1], [2, 3, 2])
        real_sequences = generate_sample(intensityGaussian, T, 20000)
        sequence2file(real_sequences, 'gaussian')
    else:
        real_sequences = file2sequence(DATA)

    #lambda0 = np.mean([len(item) for item in real_sequences])/T
    #intensityPoisson = IntensityHomogenuosPoisson(lambda0)
    #fake_sequences = generate_sample(intensityPoisson, T, 20000)
    pickle.dump(real_sequences, open(FILE_NAME, 'wb'))
else:
    real_sequences = pickle.load(open(FILE_NAME, 'rb'))

print((np.mean([len(item) for item in real_sequences]) / T))
if not REAL_DATA:
    real_sequences = real_sequences[:SEQ_NUM]
real_iterator = PaddedDataIterator(real_sequences, T, MARK, D_DIFF)
Ejemplo n.º 3
0
        plt.close()
        
        if not REAL_DATA and DATA!="rmtpp":
            integral_intensity = get_integral(sequences_generator, DATA)
            integral_intensity = np.asarray(integral_intensity)
            fig = sm.qqplot(integral_intensity, stats.expon, distargs=(), loc=0, scale=1,line='45')
            res,slope_intercept = stats.probplot(integral_intensity, dist=stats.expon)
            plt.grid()
            fig.savefig('out/{}/{}.png'.format(saved_file,it))
            plt.close()
            
            if np.abs(1-slope_intercept[0])<1e-1 and deviation<1e-1:
                stop_indicator = True
        elif deviation<1e-2:
            stop_indicator = True
         
    if it  == ITERS-1 or stop_indicator: 
        sequences_generator = []
        for _ in range(int(20000/BATCH_SIZE)):
            sequences_gen = sess.run(fake_data,feed_dict={Z:fake_batch[0], fake_seqlen:fake_batch[1]})
            shape_gen = sequences_gen.shape
            sequences_gen = np.reshape(sequences_gen,(shape_gen[0],shape_gen[1]))
            if D_DIFF:
                sequences_gen = np.cumsum(sequences_gen,axis=1)
            sequences_gen = sequence_filter(sequences_gen,fake_batch[1]) # remove padding tokens
            sequences_generator +=sequences_gen
        sequence2file(sequences_generator, 'wgan_{}_{}_{}_{}'.format(DATA,SEQ_NUM,ITERATION,LAMBDA_LP))
        break


Ejemplo n.º 4
0
Archivo: MLE.py Proyecto: zhh0998/pp
else:
    REAL_DATA = False

tf.set_random_seed(SEED)
np.random.seed(SEED)

##############################################################################
# prepare data

FILE_NAME = 'pickled_data_ppgan_{}'.format(DATA)
if not os.path.isfile(FILE_NAME):
    if DATA == 'gaussian':  #QQ_plot for gaussian is not good as hawkes,selfcorrecting, perhaps that simulating is not good.
        intensityGaussian = IntensitySumGaussianKernel(3, [3, 7, 11],
                                                       [1, 1, 1], [2, 3, 2])
        real_sequences = generate_sample(intensityGaussian, T, 20000)
        sequence2file(real_sequences, 'gaussian')
    else:
        real_sequences = file2sequence(DATA)

    lambda0 = np.mean([len(item) for item in real_sequences]) / T
    intensityPoisson = IntensityHomogenuosPoisson(lambda0)
    fake_sequences = generate_sample(intensityPoisson, T, 2000)
    pickle.dump([real_sequences, fake_sequences], open(FILE_NAME, 'wb'))
else:
    real_sequences, fake_sequences = pickle.load(open(FILE_NAME, 'rb'))

real_sequences, _ = pickle.load(open(FILE_NAME, 'rb'))
print((np.mean([len(item) for item in real_sequences]) / T),
      ((np.mean([len(item) for item in fake_sequences]) / T)))
if not REAL_DATA:
    real_sequences = real_sequences[:SEQ_NUM]