def GenerateOutcomes(x, z, num_cont, num_bin):
    """
    Following the generating procedure defined by Madras in Algorithm 2
    """
    # As defined by Madras
    num_z = z.shape[1]
    w = -11
    beta_a = 6

    # Algorithm 2
    # horizontal concatenation
    xz = np.concatenate((x, z), 1)
    W = np.ones(xz.shape[1])*.5

    # lists to store generated values
    y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1 = list(), list(), list(), list()
    mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1 = list(), list(), list(), list()

    # loop over observations because all need individual beta sample
    for obs in xz:
        # sample new beta
        beta_cont = choice([0, .1, .2, .3, .4], num_cont, p=[.5, .125, .125, .125, .125])
        beta_bin = choice([0, .1, .2, .3, .4], num_bin, p=[.6, .1, .1, .1, .1])

        beta_z = choice([.4, .6], num_z, p=[.5, .5])
        # in x, continuous variables come first
        beta = np.concatenate((beta_cont, beta_bin, beta_z), 0)

        # calculate y dist
        mu1 = np.matmul(np.exp(obs + W), beta)
        mu_t0_a0.append(mu1)
        mu2 = np.matmul(obs, beta)-w
        mu_t1_a0.append(mu2)
        mu3 = np.matmul(np.exp(obs + W), beta) + beta_a
        mu_t0_a1.append(mu3)
        mu4 = np.matmul(obs, beta) - w + beta_a
        mu_t1_a1.append(mu4)
        # sample new y
        y_t0_a0.append(np.random.normal(mu1, 1, 1)[0])
        y_t1_a0.append(np.random.normal(mu2, 1, 1)[0])
        y_t0_a1.append(np.random.normal(mu3, 1, 1)[0])
        y_t1_a1.append(np.random.normal(mu4, 1, 1)[0])

    plt_entries = {'y_t0_a0': y_t0_a0, 'y_t1_a0': y_t1_a0, 'y_t0_a1': y_t0_a1, 'y_t1_a1': y_t1_a1}
    plt.figure()
    plt.title('Generated data')

    for label, entry in plt_entries.items():
        plt.hist(entry, label=label, alpha=0.5, bins=20)
    plt.legend()
    plt.show()

    y_all = np.transpose(np.vstack((y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1)))
    mu_all = np.transpose(np.vstack((mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1)))

    # column names should be consistent with above vstack
    y_column = 'y_t0_a0, y_t1_a0, y_t0_a1, y_t1_a1'
    mu_column = 'mu_t0_a0, mu_t1_a0, mu_t0_a1, mu_t1_a1'
    return y_all, mu_all, y_column, mu_column
def draw_hist(heights):
    #创建直方图
    #第一个参数为待绘制的定量数据,不同于定性数据,这里并没有事先进行频数统计
    #第二个参数为划分的区间个数
    plt.hist(heights, 100)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Heights Of Male Students')
    plt.show()
def draw_cumulative_hist(heights):
    #创建累积曲线
    #第一个参数为待绘制的定量数据
    #第二个参数为划分的区间个数
    #normed参数为是否无量纲化
    #histtype参数为'step',绘制阶梯状的曲线
    #cumulative参数为是否累积
    plt.hist(heights, 20, normed=True, histtype='step', cumulative=True)
    plt.xlabel('Heights')
    plt.ylabel('Frequency')
    plt.title('Heights Of Male Students')
    plt.show()
'''

import json
from textblob import TextBlob
from wordcloud import WordCloud
import matplotlib.pylot as plt

# Get the JSON data
tweetFile = open("tweets.json", "r")
tweetData = json.load(tweetFile)
tweetFile.close()

polarity_values = []

for tweet in tweetData:
    tweets.append(tweet["text"])
giant_string = " ".join(tweets)    


    tb = TextBlob(tweet_text)
    print("{}: {}".format(tweet_text, tb.polarity))
    polarity_values.append(tb.polarity)

# bins = [-1, -0.5, 0, 0.5, 1]

plt.hist(polarity_values, bins)
plt.title("tweet polarity")
plt.ylabel("Count of tweets")
plt.xlabel("Polarity")
plt.show()
X_add, y_add = mydat['features'], mydat['labels']

with open('./mydata/train.p', mode='rb') as f:
    mytrain = pickle.load(f)
X_mytrain, y_mytrain = mytrain['features'], mytrain['labels']

with open('./mydata/test.p', mode='rb') as f:
    mytest = pickle.load(f)
X_mytest, y_mytest = mytest['features'], mytest['labels']

X_train = np.append(X_train_, X_mytrain, axis = 0)
y_train = np.append(y_train_, y_mytrain)
X_test = np.append(X_test_, X_mytest, axis = 0)
y_test = np.append(y_test_, y_mytest)

plt.hist(y_train, bins=50, color='#FF69B4')



#==============================================================================
# Failed attempt to train the model by incrementally increase the number of 
# near-zero training examples. 
#==============================================================================

#def limit(X, y, s = 700):
#    bad = [k for k,v in enumerate(y) if v in [0, -.25, .25]]
#    good = list(set(range(0, len(y)))-set(bad))
#    new = good + [bad[i] for i in np.random.randint(0,len(bad),s)]
#    X,y = X[new,], y[new]
#    return X, y
#