Exemple #1
0
def main():
    m = 350
    random.seed(2)
    X = np.empty([m, 2])
    X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    #not separable
    y = np.empty([m, 1])
    for i in range(X.shape[0]):
        y[i] = func2(X[i, :])

    #plot data and decision surface
    ax = pu.plot_data(X, y)
    pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    #change c to hard/soft margins
    w, w0, support_vectors_idx = svm.train(X, y, c=99999, eps=0.1)

    #plot result
    predicted_labels = svm.classify_all(X, w, w0)
    print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels)))

    ax = pu.plot_data(X, y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax)
    plt.show()
Exemple #2
0
def main():
    print("Loading data...")
    X = np.loadtxt('X.txt')
    y = np.loadtxt('y.txt')

    print("Plotting data...")
    plot_data(X, y)
def main():
    m=350
    random.seed(2)
    X = np.empty([m,2])
    X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    #not separable
    y = np.empty([m,1])
    for i in range(X.shape[0]):
        y[i] = func2(X[i,:])


    #plot data and decision surface
    ax = pu.plot_data(X,y)
    pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    #change c to hard/soft margins
    w,w0, support_vectors_idx = svm.train(X,y,c=99999,eps=0.1)

    #plot result
    predicted_labels = svm.classify_all(X,w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))


    ax = pu.plot_data(X,y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax)
    plt.show()
Exemple #4
0
def main():
    m=100
    X = np.empty([m,2])
    X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    # preprocessing.scale(X)

    #linearly separable
    y = np.empty([m,1])
    for i in range(m):
        y[i] = func(X[i,])

    #plot data and decision surface
    ax = pu.plot_data(X,y)
    pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax)
    plt.show()

    #train svm

    w,w0, support_vectors_idx = svm.train(X,y,c=999999999999999, eps=10, type='gaussian')
    # w, w0, support_vectors_idx = svm.train(X, y, c=999999999999999, eps=10, type='polynomial')
    #plot result
    predicted_labels = svm.classify_all(X,w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))


    ax = pu.plot_data(X,y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax)
    plt.show()
def main():
    m=150
    random.seed(2)
    X = np.empty([m,2])
    X[:,0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:,1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    preprocessing.scale(X)



    #linearly separable
    y = np.empty([m,1])
    for i in range(m):
        y[i] = func(X[i,])

    # shuffle
    p = np.random.permutation(len(X))
    X = X[p]
    y = y[p]

    #plot data and decision surface
    ax = pu.plot_data(X,y)
    pu.plot_surface(X,y, X[:, 0], X[:,1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    w,w0, support_vectors_idx = svm.train(X,y,c=9999, eps=0.000001)

    #plot result
    predicted_labels = svm.classify_all(X,w,w0)
    print("Accuracy: {}".format(svm.getAccuracy(y,predicted_labels)))

    kfold = svm.kfoldCrossValidation(X,y,10,1,c=999999999,eps=0.000001)
    print (kfold)

    ax = pu.plot_data(X,y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:,0], X[:,1], w,w0, ax=ax)
    plt.show()
def main():
    m = 150
    random.seed(2)
    X = np.empty([m, 2])
    X[:, 0] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)
    X[:, 1] = np.matrix((random.sample(range(-10000, 10000), m))) / float(1000)

    preprocessing.scale(X)

    #linearly separable
    y = np.empty([m, 1])
    for i in range(m):
        y[i] = func(X[i, ])

    # shuffle
    p = np.random.permutation(len(X))
    X = X[p]
    y = y[p]

    #plot data and decision surface
    ax = pu.plot_data(X, y)
    pu.plot_surface(X, y, X[:, 0], X[:, 1], disc_func=func, ax=ax)
    plt.show()

    #train svm
    w, w0, support_vectors_idx = svm.train(X, y, c=9999, eps=0.000001)

    #plot result
    predicted_labels = svm.classify_all(X, w, w0)
    print("Accuracy: {}".format(svm.getAccuracy(y, predicted_labels)))

    kfold = svm.kfoldCrossValidation(X, y, 10, 1, c=999999999, eps=0.000001)
    print(kfold)

    ax = pu.plot_data(X, y, support_vectors_idx)
    pu.plot_surfaceSVM(X[:, 0], X[:, 1], w, w0, ax=ax)
    plt.show()
            print("Num dead: {}".format(len(DEAD)))
            print("Num recovered: {}".format(len(RECOVERED)))

        if len(DEAD) == num_all_people:
            if output:
                print(f"All died in {i} days")

            return [inf, dead, rec]

        if len(RECOVERED) == num_all_people or \
           len(INFECTED) == 0:

            if output:
                print(f"All revovered. Took {i} days")
            return [inf, dead, rec]

        if output:
            print("-" * 35)

        i += 1


inf, dead, rec = run_simulation(3, output=False)
BOARD_HISTORY.append(BOARD)

if PLOT_RESULTS:
    print("Plotting data")
    plot_data(inf, dead, rec, START_EMERGENCY, START_DEEPENING,
              START_RECOVERING, vir_sev, inf_chance)
    print("Saved plots")
X_norm,mu,sigma = utils.feature_normalize(X)

# add intercept term to X_norm

XX = np.vstack([np.ones((X.shape[0],)),X_norm.T]).T

print 'Running gradient descent ..'

# set up model and train 

linear_reg3 = LinearReg_SquaredLoss()
J_history3 = linear_reg3.train(XX,y,learning_rate=0.01,num_iters=5000,verbose=False)

# Plot the convergence graph and save it in fig5.pdf

plot_utils.plot_data(range(len(J_history3)),J_history3,'Number of iterations','Cost J')
plt.savefig('fig5.pdf')

# Display the computed theta

print 'Theta computed by gradient descent: ', linear_reg3.theta


########################################################################
# ======= Part 3: Predict on unseen data with model ======= ===========#
########################################################################

########################################################################
# TODO:                                                                #
# Predict values for the average home                                  #
# remember to multiply prediction by 10000 using linear_reg3           #
Exemple #9
0
# add intercept term to X_norm

XX = np.vstack([np.ones((X.shape[0],)),X_norm.T]).T
print X_norm

print 'Running gradient descent ..'

# set up model and train 

linear_reg3 = LinearReg_SquaredLoss()

J_history3 = linear_reg3.train(XX,y,learning_rate=0.01,num_iters=5000,verbose=False)

# Plot the convergence graph and save it in fig5.pdf

plot_utils.plot_data(range(len(J_history3)),J_history3,'Number of iterations','Cost J')
plt.savefig('fig5a.pdf')

# Display the computed theta

print 'Theta computed by gradient descent: ', linear_reg3.theta


########################################################################
# ======= Part 3: Predict on unseen data with model ======= ===========#
########################################################################

########################################################################
# TODO:                                                                #
# Predict values for the average home                                  #
# remember to multiply prediction by 10000 using linear_reg3           #
Exemple #10
0
    z = 0
    while x > 0:
        if x % 2 == 1: z = z + y
        y = y << 1
        x = x >> 1
    return z

if __name__ == "__main__":
    # first arg is max value, second arg is step
    numbers = range(1,int(sys.argv[1]),int(sys.argv[2]))
    print "Starting to test timings for function:"
    times_naive = []
    times_russian = []
    tot_numbers = len(numbers)
    s0 = time.time()
    for idx,val in enumerate(numbers):
        if idx%100 == 0:
            print " -- Working on %i/%i: current time: %.3f seconds"%(idx+1,tot_numbers,time.time() - s0)
        s1 = time.time()
        naive(val,val)
        times_naive.append(time.time() - s1)
        s2 = time.time()
        russian(val,val)
        times_russian.append(time.time() - s2)
    e0 = time.time()
    print "Time taken: %.3f seconds"%(e0 - s0)
    times = numpy.array([times_naive,times_russian]).transpose()
    numbers = numpy.array([numbers,numbers]).transpose()
    print "opening plot..."
    plot_utils.plot_data(numbers,times)
# Group the data by week, and take the count for each week
week_counts = df.resample('W').count()
week_counts.columns = ['adoptions']
commit_week = df_c.resample('W').count()
commit_week.columns = ['commits']
#merge for multi-axis plotting
week_merged = week_counts.merge(commit_week,
                                how='outer',
                                left_index=True,
                                right_index=True)
week_merged.fillna(int(0), inplace=True)
#plot?
plot_utils.plot_data(week_counts.index,
                     week_counts['adoptions'],
                     "time",
                     "number of adoption events per week",
                     "Adoption Events Per Week",
                     filename="results/adop_over_time_week.png")
#double axis plot
plot_utils.plot_two_axes(week_merged.index,
                         week_merged['adoptions'],
                         week_merged['commits'],
                         "time",
                         "number of adoption events per week",
                         "number of import commits per week",
                         "Adoption Events and Import Commits Per Week",
                         filename="results/adop_commit_over_time_week.png")

# Group the data by week, and take the count for each week
month_counts = df.resample('M').count()
month_counts.columns = ['adoptions']
            # if a spike is detected, store the link in reddit.events
                if (tree_score > adjusted_max/5) or (tree_score < adjusted_min):
                    log_event(date, post, tree_score, TOPIC)
                
        # slide window to the right
        window = window[1:] + [daily_max]

        # find max and adjusted max
        window_max = max(window)
        adjusted_max = (window_max + 6*adjusted_max + global_max)/10
        # write to file in case of failure
        f = open(AMAX_TEMP, "w+")
        f.write(str(adjusted_max) + "\n" + str(global_max))
        f.close()

        # find min and adjusted min
        window_min = min(window)
        adjusted_min = (window_min + 6*adjusted_min + global_min)/10
        # write to file in case of failure
        f = open(AMIN_TEMP, "w+")
        f.write(str(adjusted_min) + "\n" + str(global_min))
        f.close()

        # write score to file
        append_to_file(date, score, FILENAME)

    os.remove(AMAX_TEMP)
    os.remove(AMIN_TEMP)
    plot_data(TOPIC)

Exemple #13
0
bdata = load_boston()
df = pd.DataFrame(data = bdata.data, columns = bdata.feature_names)

#  X is the percentage of the population in a census tract that is of
#  lower economic status. X is a vector of length 506.
#  y is to the median home value in $10000's. y is a vector of length 506

X = df.LSTAT
y = bdata.target

# Scatter plot LSTAT vs median home value, saved in fig1.pdf

import numpy as np
import plot_utils
print 'Plotting data ...'
plot_utils.plot_data(X,y,'Percentage of population with lower economic status','Median home value in $10000s')
plt.savefig('fig1.pdf')

########################################################################
##============= Part 1: Training a univariate model ===================#
########################################################################

# Predict median home value from percentage of lower economic status in a census tract

# add the column of ones to X to represent the intercept term

XX = np.vstack([np.ones((X.shape[0],)),X]).T

from linear_regressor import LinearRegressor, LinearReg_SquaredLoss

# set up a linear regression model
                            key=len,
                            reverse=True)
        repo_comp_data.append(len(repo_comps))
        times.append(time)
        print "   finished", adoption_index, "events"

#get final component counts
n = len(events_list)
user_comps = sorted(nx.connected_component_subgraphs(U), key=len, reverse=True)
user_comp_data.append(len(user_comps))
repo_comps = sorted(nx.connected_component_subgraphs(R), key=len, reverse=True)
repo_comp_data.append(len(repo_comps))
times.append(events_list[n - 1]["target"]["time"])

#plot number of components over time
plot_utils.plot_data(times,
                     user_comp_data,
                     "Time (UNIX)",
                     "Number of Components in User Graph",
                     "Number of Components in User Graph over Time",
                     filename="results/time_components_user.png",
                     log_scale=False)
print "user component plot saved to results/time_components_user.png"
plot_utils.plot_data(times,
                     repo_comp_data,
                     "Time (UNIX)",
                     "Number of Components in Repo Graph",
                     "Number of Components in Repo Graph over Time",
                     filename="results/time_components_repo.png",
                     log_scale=False)
print "repo component plot saved to results/time_components_repo.png"
#plots use usage counts, adoption counts, and average delta t:

use_counts = []
adop_counts = []
avg_delta = []
for lib in usage_counts:
    if lib in lib_adop_counts:
        use_counts.append(usage_counts[lib])
        adop_counts.append(lib_adop_counts[lib])
        avg_delta.append(lib_delta[lib])
#total # of usages for library on x, total # of adoptions for lib on y
plot_utils.plot_data(use_counts,
                     adop_counts,
                     "Number of uses",
                     "Number of adoptions",
                     "Uses vs. Adoptions per Library",
                     filename="results/uses_vs_adoptions.png",
                     scatter=True,
                     log_scale=True)
print "Uses vs. adoptions plot saved to results/uses_vs_adoptions.png"

#frequency distribution of # of adoptions per library
lib_adop_freq, min_lib_adop, max_lib_adop = plot_utils.count_freq(
    lib_adop_counts)
plot_utils.plot_freq(lib_adop_freq,
                     "library adoption count",
                     "freq",
                     "Frequency of library adoption counts",
                     filename="results/lib_adop_freq.jpg",
                     log_scale=True)
print "lib adop counts: min =", min_lib_adop, ", max =", max_lib_adop
Exemple #16
0
########################################################################
#  We start the exercise by first loading and visualizing the dataset. #
#  The following code will load the dataset into your environment and  #
#  plot the data.                                                      #
########################################################################


# Load Training Data

print 'Loading and Visualizing Data ...'

X, y, Xtest, ytest, Xval, yval = utils.load_mat('ex2data1.mat')

# Plot training data

plot_utils.plot_data(X,y,'Change in water level (x)','Water flowing out of the dam (y)')
plt.savefig('fig6.pdf')

########################################################################
## =========== Part 2: Regularized Linear Regression ==================#
########################################################################
#  You should now implement the loss function and gradient of the
# loss function for regularized linear regression in reg_linear_regression_multi.py

# append a column of ones to matrix X

XX = np.vstack([np.ones((X.shape[0],)),X]).T

#  Train linear regression with lambda = 0

reglinear_reg1 = RegularizedLinearReg_SquaredLoss()
        G.add_edge(user, repo)

    commit_index = commit_index + 1
    if commit_index % 100 == 0:
        #get components again, add to plot data
        comps = sorted(nx.connected_component_subgraphs(G),
                       key=len,
                       reverse=True)
        comp_data[key] = len(comps)  #add to plot
        print "finished", commit_index, "commits"

#plot # of components over time
plot_utils.plot_data(comp_data,
                     "Time (UNIX)",
                     "Number of Components",
                     "%s: Number of Components over Time" % lib,
                     filename="results/%s_time_components_APPROX.png" % lib,
                     x_max=0,
                     x_min=0,
                     log_scale=False)
print "plot saved to results/%s_time_components_APPROX.png" % lib
'''  SLOW WAY
for user_id in users_list:
	user = str(user_id)
	print user
	for repo in user_to_repos[user]:
		print repo
		#read repo file
		repo_commits = utils.load_json("imports_data/%s.log" % repo)
		print len(repo_commits)
		#loop all commits in this repo
		for commit in commits:		#each commit is user, time, dictionary of imports