def bonus_cluserting(): df1_clean = rd.read_data() features = list(df1_clean.columns.values) df1_clean = rd.clean_df(df1_clean,False) labels = df1_clean.loc[:,['target']].values features = features[2:-1] data = df1_clean.loc[:,features].astype(float).astype(int) #DO KMeans to obtain new labels points = data.values kmeans = KMeans(n_clusters=2) km =kmeans.fit(points) labels = pd.DataFrame(km.labels_) labels.columns = ['labels'] #as we have 11/13 features, drawing a 11-D graph is impossible #so I used PCA to reduce dimentionality to 2 and draw the graph points_std = StandardScaler().fit_transform(points) pca = PCA(n_components=2) principalComponents = pca.fit_transform(points_std) principalDf = pd.DataFrame(data = principalComponents, columns = ['principal component 1', 'principal component 2']) finalDf = pd.concat([principalDf, labels], axis = 1) fig, ax = plt.subplots() colors = {1: 'red',0 : 'blue'} finalDf.plot.scatter(x = 'principal component 1',y = 'principal component 2',c = finalDf.labels.map(colors)) red_patch = mpatches.Patch(color='red', label='Class One') blue_patch = mpatches.Patch(color='blue', label='Class Two') plt.legend(handles=[red_patch,blue_patch]) return get_graph_url(plt)
def produce_chart(chart_no): df = rd.read_data() chart_no = int(chart_no) chart_produce = { 1: ['rd.draw_chest_pain_type(df)', 'Chest Pain Type'], 2: ['rd.draw_resting_blood_pressure(df)', 'Resting Blood Pressure'], 3: ['rd.draw_serum_cholestoral(df)', 'Serum Cholestoral'], 4: ['rd.draw_fasting_blood_sugar(df)', 'Fasting Blood Sugar'], 5: ['rd.draw_RER(df)', 'Resting Electrocardiographic Results'], 6: ['rd.draw_Mhra(df)', 'Maximum Heart Rate Achieved'], 7: ['rd.draw_exercise_induced_angina(df)', 'Exercise Induced Angina'], 8: ['rd.draw_ST_Depression(df)', 'ST Depression'], 9: [ 'rd.draw_slope_exercise_ST_segment(df)', 'Slope of the Peak Exercise ST Segment' ], 10: ['rd.draw_major_vessels(df)', 'Number of Major Vessels'], 11: ['rd.draw_thal(df)', 'Thal(Thalassemia)'] } title = chart_produce[chart_no][1] graph = eval(chart_produce[chart_no][0]) return render_template('specific_graph.html', graph=graph, graph_no=chart_no, title=title)
def clean_data (usePCA = False) : """ """ logging.info ('begin to clean the data') if os.path.exists (ROOT + '/data/cleandata.csv') : # we need not to clean the data each time # if you want to reclean the data, please delete '../data/cleandata.csv' file logging.info ('the clean data is already exists') data = pd.read_csv (ROOT + '/data/cleandata.csv') train_number, val_number, test_number, unlabel_number, label, uid = io.grab (ROOT + '/data/datadescribe') else : data, train_number, val_number, test_number, unlabel_number, label, uid = read.read_data () data = feature_handler (data) # store the result data.to_csv (ROOT + '/data/cleandata.csv') io.store ([train_number, val_number, test_number, unlabel_number, label, uid], ROOT + '/data/datadescribe') logging.info ('finished cleaning the data') if usePCA : # dimensionality reduction if not os.path.exists (ROOT + '/data/datapca') : # we need not to rerun this step # if you change the parameters and want to relearn it, please delete '../data/datapca' file data_values = decomposition.pca_solver (data) io.store (data_values, ROOT + '/data/datapca') data_values = io.grab (ROOT + '/data/datapca') else : data_values = data.values[:,1:] return data_values, train_number, val_number, test_number, unlabel_number, label, uid
def main(): train = read_data("train") print(train.columns) count_missing(train) df_train_Y = train[["isFraud"]] df_train_X = train.drop(["isFraud"], axis=1) train_Y = df_train_Y.to_numpy() train_X = df_train_X.to_numpy() train_model_dnn(train_X, train_Y)
def load_data(subset): df1_clean = rd.read_data() df1_clean = rd.clean_df(df1_clean, False) labels = df1_clean.loc[:, ['target']].values df1_clean = df1_clean.loc[:, subset].astype(float).astype(int) data = df1_clean.loc[:, subset].astype(float).astype(int) query = '' for s in range(len(subset) - 1): query = query + "data['" + subset[s] + "'].values," query = query + "data['" + subset[-1] + "'].values" data = np.stack((eval(query)), axis=-1) return data, labels, df1_clean
def viewall(): df = rd.read_data() q3 = rd.draw_chest_pain_type(df) q4 = rd.draw_resting_blood_pressure(df) q5 = rd.draw_serum_cholestoral(df) q6 = rd.draw_fasting_blood_sugar(df) q7 = rd.draw_RER(df) q8 = rd.draw_Mhra(df) q9 = rd.draw_exercise_induced_angina(df) q10 = rd.draw_ST_Depression(df) q11 = rd.draw_slope_exercise_ST_segment(df) q12 = rd.draw_major_vessels(df) q13 = rd.draw_thal(df) return render_template('part1_viewall.html', q3 = q3, q4 = q4, q5 = q5, q6 = q6, q7 = q7, q8 = q8\ ,q9 = q9,q10 =q10,q11 = q11, q12 = q12, q13 = q13)
def main(): start = time.clock() docs, stopwords = read_data() docs = [doc for i, doc in list(docs.items())] indexer = kmeans(docs, stopwords) print("Time to index: ", round(time.clock() - start, 3), "Seconds") ### PART A print("PART A") print("\nEnter number of clusters: ") indexer.clustering(int(input())) print("\nTime to cluster: ", round(time.clock() - start, 3), "Seconds") ### PART B print("\n\nPART B") start = time.clock() for i in range(2, 31): print("\nNumber of clusters:", i) indexer.clustering(i) print("\nTime to cluster: ", round(time.clock() - start, 3), "Seconds")
def relevance_feedback(isPsuedoFeedback): queries, relevances, docs, stopwords = read_data() docs = [docs[i] for i in range(17, max(list(docs.keys())))] indexer = Index(docs, stopwords) rel = { 1: { 0: [374, 398, 304, 380], 1: [374, 304, 380], 2: [304, 268, 326], 3: [304, 268, 326], 4: [304, 268, 326] }, 6: { 0: [304, 398, 402, 380, 374], 1: [304, 398, 402], 2: [304, 398, 402], 3: [304, 398, 402], 4: [304, 398, 402] }, 31: { 0: [47, 72, 100], 1: [47, 72, 100, 10], 2: [47, 72, 100, 10], 3: [47, 72, 100, 10], 4: [47, 72, 100, 10] } } for key in [1, 6, 31]: query = queries[key] if len(relevances[key]) > 5: indexer.run_relevance_feedback(key, query, relevances[key], isPsuedoFeedback, rel[key])
#default input_filename = 'real.csv' output_filename = 'model.txt' feature_list = [] if len(sys.argv) >= 2: if sys.argv[1][0] != '-': input_filename = sys.argv[1] n = -int(sys.argv[2]) start = 3 else: n = -int(sys.argv[1]) start = 2 for i in range(start, len(sys.argv)): feature_list.append(int(sys.argv[i])) output_filepath = '../data/' + output_filename sample_num, feature_num, features, NULL = read_data(input_filename) convs = analyze(sample_num, feature_num, features, n, feature_list) # print out in a file output = open(output_filepath, "w") buf = "" buf += str(sample_num) + ' ' + str(feature_num) + '\n' buf += str(n) + '\n' for xi in range(0, n): buf += str(feature_list[xi]) if xi < n - 1: buf += ' ' buf += '\n' output.write(buf) for xi in range(0, n): buf = ""
def train(config): """Trains the neural network with provided configurations. Parameters ---------- config : dictionary Configurations of training procedure. Returns ------- None """ # Latent Space Dimension k = config['k'] # Read training data user_ids, movie_ids, ratings = read_data(training=True) user_ids = map_ids(user_ids, users=True) movie_ids = map_ids(movie_ids, users=False) # Input Data users = torch.Tensor(user_ids).int() movies = torch.Tensor(movie_ids).int() ratings = torch.Tensor(ratings) config['n_users'] = np.unique(user_ids).size config['n_items'] = np.unique(movie_ids).size # The input size of first fc layer is different if we do one hot encoding for the input if config['one_hot_encoding']: config['layers'][0] = (config['n_users'] + config['n_items']) * config['k'] else: # 2 by k - because we conactenate the users and items, k is the output size of embedding layers config['layers'][0] = 2 * config['k'] print("Configurations") print(config) # Save the configs as a dictionary with open(configs.CONFIGS_PATH, "wb") as f: pickle.dump(config, f, pickle.HIGHEST_PROTOCOL) # Try to use GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Model model = NCA(config).to(device) print("-" * 50) print("Our Model") print(model) learning_rate = config['lr'] critertion = config['critertion'] batch_size = config['batch_size'] epochs = range(config['epochs']) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # optimizer = torch.optim.Adam(model.parameters()) # Create a data loader from training data data_loader = DataLoader(TensorDataset(users, movies, ratings), batch_size=batch_size) # Accumulatas the loss across epochs losses = [] print(sum(p.numel() for p in model.parameters() if p.requires_grad)) print("-" * 50) # Iterate over epochs for epoch in epochs: epoch_loss = [] # Iterate over batches for batch_users, batch_movies, batch_ratings in data_loader: # Do one-hot encoding if config['one_hot_encoding']: batch_users = torch.nn.functional.one_hot( batch_users.long(), config['n_users']) batch_movies = torch.nn.functional.one_hot( batch_movies.long(), config['n_items']) users = batch_users.int().to(device) movies = batch_movies.int().to(device) ratings = batch_ratings.to(device) optimizer.zero_grad() output = model(users, movies)[:, 0] loss = critertion(output, ratings) loss.backward() optimizer.step() epoch_loss.append(loss.item()) avg_epoch_loss = np.mean(epoch_loss) losses.append(avg_epoch_loss) print(f"epoch {epoch}, loss = {avg_epoch_loss}") # Save the trained model # Save different models to different files which is based whether it includes one-hot encoding of features or not if config['one_hot_encoding']: torch.save(model.state_dict(), configs.NCF_MODEL_ONE_HOT_PATH) else: torch.save(model.state_dict(), configs.NCF_MODEL_PATH)
ivar = str(args.var) main_path = '/srv/ccrc/data25/z5166746/CMIP5/' cmip1=main_path+str(ivar)+'/a10_cmip5_r01_'+str(ivar)+'_r.nc' cmip2=main_path+str(ivar)+'/a10_cmip5_r02_'+str(ivar)+'_r.nc' cmip3=main_path+str(ivar)+'/a10_cmip5_r03_'+str(ivar)+'_r.nc' cmip_all = [cmip1,cmip2,cmip3] #SST if ivar=='sst': ds_sst = [] for i in range(len(cmip_all)): lon,lat,lev,sst,time,basin_mask = read_data(cmip_all[i],'thetao',imask=None) sst = sst - 273.15 #Convert SST to degrees celsius ds_sst.append(sst) klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr', serialized=True, cached=False) klepto_atm_xr['cmip5_sst'] = ds_sst elif ivar=='pot_temp': ds_pot_temp = [] for i in range(len(cmip_all)): lon,lat,lev,pot_temp,time,basin_mask = read_data(cmip_all[i],'thetao',imask=None) pot_temp = pot_temp.sel(lev=slice(0,600)) - 273.15 #Convert SST to degrees celsius lev = lev.sel(lev=slice(0,600)) #Select upper ocean ds_pot_temp.append(pot_temp) klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr', serialized=True, cached=False) klepto_atm_xr['cmip5_pot_temp'] = ds_pot_temp
import PIL.Image import numpy as np import os from read import read_data import scipy.misc def split_pic(data): print(data.shape) res = np.split(data, 2, axis=2) print(res[0].shape) return res[0], res[1] if __name__ == "__main__": data = read_data('DATA/', 200) data = data * 255 data = data.astype(dtype='uint8') svg, pxl = split_pic(data) out = tf.placeholder('float32', [256, 256, 3]) out1 = tf.image.resize_images(out, [64, 64], 0) out2 = tf.image.resize_images(out1, [256, 256], 0) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) for i in range(200): #out_pxl = sess.run(out2, {out : pxl[i]}) #out = scipy.misc.imresize(pxl[i], (64, 64), 'bilinear') #out = scipy.misc.imresize(out, (256, 256), 'bilinear') #scipy.misc.imsave('test_input/%d.png' %i, out_pxl)
import os import cv2 from read import read_data from segment import segment_leukocytes from visualize import compare_images, mark_cancerous_lymphocytes if __name__ == "__main__": IMG_PATH = os.path.join("ALL_IDB1", "im") XYC_PATH = os.path.join("ALL_IDB1", "xyc") # This dataframe stores id, image path, bool for presence of blasts and # co-ordinates of blasts if any: id, img_path, has_blasts, blast_xy df = read_data(IMG_PATH, XYC_PATH) for i in range(108): img = cv2.imread(df.loc[i].img_path) segmented_image = segment_leukocytes(img) mark_cancerous_lymphocytes(img, df.loc[i].blast_xy) compare_images(img, segmented_image)
import matplotlib.pyplot as plt # plotting routine import sys sys.path.append( '../.' ) # allows to search for modules in parent directory, if not found in current directory from sub import fcc_points, kline, sort4 # user-defined function from plot import plot_bands # user-defined function for plotting from read import read_data # user-defined function for reading data """Code pseudo.py, written by Lucio Andreani, [email protected] It calculates the energy bands of tetrahedral semiconductors by the empirical pseudopotential method Reference: Yu-Cardona, Fundamentals of Semiconductors, Springer Atomic units are used (Bohr radius for length, Hartree for energy)""" # reads data from file pseudo.dat f = open('pseudo.dat', 'r') aret, v3s, v8s, v11s, v3a, v4a, v11a, nk, gmax, nmax, upper, ymin, ymax, ny, jplot = read_data( f) print(aret, v3s, v8s, v11s, v3a, v4a, v11a, nk, gmax, nmax, upper, ymin, ymax, ny, jplot) f.close #aret=5.43 # lattice constant in Angstrom #v3s,v8s,v11s= -0.211, 0.04, 0.08 #v3s,v8s,v11s=simmetric pseudopotential form factors in Rydberg #v3a,v4a,v11a=0. , 0. , 0. , #v4a,v4a,v11a=antisimmetric pseudopot. form factors in Rydberg #nk=50 # number of k-points along each line in BZ #gmax=5. # maximum modulus of reciprocal lattice vector, in units of 2\pi/a #nmax=5 # max index number for reciprocal lattice vectors, choose high enough to get all bands up to max energy of the plot #upper=1e-12 # numerical tolerance parameter: upper limit for evaluating zero # fundamental constants abohr = 0.529177210903 # Bohr radius in Angstrom hartree = 27.211386245988 # Hartree energy in eV
def evaluate(p): cmd = ["./cycle.sh"] cmd.append("-" + str(p.num)) for k in range(0, p.num): cmd.append(str(p.feature_list[k])) output = subprocess.check_output(cmd).decode("utf-8") spos = output.find("Testing Accuracy") output = output[spos:len(output)] for c in output: if not c in '.0123456789': output = output.replace(c, '') return float(output) input_filename = 'real.csv' NULL, feature_num, NULL, NULL = read_data(input_filename) init(feature_num) for i in range(0, MAX): # cross over for j in range(0, cross_over_num // 2): random.seed(a=None, version=2) num1, crossed_list1, num2, crossed_list2 = crossover( generation[random.randint(0, population-1)], \ generation[random.randint(0, population-1)]) generation[population + j * 2].modify(num1, crossed_list1) generation[population + j * 2 + 1].modify(num2, crossed_list2) # mutate for j in range(0, population): num, mutation_list = mutate(generation[j]) generation[j].modify(num, mutation_list) # evaluate for j in range(0, population + cross_over_num):
clmt1 = main_path + 'a10_' + iexp + 'T_r01/a10_' + iexp + 'T_r01.pa_1951-2016_var.nc' clmt2 = main_path + 'a10_' + iexp + 'T_r02/a10_' + iexp + 'T_r02.pa_1951-2016_var.nc' clmt3 = main_path + 'a10_' + iexp + 'T_r03/a10_' + iexp + 'T_r03.pa_1951-2016_var.nc' clmt4 = main_path + 'a10_' + iexp + 'T_r04/a10_' + iexp + 'T_r04.pa_1951-2016_var.nc' clmt5 = main_path + 'a10_' + iexp + 'T_r05/a10_' + iexp + 'T_r05.pa_1951-2016_var.nc' clmt6 = main_path + 'a10_' + iexp + 'T_r06/a10_' + iexp + 'T_r06.pa_1951-2016_var.nc' atm_all = [clm1, clm2, clm3, clm4, clm5, clm6, \ clmt1, clmt2, clmt3, clmt4, clmt5, clmt6] #U1000 if ivar == 'u1000': ds = [] for i in range(len(atm_all)): lon, lat, lev, ua, time, basin_mask = read_data(atm_all[i], 'ua_plev', imask=None) ua = ua.sel(lev=1000, method='nearest') ds.append(ua) klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr', serialized=True, cached=False) klepto_atm_xr[str(iexp) + '_u1000'] = ds #V1000 elif ivar == 'v1000': ds = [] for i in range(len(atm_all)): lon, lat, lev, va, time, basin_mask = read_data(atm_all[i], 'va_plev',
def return_clean_df(): df1_clean = rd.read_data() df1_clean = rd.clean_df(df1_clean,False) return df1_clean
clmt1 = main_path + 'a10_' + iexp + 'T_r01/a10_' + iexp + 'T_r01.mocn_1951-2016_w_r.nc' clmt2 = main_path + 'a10_' + iexp + 'T_r02/a10_' + iexp + 'T_r02.mocn_1951-2016_w_r.nc' clmt3 = main_path + 'a10_' + iexp + 'T_r03/a10_' + iexp + 'T_r03.mocn_1951-2016_w_r.nc' clmt4 = main_path + 'a10_' + iexp + 'T_r04/a10_' + iexp + 'T_r04.mocn_1951-2016_w_r.nc' clmt5 = main_path + 'a10_' + iexp + 'T_r05/a10_' + iexp + 'T_r05.mocn_1951-2016_w_r.nc' clmt6 = main_path + 'a10_' + iexp + 'T_r06/a10_' + iexp + 'T_r06.mocn_1951-2016_w_r.nc' w_all = [clm1, clm2, clm3, clm4, clm5, clm6, \ clmt1, clmt2, clmt3, clmt4, clmt5, clmt6] #Vertical current ds = [] for i in range(len(w_all)): lon, lat, lev, w, time, basin_mask = read_data(w_all[i], ivar, imask=None) w = w.sel(lev=slice(0, 600)) lev = lev.sel(lev=slice(0, 600)) #Select upper ocean ds.append(w) # #Assign correct lat/lon to MclmT r02 and r03 # ds[7]['latitude'] = ds[0].latitude # ds[8]['latitude'] = ds[0].latitude elif ivar == 'ssh': clm1 = main_path + 'a10_' + iexp + '_r01/a10_' + iexp + '_r01.mocn_1951-2016_sshr.nc' clm2 = main_path + 'a10_' + iexp + '_r02/a10_' + iexp + '_r02.mocn_1951-2016_sshr.nc' clm3 = main_path + 'a10_' + iexp + '_r03/a10_' + iexp + '_r03.mocn_1951-2016_sshr.nc' clm4 = main_path + 'a10_' + iexp + '_r04/a10_' + iexp + '_r04.mocn_1951-2016_sshr.nc' clm5 = main_path + 'a10_' + iexp + '_r05/a10_' + iexp + '_r05.mocn_1951-2016_sshr.nc' clm6 = main_path + 'a10_' + iexp + '_r06/a10_' + iexp + '_r06.mocn_1951-2016_sshr.nc'
#=============================================================== ### Execute script if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('ivar_obs') parser.add_argument('time_ifile') parser.add_argument('fname') args = parser.parse_args() #Extract lon,lat,sst,time from HadISST observation (1950-2017) ipath_had = '/srv/ccrc/data25/z5166746/Obs_data/sst/HadISST/HadISST_all_clean.nc' lon_had,lat_had,lev_had,sst_had,time_had,basin_mask = read_data(ipath_had,'sst',\ imask=None) #Select time start_time = 1950 end_time = 2017 sst_t, time_t = seltime(sst_had, time_had, start_time, end_time) # #Calculate area weighed ssts # wgtfac = areawgtvar_3D(lon_had,lat_had) # sst_had_aw = np.multiply(sst_had,wgtfac[np.newaxis,...]) # #Mask other oceans sst_io = mask_oceans('./../../grids/basinmask_01.msk', sst_had, lon_had, lat_had) #Calculate trend in IO SST from 1950 to 2017
def test_model(): """Runs the persisted neural network model on test data and returns the test loss. Returns ------- None """ user_ids, movie_ids, ratings = read_data(training=False) # Resetting ids of users and movies user_ids = map_ids(user_ids, users=True) movie_ids = map_ids(movie_ids, users=False) # Creating the tensors users = torch.Tensor(user_ids).int() movies = torch.Tensor(movie_ids).int() ratings = torch.Tensor(ratings) # Reading the training settings to be fed into the model config = {} with open(configs.CONFIGS_PATH, "rb") as f: config = pickle.load(f) model = NCA(config) # Different models for different Preprocessing steps if config['one_hot_encoding']: model.load_state_dict(torch.load(configs.NCF_MODEL_ONE_HOT_PATH, 'cpu')) else: model.load_state_dict(torch.load(configs.NCF_MODEL_PATH, 'cpu')) model.eval() # Batch size for test data batch_size = 200 # The same critertion used for training stage critertion = config['critertion'] # Creating loader for test data data_loader = DataLoader(TensorDataset(users, movies, ratings), batch_size=batch_size) losses = [] # Iterate over batches # We calculate the test loss on batches due to the large dataset size for batch_users, batch_movies, batch_ratings in data_loader: # Whether we have to do one hot encoding or not if config['one_hot_encoding']: batch_users = torch.nn.functional.one_hot(batch_users.long(), config['n_users']) batch_movies = torch.nn.functional.one_hot(batch_movies.long(), config['n_items']) users = batch_users.int() movies = batch_movies.int() ratings = batch_ratings output = model(users, movies)[:, 0] loss = critertion(output, ratings) losses.append(loss.item()) print(f"Loss for test data is {np.mean(losses)}")
from read import read_data #================================================================= ### Execute script if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('var') #thetao, args = parser.parse_args() ivar = str(args.var) main_path = '/srv/ccrc/data25/z5166746/Obs_data/obs_' + str(ivar) + '_r.nc' if ivar == 'sst': lon, lat, lev, sst, time, basin_mask = read_data(main_path, 'sst', imask=None) klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr', serialized=True, cached=False) klepto_atm_xr['obs_sst'] = sst elif ivar == 'pot_temp': lon, lat, lev, pot_temp, time, basin_mask = read_data( main_path, 'temp', imask=None, decode_times=False) klepto_atm_xr = klepto.archives.dir_archive('klepto_atm_xr', serialized=True, cached=False) klepto_atm_xr['obs_pot_temp'] = pot_temp
def get_df_val(): df_new = read_data() df_val = df_new[20000:] df_val.reset_index(inplace=True) return df_val
def train_gan(activity): logdir = '1' data_path = 'C:/Users/STUDENT/Desktop/Ibrahim/GAN_tot/o/' + activity + '/train/' # directly to plot training data for visualization outputdata = 'C:/Users/STUDENT/Desktop/Ibrahim/GAN_tot/o/' # directly to visualization GAN output gan_input, _ = read_data(data_path, [[activity]]) train(logdir, 64, gan_input, outputdata, activity)
#filename default train_filename = "synthetic.csv" test_filename = "real.csv" if len(sys.argv) > 1: test_filename = sys.argv[1] if len(sys.argv) > 2: train_filename = sys.argv[2] # parameters learning_rate = 0.001 training_iters = 100 batch_size = 10 display_step = 10 # Network Parameters n_sample, n_input, features, labels = read_data(train_filename) # features * 4 (img size: n_input * 4) n_classes = 2 # 0 and 1 keep_rate = 0.75 # Dropout, probability to keep units filter_width = 5 # filter size p1_width = 10 # pooling rate for first pooling p2_width = 10 # pooling rate for second pooling remain = math.ceil(float(n_input) / float(p1_width)) # remaining items after pooling remain = math.ceil(float(remain) / float(p2_width)) remain = remain * 4 # tf Graph input x = tf.placeholder(tf.float32, [None, n_input, 4]) y = tf.placeholder(tf.float32, [None, n_classes]) keep_prob = tf.placeholder(tf.float32) # keep probability for dropout
clmt5 = main_path + 'a10_' + iexp + 'T_r05/a10_' + iexp + 'T_r05.mocn_1951-2016_w_r_detrend_' + str( itype) + '.nc' clmt6 = main_path + 'a10_' + iexp + 'T_r06/a10_' + iexp + 'T_r06.mocn_1951-2016_w_r_detrend_' + str( itype) + '.nc' atm_all = [ clm1, clm2, clm3, clm4, clm5, clm6, clmt1, clmt2, clmt3, clmt4, clmt5, clmt6 ] #Vertical current iw = 'W' ds = [] for i in range(len(atm_all)): lon, lat, lev, w, time, basin_mask = read_data(atm_all[i], iw, imask=None) ds.append(w) # #Assign correct lat/lon to MclmT r02 and r03 # w_d[7]['latitude'] = w_d[0].latitude # w_d[8]['latitude'] = w_d[0].latitude else: #u clm1 = main_path + 'a10_' + iexp + '_r01/a10_' + iexp + '_r01.mocn_1951-2016_uvpot_temprho_detrend_' + str( itype) + '.nc' clm2 = main_path + 'a10_' + iexp + '_r02/a10_' + iexp + '_r02.mocn_1951-2016_uvpot_temprho_detrend_' + str( itype) + '.nc' clm3 = main_path + 'a10_' + iexp + '_r03/a10_' + iexp + '_r03.mocn_1951-2016_uvpot_temprho_detrend_' + str( itype) + '.nc' clm4 = main_path + 'a10_' + iexp + '_r04/a10_' + iexp + '_r04.mocn_1951-2016_uvpot_temprho_detrend_' + str(
# 添加层 def add_layer(inputs, in_size, out_size, activation_function=None): # add one more layer and return the output of this layer weights = tf.Variable(tf.truncated_normal([in_size, out_size], stddev=0.1)) biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) wx_plus_b = tf.matmul(inputs, weights) + biases if activation_function is None: outputs = wx_plus_b else: outputs = activation_function(wx_plus_b) return outputs # 1.训练的数据 # Make up some real data x_train, y_train, x_test, y_test = read_data("data_train_4.csv") x_test_cnn = read_test("predict.csv") # 2.定义节点准备接收数据 # define placeholder for inputs to network xs = tf.placeholder(tf.float32, [None, 10]) ys = tf.placeholder(tf.float32, [None, 3]) # 3.定义神经层:隐藏层和预测层 # add hidden layer 输入值是 xs l1 = add_layer(xs, 10, 90, activation_function=tf.nn.sigmoid) l2 = add_layer(l1, 90, 90, activation_function=tf.nn.sigmoid) # l3 = add_layer(l2, 20, 20, activation_function=tf.nn.tanh) # l4 = add_layer(l3, 24, 22, activation_function=tf.nn.tanh) # l5 = add_layer(l4, 22, 20, activation_function=tf.nn.tanh) # l6 = add_layer(l5, 20, 18, activation_function=tf.nn.tanh)
if (len(sys.argv) < 5): print("Go Away") exit(1) part = sys.argv[1] trainFile = sys.argv[2] testFile = sys.argv[3] valFile = sys.argv[4] # Preprocess preprocess_data(trainFile, trainFile + '.processed') preprocess_data(testFile, testFile + '.processed') preprocess_data(valFile, valFile + '.processed') if (part == '1' or part == '2'): data = read_data(trainFile + '.processed') valData = read_data(valFile + '.processed') testData = read_data(testFile + '.processed') features = set() for x in range(1, 24): features.add(x) trainAccuracies = [] valAccuracies = [] testAccuracies = [] numNodes = [] remainders = [r for r in range(23, -1, -1)] # [23, 20, 15, 10, 5, 0] pruning = False if part == '1' else True fileName = "Q1/plots/accuracies.png" if part == '1' else "Q1/plots/pruning.png"
def hello_world(): df = rd.read_data() return render_template('index.html')
recents = [] while True: results = get_group(data) if results.tolist() in recents: break new_centroids = get_new_centroids(data, results) centroids = new_centroids.copy() recents.append(results.tolist()) return centroids def get_class_from_centroids(x, centroids): return np.argmin(np.abs(centroids - x)) not_tech, tech = read_data() def get_delta(hitobjects): current = -1 delta = [] for hitobject in hitobjects: if current == -1: delta.append(0) current = hitobject.offset continue delta.append(hitobject.offset - current) current = hitobject.offset return delta