def main(): arguments = docopt(__doc__) if arguments['lorem']: print(generate(arguments['<iterations>'], arguments['--text-size'])) elif arguments['data']: get_data(arguments['<data-url>'], arguments['<data-location>'])
def main(): os.mkdir("./Data") for i in range(2010, 2018): year_0 = i try: # 调用函数获取获取电影上映日期 get_data(year_0) # 为存放数据创建文件夹 os.mkdir("./Date/" + str(year_0)) # 切片,获取电影上映日期(年、月、日) with open("./movie/" + str(year_0) + "_data.txt", "r") as r: lines = r.readlines() for line in lines: try: line = line.replace('\n', '') movie_name = line.split(' ')[0] movie_id = line.split(' ')[1] year = line.split(' ')[2] month = line.split(' ')[3] day = line.split(' ')[4] # 调用deal获取数据 try: data = deal(movie_name, year, month, day) print(data) except: print("login again." + '/n') data = deal(movie_name, year, month, day) while data == None: account = random.choice(AccountList) login_again(account) data = deal(movie_name, year, month, day) # 存放数据 with open( "./Data/" + str(year) + "/" + str(movie_id) + ".txt", "a+") as w: w.writelines(str(data)) # 若出现错误,则进行记录 except Exception as e: with open("./Worng in second try.txt", "a+") as w: line = line.replace('\n', '') movie_name = line.split(' ')[0] movie_id = line.split(' ')[1] year = line.split(' ')[2] month = line.split(' ')[3] day = line.split(' ')[4] w.writelines( str(movie_name) + ' ' + str(movie_id) + ' ' + str(year) + ' ' + str(month) + ' ' + str(day) + '\n') w.writelines(str(e) + '\n') # 若出现错误,则进行记录 except Exception as e: with open("./Worng in first try.txt", "a+") as w: w.writelines("worng in " + str(year_0)) w.writelines(str(e) + '\n')
def prompt_data_refresh(): while True: response = input("Obtain fresh data? Y/N") if response.lower() == 'y': get_data() break elif response.lower() == 'n': break else: print("Please enter 'Y' or 'N'") pass
def train(self, batch_size=128, nb_epoch=10, data_augmentation=False): sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) adam = Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay=0.9) self.model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) datas, labels = get_data(train=True, test=False) k = 7281 // batch_size if not data_augmentation: self.model.fit(datas, labels, batch_size=batch_size, epochs=nb_epoch, verbose=1) # self.model.fit(data_images, data_labels, steps_per_epoch=k, epochs=nb_epoch, verbose=1) else: # 进行数据的增广 datagen = ImageDataGenerator(featurewise_center=False, samplewise_center=False, featurewise_std_normalization=False, samplewise_std_normalization=False, zca_whitening=False, rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, vertical_flip=False) datagen.fit(datas) self.model.fit_generator(datas, labels) self.save_model()
def model_train(self, path, train=True): X, y = get_data(path, self.im_height, self.im_width, train) X_train, X_valid, y_train, y_valid = train_test_split( X, y, test_size=0.06, random_state=2019) input_img = Input((self.im_height, self.im_width, 1), name='img') model = get_unet(input_img, n_filters=16, dropout=0.05, batchnorm=True) model.compile(optimizer=Adam(), loss="binary_crossentropy", metrics=[my_iou_metric]) model.summary() callbacks = [ ReduceLROnPlateau(factor=0.1, patience=3, min_lr=0.00000001, verbose=1), ModelCheckpoint('model-unet.h5', verbose=1, save_best_only=True, save_weights_only=True) ] # if 'model-unet.ht': # model.load_weights('model-unet.h5') results = model.fit(X_train, y_train, batch_size=4, epochs=300, callbacks=callbacks, validation_data=(X_valid, y_valid)) return results
def execute(filters=None): if not filters: filters ={} data = [] conditions = get_columns(filters, "Loan Installment") data = get_data(filters, conditions) return conditions["columns"], data
def main(): login(13612339624, 348673210) for year_0 in range(2010,2017): get_data(year_0) with open("./movie/" + str(year_0) + "_data.txt","r") as r: lines = r.readlines() for line in lines: line = line.replace('\n','') movie_name = line.split(' ')[0] movie_id = line.split(' ')[1] year = line.split(' ')[2] month = line.split(' ')[3] day = line.split(' ')[4] data = deal(movie_name,year,month,day) print(type(data)) print(data)
def __init__(self, raw_path, others_path, **kwargs): self.features_dim = kwargs.get('features_dim', 10000) self.data_dict = get_data(raw_path, others_path) self.all_words_list = [] self.train_data_dict = dict() self.train_feature_dict = dict() # self.train_class_list=["1","0"] self.text_processing()
def experiment_loc_disturb(n_ts, gamma, alpha, evo_time, supeop_size, controls_nb, train_set_size, test_set_size, size_of_lrs, noise_name, model_dim, eps, accept_err): ########################################### # PLACEHOLDERS ########################################### # input placeholder x_ = tf.placeholder(tf.float32, [None, n_ts, controls_nb]) # output placeholder y_ = tf.placeholder(tf.complex128, [None, supeop_size, supeop_size]) # dropout placeholder keep_prob = tf.placeholder(tf.float32) # creating the graph network = my_lstm(x_, controls_nb, size_of_lrs, keep_prob) # instance for saving the model saver = tf.train.Saver() # loading the data (_, _, test_input, test_target) = get_data(train_set_size, test_set_size, model_dim) # maintaining the memory config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # essential function which executes the experiment result = variation_acc2_local_disturb(sess, network, x_, keep_prob, saver, noise_name, gamma, alpha, controls_nb, test_input, test_target, n_ts, evo_time, eps, accept_err) sess.close() tf.reset_default_graph() return result
def data(args): # TODO: add better error handling validate_args(args) api_key = args["api_key"] is_authorized(api_key) return json.dumps({ data: get_data(params) })
def setUp(self): comps = [['5ht7', 'ExtFP']] loader = ["get_splitted_data", { "seed": 666, "valid_size": 0.1, "n_folds": 1}] preprocess_fncs = [] data = get_data(comps, loader, preprocess_fncs).values()[0][0][0] self.X = data['X_train']['data'] self.y = data['Y_train']['data']
def split_data(self, adapt_method): to_exclude = feature_sets.get_general_keyword_features() if adapt_method == "baseline": X, y, labels = get_data(exclude_features=to_exclude, with_spatial=False) elif adapt_method == "halves": X, y, labels = get_data(spacial_db="dbank_spatial_halves", exclude_features=to_exclude) elif adapt_method == "strips": X, y, labels = get_data(spacial_db="dbank_spatial_strips", exclude_features=to_exclude) elif adapt_method == "quadrants": X, y, labels = get_data(spacial_db="dbank_spatial_quadrants", exclude_features=to_exclude) else: raise KeyError('adapt_method not one of: %s' % self.models) group_kfold = GroupKFold(n_splits=10).split(X, y, groups=labels) data = [] for train_index, test_index in group_kfold: fold = {} fold["X_train"] = X.values[train_index] fold["y_train"] = y.values[train_index] fold["X_test"] = X.values[test_index] fold["y_test"] = y.values[test_index] data.append(fold) return data
def main(): parser = argparse.ArgumentParser( description="plot data from stdin") parser.add_argument('--out_file_name', type=str, help='name of output file', required=True) parser.add_argument('--plot_type', type=string, help='take "histrogram" or "boxplot" or "combo"', required=True) parser.add_argument('--col_num', type=int, help='column num in stdin to get data from', required=True) args = parser.parse_args() try: data = get_data(args.col_num) except Exception: print("something went wrong in get_data") sys.exit(1) if(argparse.plot_type == "boxplot"): try: data_viz.boxplot(data, args.out_file_name) except Exception: print("something went wrong in data_viz.boxplot") sys.exit(1) if(argparse.plot_type == "histogram"): try: data_viz.histogram(data, args.out_file_name) except Exception: print("something went wrong in data_viz.histogram") sys.exit(1) if(argparse.plot_type == "boxplot"): try: data_viz.combo(data, args.out_file_name) except Exception: print("something went wrong in data_viz.combo") sys.exit(1) pass
def setUp(self): comps = [['5ht7', 'ExtFP']] loader = [ "get_splitted_data", { "seed": 666, "valid_size": 0.1, "n_folds": 1 } ] preprocess_fncs = [] data = get_data(comps, loader, preprocess_fncs).values()[0][0][0] self.X = data['X_train']['data'] self.y = data['Y_train']['data']
def predict_accuracy(self, path, train=True): X_test, y_test = get_data(path, self.im_height, self.im_width, train) input_img = Input((self.im_height, self.im_width, 1), name='img') model = get_unet(input_img, n_filters=16, batchnorm=True) model.load_weights('model-unet.h5') sess = tf.Session() sess.run(tf.global_variables_initializer()) size = 20 total_accuracy = 0 for i in range(0, len(X_test), size): print('i=', i) label = y_test[i:i + size] preds_test = model.predict(X_test[i:i + size]) accuracy = self.get_iou_metric(label, preds_test) total_accuracy += accuracy ave_accuracy = sess.run(total_accuracy / len(X_test)) return ave_accuracy
def train_and_predict(params, file_name): ########################################### # PLACEHOLDERS ########################################### # input placeholder x_ = tf.placeholder(tf.float32, [None, params.n_ts, params.controls_nb]) # output placeholder y_ = tf.placeholder(tf.complex128, [None, params.supeop_size, params.supeop_size]) # dropout placeholder keep_prob = tf.placeholder(tf.float32) # creating the graph network = my_lstm(x_, keep_prob, params) # instance for saving the model saver = tf.train.Saver() # loading the data (train_input, train_target, test_input, test_target) = get_data(params.train_set_size, params.test_set_size, params.model_dim, params.data_type) # maintaining the memory config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # training the network (acc, train_table, test_table) = fit(sess, network, x_, y_, keep_prob, train_input, train_target, test_input, test_target, params) # making prediction by trained model pred = get_prediction(sess, network, x_, keep_prob, test_input) # saving trained model saver.save(sess, "weights/weights_from_{}.ckpt".format(file_name)) sess.close() tf.reset_default_graph() return (pred, acc, train_table, test_table)
def test(): # 读取数据 data_A, _ = get_data(data_dir, "/test", "/trainB") with tf.variable_scope('Model'): # 输入数据的占位符 input_A = tf.placeholder( tf.float32, [batch_size, image_height, image_width, image_channel], name="input_A") # 建立生成器 fake_B = build_generator_resnet_9blocks(input_A, "g_A") # 输入A生成A’ config = tf.ConfigProto() config.gpu_options.allow_growth = True # 设定显存不超量使用 with tf.Session() as sess: # 建立会话层 # 结果保存器 model_vars = tf.trainable_variables() g_A_vars = [var for var in model_vars if 'g_A' in var.name] #print(g_A_vars) saver = tf.train.Saver(var_list=g_A_vars) checkpoint = tf.train.latest_checkpoint(check_dir) #print(checkpoint) saver.restore(sess, checkpoint) if not os.path.exists(output_path): os.makedirs(output_path) for i in range(len(data_A)): print("正在处理第%d张图片" % (i)) fake = sess.run( [fake_B], feed_dict={input_A: np.reshape(data_A[i], [-1, 256, 256, 3])}) #print(fake[0].shape) if (save_training_images): # 检查路径是否存在 if not os.path.exists("./output/res"): os.makedirs("./output/res") # 保存10张影像 skimage.io.imsave( "./output/res/fake_" + str(i) + ".jpg", np.reshape(((fake[0] + 1) * 127.5).astype(np.uint8), [256, 256, 3]))
def train(self, get_data, steps=1000, display_step=10, path_to_load_variables='', path_to_save_variables=''): saver = tf.train.Saver() self.sess = tf.Session() if path_to_load_variables == '': self.sess.run(tf.global_variables_initializer()) else: #Load variables saver.restore(self.sess, path_to_load_variables) print 'loaded variables ' + path_to_load_variables # Training cycle for step in range(steps): batch = [] while len(batch) != self.batch_size: sequence=get_data() batch.append(sequence) _ = self.sess.run(self.optimizer, feed_dict={self.x: batch}) # Display if step % display_step == 0: el,p1,p2,p3,reg = self.sess.run([self.elbo, self.log_px, self.log_pz, self.log_qz,self.l2_regularization()], feed_dict={self.x: batch}) # cost = -el #because I want to see the NLL print "Step:", '%04d' % (step), "elbo=", "{:.5f}".format(el), 'logprobs', p1, '+', p2, '-', p3, 'reg', reg*self.learning_rate if path_to_save_variables != '': saver.save(self.sess, path_to_save_variables) print 'Saved variables to ' + path_to_save_variables print 'Done training' return self.sess
indicies = np.argsort(data) sdata = data[indicies] sweights = np.ones_like(sdata)/sdata.size cumweight = np.cumsum(sweights) pos = np.where(cumweight<=percentile)[0] if len(pos)<1: pos = 0 else: pos = np.max(pos) return sdata[pos] options = get_options_main() cursor = mysql_connect('catalog','pymorph','pymorph','') data1 = get_data(cursor, 'r_band_serexp', 'r_lackner_nb1', flags = options['use_flags'], flagmodel = options['flagmodel'], add_tables = ", r_lackner_fit as lfit " , conditions = " and lfit.galcount = a.galcount and lfit.model = 'nb1' and (x.flag&pow(2,11)>1 or x.flag&pow(2,12)>1) and a.n_bulge < 7.95 ") data4 = get_data(cursor, 'r_band_serexp', 'r_lackner_nb4', flags = options['use_flags'], flagmodel = options['flagmodel'], add_tables = ", r_lackner_fit as lfit " , conditions = " and lfit.galcount = a.galcount and lfit.model = 'nb4' and (x.flag&pow(2,11)>1 or x.flag&pow(2,12)>1) and a.n_bulge < 7.95 ") print 'num_objects: ', len(data1['galcount']) print 'num_objects: ', len(data4['galcount']) pl.ylim(0,0.4) pl.xlim(-0.5, 8.5) nbins = np.arange(-0.25,8.51, 0.5) pl.hist(data1['nbulge_1'], bins=nbins, normed=True, histtype='step', color='b', linestyle = 'dashed', label='nb1') pl.errorbar([get_val(data1['nbulge_1'], 0.5)], [0.1], xerr=[[get_val(data1['nbulge_1'], 0.5)-get_val(data1['nbulge_1'], 0.16)],[get_val(data1['nbulge_1'], 0.84)-get_val(data1['nbulge_1'], 0.5)]], color = 'b', ms=5, marker = 'o') print get_val(data1['nbulge_1'], 0.5), get_val(data1['nbulge_1'], 0.16),get_val(data1['nbulge_1'], 0.84) pl.hist(data4['nbulge_1'], bins=nbins, normed=True, histtype='step', color='g', linestyle = 'solid', label='nb4') pl.errorbar([get_val(data4['nbulge_1'], 0.5)], [0.05], xerr=[[get_val(data4['nbulge_1'], 0.5)-get_val(data4['nbulge_1'], 0.16)],[get_val(data4['nbulge_1'], 0.84)-get_val(data4['nbulge_1'], 0.5)]], color = 'g', ms=5, marker = 's') print get_val(data4['nbulge_1'], 0.5), get_val(data4['nbulge_1'], 0.16),get_val(data4['nbulge_1'], 0.84)
def firefly_single(parameters): """ The routine for a single run of FIREFLY. It is called from firefly_job, test_firefly, or can be run interactively for a custom single SED. In the interactive case, one sets the 'custom' and 'interactive' parameters in the parameter file, then one enters at an interative prompt: > from firefly_single import firefly_single > firefly_single('[locationpathofdata]','custom/[outputdirname]','./parameters.dat') One is then able to view the output plots at the native X window. This routine retrieves the options from the parameters file, including the locations of the data file and models to be used. It then opens the data file, model files, matches their resolutions (downgrading to instrumental+velocity disp resolution if necessary) fits, then produces output files and plots. INPUTS: - options_file: location of the parameter file (default: ./parameters.dat) No outputs. """ # data_file, output_dir data = get_data(parameters) # restrict_ages can be default (allow age<+1 Gyr age uni), off (allow all ages), strict (age<age uni only) if parameters["restrict_ages"] == "default": age_universe = Planck13.age(data["redshift"]) parameters["age_limits"][1] = np.log10(age_universe.value + 1.0) + 9.0 # log(yr units) elif parameters["restrict_ages"] == "strict": age_universe = Planck13.age(data["redshift"]) parameters["age_limits"][1] = np.log10(age_universe.value) + 9.0 # Get the models with observation information needed for downgrading. for mi, mm in enumerate(parameters["model_libs"]): for ii in parameters["imfs"]: deltal = parameters["deltal_libs"][mi] model_wave_int, model_flux_int, age, metal = get_model( parameters, mm, ii, deltal, data["vdisp"], data["wavelength"], data["r_instrument"], data["ebv_mw"] ) print "Matching data to models..." wave, data_flux, error_flux, model_flux_raw = match_data_models( data["wavelength"], data["flux"], data["flags"], data["error"], model_wave_int, model_flux_int, parameters["wave_limits"][0], parameters["wave_limits"][1], ) print "Normalising all spectra." model_flux, mass_factors = normalise_spec(data_flux, model_flux_raw) # Get filtered values IF dust is on! if parameters["hpf_mode"] == "on": print "Determining attenuation curve through HPF fitting" best_ebv, attenuation_curve = determine_attenuation( wave, data_flux, error_flux, model_flux, parameters, age, metal ) if parameters["plot_diagnostics"]: print "Best ebv is " + str(best_ebv) plt.plot(attenuation_curve) plt.title("Attenuation curve") plt.show() # Apply curve to models and renormalise: print "Curve found! Applying to models..." model_flux_atten = np.zeros(np.shape(model_flux_raw)) for m in range(len(model_flux_raw)): model_flux_atten[m] = attenuation_curve * model_flux_raw[m] model_flux, mass_factors = normalise_spec(data_flux, model_flux_atten) print "Fitting with attenuated models..." light_weights_int, chis_int, branch = fitter(wave, data_flux, error_flux, model_flux, parameters) elif parameters["hpf_mode"] == "hpf_only": print "Using filtered values to determing SP properties only." smoothing_length = parameters["dust_smoothing_length"] hpf_data = hpf(data_flux) hpf_models = np.zeros(np.shape(model_flux)) for m in range(len(model_flux)): hpf_models[m] = hpf(model_flux[m]) zero_dat = np.where((np.isnan(hpf_data)) & (np.isinf(hpf_data))) hpf_data[zero_dat] = 0.0 for m in range(len(model_flux)): hpf_models[m, zero_dat] = 0.0 hpf_error = np.zeros(len(error_flux)) hpf_error[:] = np.median(error_flux) / np.median(data_flux) * np.median(hpf_data) hpf_error[zero_dat] = np.max(hpf_error) * 999999.9 best_ebv = -99 hpf_models, mass_factors = normalise_spec(hpf_data, hpf_models) light_weights_int, chis_int, branch = fitter(wave, hpf_data, hpf_error, hpf_models, parameters) elif parameters["hpf_mode"] == "off": raise NotImplementedError("Not using a HPF and fitting using model curves not implemented yet") # use loop over dust curve, but this will take a while! print "Fitting complete! Calculating average properties and outputting." # Convert chis into probs # Degrees of freedom approximately = number of wavelength points dof = len(wave) probs_int = convert_chis_to_probs(chis_int, dof) # Remove zero-prob solutions nonzero_prob = np.where(probs_int > 0.00001) probs = probs_int[nonzero_prob] light_weights = light_weights_int[nonzero_prob] chis = chis_int[nonzero_prob] # Get mass-weighted SSP contributions using saved M/L ratio. (raw and normalised) unnorm_mass, mass_weights = light_weights_to_mass(light_weights, mass_factors) # Calculate all average properties and errors averages = calculate_averages_pdf( probs, light_weights, mass_weights, unnorm_mass, age, metal, parameters["pdf_sampling"], data["redshift"], ) unique_ages = np.unique(age) marginalised_age_weights = np.zeros(np.shape(unique_ages)) marginalised_age_weights_int = np.sum(mass_weights.T, 1) for ua in range(len(unique_ages)): marginalised_age_weights[ua] = np.sum(marginalised_age_weights_int[np.where(age == unique_ages[ua])]) # sfr_int,sfr_error_int = star_formation_rate(np.log10(unique_ages)+9.0,marginalised_age_weights) # sfr = sfr_int * 10**averages['stellar_mass'] / (10.0**7) # sfr_error = sfr_error_int * 10**averages['stellar_mass'] / (10.0**7) # print "Star formation rate is (in M / yr) "+str(sfr)+" plus.minus "+str(sfr_error) # Tracer()() best_fit_index = [np.argmin(chis)] best_fit = np.dot(light_weights[best_fit_index], model_flux)[0] if parameters["plot_fits"]: plt.plot(wave, data_flux, "k") plt.plot(wave, best_fit, "r", linewidth=1.0) out_plot_string = "plots/fit.eps" plt.savefig(out_plot_string, format="eps", transparent=True) plt.close() if parameters["plot_diagnostics"]: plt.plot(wave, data_flux, "k") plt.plot(wave, best_fit, "r", linewidth=1.0) out_plot_string = "plots/fit.eps" plt.show() plt.close() import plotting fits = np.dot(light_weights, model_flux) # Tracer()() # plotting.plot_fits(wave,data_flux,fits,probs) # plotting.plot_sfh_contours(age,metal,light_weights,probs,title="Light-weighted properties") # plotting.plot_sfh_contours(age,metal,mass_weights,probs,title="Mass-weighted properties") # Tracer()() # Calculate the weighted average of SSPs for the secondary outputs and contour plots. if parameters["observation_type"] == "ifu": file1 = parameters["output_dir_prefix"] + parameters["file_in"].split("/")[-1] + "/" file2 = parameters["output_dir_prefix"] + parameters["file_in"].split("/")[-1] + "/" + mm + "/" file3 = ( parameters["output_dir_prefix"] + parameters["file_in"].split("/")[-1] + "/" + mm + "/" + ii + "/" ) else: file1 = parameters["output_dir_prefix"] file2 = parameters["output_dir_prefix"] + mm + "/" file3 = parameters["output_dir_prefix"] + mm + "/" + ii + "/" if not os.path.exists(file1): os.makedirs(file1) if not os.path.exists(file2): os.makedirs(file2) if not os.path.exists(file3): os.makedirs(file3) parameters["output_file"] = ( parameters["output_dir_prefix"] + parameters["file_in"].split("/")[-1] + "/" + mm + "/" + ii + "/" ) if parameters["observation_type"] == "ifu": f = open(parameters["output_file"] + "bin" + str(int(parameters["bin_number"])) + "_single.txt", "wb") f.write( "# x, y, bin_number, Light_age / log(Gyrs) [value, +error, -error] light [Z/H] [value, +error,-error]," + "mass age / log(Gyrs) [value, +error, -error]," + "mass [Z/H][value, +error, -error], E(B-V), stellar mass [value, +error,-error]\n" ) f.write( str(data["xpos"]) + "\t" + str(data["ypos"]) + "\t" + str(parameters["bin_number"]) + "\t" + str(averages["light_age"]) + "\t" + str(averages["light_age_1_sig_plus"]) + "\t" + str(averages["light_age_1_sig_minus"]) + "\t" + str(averages["light_metal"]) + "\t" + str(averages["light_metal_1_sig_plus"]) + "\t" + str(averages["light_metal_1_sig_minus"]) + "\t" + str(averages["mass_age"]) + "\t" + str(averages["mass_age_1_sig_plus"]) + "\t" + str(averages["mass_age_1_sig_minus"]) + "\t" + str(averages["mass_metal"]) + "\t" + str(averages["mass_metal_1_sig_plus"]) + "\t" + str(averages["mass_metal_1_sig_minus"]) + "\t" + str(best_ebv) + "\t" + str(averages["stellar_mass"]) + "\t" + str(averages["stellar_mass_1_sig_plus"]) + "\t" + str(averages["stellar_mass_1_sig_minus"]) + "\n" ) f.close() print "Combining ascii fits files..." files_present = os.listdir(parameters["output_file"]) string = str(parameters["file_in"]).replace("./data/manga/", "") name = string.replace("-LOGCUBE_BIN-RADIAL-015.fits", "") if np.size(files_present) > 0: combine_files = open(parameters["output_file"] + "/" + name + "-combined.txt", "wb") combine_files.write( "# x, y, bin_number, Light_age / log(Gyrs) [value, +error, -error] light [Z/H] [value, +error,-error]," + "mass age / log(Gyrs) [value, +error, -error]," + "mass [Z/H][value, +error, -error], E(B-V), stellar mass [value, +error,-error]\n" ) for o in files_present: try: a = o.split("_")[-1] except IndexError: continue if o.split("_")[-1] == "single.txt": fits = np.loadtxt(parameters["output_file"] + o, skiprows=1, unpack=True) combine_files.write( str(fits[0]) + "\t" + str(fits[1]) + "\t" + str(fits[2]) + "\t" + str(fits[3]) + "\t" + str(fits[4]) + "\t" + str(fits[5]) + "\t" + str(fits[6]) + "\t" + str(fits[7]) + "\t" + str(fits[8]) + "\t" + str(fits[9]) + "\t" + str(fits[10]) + "\t" + str(fits[11]) + "\t" + str(fits[12]) + "\t" + str(fits[13]) + "\t" + str(fits[14]) + "\t" + str(fits[15]) + "\t" + str(fits[16]) + "\t" + str(fits[17]) + "\t" + str(fits[18]) + "\n" ) combine_files.close() else: f = open( parameters["output_dir_prefix"] + mm + "/" + ii + "/" + parameters["file_in"].split("/")[-1] + ".txt", "wb", ) f.write( "# Light_age / log(Gyrs) [value, +error, -error] light [Z/H] [value, +error,-error]," + "mass age / log(Gyrs) [value, +error, -error]," + "mass [Z/H][value, +error, -errpr], E(B-V), stellar mass [value, +error,-error]\n" ) f.write( str(averages["light_age"]) + "\t" + str(averages["light_age_1_sig_plus"]) + "\t" + str(averages["light_age_1_sig_minus"]) + "\t" + str(averages["light_metal"]) + "\t" + str(averages["light_metal_1_sig_plus"]) + "\t" + str(averages["light_metal_1_sig_minus"]) + "\t" + str(averages["mass_age"]) + "\t" + str(averages["mass_age_1_sig_plus"]) + "\t" + str(averages["mass_age_1_sig_minus"]) + "\t" + str(averages["mass_metal"]) + "\t" + str(averages["mass_metal_1_sig_plus"]) + "\t" + str(averages["mass_metal_1_sig_minus"]) + "\t" + str(best_ebv) + "\t" + str(averages["stellar_mass"]) + "\t" + str(averages["stellar_mass_1_sig_plus"]) + "\t" + str(averages["stellar_mass_1_sig_minus"]) + "\n" ) f.close() print "Wrote ASCII output to " + parameters["output_file"]
def train_and_predict(n_ts, gamma, alpha, evo_time, batch_size, supeop_size, controls_nb, nb_epochs, learning_rate, train_set_size, test_set_size, size_of_lrs, dim, noise_name, model_dim): ########################################### # PLACEHOLDERS ########################################### # input placeholder x_ = tf.placeholder(tf.float32, [None, n_ts, controls_nb]) # output placeholder y_ = tf.placeholder(tf.complex128, [None, supeop_size, supeop_size]) # dropout placeholder keep_prob = tf.placeholder(tf.float32) # creating the graph network = my_lstm(x_,controls_nb, size_of_lrs, keep_prob) # instance for saving the model saver = tf.train.Saver() # loading the data (train_input, train_target, test_input, test_target) = get_data(train_set_size, test_set_size, model_dim) # maintaining the memory config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # training the network acc = fit(sess, network, x_, y_, keep_prob, train_input, train_target, test_input, test_target, nb_epochs, batch_size, train_set_size, learning_rate, gamma, alpha, n_ts, evo_time, dim, noise_name) # making prediction by trained model pred = get_prediction(sess, network, x_, keep_prob, test_input) # saving trained model saver.save(sess, "weights/dim_{}/{}/gam_{}_alfa_{}.ckpt".format(model_dim, noise_name, gamma, alpha)) sess.close() tf.reset_default_graph() return (pred,acc)
from matplotlib import pyplot as plt import sys from get_data import * data = get_data(sys.argv[1]) years = first(data) anomalies = to_float(second(data)) plt.plot(years, anomalies, color='red', marker='', linestyle='solid') plt.title("Temperature anomaly") plt.ylabel("Degrees C") plt.show()
# -*- coding: utf-8 -*- """ Created on Tue Aug 25 10:11:13 2020 @author: "*****@*****.**" """ from kafka import KafkaProducer from weather-report-api import get_data import json import time def json_serializer(data): return json.dumps(data).encode('utf-8') producer = KafkaProducer(bootstrap_servers=['localhost:9092'], value_serializer= json_serializer) if __name__ == "__main__": while 1==1: weather_data = get_data() print(weather_data) producer.send('weather-report', weather_data ) time.sleep(3)
__author__ = 'kalpit' import tensorflow as tf from get_data import * if __name__=='__main__': ##### GET DATA ##### X, y, word_counts = get_data('../lastfm_train_mappings.txt', '../lyrics/data/lyrics/train/', threshold=100) keys, vals = word_counts.keys(), word_counts.values() keys, vals = [list(x) for x in zip(*sorted(zip(keys, vals), key=lambda x: x[1], reverse=True))] print len(keys) ##### RUN RNN #####
# def on_epoch_end(self, epoch, logs={}): # if epoch % self.interval == 0: # pred_y = self.model.predict(self.x_val, verbose=0) # true_label = np.argmax(self.y_val, axis=1) # pred_label = np.argmax(pred_y, axis=1) # classes = gd.get_classes() # confusion_matx(true_label, pred_label, classes) # # X_test, y_test = gd.get_data(train=False, test=True) # Conf = Confusion(validation_data=(X_test, y_test)) def CM(self, x_val, y_val): pred_y = self.model.predict(x_val, verbose=0) true_label = np.argmax(y_val, axis=1) pred_label = np.argmax(pred_y, axis=1) classes = get_classes() confusion_matx(true_label, pred_label, classes) if __name__ == '__main__': import tensorflow as tf from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 set_session(tf.Session(config=config)) md = ModelFace() md.train() tests, test_labels = get_data(train=False, test=True) md.CM(tests, test_labels)
__author__ = 'kalpit' import tensorflow as tf from get_data import * if __name__ == '__main__': ##### GET DATA ##### X, y, word_counts = get_data('../lastfm_train_mappings.txt', '../lyrics/data/lyrics/train/', threshold=100) keys, vals = word_counts.keys(), word_counts.values() keys, vals = [ list(x) for x in zip( *sorted(zip(keys, vals), key=lambda x: x[1], reverse=True)) ] print len(keys) ##### RUN RNN #####
import os from matplotlib.backends.backend_pdf import PdfPages # my personal code imports from mysql_class import * from cmp_functions import * from MatplotRc import * from plot_info import * from get_data import * tablestem, model, band, xchoice, ychoice, key_x, key_y, use_flags, flagmodel = get_options() cursor = mysql_connect('catalog','pymorph','pymorph','') data = get_data(cursor, '%s_band_%s' %(band, model), '%s_%s_%s' %(band, tablestem, model), flags = use_flags, flagmodel = flagmodel) #, conditions = " and (z.flag>-1 and x.flag >-1 and y.flag >-1)".format(band = band, model = model) print 'num_objects: ', len(data['galcount']) # we want radial differences in percents # this sets up the calculation so that the plotting below works for name in ['hrad', 'rbulge', 'rdisk']: data[name+'_2'] = 1.0- (data[name+'_2']/data[name+'_1']) +data[name+'_1'] if model == 'dev' and tablestem=='lackner': data['mtot_2'] = data['mtot_2']-0.09 data['sky_1'] =100.0*( 10.0**(-0.4*(data['sky_1']-data['sky_2']))-1)+data['sky_2'] print 'keys' print key_x, key_y #do plot
def test1(): if not 'demo' in os.listdir('.'): os.mkdir('demo') companies = get_companies()[0:120] to_exclude = [ 'ICT', 'ABK', 'BHL', 'BMC', 'CFL', 'AMO', 'ATN', 'ACS', 'CBJ' ] companies = [x for x in companies if not x in to_exclude] data = get_data(companies, start="2017-01-01", end="2018-12-31", use_stored=False) number_of_days = 30 days_in_advance = 0 sell_window = 1 actual_buy_prices, actual_sell_prices, last_sell_prices, dependent_variables = make_arrays( data, companies, number_of_days=number_of_days, days_in_advance=days_in_advance, sell_window=sell_window) if not 'weights.npy' in os.listdir('.'): model = NewModel_latest(actual_buy_prices, actual_sell_prices, last_sell_prices, dependent_variables, positive=False, load_previous=False, weights=None) model.train() weights = model.sess.run(model.W_buy) bias = model.sess.run(model.bias) np.save('weights.npy', weights) np.save('bias.npy', bias) else: print('Loading previous weights') weights = np.load('weights.npy') bias = np.load('bias.npy') model = NewModel_latest(actual_buy_prices, actual_sell_prices, last_sell_prices, dependent_variables, positive=False, load_previous=True, weights=weights, bias=bias) companies = get_companies()[0:120] # companies = companies + ['FB'] to_exclude = [ 'ICT', 'ABK', 'BHL', 'BMC', 'CFL', 'AMO', 'ATN', 'ACS', 'CBJ' ] companies = [x for x in companies if not x in to_exclude] test_data = get_data(companies, start="2019-01-01", end="2019-12-31", use_stored=False) test_actual_buy_prices, test_actual_sell_prices, test_last_sell_prices, test_dependent_variables = make_arrays( test_data, companies, number_of_days=number_of_days, days_in_advance=days_in_advance, sell_window=sell_window) predicted_sell_prices, returns = model.test(test_actual_buy_prices, test_actual_sell_prices, test_last_sell_prices, test_dependent_variables) save_dataframe_in_demo_directory(companies, test_data, predicted_sell_prices, returns, number_of_days, days_in_advance, sell_window, test_actual_buy_prices)
from sklearn.metrics import mean_squared_error import math import tensorflow as tf import numpy as np from keras import regularizers def NMSE(pred, actual): NMSE = 10*math.log10(np.sum(np.power((pred.reshape(-1, 1) - actual.reshape(-1, 1)), 2))/np.sum( np.power(actual.reshape(-1, 1), 2))) return NMSE path = 'Data' data, batch_size, n_batch = get_data(path) print('length of data', len(data)) # split into train and test sets values = data.values train = values[:n_batch, :] validate = values[n_batch:2 * n_batch, :] test = values[-n_batch:, :] # split into input and outputs train_X, train_y = np.split(train, [3, ], axis=1) validate_X, validate_y = np.split(validate, [3, ], axis=1) test_X, test_y = np.split(test, [3, ], axis=1) # reshape input to be 3D [samples, timesteps, features] # train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
tablestem, model, band, xchoice, ychoice, key_x, key_y, use_flags, flagmodel, use_twocom = get_options() cursor = mysql_connect('catalog','pymorph','pymorph','') if use_twocom: #conditions = " and x.flag&pow(2,10)>0 and z.galcount = a.galcount " #conditions = " and z.galcount = a.galcount and mmas.galcount = x.galcount and sfit.galcount = x.galcount and sfit.Prob_pS between 0 and 0.32 " #conditions = " and z.galcount = a.galcount and mmas.galcount = x.galcount and sfit.galcount = x.galcount and mmas.ProfType =3 " conditions = " and z.galcount = a.galcount and mmas.galcount = x.galcount and sfit.galcount = x.galcount and lfit.galcount = a.galcount and lfit.model ='dvc' " add_tables = ', r_simard_fit as sfit, simard.Mendel_masses as mmas, %s_lackner_%s as z, r_lackner_fit as lfit ' %(band, model) else: conditions = ' and z.galcount = a.galcount ' add_tables = ', %s_lackner_%s as z' %(band, model) data = get_data(cursor, '%s_band_%s' %(band, model), '%s_%s_%s' %(band, tablestem, model), flags = use_flags, flagmodel = flagmodel, add_tables=add_tables, conditions = conditions) print 'num_objects: ', len(data['galcount']) # we want radial differences in percents # this sets up the calculation so that the plotting below works for name in ['hrad', 'rbulge', 'rdisk']: data[name+'_2'] = 1.0- (data[name+'_2']/data[name+'_1']) +data[name+'_1'] if model == 'dev' and tablestem=='lackner': data['mtot_2'] = data['mtot_2']-0.09 data['sky_1'] =100.0*( 10.0**(-0.4*(data['sky_1']-data['sky_2']))-1) print 'keys' print key_x, key_y #do plot
from cmp_functions import * from MatplotRc import * from plot_info import * from get_data import * tablestem = 'dev' model = 'dev' band = 'r' xchoice = 'mtot' ychoice = 'sky' key_x = 'mag' cursor = mysql_connect('catalog','pymorph','pymorph','') data = get_data(cursor, '%s_band_%s' %(band, model), '%s_sdss_%s' %(band, tablestem)) print 'num_objects: ', len(data['galcount']) # we want radial differences in percents # this sets up the calculation so that the plotting below works for name in ['hrad', 'rbulge', 'rdisk']: data[name+'_2'] = (data[name+'_2']/data[name+'_1']) - 1.0 +data[name+'_1'] data['sky_1'] =100.0*( 10.0**(-0.4*(data['sky_1']-data['sky_2']))-1) #do plot oplot = outlier_fig() oplot.set_ticks(ticksx[key_x][0], ticksx[key_x][1], ticksx[key_x][2], .5, .05, '%2.1f') oplot.makeplot(data[xchoice+'_2'],data['sky_1'], xlims[xchoice],(-2.0, 2.0))
elif(params['dataset'] == 'CelebA'): from models.celeba_model import Generator, Discriminator, DHead, QHead elif(params['dataset'] == 'FashionMNIST'): from models.mnist_model import Generator, Discriminator, DHead, QHead # Set random seed for reproducibility. seed = 1123 random.seed(seed) torch.manual_seed(seed) print("Random Seed: ", seed) # Use GPU if available. device = torch.device("cuda:0" if(torch.cuda.is_available()) else "cpu") print(device, " will be used.\n") dataloader = get_data(params['dataset'], params['batch_size']) # Set appropriate hyperparameters depending on the dataset used. # The values given in the InfoGAN paper are used. # num_z : dimension of incompressible noise. # num_dis_c : number of discrete latent code used. # dis_c_dim : dimension of discrete latent code. # num_con_c : number of continuous latent code used. if(params['dataset'] == 'MNIST'): params['num_z'] = 62 params['num_dis_c'] = 1 params['dis_c_dim'] = 10 params['num_con_c'] = 2 elif(params['dataset'] == 'SVHN'): params['num_z'] = 124 params['num_dis_c'] = 4
import os from matplotlib.backends.backend_pdf import PdfPages # my personal code imports from mysql_class import * from cmp_functions import * from MatplotRc import * from plot_info import * from get_data import * tablestem, model, band, xchoice, ychoice, key_x, key_y, use_flags, flagmodel = get_options() cursor = mysql_connect('catalog','pymorph','pymorph','') data = get_data(cursor, '%s_lackner_%s' %(band, model), '%s_sdss_%s' %(band, tablestem), flags = use_flags, flagmodel = flagmodel, add_tables = ', %s_lackner_%s as z' %(band, model), conditions = ' and z.galcount = a.galcount ') print 'num_objects: ', len(data['galcount']) # we want radial differences in percents # this sets up the calculation so that the plotting below works for name in ['hrad', 'rbulge', 'rdisk']: data[name+'_2'] = (data[name+'_2']/data[name+'_1']) - 1.0 +data[name+'_1'] data['sky_1'] =100.0*( 10.0**(-0.4*(data['sky_1']-data['sky_2']))-1) #do plot oplot = outlier_fig() oplot.set_ticks(ticksx[key_x][0], ticksx[key_x][1], ticksx[key_x][2], ticksy[key_y][0], ticksy[key_y][1], ticksy[key_y][2]) oplot.setdenselims(1,100 )
html=response.read() #mystr=html.decode("utf-8") response.close() #返回数据结果 return html def save(data,i): f=open("视频ts爬取-18-8-17/result/"+str(i)+".ts","wb") f.write(data) f.close() for i in range(1,1331,1): if i<10: url=head_url+"00"+str(i)+end_url elif i<100 : url=head_url+"0"+str(i)+end_url else : url=head_url+str(i)+end_url data=get_data(url,User_Agent,Cookie) save(data,i) #print(data) #print(url)
# jj = 0 torch.save(model.state_dict(), "weight/student.pth") torch.save(ema_model.state_dict(), "weight/teacher.pth") return step_counter if __name__ == '__main__': model = UNet(n_channels=1, n_classes=1) # model.load_state_dict(torch.load('weight/student.pth')) model.cuda() ema_model = UNet(n_channels=1, n_classes=1) # ema_model.load_state_dict(torch.load('weight/teacher.pth')) ema_model.cuda() optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0005) dataset = get_data() dataset = get_unlabel(dataset) for param in ema_model.parameters(): param.detach_() random.shuffle(dataset) for epoch in range(10): train_mt(model, ema_model, dataset, optimizer, epoch, 0)
def main(): #Retrieve command line arguments from user as input from the user running the program from a terminal window in_arg = get_train_input_args() #Get and transform data data_dir = in_arg.data_dir train_data, trainloader, validloader, testloader = get_data(data_dir) #Build and train network #Get pretrained model and attach appropriate classifier if in_arg.arch.lower().strip() == 'vgg16': model = models.vgg16(pretrained=True) if in_arg.hidden_units: classifier = nn.Sequential(nn.Linear(25088,in_arg.hidden_units), nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(in_arg.hidden_units,102), nn.LogSoftmax(dim=1)) else: classifier = nn.Sequential(nn.Linear(25088,4096), nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(4096,4096), nn.Dropout(p=0.2), nn.Linear(4096,4096), nn.ReLU(), nn.Dropout(p=0.2), nn.Linear(4096,102), nn.LogSoftmax(dim=1)) elif in_arg.arch.lower().strip() == 'alexnet': model = models.alexnet(pretrained=True) if in_arg.hidden_units: classifier = nn.Sequential(nn.Dropout(p=0.5), nn.Linear(9216,in_arg.hidden_units), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(in_arg.hidden_units,102), nn.LogSoftmax(dim=1)) else: classifier = nn.Sequential(nn.Dropout(p=0.5), nn.Linear(9216,4096), nn.ReLU(), nn.Dropout(p=0.5), nn.Linear(4096,4096), nn.ReLU(), nn.Linear(4096,102), nn.ReLU(), nn.LogSoftmax(dim=1)) else: print("Please choose model vgg16 or alexnet") #Set up classifier for chosen pretrained model for param in model.parameters(): param.requires_grad = False model.classifier = classifier criterion = nn.NLLLoss() optimizer = optim.Adam(model.classifier.parameters(), lr = in_arg.learning_rate) #Train model and print loss using the train and validation datasets check = do_deep_learning(model, trainloader, in_arg.epochs, 25, criterion, optimizer, in_arg.gpu) checkval = do_deep_learning(model, validloader, in_arg.epochs, 25, criterion, optimizer, in_arg.gpu) #Test and print network's accuracy: check_accuracy_on_test(model, testloader, in_arg.gpu) #Save checkpoint if in_arg.save_dir: filename = in_arg.save_dir + 'checkpoint.pth' else: filename = 'checkpoint.pth' model.class_to_idx = train_data.class_to_idx save_checkpoint(in_arg.arch.lower().strip(), model, optimizer, in_arg.epochs, filename)
from error_function import * from ANN import * from plot import * """ get data """ #X, Y = get_data("Data/movies.csv", header=True) #print 'Number of input feature: ', X.shape[1] """ create training set, valdiation set and testing set with ratio: 0.6:0.2:0.2""" # X_train = X[0: int(0.6 * len(X))] # X_val = X[int(0.6 * len(X)) : int(0.8 * len(X))] # X_test = X[int(0.8 * len(X)): len(X)] # Y_train = Y[0: int(0.6 * len(Y))] # Y_val = Y[int(0.6 * len(Y)) : int(0.8 * len(Y))] # Y_test = Y[int(0.8 * len(Y)) : len(Y)] X_train, Y_train = get_data("Data/hec_train.csv", header=True) X_train = X_train[:, 1:] X_val, Y_val = get_data("Data/hec_validation.csv", header=True) X_val = X_val[:, 1:] X_test, Y_test = get_data("Data/hec_test.csv", header=True) X_test = X_test[:, 1:] print 'Size of train data: %d, Size of validation data: %d, Size of test data: %d' % ( len(X_train), len(X_val), len(X_test)) """ normalize data - column 0 1 - DAY-MONTH """ ## Normalize cac cot 0, -1, -2 list_norm = [0, -1, -2] for i in list_norm: X_train[:, i], X_val[:, i], X_test[:, i] = scale(X_train[:, i], X_val[:, i], X_test[:, i])
def main(): '''Example: python fit_model_to_generated_dataset.py --modelname "11" --exp 1 --steps 1000 --steps_tune 100 --seed 3 ''' parser = argparse.ArgumentParser() parser.add_argument('--seed', '-se', type=int, default=3) parser.add_argument('--modelname', '-m', type=str, default=None) parser.add_argument('--steps', '-st', type=int, default=1000) parser.add_argument('--steps_tune', '-stt', type=int, default=100) parser.add_argument('--task', '-tt', type=str, default='both') parser.add_argument('--exp', '-e', type=int, default=1) args = parser.parse_args() print(args.steps) print(args.steps_tune) print(args.seed) print(type(args.seed)) print(args.exp) # load behavioral data if args.exp == 1: dftmp = pd.read_csv('../data/participant_table_exp1.csv') data = get_data(dftmp) else: dftmp = pd.read_csv('../data/participant_table_exp2.csv') data = get_data_online(dftmp) # set up data for model fitting (extract relevant behavioral data) X = {} Y = {} C = {} subj_indices = slice(0, 157) subj_indices_86 = slice(0, 86) X['NN'] = data['outcomes_c_flipped'].shape[1] X['Nboth'] = data['Nboth'] X['Nrewonly'] = data['Nrewonly'] X['Npainonly'] = data['Npainonly'] subj_indices_both = slice(0, X['Nboth']) subj_indices_rew_only = slice(0, X['Nrewonly']) subj_indices_pain_only = slice(0, X['Npainonly']) Y['participants_choice'] = data['participants_choice'][:, subj_indices] for var in [ 'outcomes_c_flipped', 'mag_1_c', 'mag_0_c', 'stabvol', 'rewpain' ]: X[var] = data[var][:, subj_indices] C['Bi1item_w_j_scaled_both'] = data['Bi1item_w_j_scaled_both'][ subj_indices_both] C['Bi2item_w_j_scaled_both'] = data['Bi2item_w_j_scaled_both'][ subj_indices_both] C['Bi3item_w_j_scaled_both'] = data['Bi3item_w_j_scaled_both'][ subj_indices_both] C['Bi1item_w_j_scaled_rew_only'] = data['Bi1item_w_j_scaled_rew_only'][ subj_indices_rew_only] C['Bi2item_w_j_scaled_rew_only'] = data['Bi2item_w_j_scaled_rew_only'][ subj_indices_rew_only] C['Bi3item_w_j_scaled_rew_only'] = data['Bi3item_w_j_scaled_rew_only'][ subj_indices_rew_only] C['Bi1item_w_j_scaled_pain_only'] = data['Bi1item_w_j_scaled_pain_only'][ subj_indices_pain_only] C['Bi2item_w_j_scaled_pain_only'] = data['Bi2item_w_j_scaled_pain_only'][ subj_indices_pain_only] C['Bi3item_w_j_scaled_pain_only'] = data['Bi3item_w_j_scaled_pain_only'][ subj_indices_pain_only] # load estimated parameters from actual dataset if args.modelname == '11': # some specifications for fitting covariate = 'Bi3itemCDM' hierarchical = True B_max = 10 import models_2thr9_11 as model_specific # load previous fit / parameters # this file path might need to be changed, depending on how the main model was run. model_name = 'model=11_covariate=Bi3itemCDM_date=2021_1_5_samples=1000_seed=3_exp=1.pkl' with open('../fitting_behavioral_model/model_fits/' + model_name, "rb") as buff: model_output = pickle.load(buff) trace = model_output['trace'] ppc = model_output['ppc'] model = model_output['model'] params = model.params # extract previous parameters, these are the ground truth parameters that we want to recover Theta_est = trace['Theta'].mean(axis=0) #subset participants (not implemented right now) Theta_est = Theta_est[subj_indices, :] if args.modelname == '11trip': covariate = 'Bi3itemCDM' hierarchical = True B_max = 10 import models_2thr9_11 as model_specific # load previous fit / parameters model_name = 'model=11trip_covariate=Bi3itemCDM_date=2021_1_5_samples=1000_seed=3_exp=1.pkl' with open('../fitting_behavioral_model/model_fits/' + model_name, "rb") as buff: model_output = pickle.load(buff) trace = model_output['trace'] ppc = model_output['ppc'] model = model_output['model'] params = model.params # extract previous parameters, these are the ground truth parameters that we want to recover Theta_est = Theta_est[subj_indices, :] # specify generative model f = model_specific.create_gen_choice_model(X, Y, param_names=params, B_max=B_max, seed=int(args.seed)) # generate new data using ground truth parameters gen_choice, gen_outcome_valence, *_ = f(Theta_est) # replace participants choices with generative choices Y_gen = {} Y_gen['participants_choice'] = gen_choice X_gen = copy.deepcopy(X) X_gen[ 'outcome_valence'] = gen_outcome_valence # only used for visualization idx_first_reward_pain = np.min( [pi for (pi, p) in enumerate(params) if 'rew' in p]) # compile base model model = create_model_base( X_gen, Y_gen, C, params=params, K=len(params), Konetask=idx_first_reward_pain, rew_slice=slice(0, idx_first_reward_pain), pain_slice=slice(0, idx_first_reward_pain), split_by_reward=True, includes_subjs_with_one_task=True, covariate=covariate, hierarchical=hierarchical, covv='diag', coding='deviance', ) # compile specific model model = model_specific.combined_prior_model_to_choice_model( X_gen, Y_gen, param_names=params, model=model, save_state_variables=False, B_max=B_max) # save name now = datetime.datetime.now() filename='model='+args.modelname+'_date='+str(now.year)+\ '_'+str(now.month)+'_'+str(now.day)+'_samples='+str(args.steps)+'_seed='+str(args.seed)+'_exp='+str(args.exp) # save empty placeholder with open('./model_fits/' + filename + '.pkl', "wb") as buff: print('saving placeholder') pickle.dump({}, buff) # sample (fit) with model: print('sampling from posterior') MAP = {} step = pm.HamiltonianMC(target_accept=.95) trace = pm.sample(args.steps, step=step, chains=4, tune=args.steps_tune, random_seed=args.seed) ppc = pm.sample_ppc(trace, 500) if hierarchical: hier = 'hier' else: hier = 'nonhier' # save completed results with open('./model_fits/' + filename + '.pkl', "wb") as buff: pickle.dump( { 'model': model, 'trace': trace, 'ppc': ppc, 'MAP': MAP, 'Theta_est': Theta_est, 'X_gen': X_gen, 'Y_gen': Y_gen, 'C': C, 'subj_indices': subj_indices, 'subj_indices_both': subj_indices_both, 'subj_indices_rew_only': subj_indices_rew_only, 'subj_indices_pain_only': subj_indices_pain_only }, buff)
def test_clusterwise(self): compound = "5ht6" fingerprint = "ExtFP" seed = 777 # all_combinations = [p for p in list(product(proteins, fingerprints))] preprocess_fncs = [["to_binary", {"all_below": True}]] loader = ["get_splitted_data_clusterwise", { "seed": seed, "valid_size": 0.15, "n_folds": 4}] folds, _, _ = get_data([[compound, fingerprint]], loader, preprocess_fncs).values()[0] plt.figure(figsize=(20,20)) X_2 = folds[0]["X_valid"] X = folds[0]["X_train"] Y = folds[0]["Y_train"]["data"] # Note: this test might fail if you change get_data preprocess to_binary. Just change it appropr. then assert X["data"].shape[1] == 2012 # Check interestigness index d1 = calculate_jaccard_kernel(X["data"][X["cluster_A"]], X["data"][X["cluster_A"]])[1].mean() d2 = calculate_jaccard_kernel(X["data"][X["cluster_B"]], X["data"][X["cluster_B"]])[1].mean() d3 = calculate_jaccard_kernel(X["data"][X["cluster_A"]], X["data"][X["cluster_B"]])[1].mean() assert d3/(0.5*(d1+d2)) >= 1.1 ids = X["cluster_A"] + X["cluster_B"] c = Y.copy()[ids] c[:] = 1 c[0:len(X["cluster_A"])] = 2 X_proj = RandomizedPCA(n_components=3, iterated_power=10).fit_transform(X["data"].toarray()) plt.figure(figsize=(30,30)) plt.scatter(X_proj[ids,0], X_proj[ids,1], c=c, s=250) plt.show() compound = "5ht6" fingerprint = "ExtFP" seed = 777 # all_combinations = [p for p in list(product(proteins, fingerprints))] preprocess_fncs = [["to_binary", {"all_below": True}]] loader = ["get_splitted_data_clusterwise", { "seed": seed, "valid_size": 0.15, "n_folds": 4}] twelm_uncertain_1 = run_experiment("fit_grid", n_jobs=4, experiment_detailed_name="test_fit_TWELM_uncertain_%s_%s" % (compound, fingerprint), base_experiment="fit_active_learning", seed=777, base_experiment_kwargs={"strategy": "uncertainty_sampling", "preprocess_fncs": preprocess_fncs, "batch_size": 20, "protein": compound, "fingerprint": fingerprint, "warm_start_percentage": 0.03, "base_model": "TWELM", "loader_function": loader[0], "loader_args": loader[1], "param_grid": {'h': [100], \ 'C': list(np.logspace(-3,4,7))}}) assert "wac_score_cluster_B_valid" in twelm_uncertain_1.experiments[0].monitors[0].keys() # This is rather magic, but seems quite reasonable assert np.array([m["wac_score_cluster_B_valid"][-1] for m in twelm_uncertain_1.experiments[0].monitors]).mean() > 0.7
# my personal code imports from astro_image_processing.mysql import * from cmp_functions import * from astro_image_processing.MatplotRc import * from plot_info import * from get_data import * options = get_options_main() if options['model2']==None: options['model2']=options['model1'] cursor = mysql_connect('catalog','pymorph','pymorph','') data = get_data(cursor, '%s_%s_%s' %(options['band1'], options['table1'],options['model1']), '%s_%s_%s' %(options['band2'], options['table2'],options['model2']), options['band1'],options['band2'],flags = options['use_flags'], flagmodel = options['flagmodel'], add_tables = options['add_tables'], conditions = options['conditions']) print 'num_objects: ', len(data['galcount']) print data[options['xchoice']+'_1'],data[options['ychoice']+'_1'],data[options['ychoice']+'_2'] # we want radial differences in percents # this sets up the calculation so that the plotting below works for name in ['hrad', 'rbulge', 'rdisk', 'petrorad']: data[name+'_2'] = (data[name+'_2']/data[name+'_1'])-1.0 +data[name+'_1'] # data[name+'_2'] = 1.0-(data[name+'_1']/data[name+'_2'])+data[name+'_1'] for posnum in ['1','2']: if options['model%s' %posnum] == 'dev': if options['table%s' %posnum] in ['sdss', 'lackner']: data['mtot_%s' %posnum] = data['mtot_%s' %posnum]-0.071648 #corrects for mag offset due to truncation data['mtot_abs_%s' %posnum] = data['mtot_abs_%s' %posnum]-0.071648
def test_clusterwise(self): compound = "5ht6" fingerprint = "ExtFP" seed = 777 # all_combinations = [p for p in list(product(proteins, fingerprints))] preprocess_fncs = [["to_binary", {"all_below": True}]] loader = [ "get_splitted_data_clusterwise", { "seed": seed, "valid_size": 0.15, "n_folds": 4 } ] folds, _, _ = get_data([[compound, fingerprint]], loader, preprocess_fncs).values()[0] plt.figure(figsize=(20, 20)) X_2 = folds[0]["X_valid"] X = folds[0]["X_train"] Y = folds[0]["Y_train"]["data"] # Note: this test might fail if you change get_data preprocess to_binary. Just change it appropr. then assert X["data"].shape[1] == 2012 # Check interestigness index d1 = calculate_jaccard_kernel(X["data"][X["cluster_A"]], X["data"][X["cluster_A"]])[1].mean() d2 = calculate_jaccard_kernel(X["data"][X["cluster_B"]], X["data"][X["cluster_B"]])[1].mean() d3 = calculate_jaccard_kernel(X["data"][X["cluster_A"]], X["data"][X["cluster_B"]])[1].mean() assert d3 / (0.5 * (d1 + d2)) >= 1.1 ids = X["cluster_A"] + X["cluster_B"] c = Y.copy()[ids] c[:] = 1 c[0:len(X["cluster_A"])] = 2 X_proj = RandomizedPCA(n_components=3, iterated_power=10).fit_transform( X["data"].toarray()) plt.figure(figsize=(30, 30)) plt.scatter(X_proj[ids, 0], X_proj[ids, 1], c=c, s=250) plt.show() compound = "5ht6" fingerprint = "ExtFP" seed = 777 # all_combinations = [p for p in list(product(proteins, fingerprints))] preprocess_fncs = [["to_binary", {"all_below": True}]] loader = [ "get_splitted_data_clusterwise", { "seed": seed, "valid_size": 0.15, "n_folds": 4 } ] twelm_uncertain_1 = run_experiment("fit_grid", n_jobs=4, experiment_detailed_name="test_fit_TWELM_uncertain_%s_%s" % (compound, fingerprint), base_experiment="fit_active_learning", seed=777, base_experiment_kwargs={"strategy": "uncertainty_sampling", "preprocess_fncs": preprocess_fncs, "batch_size": 20, "protein": compound, "fingerprint": fingerprint, "warm_start_percentage": 0.03, "base_model": "TWELM", "loader_function": loader[0], "loader_args": loader[1], "param_grid": {'h': [100], \ 'C': list(np.logspace(-3,4,7))}}) assert "wac_score_cluster_B_valid" in twelm_uncertain_1.experiments[ 0].monitors[0].keys() # This is rather magic, but seems quite reasonable assert np.array([ m["wac_score_cluster_B_valid"][-1] for m in twelm_uncertain_1.experiments[0].monitors ]).mean() > 0.7
import os from matplotlib.backends.backend_pdf import PdfPages # my personal code imports from mysql_class import * from cmp_functions import * from MatplotRc import * from plot_info import * from get_data import * tablestem, model, band, xchoice, ychoice, key_x, key_y = get_options() cursor = mysql_connect('catalog','pymorph','pymorph','') data = get_data(cursor, '%s_band_%s' %(band, model), '%s_%s_%s' %(band, tablestem, model), add_tables=', r_lackner_ser as c', conditions=' and c.galcount = a.galcount') print 'num_objects: ', len(data['galcount']) # we want radial differences in percents # this sets up the calculation so that the plotting below works for name in ['hrad', 'rbulge', 'rdisk']: data[name+'_2'] = (data[name+'_2']/data[name+'_1']) - 1.0 +data[name+'_1'] #do plot oplot = outlier_fig() oplot.set_ticks(ticksx[key_x][0], ticksx[key_x][1], ticksx[key_x][2], ticksy[key_y][0], ticksy[key_y][1], ticksy[key_y][2]) oplot.setdenselims(0,100 ) oplot.makeplot(data[xchoice+'_1'],data[ychoice+'_1']-data[ychoice+'_2'], xlims[xchoice], ylims[ychoice])
nofovmodel = tf.keras.models.load_model( '/content/gdrive/My Drive/my_model_nofov.h5', ) model_input = tf.keras.layers.Input(shape=(64, 64, 3)) model_output = nofovmodel( model_input ) ###This line indicates that the perturbations are based on the regular model model = tf.keras.models.Model(inputs=model_input, outputs=model_output) model.compile(optimizer=tf.keras.optimizers.SGD(), loss='categorical_crossentropy', metrics=['accuracy']) fmodel = KerasModel2(model, bounds=(0.0, 255.0)) attack = GradientSignAttack(model=fmodel, criterion=TopKMisclassification(5)) train_data, train_labels, val_data, val_labels, test_data, test_labels = get_data( get_id_dictionary()) totalattacks = 0 correct = 0 for idx in range(1000): img = val_data[idx] realLabel = np.argmax(val_labels[idx]) print('input: ' + str(realLabel)) adversarial = attack((img), label=realLabel, epsilons=[0.04]) if adversarial is not None: advpredict = np.argmax(nofovmodel.predict(adversarial[np.newaxis, ...])) print('adverserial: ' + str(advpredict)) fovtop = top_predictor( adversarial, fovmodel) ###using the foveated model to evaluate predictions
def firefly_single(parameters): """ The routine for a single run of FIREFLY. It is called from firefly_job, test_firefly, or can be run interactively for a custom single SED. In the interactive case, one sets the 'custom' and 'interactive' parameters in the parameter file, then one enters at an interative prompt: > from firefly_single import firefly_single > firefly_single('[locationpathofdata]','custom/[outputdirname]','./parameters.dat') One is then able to view the output plots at the native X window. This routine retrieves the options from the parameters file, including the locations of the data file and models to be used. It then opens the data file, model files, matches their resolutions (downgrading to instrumental+velocity disp resolution if necessary) fits, then produces output files and plots. INPUTS: - options_file: location of the parameter file (default: ./parameters.dat) No outputs. """ # data_file, output_dir data = get_data(parameters) # restrict_ages can be default (allow age<+1 Gyr age uni), off (allow all ages), strict (age<age uni only) if parameters['restrict_ages'] == 'default': age_universe = Planck13.age(data['redshift']) parameters['age_limits'][1] = np.log10(age_universe.value+1.0)+9.0 # log(yr units) elif parameters['restrict_ages'] == 'strict': age_universe = Planck13.age(data['redshift']) parameters['age_limits'][1] = np.log10(age_universe.value)+9.0 # Get the models with observation information needed for downgrading. for mi,mm in enumerate(parameters['model_libs']): for ii in parameters['imfs']: deltal = parameters['deltal_libs'][mi] model_wave_int,model_flux_int, age,metal = \ get_model(parameters,mm,ii,deltal,data['vdisp'],data['wavelength'],data['r_instrument'],data['ebv_mw']) print "Matching data to models..." wave,data_flux,error_flux,model_flux_raw = \ match_data_models(data['wavelength'],data['flux'],data['flags'],data['error'],model_wave_int,model_flux_int,\ parameters['wave_limits'][0],parameters['wave_limits'][1]) print "Normalising all spectra." model_flux,mass_factors = normalise_spec(data_flux,model_flux_raw) # Get filtered values IF dust is on! if parameters['hpf_mode']=='on': print "Determining attenuation curve through HPF fitting" best_ebv,attenuation_curve = determine_attenuation(wave,data_flux,error_flux,model_flux,parameters,age,metal) if parameters['plot_diagnostics']: print "Best ebv is "+str(best_ebv) plt.plot(attenuation_curve) plt.title("Attenuation curve") plt.show() # Apply curve to models and renormalise: print "Curve found! Applying to models..." model_flux_atten = np.zeros(np.shape(model_flux_raw)) for m in range(len(model_flux_raw)): model_flux_atten[m] = attenuation_curve * model_flux_raw[m] model_flux,mass_factors = normalise_spec(data_flux,model_flux_atten) print "Fitting with attenuated models..." light_weights,chis,branch = fitter(wave,data_flux,error_flux,model_flux,parameters) elif parameters['hpf_mode'] == 'hpf_only': print "Using filtered values to determing SP properties only." smoothing_length = parameters['dust_smoothing_length'] hpf_data = hpf(data_flux) hpf_models = np.zeros(np.shape(model_flux)) for m in range(len(model_flux)): hpf_models[m] = hpf(model_flux[m]) zero_dat = np.where( (np.isnan(hpf_data)) & (np.isinf(hpf_data)) ) hpf_data[zero_dat] = 0.0 for m in range(len(model_flux)): hpf_models[m,zero_dat] = 0.0 hpf_error = np.zeros(len(error_flux)) hpf_error[:] = np.median(error_flux)/np.median(data_flux) * np.median(hpf_data) hpf_error[zero_dat] = np.max(hpf_error)*999999.9 best_ebv = -99 hpf_models,mass_factors = normalise_spec(hpf_data,hpf_models) light_weights,chis,branch = fitter(wave,hpf_data,hpf_error,hpf_models,parameters) elif parameters['hpf_mode'] == 'off': raise NotImplementedError("Not using a HPF and fitting using model curves not implemented yet") # use loop over dust curve, but this will take a while! # Get mass-weighted SSP contributions using saved M/L ratio. (raw and normalised) unnorm_mass,mass_weights = light_weights_to_mass(light_weights,mass_factors) print "Fitting complete! Calculating average properties and outputting." # Convert chis into probs # Degrees of freedom approximately = number of wavelength points dof = len(wave) probs = convert_chis_to_probs(chis,dof) # Calculate all average properties and errors averages = calculate_averages_pdf(probs,light_weights,mass_weights,unnorm_mass,\ age,metal,parameters['pdf_sampling'],data['redshift']) unique_ages = np.unique(age) marginalised_age_weights = np.zeros(np.shape(unique_ages)) marginalised_age_weights_int = np.sum(mass_weights.T,1) for ua in range(len(unique_ages)): marginalised_age_weights[ua] = np.sum(marginalised_age_weights_int[np.where(age==unique_ages[ua])]) # sfr_int,sfr_error_int = star_formation_rate(np.log10(unique_ages)+9.0,marginalised_age_weights) # sfr = sfr_int * 10**averages['stellar_mass'] / (10.0**7) # sfr_error = sfr_error_int * 10**averages['stellar_mass'] / (10.0**7) # print "Star formation rate is (in M / yr) "+str(sfr)+" plus.minus "+str(sfr_error) # Tracer()() best_fit_index = [np.argmin(chis)] best_fit = np.dot(light_weights[best_fit_index],model_flux)[0] if parameters['plot_fits']: plt.plot(wave,data_flux,'k') plt.plot(wave,best_fit,'r',linewidth=1.0) out_plot_string = 'plots/fit.eps' plt.savefig(out_plot_string,format='eps',transparent=True) plt.close() if parameters['plot_diagnostics']: plt.plot(wave,data_flux,'k') plt.plot(wave,best_fit,'r',linewidth=1.0) out_plot_string = 'plots/fit.eps' plt.show() plt.close() # Calculate the weighted average of SSPs for the secondary outputs and contour plots. if parameters['observation_type']=='ifu': file1=parameters['output_dir_prefix']+parameters['file_in'].split('/')[-1]+'/' file2=parameters['output_dir_prefix']+parameters['file_in'].split('/')[-1]+'/'+mm+'/' file3=parameters['output_dir_prefix']+parameters['file_in'].split('/')[-1]+'/'+mm+'/'+ii+'/' else: file1=parameters['output_dir_prefix'] file2=parameters['output_dir_prefix']+mm+'/' file3=parameters['output_dir_prefix']+mm+'/'+ii+'/' if not os.path.exists(file1): os.makedirs(file1) if not os.path.exists(file2): os.makedirs(file2) if not os.path.exists(file3): os.makedirs(file3) parameters['output_file'] = parameters['output_dir_prefix']+\ parameters['file_in'].split('/')[-1]+'/'+mm+'/'+ii+'/' if parameters['observation_type']=='ifu': f = open(parameters['output_file']+'bin'+str(int(parameters['bin_number']))+'_single.txt', 'wb') f.write("# x, y, bin_number, Light_age / log(Gyrs) [value, +error, -error] light [Z/H] [value, +error,-error],"+\ "mass age / log(Gyrs) [value, +error, -error],"+\ "mass [Z/H][value, +error, -error], E(B-V), stellar mass [value, +error,-error]\n") f.write(str(data['xpos'])+'\t'+str(data['ypos'])+'\t'+str(parameters['bin_number'])+'\t'+\ str(averages['light_age'])+'\t'+str(averages['light_age_1_sig_plus'])+'\t'+str(averages['light_age_1_sig_minus'])+'\t'+\ str(averages['light_metal'])+'\t'+str(averages['light_metal_1_sig_plus'])+'\t'+str(averages['light_metal_1_sig_minus'])+'\t'+\ str(averages['mass_age'])+'\t'+str(averages['mass_age_1_sig_plus'])+'\t'+str(averages['mass_age_1_sig_minus'])+'\t'+\ str(averages['mass_metal'])+'\t'+str(averages['mass_metal_1_sig_plus'])+'\t'+str(averages['mass_metal_1_sig_minus'])+'\t'+\ str(best_ebv)+'\t'+\ str(averages['stellar_mass'])+'\t'+str(averages['stellar_mass_1_sig_plus'])+'\t'+str(averages['stellar_mass_1_sig_minus'])+'\n') f.close() print "Combining ascii fits files..." files_present = os.listdir(parameters['output_file']) if np.size(files_present)>0: combine_files = open(parameters['output_file']+'/combined.txt', 'wb') combine_files.write("# x, y, bin_number, Light_age / log(Gyrs) [value, +error, -error] light [Z/H] [value, +error,-error],"+\ "mass age / log(Gyrs) [value, +error, -error],"+\ "mass [Z/H][value, +error, -error], E(B-V), stellar mass [value, +error,-error]\n") for o in files_present: try: a = o.split('_')[-1] except IndexError: continue if o.split('_')[-1] == 'single.txt': fits = np.loadtxt(parameters['output_file']+o, skiprows=1, unpack=True) combine_files.write(str(fits[0])+'\t'+str(fits[1])+'\t'+str(fits[2])+\ '\t'+str(fits[3])+'\t'+str(fits[4])+'\t'+str(fits[5])+\ '\t'+str(fits[6])+'\t'+str(fits[7])+'\t'+str(fits[8])+\ '\t'+str(fits[9])+'\t'+str(fits[10])+'\t'+str(fits[11])+\ '\t'+str(fits[12])+'\t'+str(fits[13])+'\t'+str(fits[14])+\ '\t'+str(fits[15])+'\t'+str(fits[16])+'\t'+str(fits[17])+'\t'+str(fits[18])+'\n') combine_files.close() else: f = open(parameters['output_dir_prefix']+mm+'/'+ii+'/'+parameters['file_in'].split('/')[-1]+'.txt', 'wb') f.write("# Light_age / log(Gyrs) [value, +error, -error] light [Z/H] [value, +error,-error],"+\ "mass age / log(Gyrs) [value, +error, -error],"+\ "mass [Z/H][value, +error, -errpr], E(B-V), stellar mass [value, +error,-error]\n") f.write(str(averages['light_age'])+'\t'+str(averages['light_age_1_sig_plus'])+'\t'+str(averages['light_age_1_sig_minus'])+'\t'+\ str(averages['light_metal'])+'\t'+str(averages['light_metal_1_sig_plus'])+'\t'+str(averages['light_metal_1_sig_minus'])+'\t'+\ str(averages['mass_age'])+'\t'+str(averages['mass_age_1_sig_plus'])+'\t'+str(averages['mass_age_1_sig_minus'])+'\t'+\ str(averages['mass_metal'])+'\t'+str(averages['mass_metal_1_sig_plus'])+'\t'+str(averages['mass_metal_1_sig_minus'])+'\t'+\ str(best_ebv)+'\t'+\ str(averages['stellar_mass'])+'\t'+str(averages['stellar_mass_1_sig_plus'])+'\t'+str(averages['stellar_mass_1_sig_minus'])+'\n') f.close() print "Wrote ASCII output to "+parameters['output_file']