def convert_to_video( frame_dir, imu_file ): parent_dir = os.path.dirname( frame_dir ) tmp_dir = os.path.join(parent_dir, 'tmp') rescale( frame_dir, tmp_dir, horizon_file=imu_file ) video_file = os.path.join(parent_dir, 'out.avi') vid.convert_to_video(tmp_dir, video_file) shutil.rmtree(tmp_dir)
def extract_data(modeldir, threshold, architecture="trans", rnn_cell_type="lstm"): ''' Extract useful data from model training log files. :param modeldir: the path to models :param threshold: remove samples with bad performance (BLEU). ''' domain_eval_lst = get_all_domain_eval(modeldir) if architecture == 'rnn': domain_eval_lst_arch = [ de for de in domain_eval_lst if (de[0]['architecture'] == architecture) and ( de[0]['rnn_cell_type'] == rnn_cell_type) ] else: domain_eval_lst_arch = [ de for de in domain_eval_lst if (de[0]['architecture'] == architecture) ] domain_dict_lst, eval_dict_lst = list(list(zip( *domain_eval_lst_arch))[0]), list(list(zip(*domain_eval_lst_arch))[1]) # Rescale values to the range [0,1] and turn dictionary into list if architecture == 'rnn': rescaled_domain_lst, domain_name_lst = rescale.rescale( domain_dict_lst, rescale.rnn_rescale_dict) elif architecture == 'trans': rescaled_domain_lst, domain_name_lst = rescale.rescale( domain_dict_lst, rescale.trans_rescale_dict) # Transform eval_dict_lst: [{model1_eval_dict}, {model2_eval_dict}] to # eval_dict: {metric1: [value1, value2, ...], metric2: [...], ...} eval_dict = defaultdict(list) for model_eval_dict in eval_dict_lst: for key in eval_dict_lst[0].keys(): eval_dict[key].append(model_eval_dict[key]) # Shuffle the data random_sampling.Random(37).shuffle(rescaled_domain_lst) for metric in eval_dict.keys(): random_sampling.Random(37).shuffle(eval_dict[metric]) x = rescaled_domain_lst y1 = eval_dict["dev_bleu"] y2 = eval_dict["dev_gpu_time"] remove_lst = [i for i in range(len(y1)) if y1[i] < threshold] x = np.array([x[i] for i in range(len(x)) if i not in remove_lst]) y1 = np.array([y1[i] for i in range(len(y1)) if i not in remove_lst]) y2 = np.array([-y2[i] for i in range(len(y2)) if i not in remove_lst]) return x, y1, y2
def setup_fig(): trace = go.Scatter3d(x=points[::100, 0], y=points[::100, 1], z=points[::100, 2], mode='markers', marker=dict(size=2, opacity=0.8, color=rescale(np.log(points[::100, 2]), range=(-255, 255)))) data = [trace] layout = go.Layout(title='Partial Data', hovermode='closest', height=900, margin=dict(l=0, r=0, b=0, t=0), scene=dict(xaxis=dict( title='Time', ticklen=5, gridwidth=2, ), yaxis=dict( title='Frequency', type='log', ticklen=5, gridwidth=2, ), zaxis=dict( title='Amplitude', ticklen=5, gridwidth=2, ))) return go.Figure(data, layout)
def compose_nc(nc_template, *filepaths): from awips_netcdf import fill, UTC from rescale import rescale,K_REFLECTANCE from datetime import datetime new_comp = compose(*filepaths) new_comp = rescale(new_comp, data_kind=K_REFLECTANCE) fill("./composite.nc", new_comp, nc_template, datetime.utcnow().replace(tzinfo=UTC))
def extract_data(modeldir, architecture, rnn_cell_type, metric, best): domain_eval_lst = get_all_domain_eval(modeldir) if architecture == 'rnn': domain_eval_lst_arch = [ de for de in domain_eval_lst if (de[0]['architecture'] == architecture) and ( de[0]['rnn_cell_type'] == rnn_cell_type) ] else: domain_eval_lst_arch = [ de for de in domain_eval_lst if (de[0]['architecture'] == architecture) ] domain_dict_lst, eval_dict_lst = list(list(zip( *domain_eval_lst_arch))[0]), list(list(zip(*domain_eval_lst_arch))[1]) # Rescale values to the range [0,1] and turn dictionary into list if architecture == 'rnn': rescaled_domain_lst, domain_name_lst = rescale.rescale( domain_dict_lst, rescale.rnn_rescale_dict) elif architecture == 'cnn': rescaled_domain_lst, domain_name_lst = rescale.rescale( domain_dict_lst, rescale.cnn_rescale_dict) elif architecture == 'trans': rescaled_domain_lst, domain_name_lst = rescale.rescale( domain_dict_lst, rescale.trans_rescale_dict) # The objective we want to optimize if best == 'min': eval_lst = [e[metric] for e in eval_dict_lst] WORST = 100000 else: eval_lst = [-e[metric] for e in eval_dict_lst] WORST = 0 BEST = min(eval_lst) # shuffle the data random.Random(37).shuffle(rescaled_domain_lst) random.Random(37).shuffle(eval_lst) return rescaled_domain_lst, domain_name_lst, eval_lst, BEST, WORST
def run_eos(self): """Run calculations for equation of state.""" # Create basic structure and attach it as an output structure = self.inputs.structure calculations = {} for label, factor in zip(labels, scale_facs): rescaled_structure = rescale(structure, Float(factor)) inputs = generate_scf_input_params(rescaled_structure, self.inputs.code, self.inputs.pseudo_family) self.report( 'Running an SCF calculation for {} with scale factor {}'. format(structure.get_formula(), factor)) future = self.submit(PwCalculation, **inputs) calculations[label] = future # Ask the workflow to continue when the results are ready and store them in the context return ToContext(**calculations)
def run_eos_wf(code, pseudo_family, structure): """Run an equation of state of a bulk crystal structure for the given element.""" # This will print the pk of the work function print('Running run_eos_wf<{}>'.format(Process.current().pid)) scale_factors = (0.96, 0.98, 1.0, 1.02, 1.04) labels = ['c1', 'c2', 'c3', 'c4', 'c5'] calculations = {} # Loop over the label and scale_factor pairs for label, factor in list(zip(labels, scale_factors)): # Generated the scaled structure from the initial structure rescaled_structure = rescale(structure, Float(factor)) # Generate the inputs for the `PwCalculation` inputs = generate_scf_input_params(rescaled_structure, code, pseudo_family) # Launch a `PwCalculation` for each scaled structure print('Running a scf for {} with scale factor {}'.format( structure.get_formula(), factor)) calculations[label] = run(PwCalculation, **inputs) # Bundle the individual results from each `PwCalculation` in a single dictionary node. # Note: since we are 'creating' new data from existing data, we *have* to go through a `calcfunction`, otherwise # the provenance would be lost! inputs = { label: result['output_parameters'] for label, result in calculations.items() } eos = create_eos_dictionary(**inputs) # Finally, return the eos Dict node return eos
import hopfion, plot, rescale #decide kind of torus knot and file name hopfion.select_torusknot() file_name = hopfion.gamma + '_' + hopfion.delta + '_tk.csv' #solve ansatz Im_func, Re_func = hopfion.MakeAnsatz(hopfion.gamma, hopfion.delta) plt_data = hopfion.solve(Im_func, Re_func) #rescale data rescale_data = rescale.rescale(plt_data[0], plt_data[1], plt_data[2]) #output hopfion.output(rescale_data[3], file_name) #plot plot.data_plot(rescale_data[0], rescale_data[1], rescale_data[2]) #save data_name = file_name.strip(".csv") plt.savefig(data_name + ".png") plt.show()
from social_unmixing import social_unmixing from FCLSU import FCLSU import h5py import time from bundle2global import bundle2global from pca_viz import pca_viz plt.close("all") data = scipy.io.loadmat("real_data_1.mat") hyper = data['data'] rgb = hyper[:,:,[56,29,19]] uint8_rgb = rescale(rgb) plt.imshow(uint8_rgb, interpolation='nearest') endmembers = {} f = h5py.File('bundles.mat') for k, v in f.items(): endmembers[k] = np.array(v) bundle = endmembers['bundle'].T groups = endmembers['groups'][0].astype(int) plt.figure() plt.plot(bundle)
domain_eval_lst = preprocess.get_all_domain_eval(args.modeldir) if args.architecture == 'rnn': domain_eval_lst_arch = [ de for de in domain_eval_lst if (de[0]['architecture'] == args.architecture) and ( de[0]['rnn_cell_type'] == args.rnn_cell_type) ] else: domain_eval_lst_arch = [ de for de in domain_eval_lst if (de[0]['architecture'] == args.architecture) ] domain_dict_lst, eval_dict_lst = list(list( zip(*domain_eval_lst_arch))[0]), list(list(zip(*domain_eval_lst_arch))[1]) rescaled_domain_lst, domain_name_lst = rescale.rescale( domain_dict_lst, rescale.trans_rescale_dict) eval_lst = [e[args.metric] for e in eval_dict_lst] x = np.array(rescaled_domain_lst) y = np.array(eval_lst) regr = linear_model.LinearRegression() regr.fit(x, y) c = regr.coef_ cscaled = [abs(i) for i in c] minc = min(cscaled) maxc = max(cscaled) cscaled = [0.9 - (i - minc) / (maxc - minc) * 0.8 for i in cscaled] print("Domain names: ", domain_name_lst) print("Coeficients: ", c) print("Scaled Coeficients [0.1, 0.9]: ", cscaled)
return go.Figure(data, layout) py.plot(setup_fig(), filename='./plotly/partials.html') # %% Crop by frequency range freq_range = dict( low=1200, high=np.inf, ) condition = np.logical_and(points[:, 1] > freq_range['low'], points[:, 1] < freq_range['high']) points = points[condition] # %% points[:, 2] = rescale(np.log(points[:, 2])) # In[7]: # threshold = -50 # in dB # isSignal = points[:, 2] > 10 ** (threshold / 20) isSignal = points[:, 2] > 0.55 points = points[isSignal] # %% MiniBatchKMeans Clustering (fastest method) n_clusters = 15 weight = dict( times=0, freqs=8, amps=8, )
import matplotlib.pyplot as plt import scipy.io import scipy.signal import time from rescale import rescale from SCLSU import SCLSU from pca_viz import pca_viz from ELMM import ELMM from RELMM import RELMM plt.close("all") data = scipy.io.loadmat("real_data_1.mat") im = rescale(data['data']) rgb = im[:,:,[56,29,19]] uint8_rgb = rescale(rgb) plt.imshow(uint8_rgb, interpolation='nearest') endmembers = scipy.io.loadmat("endmembers_houston.mat") S0 = endmembers['S0'] P = S0.shape[1] plt.figure(1) plt.plot(S0)
def main(args): path_to_files = sys.argv[1] min = int(sys.argv[2]) # 550 max = int(sys.argv[3]) # 650 step = int(sys.argv[4]) # 10 do_balance = int(sys.argv[5]) if(len(sys.argv) > 6): debug = 1 else: debug = 0 # Get the original files -- we do *not* want to modify these. original_files = glob.glob(path_to_files + '/*.h5') # First, pT-sort the files. for file in original_files: sorted_file = pt_sort(file,debug) # Record the new filenames. sort_files = glob.glob(path_to_files + '/*.h5') sort_files = list(set(sort_files) - set(original_files)) # Next, perform the pT splicing. Delete the sorted files when no longer needed. for file in sort_files: for i in range(min,max,step): pt_slice(file,str(i),str(i+step)) sub.check_call(['rm',file]) # Get the pT-split files. By definition it's everything in the directory except for the "original files". pt_files = glob.glob(path_to_files + '/*.h5') pt_files = list(set(pt_files) - set(original_files)) if(do_balance == 0): return # Now our goal is to remove events from training pt slices, # such that each has the same number of signal and background events, with a 50/50 split. # (We also want to "balance" the validation and testing pt slices to have a 50/50 split, # but each slice need not have the same number of events.) # First handle validation & testing. val_test_pt_files = [x for x in pt_files if('test' in x or 'valid' in x)] for file in val_test_pt_files: balanced_file = balance(file) sub.check_call(['rm',file]) sub.check_call(['mv',balanced_file,file]) # Now handle training -- slightly more complicated since in addition to balancing signal and background, # we are also ensuring that each training pt slice has the same number of total events. pt_files = [x for x in pt_files if('train' in x)] # Now we seek file with the smallest number of signal *or* background events. files_h5 = [h5.File(x,'r') for x in pt_files] ns = [np.sum(x['is_signal'][:]) for x in files_h5] nb = [files_h5[x]['is_signal'].shape[0] - ns[x] for x in range(len(files_h5))] for file_h5 in files_h5: file_h5.close() # can safely close the files (will be overwritten later) min_s = (np.min(ns),np.argmin(ns)) min_b = (np.min(nb),np.argmin(nb)) min_index = -1 if(min_s[0] < min_b[0]): min_index = min_s[1] else: min_index = min_b[1] min_file = pt_files[min_index] # filename of file with the smallest set of signal or background events # Now "balance" this file -- make a copy with 50% signal, 50% background. balanced_min_file = balance(min_file) # Now delete min_file, and rename balanced_min_file -> min_file sub.check_call(['rm',min_file]) sub.check_call(['mv',balanced_min_file,min_file]) # Now min file has 50% signal and 50% background, and all the other files can be rescaled to match # its number of signal and background events. (Thus they will be "balanced" too by definition). for pt_file in pt_files: if(pt_file == min_file): continue rescaled_file = rescale(pt_file, min_file) sub.check_call(['rm',pt_file]) sub.check_call(['mv',rescaled_file,pt_file]) # Done. Now we do an (optional) sanity check. if(debug != 0): files_h5 = [h5.File(x,'r') for x in pt_files] ns = [np.sum(x['is_signal'][:]) for x in files_h5] nb = [files_h5[x]['is_signal'].shape[0] - ns[x] for x in range(len(files_h5))] for i in range(len(files_h5)): print(pt_files[i], ns[i], nb[i]) return