def write(cls, processList, lutPath, lutStyle=IO_CAPABILITY_WRITE_1D, lutResolution1d=1024, lutResolution3d=33, lutResolution1d3d1d=[1024, 33, 2], inputMin=0.0, inputMax=1.0, shaperIn=['linear', 0.0, 1.0], shaperOut=['linear', 0.0, 1.0]): # Expand 3D LUT resolutions expandedlutResolution3d = [lutResolution3d] * 3 expandedlutResolution1d3d1d = list(lutResolution1d3d1d) expandedlutResolution1d3d1d[1] = [lutResolution1d3d1d[1]] * 3 if lutStyle == IO_CAPABILITY_WRITE_1D: #print( "Sample the CLF ProcessList into a 1D LUT" ) samples = Sampling.sample1D(processList, lutResolution1d, inputMin, inputMax) return cls.write1D(lutPath, samples, lutResolution1d, inputMin, inputMax) elif lutStyle == IO_CAPABILITY_WRITE_3D: #print( "Sample the CLF ProcessList into a 3D LUT" ) samples = Sampling.sample3D(processList, expandedlutResolution3d, inputMin, inputMax) return cls.write3D(lutPath, samples, expandedlutResolution3d, inputMin, inputMax) elif lutStyle == IO_CAPABILITY_WRITE_1D3D1D: #print( "Sample the CLF ProcessList into a 1D 3D 1D LUT" ) (samples1dIn, inputMin, inputMax, samples3d, samples1dOut, outputMin, outputMax) = Sampling.sample1D3D1D( processList, expandedlutResolution1d3d1d, shaperIn, shaperOut) # Unpack a bunch of values before calling the write method (lutResolution1dIn, lutResolution3d, lutResolution1dOut) = expandedlutResolution1d3d1d (shaperInType, shaperInMin, shaperInMax) = shaperIn (shaperOutType, shaperOutMin, shaperOutMax) = shaperOut return cls.write1D3D1D(lutPath, samples1dIn, lutResolution1dIn, inputMin, inputMax, samples3d, lutResolution3d, samples1dOut, lutResolution1dOut, outputMin, outputMax) else: return False
def update (self, rotation, distance, measure): for particle in self.particles: particle.move(rotation * random.gauss(1.0, self.turn_noise), distance * random.gauss(1.0, self.forward_noise)) w = [abs(random.gauss(0.0, self.sense_noise) + measure(particle)) for particle in self.particles] self.particles = [copy(sample) for sample in Sampling.roulette(self.particles, w)]
def update(self, rotation, distance, measure): for particle in self.particles: particle.move(rotation * random.gauss(1.0, self.turn_noise), distance * random.gauss(1.0, self.forward_noise)) w = [ abs(random.gauss(0.0, self.sense_noise) + measure(particle)) for particle in self.particles ] self.particles = [ copy(sample) for sample in Sampling.roulette(self.particles, w) ]
def get_results_from_scratch(sample_size, nr_samples, sample_type, xi, rejection_prob, save_hill_estimation = False): start_time = time.time() # Sampling print(f'Started sampling {nr_samples} samples of {sample_size} {sample_type}') sample_file = Sampling.Sampler(xi, sample_type).sample_to_file(sample_size, nr_samples) end_sampling = time.time() print(f'Finished sampling in {round(end_sampling - start_time, 2)} seconds') if save_hill_estimation: # Hill estimation print('Started writing hill estimator to file') hills_file = Hill.hills_from_sample_to_file(sample_file) end_hill = time.time() print(f'Finished writing hill estimator to file in {round(end_hill - end_sampling,2)} seconds') # Measurement execution print('Started measuring') start_measuring = time.time() Measuring.Measuring(sample_file, rejection_prob).writing_results_to_files() end_measuring = time.time() print(f'Finished measuring in {round(end_measuring - start_measuring,2)} seconds') print(f'Total run time {round(end_measuring - start_time,2)} seconds \n which is {round((end_measuring - start_time)/60)} minutes') return sample_file
Date: 16.04.2015 ''' import mushrooms_bfgs, randomForest, Sampling, PGCM, WilcoxonTest, datetime from copy import deepcopy if __name__ == '__main__': timeStart = datetime.datetime.now() (train_X, train_y, test_X, test_y) = mushrooms_bfgs.initialize(float(80)/100) N = 22 # Number of Iterations at max can be equal to Number of features - 1 NumberOfFeaturesRemoved = 0 FeaturesRemoved = [] for i in xrange(1, N + 1): # Maximum can remove all the features importanceOfRandomForest = randomForest.netFeatureImportance(train_X, train_y, 15) importanceOfSymbioticAlgorithm = Sampling.test_initialize() #~ print "Feature Importance by Random Forest ", importanceOfRandomForest #~ print "Feature Importance by Symbiotic Algorithm ", importanceOfSymbioticAlgorithm if len(FeaturesRemoved) == 0 : (train_X_Sym, test_X_Sym) = mushrooms_bfgs.featureRemoval(train_X, test_X, [importanceOfSymbioticAlgorithm[i - 1][0]]) (train_X_For, test_X_For) = mushrooms_bfgs.featureRemoval(train_X, test_X, [importanceOfRandomForest[i - 1][0]]) else: trialFeatureSym = deepcopy(FeaturesRemoved) trialFeatureFor = deepcopy(FeaturesRemoved) trialFeatureSym.append(importanceOfSymbioticAlgorithm[i - 1][0]) trialFeatureFor.append(importanceOfRandomForest[i - 1][0]) print trialFeatureFor, trialFeatureSym (train_X_Sym, test_X_Sym) = mushrooms_bfgs.featureRemoval(train_X, test_X, trialFeatureSym) (train_X_For, test_X_For) = mushrooms_bfgs.featureRemoval(train_X, test_X, trialFeatureFor) wilcoxonOut = WilcoxonTest.WilcoxonTest(train_X.ravel(), train_X_Sym.ravel() ,train_X_For.ravel()) if wilcoxonOut == 1:
def single_loc_evaluation(location, perf_plot=False, hpar_plot=False): metric_list = [] coord_list = sa.random_location_generator(location) n = len(coord_list) for i in tqdm(range(n)): try: xtrain, xval, _, ytrain, yval, _ = dp.point_model( coords=list(coord_list[i])) m = gpm.multi_gp(xtrain, xval, ytrain, yval) training_R2 = me.R2(m, xtrain, ytrain) training_RMSE = me.RMSE(m, xtrain, ytrain) val_R2 = me.R2(m, xval, yval) val_RMSE = me.RMSE(m, xval, yval) time_kernel_lengthscale = float( m.kernel.kernels[0].base_kernel.variance.value()) time_kernel_variance = float( m.kernel.kernels[0].base_kernel.lengthscales.value()) time_kernel_periodicity = float(m.kernel.kernels[0].period.value()) N34_lengthscale = np.array( m.kernel.kernels[1].lengthscales.value())[2] d2m_lengthscale = np.array( m.kernel.kernels[1].lengthscales.value())[0] tcwv_lengthscale = np.array( m.kernel.kernels[1].lengthscales.value())[1] rbf_kernel_variance = float(m.kernel.kernels[1].variance.value()) metric_list.append([ coord_list[i, 0], coord_list[i, 1], training_R2, training_RMSE, val_R2, val_RMSE, time_kernel_lengthscale, time_kernel_variance, time_kernel_periodicity, N34_lengthscale, d2m_lengthscale, tcwv_lengthscale, rbf_kernel_variance ]) except Exception: pass df = pd.DataFrame( metric_list, columns=[ "latitude", "longitude", "training_R2", "training_RMSE", "val_R2", "val_RMSE", "time_kernel_lengthscale", "time_kernel_variance", "time_kernel_periodicity", "N34_lengthscale", "d2m_lengthscale", "tcwv_lengthscale", "rbf_kernel_variance" ], ) now = datetime.datetime.now() df.to_csv("Data/single-locations-eval-" + now.strftime("%Y-%m-%d") + ".csv") print(df.mean(axis=0)) df_prep = df.set_index(["latitude", "longitude"]) da = df_prep.to_xarray() if perf_plot == True: slm_perf_plots(da) if hpar_plot == True: slm_hpar_plots(da)
def run_dimensionality_reductions(lib='dlibHOG', dataset='distances_all_px_eu', reduction='None', filtro=0.0, amostragem=None, split_synthetic=False, min_max=False, verbose=True): synthetic_X, synthetic_y = None, None X, y = load(lib, dataset, filtro, min_max, verbose) n_classes = len(unique_labels(y)) X = X.values instances, features = X.shape if verbose: log.info('Data has {0} classes, {1} instances and {2} features'.format( n_classes, instances, features)) n_features_to_keep = int(np.sqrt(features)) if reduction == 'None': log.info("Not applying any dimensionality reduction") red_dim = None elif reduction == 'PCA': red_dim = PCA(n_components=n_features_to_keep, whiten=True) elif reduction == 'mRMR': red_dim = mRMRProxy(n_features_to_select=n_features_to_keep, verbose=False) elif reduction == 'FCBF': red_dim = FCBFProxy(n_features_to_select=n_features_to_keep, verbose=False) elif reduction == 'CFS': red_dim = CFSProxy(n_features_to_select=n_features_to_keep, verbose=False) elif reduction == 'RFS': red_dim = RFSProxy(n_features_to_select=n_features_to_keep, verbose=False) elif reduction == 'ReliefF': red_dim = ReliefF(n_features_to_select=n_features_to_keep, n_neighbors=100, n_jobs=-1) elif reduction == 'RFSelect': red_dim = RFSelect() else: raise IOError( "Dimensionality Reduction not found for parameter {0}".format( reduction)) X = __dimensionality_reduction(red_dim, X, y, verbose) if amostragem is not None: if amostragem == 'Random': X, y = sampling.runRandomUnderSampling(X, y, verbose) else: X, y, synthetic_X, synthetic_y = sampling.runSmote( X, y, amostragem, split_synthetic, verbose) return X, y, synthetic_X, synthetic_y
if args.every == -1: sample_indices = [args.iters-1] else: first = args.every if args.first == -1 else args.first sample_indices = range(first, args.iters, args.every) if sample_indices[-1] != args.iters - 1: sample_indices.append(args.iters-1) if args.columns: cb = horizontal_cb elif args.dedupe: cb = dedupe_cb else: cb = Sampling.print_sample_callback kwargs = dict(start_temp=args.start_temp, final_temp=args.end_temp, sample_energy=args.energy, callback=cb) if args.sil: kwargs['init_method'] = Sampling.VisInit.silhouettes kwargs['training_examples'] = args.sil vis = Sampling.sample_model(model, args.n_samples, args.iters, sample_indices, **kwargs) if args.columns: print_columns(model.codec.maxlen) if args.energy: fe = model._free_energy(vis) sys.stderr.write('Final energy: {:.2f} (stdev={:.2f})\n'.format(fe.mean(), fe.std()))
def areal_model(location, number=None, EDA_average=False, length=3000, seed=42): """ Outputs test, validation and training data for total precipitation as a function of time, 2m dewpoint temperature, angle of sub-gridscale orography, orography, slope of sub-gridscale orography, total column water vapour, Nino 3.4 index for given number randomly sampled data points for a given basin. Inputs location: specify area to train model number, optional: specify desired ensemble run, integer EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean length, optional: specify number of points to sample, integer seed, optional: specify seed, integer Outputs x_train: training feature vector, numpy array y_train: training output vector, numpy array x_test: testing feature vector, numpy array y_test: testing output vector, numpy array """ if number != None: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.sel(number=number).drop("number") if EDA_average == True: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.mean(dim="number") else: da = dd.download_data(location, xarray=True) # apply mask mask_filepath = find_mask(location) masked_da = dd.apply_mask(da, mask_filepath) multiindex_df = masked_da.to_dataframe() df_clean = multiindex_df.dropna().reset_index() df = sa.random_location_and_time_sampler(df_clean, length=length, seed=seed) df["time"] = df["time"] - 1970 df["tp"] = log_transform(df["tp"]) df = df[[ "time", "lat", "lon", "slor", "anor", "z", "d2m", "tcwv", "N34", "tp" ]] # Remove last 10% of time for testing test_df = df[df["time"] > df["time"].max() * 0.9] xtest = test_df.drop(columns=["tp"]).values ytest = test_df["tp"].values # Training and validation data tr_df = df[df["time"] < df["time"].max() * 0.9] xtr = tr_df.drop(columns=["tp"]).values ytr = tr_df["tp"].values xtrain, xval, ytrain, yval = train_test_split( xtr, ytr, test_size=0.30, shuffle=False) # Training and validation data """ # Keep first of 70% for training train_df = df[ df['time']< df['time'].max()*0.7] xtrain = train_df.drop(columns=['tp']).values ytrain = train_df['tp'].values # Last 30% for evaluation eval_df = df[ df['time']> df['time'].max()*0.7] x_eval = eval_df.drop(columns=['tp']).values y_eval = eval_df['tp'].values # Training and validation data xval, xtest, yval, ytest = train_test_split(x_eval, y_eval, test_size=0.3333, shuffle=True) """ return xtrain, xval, xtest, ytrain, yval, ytest
def readSPI1D(lutPath, inverse=False, interpolation='linear', inversesUseIndexMaps=True, inversesUseHalfDomain=True): with open(lutPath) as f: lines = f.read().splitlines() # # Read LUT data # resolution = [0, 0] samples = [] indexMap = [] minInputValue = 0.0 maxInputValue = 1.0 for line in lines: #print( "line : %s" % line ) tokens = line.split() if tokens[0] == "Version": version = int(tokens[1]) if version != 1: break elif tokens[0] == "From": minInputValue = float(tokens[1]) maxInputValue = float(tokens[2]) elif tokens[0] == "Length": resolution[0] = int(tokens[1]) elif tokens[0] == "Components": resolution[1] = int(tokens[1]) elif tokens[0] in ["{", "}"]: continue else: samples.extend(map(float, tokens)) #else: # print( "Skipping line : %s" % tokens ) # # Create ProcessNodes # lutpns = [] # Forward transform, pretty straightforward if not inverse: # Remap input range if minInputValue != 0.0 or maxInputValue != 1.0: rangepn = clf.Range(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "range", "range") rangepn.setMinInValue(minInputValue) rangepn.setMaxInValue(maxInputValue) rangepn.setMinOutValue(0.0) rangepn.setMaxOutValue(1.0) lutpns.append(rangepn) # LUT node lutpn = clf.LUT1D(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "lut1d", "lut1d", interpolation=interpolation) lutpn.setArray(resolution[1], samples) lutpns.append(lutpn) # Inverse transform, LUT has to be resampled else: if inversesUseIndexMaps: print( "Generating inverse of 1D LUT using Index Maps") lutpnInverses = Sampling.generateLUT1DInverseIndexMap(resolution, samples, minInputValue, maxInputValue) elif inversesUseHalfDomain: print( "Generating full half-domain inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseHalfDomain(resolution, samples, minInputValue, maxInputValue, rawHalfs=True) else: print( "Generating resampled inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseResampled(resolution, samples, minInputValue, maxInputValue) lutpns.extend(lutpnInverses) return lutpns
def writeCLF1D3D1D(lutPath, samples1dIn, lutResolution1dIn, inputMin, inputMax, samples3d, lutResolution3d, samples1dOut, lutResolution1dOut, outputMin, outputMax, inversesUseIndexMaps=True, inversesUseHalfDomain=True): lutpns = [] # Create the input shaper if samples1dIn: if inversesUseIndexMaps: #print( "Generating inverse of 1D LUT using Index Maps") lutpnInverses = Sampling.generateLUT1DInverseIndexMap([lutResolution1dIn, 3], samples1dIn, inputMin, inputMax) elif inversesUseHalfDomain: #print( "Generating full half-domain inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseHalfDomain([lutResolution1dIn, 3], samples1dIn, inputMin, inputMax, rawHalfs=True) else: #print( "Generating resampled inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseResampled([lutResolution1dIn, 3], samples1dIn, inputMin, inputMax) lutpns.extend(lutpnInverses) # Create the 3D LUT clfSamples = [0.0]*(lutResolution3d[0]*lutResolution3d[1]*lutResolution3d[2])*3 index = 0 for r in range(lutResolution3d[0]): for g in range(lutResolution3d[1]): for b in range(lutResolution3d[2]): for c in range(3): clfSamples[index] = samples3d[r][g][b][c] index += 1 interpolation = 'trilinear' lut3dpn = clf.LUT3D(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "lut3d", "lut3d", interpolation=interpolation) lut3dpn.setArray([lutResolution3d[0], lutResolution3d[1], lutResolution3d[2]], clfSamples) lutpns.append(lut3dpn) # Create the output shaper if samples1dOut: interpolation = 'linear' lutpn = clf.LUT1D(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "lut1d", "lut1d", interpolation=interpolation) lutpn.setArray(3, samples1dOut) lutpns.append(lutpn) # Wrap in a ProcessList and write to disk pl = clf.ProcessList() # Populate pl.setID('Converted lut') pl.setCompCLFversion(1.0) pl.setName('Converted lut') for lutpn in lutpns: pl.addProcess(lutpn) # Write CLF to disk pl.writeFile(lutPath) return True
import mushrooms_bfgs, randomForest, Sampling, PGCM, WilcoxonTest, datetime from copy import deepcopy if __name__ == '__main__': timeStart = datetime.datetime.now() (train_X, train_y, test_X, test_y) = mushrooms_bfgs.initialize(float(80) / 100) N = 22 # Number of Iterations at max can be equal to Number of features - 1 NumberOfFeaturesRemoved = 0 FeaturesRemoved = [] for i in xrange(1, N + 1): # Maximum can remove all the features importanceOfRandomForest = randomForest.netFeatureImportance( train_X, train_y, 15) importanceOfSymbioticAlgorithm = Sampling.test_initialize() #~ print "Feature Importance by Random Forest ", importanceOfRandomForest #~ print "Feature Importance by Symbiotic Algorithm ", importanceOfSymbioticAlgorithm if len(FeaturesRemoved) == 0: (train_X_Sym, test_X_Sym) = mushrooms_bfgs.featureRemoval( train_X, test_X, [importanceOfSymbioticAlgorithm[i - 1][0]]) (train_X_For, test_X_For) = mushrooms_bfgs.featureRemoval( train_X, test_X, [importanceOfRandomForest[i - 1][0]]) else: trialFeatureSym = deepcopy(FeaturesRemoved) trialFeatureFor = deepcopy(FeaturesRemoved) trialFeatureSym.append(importanceOfSymbioticAlgorithm[i - 1][0]) trialFeatureFor.append(importanceOfRandomForest[i - 1][0]) print trialFeatureFor, trialFeatureSym (train_X_Sym, test_X_Sym) = mushrooms_bfgs.featureRemoval( train_X, test_X, trialFeatureSym)
def readSPI1D(lutPath, inverse=False, interpolation='linear', inversesUseIndexMaps=True, inversesUseHalfDomain=True): with open(lutPath) as f: lines = f.read().splitlines() # # Read LUT data # resolution = [0, 0] samples = [] indexMap = [] minInputValue = 0.0 maxInputValue = 1.0 for line in lines: #print( "line : %s" % line ) tokens = line.split() if tokens[0] == "Version": version = int(tokens[1]) if version != 1: break elif tokens[0] == "From": minInputValue = float(tokens[1]) maxInputValue = float(tokens[2]) elif tokens[0] == "Length": resolution[0] = int(tokens[1]) elif tokens[0] == "Components": resolution[1] = int(tokens[1]) elif tokens[0] in ["{", "}"]: continue else: samples.extend(map(float, tokens)) #else: # print( "Skipping line : %s" % tokens ) # # Create ProcessNodes # lutpns = [] # Forward transform, pretty straightforward if not inverse: # Remap input range if minInputValue != 0.0 or maxInputValue != 1.0: rangepn = clf.Range(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "range", "range") rangepn.setMinInValue(minInputValue) rangepn.setMaxInValue(maxInputValue) rangepn.setMinOutValue(0.0) rangepn.setMaxOutValue(1.0) lutpns.append(rangepn) # LUT node lutpn = clf.LUT1D(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "lut1d", "lut1d", interpolation=interpolation) lutpn.setArray(resolution[1], samples) lutpns.append(lutpn) # Inverse transform, LUT has to be resampled else: if inversesUseIndexMaps: print("Generating inverse of 1D LUT using Index Maps") lutpnInverses = Sampling.generateLUT1DInverseIndexMap( resolution, samples, minInputValue, maxInputValue) elif inversesUseHalfDomain: print("Generating full half-domain inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseHalfDomain( resolution, samples, minInputValue, maxInputValue, rawHalfs=True) else: print("Generating resampled inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseResampled( resolution, samples, minInputValue, maxInputValue) lutpns.extend(lutpnInverses) return lutpns
f.close() if args.every == -1: sample_indices = [args.iters - 1] else: first = args.every if args.first == -1 else args.first sample_indices = range(first, args.iters, args.every) if sample_indices[-1] != args.iters - 1: sample_indices.append(args.iters - 1) if args.columns: cb = horizontal_cb elif args.dedupe: cb = dedupe_cb else: cb = Sampling.print_sample_callback kwargs = dict(start_temp=args.start_temp, final_temp=args.end_temp, sample_energy=args.energy, callback=cb) if args.sil: kwargs["init_method"] = Sampling.VisInit.silhouettes kwargs["training_examples"] = args.sil vis = Sampling.sample_model(model, args.n_samples, args.iters, sample_indices, **kwargs) if args.columns: print_columns(model.codec.maxlen) if args.energy: fe = model._free_energy(vis) sys.stderr.write("Final energy: {:.2f} (stdev={:.2f})\n".format(fe.mean(), fe.std()))
#Redirect all the print to Logfile.txt sys.stdout = conf.Logger("Logfile.txt") #rna = FF.Parsefile(conf.rnafile)[1] #Indexe=conf.rnafile.split('.')[0] #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Loop over rna files!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! m = 2.6 / 2 b = -0.8 / 2 path_Fasta = 'fasta_files' Alignementfolder = 'Alignement' FileExtensionFasta = 'fa' print("Sampling Process for % s Structures" % (conf.numberofsruct)) OutputSamples = SP.StructSampling( [conf.PathConstrainteFile, conf.PathConstrainteFileShape], Alignementfolder, conf.numberofsruct, conf.Temperature, conf.Fastaextenstion, m, b) for filz in GetListFile(path_Fasta, FileExtensionFasta): print filz, "Treatement " startimebig = time.time() rna = FF.Parsefile( os.path.join(path_Fasta, filz + '.' + FileExtensionFasta))[1] Indexe = filz SVMlFile = "DissimilarityMatrix" + conf.numberofsruct listfiles = [filz + state for state in ["NMIA", "1M7", "MSA"]] OutputSamples = 'OutputSamples' + conf.numberofsruct MFESnbrstruct = len( listfiles) # 1 for the case where no constraint is given FF.MergeFiles(OutputSamples, os.path.join(OutputSamples, 'Samples.txt'), listfiles, 1)
def point_model(location, number=None, EDA_average=False): """ Outputs test, validation and training data for total precipitation as a function of time, 2m dewpoint temperature, angle of sub-gridscale orography, orography, slope of sub-gridscale orography, total column water vapour, Nino 3.4, Nino 4 and NAO index for a single point. Inputs number, optional: specify desired ensemble run, integer EDA_average, optional: specify if you want average of low resolution ensemble runs, boolean coords [latitude, longitude], optional: specify if you want a specific location, list of floats mask_filepath, optional: Outputs x_train: training feature vector, numpy array y_train: training output vector, numpy array x_test: testing feature vector, numpy array y_test: testing output vector, numpy array """ if number != None: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.sel(number=number).drop("number") if EDA_average == True: da_ensemble = dd.download_data(location, xarray=True, ensemble=True) da = da_ensemble.mean(dim="number") else: da = dd.download_data(location, xarray=True) if location is str: multiindex_df = da.to_dataframe() df_clean = multiindex_df.dropna().reset_index() df_location = sa.random_location_sampler(df_clean) df = df_location.drop(columns=["lat", "lon", "slor", "anor", "z"]) else: da_location = da.interp(coords={ "lat": location[0], "lon": location[1] }, method="nearest") multiindex_df = da_location.to_dataframe() df_clean = multiindex_df.dropna().reset_index() df = df_clean.drop(columns=["lat", "lon", "slor", "anor", "z"]) df["time"] = df["time"] - 1970 # to years df["tp"] = log_transform(df["tp"]) df = df[["time", "d2m", "tcwv", "N34", "tp"]] #format order # Keep first of 70% for training train_df = df[df["time"] < df["time"].max() * 0.7] xtrain = train_df.drop(columns=["tp"]).values ytrain = train_df["tp"].values # Last 30% for evaluation eval_df = df[df["time"] > df["time"].max() * 0.7] x_eval = eval_df.drop(columns=["tp"]).values y_eval = eval_df["tp"].values # Training and validation data xval, xtest, yval, ytest = train_test_split(x_eval, y_eval, test_size=0.3333, shuffle=False) return xtrain, xval, xtest, ytrain, yval, ytest
if __name__ == "__main__": image1 = cv2.imread("Lena.png") img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY) image = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) print("Shape of gray scale image before sampling", image.shape) plt.imshow(image, cmap='gray') plt.title("Original Gray Scale Image") plt.show() D1 = S.downSample(2, image) D2 = S.downSample(2, D1) # plt.imshow(D2, cmap='gray') # plt.title("D2") # plt.show() U1 = S.UpSampled(D2) plt.imshow(U1, cmap='gray') plt.title("UP sampled once by inserting empty pixel") plt.show() U1_G = G.gaussian_blur(U1, 11, 1) plt.imshow(U1_G, cmap='gray') plt.title("Guassian smoothing on one time upsampled image") plt.show() U2 = S.UpSampled(U1)
# Self-emp-not-inc the value of 1, Self-emp-inc the value of 2 and so forth. This is wrong # as there is no reason why Private should be 'closser' to any other veriable than then the # rest. This will introduce errors in any kind of regression models, as the weights of veriables # will be affected by these distances. We can use from sklearn label_binarize, that will # encode the classes as a one hot encoding, but that will make the logistic regression # very ineficient. Instead we can extract the different classes of each column as dummy # columns (veriables) to the original data frame. Also we want to remove the mean from all of # our columns and scale the values to have unit variance. We can use sklearn StandardScaler for this. ''' The part below should be substituted by Utiilities preprocess method ''' dummy_data = pd.get_dummies(data_frame).astype('float64') dummy_data['income'] = dummy_data['income_ >50K'] del dummy_data['income_ <=50K'] del dummy_data['income_ >50K'] indices = np.array(dummy_data.index.values).astype(np.int64) train = Sampling.k_folds(indices, samples=10, dir_p=dir_p, save_sample=False) # Create training/testing data set and training/testing target sets. # Here we will only validate with one fold and not the full set. # But in general when using k-folds we iterate throught all folds # and use one as validation/testing set and the rest as training. train_data = dummy_data.loc[train.iloc[:, :-1].values.flatten(), :] test_data = dummy_data.loc[train.iloc[:, -1].values.flatten(), :] train_target = train_data.income test_target = test_data.income train_data = train_data.drop(columns=['income']) test_data = test_data.drop(columns=['income']) # Remove mean and set variance of data to one using scaler. scaler = preprocessing.StandardScaler()
# Get the rna sequence RNASequence = FF.Parsefile(conf.RNA)[1].strip() # Get probing conditions for the treated RNA ProbingConditions = [RNAName + state for state in conf.Conditions] # Specify whether to generate new sample or use a previously generated one OutputSamples = os.path.join(conf.OutputFolder, "tmp", 'OutputSamples') + conf.SampleSize if str.lower( conf.Sampling) == "true" or not os.path.isdir(OutputSamples): progress.StartTask("Sampling %s structures for each condition" % (conf.SampleSize)) OutputSamples = SP.StructSampling( [conf.PathConstraintsFile, conf.PathConstraintsFileShape], ProbingConditions, int(conf.SampleSize), conf.Temperature, conf.m, conf.b, conf.RNA) progress.EndTask() else: progress.Print("Using existing sample") progress.Print("Probing conditions: %s" % (ProbingConditions)) # Create a global file that contains structures sampled from the list of Probing conditions FF.MergeFiles(OutputSamples, os.path.join(OutputSamples, 'Samples.txt'), ProbingConditions, SP.NUM_HEADER_LINES) # Create a distance matrix file progress.StartTask("Computing dissimilarity matrix") SVMlFile = "DissimilarityMatrix" + conf.SampleSize # Calculate distance and identify redundant structures within the same condition
'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss', 'hours-per-week', 'native-country', 'income' ] data_frame = Import_.import_data(name, dir_p=dir_p, headers=headers, save=False) data_frame = data_frame.replace({' Husband': 'Spouce', ' Wife': 'Spouce'}) del data_frame['education'] data_frame = data_frame.dropna(axis=0) indices = np.array(data_frame.index.values).astype(np.int64) samples = sp.k_folds(indices, samples=10, dir_p=dir_p, save_sample=False) train_data = data_frame.loc[samples.iloc[:, :-1].values.flatten(), :] test_data = data_frame.loc[samples.iloc[:, -1].values.flatten(), :] train_data = train_data.drop(columns=['income']) test_data = test_data.drop(columns=['income']) to_encode = data_frame['income'].copy() labels, fetures = encode_categorical_S(to_encode) train_target = pd.DataFrame(labels.loc[train_data.index.values], columns=['income']) test_target = pd.DataFrame(labels.loc[test_data.index.values], columns=['income']) #%% # Use FAMD (Factor Analysis for Mixed Data), to reduse the dimensions of the data set
def write(cls, processList, lutPath, lutStyle=IO_CAPABILITY_WRITE_1D, lutResolution1d=1024, lutResolution3d=33, lutResolution1d3d1d=[1024,33,2], inputMin=0.0, inputMax=1.0, shaperIn=['linear',0.0,1.0], shaperOut=['linear',0.0,1.0]): # Expand 3D LUT resolutions expandedlutResolution3d = [lutResolution3d]*3 expandedlutResolution1d3d1d = list(lutResolution1d3d1d) expandedlutResolution1d3d1d[1] = [lutResolution1d3d1d[1]]*3 if lutStyle == IO_CAPABILITY_WRITE_1D: #print( "Sample the CLF ProcessList into a 1D LUT" ) samples = Sampling.sample1D(processList, lutResolution1d, inputMin, inputMax) return cls.write1D(lutPath, samples, lutResolution1d, inputMin, inputMax) elif lutStyle == IO_CAPABILITY_WRITE_3D: #print( "Sample the CLF ProcessList into a 3D LUT" ) samples = Sampling.sample3D(processList, expandedlutResolution3d, inputMin, inputMax) return cls.write3D(lutPath, samples, expandedlutResolution3d, inputMin, inputMax) elif lutStyle == IO_CAPABILITY_WRITE_1D3D1D: #print( "Sample the CLF ProcessList into a 1D 3D 1D LUT" ) (samples1dIn, inputMin, inputMax, samples3d, samples1dOut, outputMin, outputMax) = Sampling.sample1D3D1D(processList, expandedlutResolution1d3d1d, shaperIn, shaperOut) # Unpack a bunch of values before calling the write method (lutResolution1dIn, lutResolution3d, lutResolution1dOut) = expandedlutResolution1d3d1d (shaperInType, shaperInMin, shaperInMax) = shaperIn (shaperOutType, shaperOutMin, shaperOutMax) = shaperOut return cls.write1D3D1D(lutPath, samples1dIn, lutResolution1dIn, inputMin, inputMax, samples3d, lutResolution3d, samples1dOut, lutResolution1dOut, outputMin, outputMax) else: return False
k = int(1) # Sample index i = int(1) # Experiment section counter # Initialize and calibrate board MagnetoShield.begin() # Lock I2C bus MagnetoShield.calibration() # Calibrate device fallbackSettings( ) # These are only active when CPU speed is 48 MHz. Comment if you want to use settings as above # Set the PID settings PIDAbs.Settings.setKp(KP) # Proportional PIDAbs.Settings.setTi(TI) # Integral PIDAbs.Settings.setTd(TD) # Derivative PIDAbs.Settings.setTs(Ts) # Sampling (use Ts in microseconds) Sampling.begin( Ts) # Initialize sampling subsystem (based on time.monotonic_ns()) # Algorithm step - every step that is necessary for control def step(): global r, R, i, k # Access these global variables if (k > (len(R) * T) - 1): # if the experiment is overs Sampling.Settings.realTimeViolation = False # Not a real-time violation MagnetoShield.actuatorWrite(0.0) # then turn off magnet if PLOTTING_POST: # In case plotting in post is enabled for j in enumerate( Ylog): # for every element in the log vector of outputs print(( Ulog[j[0]], Ylog[j[0]], Ilog[j[0]],
def writeCLF1D3D1D(lutPath, samples1dIn, lutResolution1dIn, inputMin, inputMax, samples3d, lutResolution3d, samples1dOut, lutResolution1dOut, outputMin, outputMax, inversesUseIndexMaps=True, inversesUseHalfDomain=True): lutpns = [] # Create the input shaper if samples1dIn: if inversesUseIndexMaps: #print( "Generating inverse of 1D LUT using Index Maps") lutpnInverses = Sampling.generateLUT1DInverseIndexMap( [lutResolution1dIn, 3], samples1dIn, inputMin, inputMax) elif inversesUseHalfDomain: #print( "Generating full half-domain inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseHalfDomain( [lutResolution1dIn, 3], samples1dIn, inputMin, inputMax, rawHalfs=True) else: #print( "Generating resampled inverse of 1D LUT") lutpnInverses = Sampling.generateLUT1DInverseResampled( [lutResolution1dIn, 3], samples1dIn, inputMin, inputMax) lutpns.extend(lutpnInverses) # Create the 3D LUT clfSamples = [0.0] * (lutResolution3d[0] * lutResolution3d[1] * lutResolution3d[2]) * 3 index = 0 for r in range(lutResolution3d[0]): for g in range(lutResolution3d[1]): for b in range(lutResolution3d[2]): for c in range(3): clfSamples[index] = samples3d[r][g][b][c] index += 1 interpolation = 'trilinear' lut3dpn = clf.LUT3D(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "lut3d", "lut3d", interpolation=interpolation) lut3dpn.setArray( [lutResolution3d[0], lutResolution3d[1], lutResolution3d[2]], clfSamples) lutpns.append(lut3dpn) # Create the output shaper if samples1dOut: interpolation = 'linear' lutpn = clf.LUT1D(clf.bitDepths["FLOAT16"], clf.bitDepths["FLOAT16"], "lut1d", "lut1d", interpolation=interpolation) lutpn.setArray(3, samples1dOut) lutpns.append(lutpn) # Wrap in a ProcessList and write to disk pl = clf.ProcessList() # Populate pl.setID('Converted lut') pl.setCompCLFversion(1.0) pl.setName('Converted lut') for lutpn in lutpns: pl.addProcess(lutpn) # Write CLF to disk pl.writeFile(lutPath) return True