def utf_uc3(this, utc, it): retn = "" this = this.encode() for i in range(len(this)): if i % 2 != 0: retn += scale(10, 15, this[i] + utc - i) + "," else: retn += scale(10, 12, this[i] - it + i) + "," return retn[0:-1]
def get_next_input(): ''' It reads an entire event file and its reference. When the file reaches the end it picks another one at random and it does the same reading process. The function returns the next event input of the current file, in a sliding window. ''' if (basecallTraining.current_file == "" or (basecallTraining.current_file_length - basecallTraining.current_index_input) <= utils.batch_size): basecallTraining.resetValues() train_files = basecallTraining.getFiles(basecallTraining.train_dir) num = random.randrange(0,len(train_files)) target_file = train_files[num] with open(basecallTraining.train_dir+target_file) as f: lines = f.readlines() basecallTraining.current_lines = lines basecallTraining.current_file = target_file basecallTraining.current_file_length = len(lines) fast5file = basecallTraining.current_lines[0].split()[4] basecallTraining.current_scale, basecallTraining.current_scale_sd, basecallTraining.current_shift = scale.get_scale_and_shift(fast5file, 1, "template") input = [] for x in range(1,len(basecallTraining.current_lines)): input.append(basecallTraining.current_lines[x].split()) mean = input[len(input)-1][0] stdv = input[len(input)-1][1] mean, stdv = scale.scale(mean,stdv,basecallTraining.current_scale,basecallTraining.current_scale_sd,basecallTraining.current_shift) input[len(input) - 1][0] = mean input[len(input) - 1][1] = stdv input.pop(0) basecallTraining.current_input = input basecallTraining.current_bases_per_event_ratio = float(float(len(basecallTraining.current_lines[0].split()[3]))/float(len(input))) ret = np.asarray(basecallTraining.current_input)[basecallTraining.current_index_input:(basecallTraining.current_index_input + utils.batch_size)] basecallTraining.current_index_input += 2 return ret
def weight_matrix(training_filename, get_scale=False, delim=','): """ Returns the weight matrix built from the data in the file at the filename given as the first argument, scaled according to the values in the file. """ training_scale = scale(training_filename, delim) with open(training_filename, 'r') as training_file: for line in training_file: x_i, y_i = parse_vectors(line, training_scale, delim) try: sum_xi += x_i * x_i.T sum_yi += x_i * y_i.T except NameError: sum_xi = x_i * x_i.T sum_yi = x_i * y_i.T except ValueError: # row has differing number of attributes if sum_xi.shape[0] < x_i.shape[0]: # more attributes, use this as the standard sum_xi = x_i * x_i.T sum_yi = x_i * y_i.T else: # less attributes, ignore it pass try: W = sum_xi.I * sum_yi # will raise exception if no inverse except linalg.LinAlgError: W = (sum_xi + 0.00001*identity(sum_xi.shape[0])).I * sum_yi if get_scale: return W, training_scale return W
def test_scale(filename, result_dir=None): input_img = Image.open(filename) cases = [] cases.append([(192, 128), (96, 64), (48, 32), (24, 16), (12, 8)]) cases.append([(300, 200), ]) cases.append([(450, 300), ]) cases.append([(500, 200), ]) count = 1 for case in cases: for size in case: print "Scaling Case %d" % (count, ) result = scale(input_img, size) result_size = result.size comparison = "expected size %s, actual size %s" % ( str(size), str(result_size)) count += 1 expect( result_size == size, "[PASS] Scaling: " + comparison, "[FAIL] Scaling: " + comparison) if result_dir: result_name = 'scale-%d-%d.png' % size result_path = os.path.join(result_dir, result_name) result.save(result_path) print case_message('[Saved] ' + result_path)
def encrypt(string, _set, extra): if _set.encode == "UTF": uc3 = utf_uc3 else: uc3 = uni_uc3 _time = str(int(time() * 1000)) part1 = int(_time[10:13]) part2 = string part3 = round(random() * 100) part4 = int(_time[0:5]) part5 = round(random() * 10) if part5 == 0: part5 = 10 part6 = int(_time[5:10]) ectpart1 = scale(10, 15, str(part1)[::-1]) ectpart2 = uc3(part2, part1, part4) ectpart3 = scale(10, 3, part3) ectpart4 = scale(10, 36, part4 + 15 - int(ectpart3))[::-1] ectpart5 = scale(10, 9, part5) ectpart6 = scale(10, 35, part6 - 15 * part5)[::-1] retn = "~" + ectpart1 + "!" + ectpart2 + "!" retn += ",".join((ectpart3, ectpart4, ectpart5, ectpart6)) return retn
def build (self, X, targetA, autoscale=False): nobj, nvar= np.shape(X) self.nobj = nobj self.nvar = nvar self.X = X X, mu = center(X) X, wg = scale (X, autoscale) self.mu = mu self.wg = wg self.autoscale = autoscale SSXac=0.0 for a in range(targetA): # extracts LV t, p = self.extractPC(X) self.t.append(t) self.p.append(p) # deflates X X, SSX, SSXex = self.deflatePC(X,t,p) SSXac += SSXex self.SSXex.append(SSXex) self.SSXac.append(SSXac) if a==0: self.SSX = SSX self.A = targetA
def validateLOO(self, A, gui=False): """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally """ if self.X == None or self.Y == None: return X = self.X Y = self.Y nobj, nvarx = np.shape(X) SSY0 = 0.0 for i in range(nobj): SSY0 += np.square(Y[i] - np.mean(Y)) SSY = np.zeros(A, dtype=np.float64) YP = np.zeros((nobj, A + 1), dtype=np.float64) if gui: updateProgress(0.0) for i in range(nobj): # build reduced X and Y matrices removing i object Xr = np.delete(X, i, axis=0) Yr = np.delete(Y, i) Xr, muxr = center(Xr) Xr, wgxr = scale(Xr, self.autoscale) Yr, muyr = center(Yr) xp = np.copy(X[i, :]) xp -= muxr xp *= wgxr # predicts y for the i object, using A LV yp = self.getLOO(Xr, Yr, xp, A) yp += muyr # updates SSY with the object i errors YP[i, 0] = Y[i] for a in range(A): SSY[a] += np.square(yp[a] - Y[i]) YP[i, a + 1] = yp[a] if gui: updateProgress(float(i) / float(nobj)) if gui: print self.SSY = SSY self.SDEP = [np.sqrt(i / nobj) for i in SSY] self.Q2 = [1.00 - (i / SSY0) for i in SSY] self.Av = A return (YP)
from scale import scale from users.models import User from django.contrib.sessions.models import Session from django.utils import timezone s = scale() # example data - no need for separate file - will eventually just query docker to see what values are available. challengeID = 3 registeredUsers = 10 activeSessions = 10 activeContainers = 10 minimumContainers = 2 try: buf = s.calculateBuffer(registeredUsers, activeSessions, minimumContainers, activeContainers, challengeID) except Exception as ex: print(ex) if buf == activeContainers: print("buffer ({0}) is exactly the # of active contianers({1})".format( buf, activeContainers)) elif buf < activeContainers: print( "calculated buffer ({0}) is less than the # of active contianers({1}) \n\r{2} containers required (-1 to account for rounding calculation)" .format(buf, activeContainers, buf - activeContainers + 1) ) # +1 to account for rounding up on buffer calculation but still erring on the side of caution for removing containers
def build(self, X, Y, targetA=0, targetSSX=0.0, autoscale=False): """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm The dimensionality of the model can be defined either providing 1. directly the number of LV to extract (targetA) 2. the fraction of SSX that the model will explain (targetSSX) The X and Y matrices are centered but no other scaling transform is applied Does not return anything, but updates internals vectors and variables """ nobj, nvarx = np.shape(X) ## for i in range (nobj): ## for j in range (nvarx): ## print X[i,j], ## print self.nobj = nobj self.nvarx = nvarx self.X = X.copy() self.Y = Y.copy() self.X, self.mux = center(self.X) self.Y, self.muy = center(self.Y) self.X, self.wgx = scale(self.X, autoscale) ## self.mux = mux ## self.muy = muy ## self.wgx = wgx self.autoscale = autoscale SSXac = 0.0 SSYac = 0.0 SSX0, SSY0, null = self.computeSS(self.X, self.Y) SSXold = SSX0 SSYold = SSY0 a = 0 while True: t, p, w, c = self.extractLV(self.X, self.Y) self.t.append(t) self.p.append(p) self.w.append(w) self.c.append(c) self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c) SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y) SSXex = (SSXold - SSXnew) / SSX0 SSXac += SSXex SSYex = (SSYold - SSYnew) / SSY0 SSYac += SSYex SDEC = np.sqrt(SSYnew / nobj) dof = nvarx - a if dof <= 0: dof = 1 dmodx = [np.sqrt(d / dof) for d in dmodx] SSXold = SSXnew SSYold = SSYnew self.SSXex.append(SSXex) self.SSXac.append(SSXac) self.SSYex.append(SSYex) self.SSYac.append(SSYac) self.SDEC.append(SDEC) self.dmodx.append(dmodx) a += 1 if targetA > 0: if a == targetA: break if targetSSX > 0.0: if SSXac > targetSSX: break # prevents to extract a meaningless number of LV if a > min(20, nobj / 5): break self.Am = a # NIPALS is destructive, so we must retrieve X and Y from original data for validation self.X = X.copy() self.Y = Y.copy() self.cutoff = np.zeros(self.Am, dtype=np.float64) self.TP = np.zeros(self.Am) self.TN = np.zeros(self.Am) self.FP = np.zeros(self.Am) self.FN = np.zeros(self.Am) self.TPpred = np.zeros(self.Am) self.TNpred = np.zeros(self.Am) self.FPpred = np.zeros(self.Am) self.FNpred = np.zeros(self.Am)
type_assign = dict((k, []) for k in TET) for k, m in zip(TET, ea): type_assign[k] = m for e in TG.edges(data=True): ty = e[2]['type'] for k in type_assign: if ty == k or (ty[1], ty[0]) == k: e[2]['cifname'] = type_assign[k] ea_dict = assign_node_vecs2edges(TG, unit_cell, SYMMETRY_TOL) all_SBU_coords = SBU_coords(TG, ea_dict, CONNECTION_SITE_BOND_LENGTH) sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma, sc_covar, Bstar_inv, max_length, callbackresults, ncra, ncca = scale( all_SBU_coords, a, b, c, ang_alpha, ang_beta, ang_gamma, max_le, num_vertices, Bstar, alpha, num_edges, FIX_UC, SCALING_ITERATIONS, PRE_SCALE, SCALING_CONVERGENCE_TOLERANCE, SCALING_STEP_SIZE) print '*******************************************' print 'The scaled unit cell parameters are : ' print '*******************************************' print 'a :', np.round(sc_a, 5) print 'b :', np.round(sc_b, 5) print 'c :', np.round(sc_c, 5) print 'alpha:', np.round(sc_alpha, 5) print 'beta :', np.round(sc_beta, 5) print 'gamma:', np.round(sc_gamma, 5) print '' for sc, name in zip((sc_a, sc_b, sc_c), ('a', 'b', 'c')):
except AssertionError: ordinals = ["st", "nd", "rd"] + ["th"] * 7 formatted_num = f"{len(inputs) + 1}{ordinals[len(inputs) % 10]}" total_score = int(total_score) if round( total_score) == total_score else total_score print( f"Error: the {formatted_num} input is not in range (0 - {total_score})." ) sys.exit(1) display.display_info([x * total_score for x in inputs], header="input data statistics", total_score=total_score) print() scale_mean = display.input_float( prompt="What is the target average (mean) percentage (0 - 100)> ", qualifier=lambda x: 0 <= x <= 100, qualifier_err="Error: please input a number between 0 and 100.") / 100 outputs = scale.scale(inputs, scale_mean) print() print("Below are the scaled scores (in the order they were entered):") print("\n".join("{:.2f}".format(total_score * n) for n in outputs)) print() display.display_info([x * total_score for x in outputs], header="output data statistics", total_score=total_score)
def build (self, X, Y, targetA=0, targetSSX=0.0, autoscale=False): """Build a new PLS model with the X and Y numpy matrice provided using NIPALS algorithm The dimensionality of the model can be defined either providing 1. directly the number of LV to extract (targetA) 2. the fraction of SSX that the model will explain (targetSSX) The X and Y matrices are centered but no other scaling transform is applied Does not return anything, but updates internals vectors and variables """ nobj, nvarx= np.shape(X) ## for i in range (nobj): ## for j in range (nvarx): ## print X[i,j], ## print self.nobj = nobj self.nvarx = nvarx self.X = X.copy() self.Y = Y.copy() self.X, self.mux = center(self.X) self.Y, self.muy = center(self.Y) self.X, self.wgx = scale(self.X, autoscale) ## self.mux = mux ## self.muy = muy ## self.wgx = wgx self.autoscale = autoscale SSXac=0.0 SSYac=0.0 SSX0,SSY0, null = self.computeSS(self.X,self.Y) SSXold=SSX0 SSYold=SSY0 a=0 while True: t, p, w, c = self.extractLV(self.X, self.Y) self.t.append(t) self.p.append(p) self.w.append(w) self.c.append(c) self.X, self.Y = self.deflateLV(self.X, self.Y, t, p, c) SSXnew, SSYnew, dmodx = self.computeSS(self.X, self.Y) SSXex = (SSXold-SSXnew)/SSX0 SSXac+=SSXex SSYex = (SSYold-SSYnew)/SSY0 SSYac+=SSYex SDEC = np.sqrt(SSYnew/nobj) dof = nvarx-a if dof <= 0 : dof = 1 dmodx = [np.sqrt(d/dof) for d in dmodx] SSXold=SSXnew SSYold=SSYnew self.SSXex.append(SSXex) self.SSXac.append(SSXac) self.SSYex.append(SSYex) self.SSYac.append(SSYac) self.SDEC.append(SDEC) self.dmodx.append(dmodx) a+=1 if targetA>0: if a==targetA : break if targetSSX>0.0: if SSXac>targetSSX: break # prevents to extract a meaningless number of LV if a > min (20,nobj/5) : break self.Am=a # NIPALS is destructive, so we must retrieve X and Y from original data for validation self.X = X.copy() self.Y = Y.copy() self.cutoff = np.zeros(self.Am, dtype=np.float64) self.TP = np.zeros(self.Am) self.TN = np.zeros(self.Am) self.FP = np.zeros(self.Am) self.FN = np.zeros(self.Am) self.TPpred = np.zeros(self.Am) self.TNpred = np.zeros(self.Am) self.FPpred = np.zeros(self.Am) self.FNpred = np.zeros(self.Am)
def __init__(self, tgt_name, test_size=0.15): self.test_size = test_size self.over_sampler = 'None' self.resampled = False self.tgt_name = tgt_name self.pat_frame = gbl.pat_frame.copy() self.con_frame = gbl.con_frame.copy() self.pat_frame_stats = gbl.pat_frame_stats.copy() self.n_rand_feat = 0 #copy(gbl.n_rand_feat) self.num_pats = self.pat_frame.shape[0] self.pat_names = self.pat_frame.index.tolist() self.num_cons = self.con_frame.shape[0] # read in target vector according to tgt variable self.y_tgt = pd.DataFrame({tgt_name: self.pat_frame_stats.loc[:, tgt_name]}) if 'class' in tgt_name: self.tgt_task = gbl.clf self.y_strat = self.y_tgt.copy() else: self.tgt_task = gbl.reg y_strat_name = 'YBOCS_class3' self.y_strat = pd.DataFrame({y_strat_name: self.pat_frame_stats.loc[:, y_strat_name]}) # extract train and test set names # pat_names_train, pat_names_test = train_test_split(pat_names, # test_size=self.test_size, # stratify=y_clf) # #random_state=random.randint(1, 101)) self.pat_names_bins = {} self.pat_names_test_bins = {} self.pat_names_train_bins = {} self.bin_keys = np.unique(self.y_strat.iloc[:, 0]) self.num_bins = len(self.bin_keys) self.multiclass = False if self.tgt_task is 'clf' and self.num_bins > 2: self.multiclass = True self.pat_names_test, self.pat_names_train = self._split_test_train_names(self.y_strat, self.test_size, self.num_bins) # check if test and train names are mutually exclusive and add up to total observations result = any(elem in self.pat_names_train for elem in self.pat_names_test) print('%s: test/train %d/%d from %d' % (self.tgt_name, len(self.pat_names_test), len(self.pat_names_train), self.y_strat.shape[0])) if not set(self.pat_names) == set(self.pat_names_test + self.pat_names_train) or result: print('%s: error separating train test' % (tgt_name)) exit() self.cv_folds = 10 # assign train and test self.pat_frame_train = self.pat_frame.loc[self.pat_names_train, :] self.pat_frame_train_y = self.y_tgt.loc[self.pat_names_train, :] self.pat_frame_test = self.pat_frame.loc[self.pat_names_test, :] self.pat_frame_test_y = self.y_tgt.loc[self.pat_names_test, :] # save base copies from train for different oversampling self._pat_frame_train_base = self.pat_frame_train.copy(deep=True) self._pat_frame_train_y_base = self.pat_frame_train_y.copy(deep=True) # scale data self.pat_frame_train_norm, self.pat_train_scaler = scale(self.pat_frame_train) self.pat_frame_test_norm = test_set_scale(self.pat_frame_test, self.pat_train_scaler) # if self.tgt_task == 'reg' leave it else check if imbalanced classes self.imbalanced_classes = False if self.tgt_task == 'clf': # check if any classes have more than 1 std away from mean number of observations per class if any(abs(self.y_tgt.iloc[:, 0].value_counts() - np.mean(self.y_tgt.iloc[:, 0].value_counts())) > np.std(self.y_tgt.iloc[:, 0].value_counts())): self.imbalanced_classes = True # con self.con_frame_norms, self.con_scalers = scale(self.con_frame)
print("Created {0}".format(args.outdir)) # Read data from csv and filter only the required columns and remove empty or NaN fields filter_names = ['Distance', 'ArrDelay', 'CRSDepTime', 'DayOfWeek', 'DepTime'] data = pd.read_csv(args.filename, delimiter=',', usecols=filter_names).dropna(axis=0) covariate = pd.DataFrame({'Late': data.ArrDelay.apply(cov.calculate_late), 'Night': data.CRSDepTime.apply(cov.calculate_night), 'Weekend': data.DayOfWeek.apply(cov.calculate_weekend), 'DepHour': data.DepTime.apply(cov.calculate_dephour), 'Distance': data.Distance.apply(cov.calculate_distance) }).values Y = covariate[:,0] X = covariate[:,1:covariate.shape[1]] X_tr, X_ts, Y_tr, Y_ts = train_test_split(X, Y, test_size=(1-args.ratio), random_state=4) # Scale datasets scaling = scale.scale(args.scaler, X_tr) Xtr_scale = scaling.transform(X_tr) Xts_scale = scaling.transform(X_ts) # Add bias Xtr_scale = np.insert(Xtr_scale, 0, 1.0, axis=1) Xts_scale = np.insert(Xts_scale, 0, 1.0, axis=1) Y_tr = Y_tr.reshape((Y_tr.shape[0], 1)) Y_ts =Y_ts.reshape((Y_ts.shape[0], 1)) util.write_csv(args.outdir + '/X_train.csv', 'x', Xtr_scale, args.precision) util.write_csv(args.outdir + '/Y_train.csv', 'y', Y_tr, args.precision) util.write_csv(args.outdir + '/X_test.csv', 'x', Xts_scale, args.precision) util.write_csv(args.outdir + '/Y_test.csv', 'y', Y_ts, args.precision)
import pyaudio import numpy as np import matplotlib.pyplot as plt import matplotlib.animation as animation from scale import scale FORMAT = pyaudio.paInt16 CHANNELS = 1 CHUNK = 2000 RATE = 44100 a = scale() fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.set_xscale('log') ax.set_xlim((a.freqs[0], a.freqs[-1])) ax.set_ylim((0, 1500)) ax.set_xticks(a.freqs[2:-2]) ax.set_xticklabels(a.notes[2:-2]) line, = ax.plot([], [], c='k', lw=1) def init(): line.set_data([], []) return line, def animate(i): data = np.fromstring(stream.read(CHUNK), dtype=np.int16)
def run_template(template): print() print( '=========================================================================================================' ) print('template :', template) print( '=========================================================================================================' ) print() cat_count = 0 for net in ct2g(template): cat_count += 1 TG, start, unit_cell, TVT, TET, TNAME, a, b, c, ang_alpha, ang_beta, ang_gamma, max_le, catenation = net node_cns = [(cncalc(node, 'nodes', ONE_ATOM_NODE_CN), node) for node in os.listdir('nodes')] print('Number of vertices = ', len(TG.nodes())) print('Number of edges = ', len(TG.edges())) print() if PRINT: print('There are', len(TG.nodes()), 'vertices in the voltage graph:') print() v = 0 for node in TG.nodes(): v += 1 print(v, ':', node) node_dict = TG.node[node] print('type : ', node_dict['type']) print('cartesian coords : ', node_dict['ccoords']) print('fractional coords : ', node_dict['fcoords']) print('degree : ', node_dict['cn'][0]) print() print('There are', len(TG.edges()), 'edges in the voltage graph:') print() for edge in TG.edges(data=True, keys=True): edge_dict = edge[3] ind = edge[2] print(ind, ':', edge[0], edge[1]) print('length : ', edge_dict['length']) print('type : ', edge_dict['type']) print('label : ', edge_dict['label']) print('positive direction :', edge_dict['pd']) print('cartesian coords : ', edge_dict['ccoords']) print('fractional coords : ', edge_dict['fcoords']) print() vas = vertex_assign(TG, TVT, node_cns, unit_cell, ONE_ATOM_NODE_CN, USER_SPECIFIED_NODE_ASSIGNMENT, SYMMETRY_TOL, ALL_NODE_COMBINATIONS) CB, CO = cycle_cocyle(TG) for va in vas: if len(va) == 0: print( 'At least one vertex does not have a building block with the correct number of connection sites.' ) print('Moving to the next template...') print() continue if len(CB) != (len(TG.edges()) - len(TG.nodes()) + 1): print('The cycle basis is incorrect.') print( 'The number of cycles in the cycle basis does not equal the rank of the cycle space.' ) print('Moving to the next tempate...') continue num_edges = len(TG.edges()) Bstar, alpha = Bstar_alpha(CB, CO, TG, num_edges) if PRINT: print( 'B* (top) and alpha (bottom) for the barycentric embedding are:' ) print() for i in Bstar: print(i) print() for i in alpha: print(i) print() num_vertices = len(TG.nodes()) if COMBINATORIAL_EDGE_ASSIGNMENT: eas = list( itertools.product([e for e in os.listdir('edges')], repeat=len(TET))) else: edge_files = sorted([e for e in os.listdir('edges')]) eas = [] i = 0 while len(eas) < len(TET): eas.append(edge_files[i]) i += 1 if i == len(edge_files): i = 0 eas = [eas] g = 0 for va in vas: node_elems = [bbelems(i[1], 'nodes') for i in va] metals = [[i for i in j if i in metal_elements] for j in node_elems] metals = list(set([i for j in metals for i in j])) v_set = [('v' + str(vname_dict[re.sub('[0-9]', '', i[0])]), i[1]) for i in va] v_set = sorted(list(set(v_set)), key=lambda x: x[0]) v_set = [v[0] + '-' + v[1] for v in v_set] print( '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' ) print('vertex assignment : ', v_set) print( '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' ) print() if SINGLE_METAL_MOFS_ONLY and len(metals) != 1: print( v_set, 'contains no metals or multiple metal elements, no cif will be written' ) print() continue for v in va: for n in TG.nodes(data=True): if v[0] == n[0]: n[1]['cifname'] = v[1] for ea in eas: g += 1 print( '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' ) print('edge assignment : ', ea) print( '++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++' ) print() type_assign = dict((k, []) for k in TET) for k, m in zip(TET, ea): type_assign[k] = m for e in TG.edges(data=True): ty = e[2]['type'] for k in type_assign: if ty == k or (ty[1], ty[0]) == k: e[2]['cifname'] = type_assign[k] ea_dict = assign_node_vecs2edges(TG, unit_cell, SYMMETRY_TOL) all_SBU_coords = SBU_coords(TG, ea_dict, CONNECTION_SITE_BOND_LENGTH) sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma, sc_covar, Bstar_inv, max_length, callbackresults, ncra, ncca, scaling_data = scale( all_SBU_coords, a, b, c, ang_alpha, ang_beta, ang_gamma, max_le, num_vertices, Bstar, alpha, num_edges, FIX_UC, SCALING_ITERATIONS, PRE_SCALE, SCALING_CONVERGENCE_TOLERANCE, SCALING_STEP_SIZE) print('*******************************************') print('The scaled unit cell parameters are : ') print('*******************************************') print('a :', np.round(sc_a, 5)) print('b :', np.round(sc_b, 5)) print('c :', np.round(sc_c, 5)) print('alpha:', np.round(sc_alpha, 5)) print('beta :', np.round(sc_beta, 5)) print('gamma:', np.round(sc_gamma, 5)) print() for sc, name in zip((sc_a, sc_b, sc_c), ('a', 'b', 'c')): cflag = False if sc < 1.0: print('unit cell parameter', name, 'has collapsed during scaling!') print( 'try re-running with', name, 'fixed, with a larger value for PRE_SCALE, or with a higher SCALING_CONVERGENCE_TOLERANCE' ) print('no cif will be written') cflag = True if cflag: continue scaled_params = [sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma] sc_Alpha = np.r_[alpha[0:num_edges - num_vertices + 1, :], sc_covar] sc_omega_plus = np.dot(Bstar_inv, sc_Alpha) ax = sc_a ay = 0.0 az = 0.0 bx = sc_b * np.cos(sc_gamma * pi / 180.0) by = sc_b * np.sin(sc_gamma * pi / 180.0) bz = 0.0 cx = sc_c * np.cos(sc_beta * pi / 180.0) cy = (sc_c * sc_b * np.cos(sc_alpha * pi / 180.0) - bx * cx) / by cz = (sc_c**2.0 - cx**2.0 - cy**2.0)**0.5 sc_unit_cell = np.asarray([[ax, ay, az], [bx, by, bz], [cx, cy, cz]]).T scaled_coords = omega2coords( start, TG, sc_omega_plus, (sc_a, sc_b, sc_c, sc_alpha, sc_beta, sc_gamma), num_vertices, template, g, WRITE_CHECK_FILES) nvecs, evecs = scaled_node_and_edge_vectors( scaled_coords, sc_omega_plus, sc_unit_cell, ea_dict) placed_nodes, node_bonds = place_nodes( nvecs, CHARGES, ORIENTATION_DEPENDENT_NODES) placed_edges, edge_bonds = place_edges(evecs, CHARGES, len(placed_nodes)) if RECORD_CALLBACK: vnames = '_'.join([v.split('.')[0] for v in v_set]) if len(ea) <= 5: enames = '_'.join([e[0:-4] for e in ea]) else: enames = str(len(ea)) + '_edges' prefix = template[0:-4] + '_' + vnames + '_' + enames frames = scaling_callback_animation( callbackresults, alpha, Bstar_inv, ncra, ncca, num_vertices, num_edges, TG, template, g, False) write_scaling_callback_animation(frames, prefix) animate_objective_minimization(callbackresults, prefix) if PLACE_EDGES_BETWEEN_CONNECTION_POINTS: placed_edges = adjust_edges(placed_edges, placed_nodes, sc_unit_cell) placed_all = placed_nodes + placed_edges bonds_all = node_bonds + edge_bonds if WRITE_CHECK_FILES: write_check_cif(template, placed_nodes, placed_edges, g, scaled_params, sc_unit_cell) if SINGLE_ATOM_NODE or NODE_TO_NODE: placed_all, bonds_all = remove_Fr(placed_all, bonds_all) print('computing X-X bonds...') print() print('*******************************************') print('Bond formation : ') print('*******************************************') fixed_bonds, nbcount, bond_check = bond_connected_components( placed_all, bonds_all, sc_unit_cell, max_length, BOND_TOL, TRACE_BOND_MAKING, NODE_TO_NODE, EXPANSIVE_BOND_SEARCH, ONE_ATOM_NODE_CN) print('there were ', nbcount, ' X-X bonds formed') if bond_check: print('bond check passed') bond_check_code = '' else: print( 'bond check failed, attempting distance search bonding...' ) fixed_bonds, nbcount = distance_search_bond( placed_all, bonds_all, sc_unit_cell, 2.5, TRACE_BOND_MAKING) bond_check_code = '_BOND_CHECK' print('there were', nbcount, 'X-X bonds formed') print() if CHARGES: fc_placed_all, netcharge, onetcharge, rcb = fix_charges( placed_all) else: fc_placed_all = placed_all fixed_bonds = fix_bond_sym(fixed_bonds, placed_all, sc_unit_cell) if CHARGES: print('*******************************************') print('Charge information : ') print('*******************************************') print('old net charge :', np.round(onetcharge, 5)) print('rescaling magnitude :', np.round(rcb, 5)) remove_net = choice(range(len(fc_placed_all))) fc_placed_all[remove_net][4] -= np.round(netcharge, 4) print('new net charge (after rescaling):', np.sum([li[4] for li in fc_placed_all])) print() vnames = '_'.join([v.split('.')[0] for v in v_set]) if len(ea) <= 5: enames = [] for e in [e[0:-4] for e in ea]: if e not in enames: enames.append(e) enames = '_'.join(enames) else: enames = str(len(ea)) + '_edges' if catenation: cifname = template[ 0: -4] + '_' + vnames + '_' + enames + bond_check_code + '_' + 'CAT' + str( cat_count) + '.cif' else: cifname = template[ 0: -4] + '_' + vnames + '_' + enames + bond_check_code + '.cif' if WRITE_CIF: print('writing cif...') print() if len(cifname) > 255: cifname = cifname[0:241] + '_truncated.cif' write_cif(fc_placed_all, fixed_bonds, scaled_params, sc_unit_cell, cifname, CHARGES) if catenation and MERGE_CATENATED_NETS: print('merging catenated cifs...') cat_cifs = glob.glob('output_cifs/*_CAT*.cif') for comb in itertools.combinations(cat_cifs, cat_count): builds = [name[0:-9] for name in comb] print(set(builds)) if len(set(builds)) == 1: pass else: continue merge_catenated_cifs(comb, CHARGES) for cif in cat_cifs: os.remove(cif)
def uni_uc3(this, utc, it): retn = "" for i in range(len(this)): if i % 2 != 0: retn += scale(10, 15, ord(this[i]) + utc - i) + "," else: retn += scale(10, 12, ord(this[i]) - it + i) + "," return retn[0:-1]
'does not contain many links. Try to add some.', 'lacks images. Visual representation can be better than a lot of text.', 'has short or no usage/examples section. This section is really important.', 'has short or no documentation section. Documentation is almost more important than code itself.', 'does not tell a user how to install your package. It\'s quite an obstacle, isn\'t it?', 'lacks some support links such as additional resources or community links. Some guiding can help a user to use your package.', 'lacks informative badges (you know, for example from build status services). They can tell something about your package and users appreciate that.', 'xxx', 'does not contain all additional important sections. They are "license", "authors" and "troubleshooting".' ]) SCRIPT_PATH = os.path.dirname(os.path.abspath(__file__)) all_features = np.array(sys.argv[-1].split(','), dtype=int) x = np.array(scale(all_features)).reshape(1, -1) scaler = joblib.load(SCRIPT_PATH + '/serialized/scaler.pkl') clf = joblib.load(SCRIPT_PATH + '/serialized/classifier.pkl') statistics = pd.read_csv(SCRIPT_PATH + '/../data/statistics-normalized.csv') diffs = list() for idx, value in enumerate(x[0]): diffs.append(statistics.values[1][idx] - value) diffs[10] = -1 # don't deal with deprecation status advice_idx = argmax(diffs, 3) x = scaler.transform(x)
def build (self, X, Y, quantitative=False, autoscale=False, nestimators=0, features='', random=False, tune=False, class_weight="balanced", cv='loo', n=2, p=1, lc=True, vpath = ''): """Build a new RF model with the X and Y numpy matrices """ nobj, nvarx= np.shape(X) self.nobj = nobj self.nvarx = nvarx self.quantitative = quantitative self.autoscale = autoscale self.estimators = nestimators self.features = features self.random = random self.class_weight = class_weight self.learning_curve = lc self.n = n self.p = p self.cv = cv self.X = X.copy() self.Y = Y.copy() self.vpath = vpath #print self.vpath if autoscale: self.X, self.mux = center(self.X) self.X, self.wgx = scale(self.X, autoscale) if random : RANDOM_STATE = None else: RANDOM_STATE = 1226 # no reason to pick this number if self.cv: self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p) if tune : self.estimators, self.features = self.optimize (self.X, self.Y) if self.features=='none': self.features = None #print self.estimators if self.quantitative: print "Building Quantitative RF model" self.clf = RandomForestRegressor(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) else: print "Building Qualitative RF_model" self.clf = RandomForestClassifier(n_estimators = int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE, class_weight=self.class_weight) self.clf.fit(self.X, self.Y) print 'Building Learning Curves' if self.learning_curve: title = "Learning Curves (RF)" # SVC is more expensive so we do a lower number of CV iterations: cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) estimator = self.clf plot = plot_learning_curve(estimator, title, self.X, self.Y, (0.0, 1.01), cv=cv) plot.savefig(self.vpath+"/RF-learning_curves.png", format='png') plot.savefig("./RF-learning_curves.png", format='png') # Regenerate the X and Y, since they might have been centered/scaled self.X = X.copy() self.Y = Y.copy()
def validateLOO (self, A, gui=False): """ Validates A dimensions of an already built PLS model, using Leave-One-Out cross-validation Returns nothing. The results of the cv (SSY, SDEP and Q2) are stored internally """ if self.X == None or self.Y == None: return X = self.X Y = self.Y nobj,nvarx = np.shape (X) SSY0 = 0.0 for i in range (nobj): SSY0+=np.square(Y[i]-np.mean(Y)) SSY = np.zeros(A,dtype=np.float64) YP = np.zeros ((nobj,A+1),dtype=np.float64) if gui: updateProgress (0.0) for i in range (nobj): # build reduced X and Y matrices removing i object Xr = np.delete(X,i,axis=0) Yr = np.delete(Y,i) Xr,muxr = center(Xr) Xr,wgxr = scale (Xr, self.autoscale) Yr,muyr = center(Yr) xp = np.copy(X[i,:]) xp -= muxr xp *= wgxr # predicts y for the i object, using A LV yp = self.getLOO(Xr,Yr,xp,A) yp += muyr # updates SSY with the object i errors YP[i,0]=Y[i] for a in range(A): SSY[a]+= np.square(yp[a]-Y[i]) YP[i,a+1]=yp[a] if gui : updateProgress (float(i)/float(nobj)) if gui : print self.SSY = SSY self.SDEP = [np.sqrt(i/nobj) for i in SSY] self.Q2 = [1.00-(i/SSY0) for i in SSY] self.Av = A return (YP)
def main(): root = tk.Tk() root.withdraw() confirm = tk.messagebox.askokcancel( 'Confirmação', 'Você precisa selecionar o arquivo que deseja comparar.', icon='question') if confirm == True: file_path = filedialog.askopenfilename() confirm = tk.messagebox.askokcancel( 'Confirmação', 'Você precisa selecionar o diretório que deseja achar videos ou imagens similares.', icon='question') if confirm == True: folder_selected = filedialog.askdirectory() files = os.listdir(folder_selected) files = os.listdir(folder_selected) similaridadeMinima = float( input('Informe o percentual mínimo de similaridade: ')) comparacoesMaximas = int(input('Informe o número máximo de comparações: ')) templateType = mimetypes.MimeTypes().guess_type(file_path)[0].split("/")[0] listaItemsComparacao = [] listaSimilaridadeComparacao = [] comparacoes = 0 for itemFile in files: similaridade = 0 itemType = mimetypes.MimeTypes().guess_type(folder_selected + "/" + itemFile)[0] if itemType != None and comparacoes < comparacoesMaximas: comparacoes = comparacoes + 1 itemType = itemType.split("/")[0] if itemType == "image": img = cv2.imread(folder_selected + "/" + itemFile, cv2.IMREAD_COLOR) scaledImage = scale(max_height, max_width, img) if templateType == "image": template = cv2.imread(file_path, cv2.IMREAD_COLOR) template = scale(max_height, max_width, template) similaridade = compareImages(template, scaledImage) elif templateType == "video": similaridade = compareImageVideo(scaledImage, file_path) elif itemType == "video": video = folder_selected + "/" + itemFile if templateType == "image": img = cv2.imread(file_path, cv2.IMREAD_COLOR) scaledImage = scale(max_height, max_width, img) similaridade = compareImageVideo(scaledImage, video) elif templateType == "video": templateCap = cv2.VideoCapture(file_path) similaridade = compareVideos(templateCap, video) similaridade = similaridade * 100 if similaridade > similaridadeMinima: listaItemsComparacao.append(itemFile) listaSimilaridadeComparacao.append( float("{0:.2f}".format(similaridade) ) if similaridade > 0 else 0.0) data = { "Item": listaItemsComparacao, "Similaridade": listaSimilaridadeComparacao } df = pd.DataFrame(data) df.sort_values(by="Similaridade", ascending=False, inplace=True) print(df)
def process(self): '''Main routine, chain together all of the steps imported from autoindex, integrate, pointgroup, scale and merge.''' try: hostname = os.environ['HOSTNAME'].split('.')[0] if version == 2: try: write('Running on: %s' % hostname) except: pass else: write('Running on: {}'.format(hostname)) except Exception: pass # check input frame limits if not self._first_image is None: if self._metadata['start'] < self._first_image: start = self._metadata['start'] self._metadata['start'] = self._first_image self._metadata['phi_start'] += self._metadata['phi_width'] * \ (self._first_image - start) if not self._last_image is None: if self._metadata['end'] > self._last_image: self._metadata['end'] = self._last_image # first if the number of jobs was set to 0, decide something sensible. # this should be jobs of a minimum of 5 degrees, 10 frames. if self._n_jobs == 0: phi = self._metadata['oscillation'][1] if phi == 0.0: if version == 2: try: raise RuntimeError, 'grid scan data' except: pass else: raise RuntimeError('grid scan data') wedge = max(10, int(round(5.0 / phi))) frames = self._metadata['end'] - self._metadata['start'] + 1 n_jobs = int(round(frames / wedge)) if self._max_n_jobs > 0: if n_jobs > self._max_n_jobs: n_jobs = self._max_n_jobs self.set_n_jobs(n_jobs) if version == 2: try: write('Number of jobs: %d' % self._n_jobs) write('Number of cores: %d' % self._n_cores) except: pass else: write('Number of jobs: {}'.format(self._n_jobs)) write('Number of cores: {}'.format(self._n_cores)) step_time = time.time() if version == 2: try: write('Processing images: %d -> %d' % (self._metadata['start'], self._metadata['end'])) except: pass else: write('Processing images: {} -> {}'.format(self._metadata['start'], self._metadata['end'])) phi_end = self._metadata['phi_start'] + self._metadata['phi_width'] * \ (self._metadata['end'] - self._metadata['start'] + 1) if version == 2: try: write('Phi range: %.2f -> %.2f' % (self._metadata['phi_start'], phi_end)) write('Template: %s' % self._metadata['template']) write('Wavelength: %.5f' % self._metadata['wavelength']) write('Working in: %s' % os.getcwd()) except: pass else: write('Phi range: {:.2f} -> {:.2f}'.format( self._metadata['phi_start'], phi_end)) write('Template: {}'.format(self._metadata['template'])) write('Wavelength: {:.5f}'.format(self._metadata['wavelength'])) write('Working in: {}'.format(os.getcwd())) if self._plugin_library != " " and self._plugin_library != "None" and self._plugin_library != "none": oet = self._metadata['extra_text'] et = None for line in oet.split('\n'): if line[0:3] != "LIB=": if et == None: et = line + "\n" else: et = et + line + "\n" if et == None: self._metadata[ 'extra_text'] = "LIB=" + self._plugin_library + "\n" else: self._metadata[ 'extra_text'] = et + "LIB=" + self._plugin_library + "\n" elif self._plugin_library == "None" or self._plugin_library == "none": oet = self._metadata['extra_text'] et = None for line in oet.split('\n'): if line[0:3] != "LIB=": if et == None: et = line + "\n" else: et = et + line + "\n" self._metadata['extra_text'] = et if version == 2: try: write('Extra commands: %s' % self._metadata['extra_text']) except: pass else: write('Extra commands: {}'.format(self._metadata['extra_text'])) try: self._p1_unit_cell = autoindex(self._metadata, input_cell=self._input_cell_p1) except Exception as e: traceback.print_exc(file=open('fast_dp.error', 'w')) if version == 2: try: write('Autoindexing error: %s' % e) except: pass else: write('Autoindexing error: {}'.format(e)) fdpelogpath = get_afilepath() fdpelogprefix = get_afileprefix() if fdpelogpath: if version == 2: try: try: shutil.copyfile( 'fast_dp.error', os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) write('Archived fast_dp.error to %s' % os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) except: write('fast_dp.error not archived to %s' % os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) except: pass else: try: shutil.copyfile( 'fast_dp.error', os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) write('Archived fast_dp.error to {}'.format( os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error'))) except: write('fast_dp.error not archived to {}'.format( os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error'))) return try: mosaics = integrate(self._metadata, self._p1_unit_cell, self._resolution_low, self._n_jobs, self._n_cores) if version == 2: try: write('Mosaic spread: %.2f < %.2f < %.2f' % tuple(mosaics)) except: pass else: write('Mosaic spread: {0[0]:.2f} < {0[1]:.2f} < {0[2]:.2f}'. format(tuple(mosaics))) except RuntimeError as e: traceback.print_exc(file=open('fast_dp.error', 'w')) if version == 2: try: write('Integration error: %s' % e) except: pass else: write('Integration error: {}'.format(e)) fdpelogpath = get_afilepath() fdpelogprefix = get_afileprefix() if fdpelogpath: if version == 2: try: try: shutil.copyfile( 'fast_dp.error', os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) write('Archived fast_dp.error to %s' % os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) except: write('fast_dp.error not archived to %s' % os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) except: pass else: try: shutil.copyfile( 'fast_dp.error', os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error')) write('Archived fast_dp.error to {}'.format( os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error'))) except: write('fast_dp.error not archived to {}'.format( os.path.join(fdpelogpath, fdpelogprefix + 'fast_dp.error'))) return try: # FIXME in here will need a mechanism to take the input # spacegroup, determine the corresponding pointgroup # and then apply this (or verify that it is allowed then # select) metadata = copy.deepcopy(self._metadata) cell, sg_num, resol = decide_pointgroup( self._p1_unit_cell, metadata, input_spacegroup=self._input_spacegroup) self._unit_cell = cell self._space_group_number = sg_num if not self._resolution_high: self._resolution_high = resol except RuntimeError as e: if version == 2: try: write('Pointgroup error: %s' % e) except: pass else: write('Pointgroup error: {}'.format(e)) return try: self._unit_cell, self._space_group, self._nref, beam_pixels = \ scale(self._unit_cell, self._metadata, self._space_group_number, \ self._resolution_high, self._resolution_low, self._n_jobs, self._n_cores) self._refined_beam = (self._metadata['pixel'][1] * beam_pixels[1], self._metadata['pixel'][0] * beam_pixels[0]) except RuntimeError as e: if version == 2: try: write('Scaling error: %s' % e) except: pass else: write('Scaling error: {}'.format(e)) return try: n_images = self._metadata['end'] - self._metadata['start'] + 1 self._xml_results = merge() mtzlogpath = get_afilepath() mtzlogprefix = get_afileprefix() if mtzlogpath: if version == 2: try: try: shutil.copyfile( 'fast_dp.mtz', os.path.join(mtzlogpath, mtzlogprefix + 'fast_dp.mtz')) write('Archived fast_dp.mtz to %s' % os.path.join( mtzlogpath, mtzlogprefix + 'fast_dp.mtz')) except: write('fast_dp.mtz not archived to %s' % os.path.join(mtzlogpath, mtzlogprefix + 'fast_dp.mtz')) except: pass else: try: shutil.copyfile( 'fast_dp.mtz', os.path.join(mtzlogpath, mtzlogprefix + 'fast_dp.mtz')) write('Archived fast_dp.mtz to {}'.format( os.path.join(mtzlogpath, mtzlogprefix + 'fast_dp.mtz'))) except: write('fast_dp.mtz not archived to {}'.format( os.path.join(mtzlogpath, mtzlogprefix + 'fast_dp.mtz'))) except RuntimeError as e: if version == 2: try: write('Merging error: %s' % e) except: pass else: write('Merging error: {}'.format(e)) return if version == 2: try: write('Merging point group: %s' % self._space_group) write('Unit cell: %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f' % \ self._unit_cell) duration = time.time() - step_time write('Processing took %s (%d s) [%d reflections]' % (time.strftime('%Hh %Mm %Ss', time.gmtime(duration)), duration, self._nref)) write('RPS: %.1f' % (float(self._nref) / duration)) except: pass else: write('Merging point group: {}'.format(self._space_group)) write( 'Unit cell: {0[0]:6.2f} {0[1]:6.2f} {0[2]:6.2f} {0[3]:6.2f} {0[4]:6.2f} {0[5]:6.2f}' .format(self._unit_cell)) duration = time.time() - step_time write('Processing took {} ({:d} s) [{:d} reflections]'.format( time.strftime('%Hh %Mm %Ss', time.gmtime(duration)), int(duration), self._nref)) write('RPS: {:.1f}'.format((float(self._nref) / duration))) # write out json and xml for func in (output.write_json, output.write_ispyb_xml): func(self._commandline, self._space_group, self._unit_cell, self._xml_results, self._start_image, self._refined_beam)
def build(self, X, Y, quantitative=False, autoscale=False, nestimators=0, features='', random=False, tune=False, class_weight="balanced", cv='loo', n=2, p=1, lc=True, vpath=''): """Build a new RF model with the X and Y numpy matrices """ nobj, nvarx = np.shape(X) self.nobj = nobj self.nvarx = nvarx self.quantitative = quantitative self.autoscale = autoscale self.estimators = nestimators self.features = features self.random = random self.class_weight = class_weight self.learning_curve = lc self.n = n self.p = p self.cv = cv self.X = X.copy() self.Y = Y.copy() self.vpath = vpath #print self.vpath if autoscale: self.X, self.mux = center(self.X) self.X, self.wgx = scale(self.X, autoscale) if random: RANDOM_STATE = None else: RANDOM_STATE = 1226 # no reason to pick this number if self.cv: self.cv = getCrossVal(self.cv, RANDOM_STATE, self.n, self.p) if tune: self.estimators, self.features = self.optimize(self.X, self.Y) if self.features == 'none': self.features = None #print self.estimators if self.quantitative: print "Building Quantitative RF model" self.clf = RandomForestRegressor(n_estimators=int(self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE) else: print "Building Qualitative RF_model" self.clf = RandomForestClassifier(n_estimators=int( self.estimators), warm_start=False, max_features=self.features, oob_score=True, random_state=RANDOM_STATE, class_weight=self.class_weight) self.clf.fit(self.X, self.Y) print 'Building Learning Curves' if self.learning_curve: title = "Learning Curves (RF)" # SVC is more expensive so we do a lower number of CV iterations: cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=0) estimator = self.clf plot = plot_learning_curve(estimator, title, self.X, self.Y, (0.0, 1.01), cv=cv) plot.savefig(self.vpath + "/RF-learning_curves.png", format='png') plot.savefig("./RF-learning_curves.png", format='png') # Regenerate the X and Y, since they might have been centered/scaled self.X = X.copy() self.Y = Y.copy()
def reprocess(self): '''Main routine, chain together last few steps of processing i.e. pointgroup, scale and merge.''' try: hostname = os.environ['HOSTNAME'].split('.')[0] if version == 2: try: write('Running on: %s' % hostname) except: pass else: write('Running on: {}'.format(hostname)) except Exception: pass # check input frame limits if not self._first_image is None: if self._metadata['start'] < self._first_image: start = self._metadata['start'] self._metadata['start'] = self._first_image self._metadata['phi_start'] += self._metadata['phi_width'] * \ (self._first_image - start) if not self._last_image is None: if self._metadata['end'] > self._last_image: self._metadata['end'] = self._last_image step_time = time.time() if version == 2: try: write('Processing images: %d -> %d' % (self._metadata['start'], self._metadata['end'])) except: pass else: write('Processing images: {} -> {}'.format( self._metadata['start'], self._metadata['end'])) phi_end = self._metadata['phi_start'] + self._metadata['phi_width'] * \ (self._metadata['end'] - self._metadata['start'] + 1) if version == 2: try: write('Phi range: %.2f -> %.2f' % (self._metadata['phi_start'], phi_end)) write('Template: %s' % self._metadata['template']) write('Wavelength: %.5f' % self._metadata['wavelength']) write('Working in: %s' % os.getcwd()) except: pass else: write('Phi range: {:.2f} -> {:.2f}'.format( self._metadata['phi_start'], phi_end)) write('Template: {}'.format(self._metadata['template'])) write('Wavelength: {:.5f}'.format(self._metadata['wavelength'])) write('Working in: {}'.format(os.getcwd())) # just for information for the user, print all options for indexing # FIXME should be able to run the same from CORRECT.LP which would # work better.... from xds_reader import read_xds_idxref_lp from cell_spacegroup import spacegroup_to_lattice results = read_xds_idxref_lp('IDXREF.LP') write('For reference, all indexing results:') if version == 2: try: write('%3s %6s %6s %6s %6s %6s %6s' % \ ('Lattice', 'a', 'b', 'c', 'alpha', 'beta', 'gamma')) except: pass else: write('{:3s} {:6s} {:6s} {:6s} {:6s} {:6s} {:6s}'.format( 'Lattice ', 'a', 'b', 'c', 'alpha', 'beta', 'gamma')) for r in reversed(sorted(results)): if not type(r) == type(1): continue cell = results[r][1] if version == 2: try: write('%7s %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f' % \ (spacegroup_to_lattice(r), cell[0], cell[1], cell[2], cell[3], cell[4], cell[5])) except: pass else: write('{:7s} {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f}'.format( spacegroup_to_lattice(r), cell[0], cell[1], cell[2], cell[3], cell[4], cell[5])) try: # FIXME in here will need a mechanism to take the input # spacegroup, determine the corresponding pointgroup # and then apply this (or verify that it is allowed then # select) metadata = copy.deepcopy(self._metadata) cell, sg_num, resol = decide_pointgroup( self._p1_unit_cell, metadata, input_spacegroup = self._input_spacegroup) self._unit_cell = cell self._space_group_number = sg_num if not self._resolution_high: self._resolution_high = resol except RuntimeError as e: if version == 2: try: write('Pointgroup error: %s' % e) except: pass else: write('Pointgroup error: {}'.format(e)) return try: self._unit_cell, self._space_group, self._nref, beam_pixels = \ scale(self._unit_cell, self._metadata, self._space_group_number, \ self._resolution_high, self._resolution_low, self._n_jobs, self._n_cores) self._refined_beam = (self._metadata['pixel'][1] * beam_pixels[1], self._metadata['pixel'][0] * beam_pixels[0]) except RuntimeError as e: if version == 2: try: write('Scaling error: %s' % e) except: pass else: write('Scaling error: {}'.format(e)) return try: n_images = self._metadata['end'] - self._metadata['start'] + 1 self._xml_results = merge(hklout='fast_rdp.mtz', aimless_log='aimless_rerun.log') except RuntimeError as e: if version == 2: try: write('Merging error: %s' % e) except: pass else: write('Merging error: {}'.format(e)) return if version == 2: try: write('Merging point group: %s' % self._space_group) write('Unit cell: %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f' % \ self._unit_cell) duration = time.time() - step_time write('Reprocessing took %s (%d s) [%d reflections]' % (time.strftime('%Hh %Mm %Ss', time.gmtime(duration)), duration, self._nref)) except: pass else: write('Merging point group: {}'.format(self._space_group)) write('Unit cell: {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f} {:6.2f}'.format( self._unit_cell)) duration = time.time() - step_time write('Reprocessing took {} ({} s) [{} reflections]'.format( (time.strftime('%Hh %Mm %Ss', time.gmtime(duration)), duration, self._nref))) # write out json and xml for func, filename in [ (output.write_json, 'fast_rdp.json'), (output.write_ispyb_xml, 'fast_rdp.xml') ]: func(self._commandline, self._space_group, self._unit_cell, self._xml_results, self._start_image, self._refined_beam, filename=filename)