def show_progress_bar(scan_job): from ipywidgets import FloatProgress, Label, VBox, HBox from IPython.display import display progress_bars = [] scan_progress = FloatProgress(value=0.0, min=0.0, max=1.0, step=0.01, bar_style='info') scan_label = Label('Scan user repositories') box = VBox([ Label('GitHub user "{}"'.format(scan_job.args[-1])), HBox([ scan_progress, scan_label ]), ]) display(box) bar_styles = {'queued': 'info', 'started': 'info', 'deferred': 'warning', 'failed': 'danger', 'finished': 'success' } while True: scan_job.refresh() if 'finished' in scan_job.meta: percentage_complete = sum(scan_job.meta['finished'].values()) / max(sum(scan_job.meta['steps'].values()), 1) scan_progress.value = 1.0 if scan_job.status == 'finished' else max(0.01, percentage_complete) # the metadata is bogus once the job is finished scan_progress.bar_style = bar_styles[scan_job.status] if scan_job.status == 'finished': scan_progress.value = 1.0 scan_progress.bar_style = bar_styles[scan_job.status] break elif scan_job.status == 'failed': scan_progress.value = max(0.01, scan_progress.value) break else: time.sleep(2)
def radial_pcf_out_of_core(hdftwo, hdfout, u, pairs, **kwargs): """ Out of core radial pair correlation calculation. Atomic two body data is expected to have been computed (see :func:`~exatomic.core.two.compute_atom_two_out_of_core`) An example is given below. Note the importance of the definition of pairs and the presence of additional arguments. .. code:: Python radial_pcf_out_of_core("in.hdf", "out.hdf", uni, {"O_H": ([0], "H")}, length="Angstrom", dr=0.01) Args: hdftwo (str): HDF filepath containing atomic two body data hdfout (str): HDF filepath to which radial PCF data will be written (see Note) u (:class:`~exatomic.core.universe.Universe`): Universe pairs (dict): Dictionary of string name keys, values of ``a``, ``b`` arguments (see Note) kwargs: Additional keyword arguments to be passed (see Note) Note: Results will be stored in the hdfout HDF file. Keys are of the form ``radial_pcf_key``. The keys of ``pairs`` are used to store the output while the values are used to perform the pair correlation itself. """ f = u.atom['frame'].unique() n = len(f) fp = FloatProgress(description="Computing:") display(fp) fdx = f[0] twokey = "frame_" + str(fdx) + "/atom_two" atom = u.atom[u.atom['frame'] == fdx].copy() uu = Universe(atom=atom, frame=u.frame.loc[[fdx]], atom_two=pd.read_hdf(hdftwo, twokey)) pcfs = {} for key, ab in pairs.items(): pcfs[key] = radial_pair_correlation(uu, ab[0], ab[1], **kwargs).reset_index() fp.value = 1 / n * 100 for i, fdx in enumerate(f[1:]): twokey = "frame_" + str(fdx) + "/atom_two" atom = u.atom[u.atom['frame'] == fdx].copy() uu = Universe(atom=atom, frame=u.frame.loc[[fdx]], atom_two=pd.read_hdf(hdftwo, twokey)) for key, ab in pairs.items(): pcfs[key] += radial_pair_correlation(uu, ab[0], ab[1], **kwargs).reset_index() fp.value = (i + 1) / n * 100 store = pd.HDFStore(hdfout) for key in pairs.keys(): pcfs[key] /= n store.put("radial_pcf_" + key, pcfs[key]) store.close() fp.close()
def noaa_spider(url, word, maxPages): """ Return something. """ if not os.path.isdir('data'): os.mkdir('data') pagesToVisit = [url] textfiles = [] numberVisited = 0 foundWord = False urlsVisited = set() foundFiles = set() progressBar = FloatProgress(min=0, max=maxPages) display(progressBar) progressBar.value = 0 # The main loop. Create a LinkParser and get all the links on the page. # Also search the page for the word or string # In our getLinks function we return the web page # (this is useful for searching for the word) # and we return a set of links from that web page # (this is useful for where to go next) while numberVisited < maxPages and pagesToVisit != [] and not foundWord: # Start from the beginning of our collection of pages to visit: url = pagesToVisit[0] pagesToVisit = pagesToVisit[1:] # try: #print(numberVisited, "Visiting:", url) parser = LinkParser() if url not in urlsVisited: urlsVisited.add(url) if '.txt' in url: if word in url: textfiles = textfiles + [url] foundFiles.add(url) print("FOUND ", url) name = './data/' + url.split('/')[-1] if not os.path.isfile(name): print('downloading...', name) urlretrieve(url, name) else: print('file exists...', name) else: numberVisited = numberVisited + 1 progressBar.value = numberVisited data, links = parser.getLinks(url) # Add the pages that we visited to the end of our collection # of pages to visit: pagesToVisit = pagesToVisit + links return foundFiles
def transect(sources, X, Y, Nmc, t_e, pmap, v_x=0.1, clock_drift=False, e_dt=0.01, x0=None, new_method=False): RMS_t = np.zeros((len(X))) BiasX_t = np.zeros((len(X))) Success_t = np.zeros((Nmc, len(X))) r = receiver(X[0], Y, e_dt=e_dt) r_dt = r.dt f = FloatProgress(value=0., min=0., max=100., step=1., orientation='horizontal', description='Loading :') display(f) for i in range(len(X)): f.value = i / len(X) * 100. # init a receiver r = receiver(X[i], Y, e_dt=e_dt, v_x=v_x) #r.dt = r_dt # unchanged variable during simulations # d_rms, bias_x, su = simu(r, sources, Nmc, t_e=t_e, t_drift=clock_drift, pmap=pmap, x0=x0, new_method=new_method) RMS_t[i] = d_rms BiasX_t[i] = bias_x Success_t[:, i] = su f.value = 100. return RMS_t, BiasX_t, Success_t
def get_airtemperature_from_files(): #Read all Tif images in current directory files = glob('./data/*.txt') files.sort() progressBar = FloatProgress(min=0, max=len(files)) display(progressBar) progressBar.value = 0 air_temperature = [] for filename in files: progressBar.value = progressBar.value + 1 print('reading...', filename) air_temperature = air_temperature + read_data_column(filename) return air_temperature
def get_gaussian_labels_probabilities(digits, hidden_markov_models, n_observation_classes, n_hidden_states, n_iter, tol, display_progress, use_pickle, filename): labels_probabilities = [] directory = settings.LABELS_PROBABILITIES_DIRECTORY + "centroids_" + str(n_observation_classes - 3) directory += "/hidden_states_" + str(n_hidden_states) + "/n_iter_" + str(n_iter) + "/tol_" + str(tol) path = directory + "/" + filename if use_pickle and os.path.isfile(path): labels_probabilities = pickle.load(open(path, 'rb')) else: f = FloatProgress(min=0, max=100) if display_progress: display(f) i = 0 for dig in digits: probabilites = get_gaussian_label_probabilites(dig, hidden_markov_models) labels_probabilities.append(probabilites) f.value = (float(i) * 100.0) / float(len(digits)) i += 1 f.close() if use_pickle: if not os.path.exists(directory): os.makedirs(directory) with open(path,'wb') as f: pickle.dump(labels_probabilities,f) return labels_probabilities
def solve(self, r0): """Trace rays through the turbulent grid Args: r0 (4xN float): array of N rays, in their initial configuration """ f = FloatProgress(min=0, max=self.ne_grid.shape[0], description='Progress:') display(f) self.r0 = r0 # keep the original dz = self.z[1] - self.z[0] DZ = Z1(dz) # matrix to push rays by dz rt = r0.copy() # iterate to save memory, starting at r0 for i, ne_slice in enumerate(self.ne_grid): f.value = i gx, gy = gradient_interpolator(ne_slice, self.x, self.y) rr1 = deflect_rays(rt, gx, gy, dz=dz) rt = transform(DZ, rr1) self.rt = rt
def plot_digit(digit, display_progress = False): fig=plt.figure() ax=fig.add_subplot(111) f = FloatProgress(min=0, max=100) if display_progress: display(f) n_points = 0 for curve in digit.curves: n_points += len(curve) i = 0 for curve in digit.curves: x_points = [] y_points = [] for point in curve: x_points.append(point[0]) y_points.append(point[1]) f.value = 100.0*(float(i) / float(n_points)) i += 1 plt.plot(x_points, y_points, linewidth = 2.0) f.close() plt.axis([settings.IMAGE_PLOT_X_MIN, settings.IMAGE_PLOT_X_MAX, settings.IMAGE_PLOT_Y_MIN, settings.IMAGE_PLOT_Y_MAX]) plt.show()
def download_file(osm_url,dest): if config.get("general","proxy_https")!="": urllib2.install_opener( urllib2.build_opener( urllib2.ProxyHandler({'https': config.get("general","proxy_https")}) ) ) file_name = "tempdata/"+osm_url.split('/')[-1] print "downloading: "+osm_url print "in progress" u = urllib2.urlopen(osm_url) f = open(file_name, 'wb') meta = u.info() file_size = int(meta.getheaders("Content-Length")[0]) print "Downloading: %s Bytes: %s" % (file_name, file_size) file_size_dl = 0 block_sz = 8192 progressbar = FloatProgress(min=0, max=100) # instantiate the bar display(progressbar) # display the bar while True: buffer = u.read(block_sz) if not buffer: break file_size_dl += len(buffer) f.write(buffer) progressbar.value=file_size_dl * 100. / file_size f.close()
def create_features_as_matrix(self, samples, show_progress_bar=False): ''' Creates featres for all the given sample objects. @return: The created features, as a float numpy matrix (shape: n_samples X n_features). ''' if show_progress_bar: from IPython.display import display from ipywidgets import FloatProgress progress_bar = FloatProgress(min=0, max=len(samples) - 1) display(progress_bar) feature_matrix = np.empty((len(samples), self.n_features()), dtype=np.float64) for i, sample in enumerate(samples): self.create_features_into_array(feature_matrix[i, :], sample) if show_progress_bar: progress_bar.value = i return feature_matrix
def predict(self): ''' Iteratively predict values based on available neighbor data On each iteration, predictdf attribute is revised ''' self.predictdf = self.targetdf.copy() dftest = self.predictdf.copy() dftest = dftest[pd.isnull(dftest[self.label])] # Set up progressbar nullcount = pd.isnull(self.predictdf[self.label]).sum() if self.progressbar: maxnullcount = nullcount f = FloatProgress(min=0, max=maxnullcount) display(f) while nullcount > 0: if ((~pd.isnull(dftest.lead)) & (~pd.isnull(dftest.lag))).sum() > 0: dftest = self.predict_once(dftest, lead=True, lag=True) if (~pd.isnull(dftest.lead)).sum() > 0: dftest = self.predict_once(dftest, lead=True) if (~pd.isnull(dftest.lag)).sum() > 0: dftest = self.predict_once(dftest, lag=True) nullcount = pd.isnull(self.predictdf[self.label]).sum() print nullcount if self.progressbar: f.value = maxnullcount - nullcount
def _compute_current_density(bvs, gvx, gvy, gvz, cmatr, cmati, occvec, verbose=True): """Compute the current density in each cartesian direction.""" nbas, npts = bvs.shape curx = np.zeros(npts, dtype=np.float64) cury = np.zeros(npts, dtype=np.float64) curz = np.zeros(npts, dtype=np.float64) cval = np.zeros(nbas, dtype=np.float64) if verbose: fp = FloatProgress(description='Computing:') display(fp) for mu in range(nbas): if verbose: fp.value = mu / nbas * 100 crmu = cmatr[mu] cimu = cmati[mu] bvmu = bvs[mu] gvxmu = gvx[mu] gvymu = gvy[mu] gvzmu = gvz[mu] for nu in range(nbas): crnu = cmatr[nu] cinu = cmati[nu] bvnu = bvs[nu] gvxnu = gvx[nu] gvynu = gvy[nu] gvznu = gvz[nu] cval = evaluate('-0.5 * (occvec * (crmu * cinu - cimu * crnu))', out=cval) csum = cval.sum() evaluate('curx + csum * (bvmu * gvxnu - gvxmu * bvnu)', out=curx) evaluate('cury + csum * (bvmu * gvynu - gvymu * bvnu)', out=cury) evaluate('curz + csum * (bvmu * gvznu - gvzmu * bvnu)', out=curz) if verbose: fp.close() return curx, cury, curz
def predict(self): ''' Iteratively predict values based on available neighbor data On each iteration, predictdf attribute is revised ''' self.predictdf = self.targetdf.copy() dftest = self.predictdf.copy() dftest = dftest[pd.isnull(dftest[self.label])] # Set up progressbar nullcount = pd.isnull(self.predictdf[self.label]).sum() if self.progressbar: maxnullcount = nullcount f = FloatProgress(min=0, max=maxnullcount) display(f) while nullcount > 0: if ((~pd.isnull(dftest.lead)) & (~pd.isnull(dftest.lag))).sum() > 0: dftest = self.predict_once(dftest, lead=True, lag=True) if (~pd.isnull(dftest.lead)).sum() > 0: dftest = self.predict_once(dftest, lead=True) if (~pd.isnull(dftest.lag)).sum() > 0: dftest = self.predict_once(dftest, lag=True) nullcount = pd.isnull(self.predictdf[self.label]).sum() print nullcount if self.progressbar: f.value = maxnullcount - nullcount
def read_datalines(self, NSTEPS=0, start_step=-1, select_ckeys=None, max_vector_dim=None, even_NSTEPS=True): """Read NSTEPS steps of file, starting from start_step, and store only the selected ckeys. INPUT: NSTEPS -> number of steps to read (default: 0 -> reads all the file) start_step = -1 -> continue from current step (default) 0 -> go to start step N -> go to N-th step select_ckeys -> an array with the column keys you want to read (see all_ckeys for a list) max_vector_dim -> when reading vectors read only this number of components (None = read all components) even_NSTEPS -> round the number of steps to an even number (default: True) OUTPUT: data -> a dictionary with the selected-column steps """ if self._GUI: progbar = FloatProgress(min=0, max=100) display(progbar) start_time = time() if (NSTEPS == 0): NSTEPS = self.MAX_NSTEPS self._set_ckey(select_ckeys, max_vector_dim) # set the ckeys to read self._initialize_dic(NSTEPS) # allocate dictionary self.gotostep(start_step) # jump to the starting step # read NSTEPS of the file progbar_step = max(100000, int(0.005*NSTEPS)) for step in range(NSTEPS): line = self.file.readline() if len(line) == 0: # EOF print "Warning: reached EOF." break values = np.array(line.split()) for key, idx in self.ckey.iteritems(): # save the selected columns self.data[key][step,:] = np.array(map(float, values[idx])) if ( (step+1)%progbar_step == 0 ): if self._GUI: progbar.value = float(step+1)/NSTEPS*100.; progbar.description = "{:6.2f}%".format(progbar.value) else: print " step = {:9d} - {:6.2f}% completed".format(step+1, float(step+1)/NSTEPS*100.) if self._GUI: progbar.close() # check number of steps read, keep an even number of steps if (step + 1 < self.NSTEPS): if (step == 0): print "WARNING: no step read." return else: print "Warning: less steps read." self.NSTEPS = step + 1 if even_NSTEPS: if (NSTEPS%2 == 1): NSTEPS = NSTEPS - 1 for key, idx in self.ckey.iteritems(): # free memory not used self.data[key] = self.data[key][:NSTEPS,:] print " ( %d ) steps read." % (NSTEPS) self.NSTEPS = NSTEPS print "DONE. Elapsed time: ", time()-start_time, "seconds" return self.data
def cepstral_analysis(self, aic_type='aic', Kmin_corrfactor=1.0, bayes_p=False, density_grid=None): """Perform the Cepstral Analysis on all blocks.""" if self.GUI: progbar = FloatProgress(min=0, max=100) progbar.description = "0 %" display(progbar) self.BLOCK_NFREQS = self.BLOCK_SIZE / 2 + 1 if self.MULTI_COMPONENT: print ' N_COMPONENTS = {:10d}'.format(self.N_COMPONENTS) self.ck_THEORY_var, self.psd_THEORY_mean = multicomp_cepstral_parameters( self.BLOCK_NFREQS, self.N_COMPONENTS) self.bayes_p = bayes_p if (self.N_BLOCKS == 1): raise NotImplemented('One block.') for L in range(self.N_BLOCKS): if self.MULTI_COMPONENT: self.block[L].compute_psd(DT=self.TSKIP, DT_FS=self.DT_FS, average_components=True) self.block[L].dct = ta.CosFilter(self.block[L].logpsd, \ ck_theory_var=self.ck_THEORY_var, psd_theory_mean=self.psd_THEORY_mean, aic_type=aic_type, Kmin_corrfactor=Kmin_corrfactor, normalization=self.BLOCK_SIZE) else: self.block[L].compute_psd(DT=self.TSKIP, DT_FS=self.DT_FS) self.block[L].dct = ta.CosFilter( self.block[L].logpsd, aic_type=aic_type, Kmin_corrfactor=Kmin_corrfactor, normalization=self.BLOCK_SIZE) # theory_var=None self.block[L].dct.scan_filter_tau() if self.bayes_p: self.block[L].dct.compute_p_aic(method='ba') if density_grid is not None: self.density_grid = density_grid self.block[L].dct.compute_logtau_density( method='ba', only_stats=False, density_grid=density_grid) else: self.block[L].dct.compute_logtau_density(method='ba', only_stats=True) if self.GUI: progbar.value = float(L + 1) / self.N_BLOCKS * 100. progbar.description = "%5.2f %%" % progbar.value if self.GUI: progbar.close() self.freqs = self.block[0].freqs return
def paint_in(contours, image): # Create an empty array the size of the original image painted_in = np.zeros_like(image) # Create the progress bar f = FloatProgress(min=0, max=len(contours)-1) display(f) # Make a list of lists of painted in points for every column of the image. # Those function as limits - we paint from a certain position up to the # closest one of these limits = [[] for i in image[0]] # Paint in every contour for n, contour in enumerate(contours): # Go through each point of the contour for i in range(len(contour)): # If colour is -1 by the end of the forthcoming ifs, # that means the direction in which the contour is going # is too ambiguous to use colour = -1 # Determine if the contour is going left of right. # This uses a very convenient aspect of skimage's contour-finding # function - they're either clockwise or anticlockwise depending # on the colour they enclose. # Note that we usually compare the point before and the point # after, to get a general trend at that position. direction = contour[(i+1) % len(contour), 1]-contour[i-1, 1] if direction > 0: colour = 0 elif direction < 0: colour = 1 else: # If the x coordinate doesn't change, perform other checks: # This calculates the clockwise or anticlockwise direction direction = ((contour[i, 1]-contour[i-1, 1])*(contour[i, 0]+contour[i-1, 0]) + (contour[(i+1) % len(contour), 1]-contour[i, 1])*(contour[(i+1) % len(contour), 0]+contour[i, 0])) # Check that the y coordinate changes if contour[(i+1) % len(contour), 0]-contour[i-1, 0]: if direction > 0: colour = 1 elif direction <= 0: colour = 0 # If we have established what colour we want, paint the pixels # above this one if colour != -1: # Establish the painting limit, which is the highest value in # paint_limit for this column that is below the current pixel paint_limit = 0 for limit in limits[contour[i, 1]]: if limit < contour[i, 0] and paint_limit < limit: paint_limit = limit # Paint in painted_in[paint_limit+1:contour[i, 0], contour[i, 1]] = colour # Add this pixel to the limit list limits[contour[i, 1]].append(contour[i, 0]) # Paint this pixel white, so that the contours are always white painted_in[contour[i, 0], contour[i, 1]] = 1 f.value = n # Return the finished image return painted_in
def evaluate (self, df, is_training, batch_size, sess, dropout_prob = 0.2): X = get_feature_X(df,maxlen) Y = pd.get_dummies(df.is_duplicate) sess = self.sess start_index = 0 final_loss = 0 final_acc = 0 current_total_trained =0 p_bar = FloatProgress() display(p_bar) start_time = time.time() while start_index < X[0].shape[0]: temp_x1 = X[0][start_index:start_index+batch_size] temp_x2 = X[1][start_index:start_index+batch_size] temp_seq_len1 = X[2][start_index:start_index+batch_size] temp_seq_len2 = X[3][start_index:start_index+batch_size] test_y = Y[start_index:start_index+batch_size] feed_dict = { self.min_mask1: get_init_min_mask_value(temp_seq_len1), self.min_mask2: get_init_min_mask_value(temp_seq_len2), self.seq_length1: temp_seq_len1, self.seq_length2: temp_seq_len2, self.input: temp_x1, self.input2: temp_x2, self.y: test_y } if is_training: feed_dict[self.prob] = 1 - dropout_prob current_total_trained += temp_x1.shape[0] if is_training: # the exact output you're looking for: _, c, ac = sess.run([self.optimizer, self.loss, self.acc], feed_dict=feed_dict) final_loss += c * temp_x1.shape[0] final_acc += ac * temp_x1.shape[0] #print("%s/%s training loss %s" % (start_index, X[0].shape[0], final_loss/current_total_trained)) # sys.stdout.write("\r%s/%s training loss %s" % (start_index, X[0].shape[0], c)) # sys.stdout.flush() duration = time.time() - start_time speed = duration/current_total_trained eta = (X[0].shape[0]-current_total_trained)*speed p_bar.value = current_total_trained/X[0].shape[0] p_bar.description = "%s/%s, eta %s sec"%(current_total_trained, X[0].shape[0], eta) else: c, ac, pred, real = sess.run([self.loss, self.acc, self.output, self.y], feed_dict=feed_dict) final_loss += c * temp_x1.shape[0] final_acc += ac * temp_x1.shape[0] # print('real:', real) # print('pred:', pred) print(sum(np.argmax(real, axis=1)==np.argmax(pred, axis=1))) start_index += batch_size final_loss = final_loss/X[0].shape[0] final_acc = final_acc/X[0].shape[0] return final_loss, final_acc
def read_timesteps(self, selection, start_step=-1, select_ckeys=None, fast_check=True): """ Read selected keys of file, within the provided range. Examples: read_timesteps(10, start_step=0, select_ckeys=['id,xu,yu,vu']) -->> Read first 10 timesteps, only the specified columns read_timesteps(10, select_ckeys=['id,xu,yu,vu']) -->> Read the next 10 timesteps, only the specified columns (DELTA_TIMESTEP is assumed) read_timesteps((10,30)) -->> Read from TIMESTEP 10 to 30 read_timesteps((10,30,2)) -->> Read every 2 steps from TIMESTEP 10 to 30 """ if self._GUI: progbar = FloatProgress(min=0, max=100) display(progbar) start_time = time() self._set_ckey(select_ckeys) # set the ckeys to read --> ckey self._set_timesteps(selection, start_step) # set the timesteps to read --> timestep self._initialize_dic() # allocate dictionary --> data # extract the steps from the file progbar_step = max(1000, int(0.005 * self.nsteps)) atomid_col = self.all_ckeys['id'][0] for istep, step in enumerate(self.timestep): self._gototimestep(step, fast_check) # jump to the desired step, self.data[istep]['TIMESTEP'] = step for nat in range(self.NATOMS): # read data (may be unsorted) line = self.file.readline() if len(line) == 0: # EOF raise EOFError('Warning: reached EOF.') values = np.array(line.split()) for key, idx in self.ckey.items(): # save the selected columns atomid = int(values[atomid_col]) - 1 # current atom index (in LAMMPS it starts from 1) if (key == 'element'): # this should be improved self.data[istep][key][atomid, :] = np.array(list(map(str, values[idx]))) else: self.data[istep][key][atomid, :] = np.array(list(map(float, values[idx]))) if ((istep + 1) % progbar_step == 0): if self._GUI: progbar.value = float(istep + 1) / self.nsteps * 100. progbar.description = '%g %%' % progbar.value else: log.write_log(' step = {:9d} - {:6.2f}% completed'.format(istep + 1, float(istep + 1) / self.nsteps * 100.)) if self._GUI: progbar.close() # check number of steps read, keep an even number of steps if (istep + 1 < self.nsteps): # (should never happen) if (istep == 0): log.write_log('WARNING: no step read.') return else: log.write_log('Warning: less steps read.') self.nsteps = istep + 1 if not self._quiet: log.write_log(' ( %d ) steps read.' % (self.nsteps)) log.write_log('DONE. Elapsed time: ', time() - start_time, 'seconds') self._compute_current_step = False # next time do not compute the current_step return self.data
def slice_mrc_stack(mrc, scratch, scanshape, optx, opty, startframe=0, wx=500, wy=500): """ Slice the *.mrc movie into all of its subframes Accepts: mrc (MrcMemmap) memory map into the mrc file (such as opened by py4DSTEM.file.io.read(...,load='relativity')) scratch (str) path to a scratch file where a numpy memmap containing the re-sliced stack will be buffered NOTE! this will overwrite whatever file is at this path! be careful! ALSO NOTE! this file is where the data in the DataCube will actually live! Either save the DataCube as a py4DSTEM *.h5 or use separate scratches for different data! scanshape (numpy array) 2-element array containing the scan shape (Rx, Ry) optx, opty (numpy meshgrids) the optimized centers of the subframes from subframeAlign(...) wx,wy (ints) subframe sizes x and y Returns: dc (DataCube) a py4DSTEM DataCube containing the sliced up stack, in the correct order """ nframe = scanshape.prod() // optx.size dshape = (int(nframe), int(optx.size), wx, wy) vstack = np.memmap(scratch, mode='w+', dtype='<i2', shape=dshape) f = FloatProgress(min=0, max=nframe - 1) display(f) t0 = time() for i in np.arange(startframe, startframe + nframe): f.value = i - startframe frame = mrc.data[int(i), :, :] stack = slice_subframes(frame, optx, opty, wx, wy) vstack[int(i - startframe), :, :, :] = np.transpose(stack, (2, 0, 1)) t = time() - t0 print("Sliced {} diffraction patterns in {}h {}m {}s".format( scanshape.prod(), int(t / 3600), int(t / 60), int(t % 60))) mrc.close() dc = DataCube(vstack) dc.set_scan_shape(scanshape[0], scanshape[1]) return dc
def run(self, duration, obs=None): """Run the simulation. Parameters ---------- duration : Real a duration for running a simulation. A simulation is expected to be stopped at t() + duration. obs : list of Obeservers, optional observers """ from ecell4_base.core import TimeoutObserver timeout = TimeoutObserver(self.__timeout) if obs is None: obs = (timeout, ) elif isinstance(obs, collections.Iterable): obs = tuple(obs) + (timeout, ) else: obs = (obs, timeout) from ipywidgets import FloatProgress, HBox, HTML from IPython.display import display from time import sleep fp = FloatProgress(min=0, max=100) ptext = HTML() display(HBox(children=[fp, ptext])) tstart = self.__sim.t() upto = tstart + duration while self.__sim.t() < upto: self.__sim.run(upto - self.__sim.t(), obs) value = (self.__sim.t() - tstart) / duration fp.value = value * 100 ptext.value = self.get_text(value, timeout.accumulation()) sleep(self.__wait) fp.value = 100 ptext.value = self.get_text(1, timeout.accumulation())
def smooth_contours(contours, range_len=10, limit_len=500): # Create a progress bar f = FloatProgress(min=0, max=len(contours)-1) display(f) smoothed_contours = [] # Iterate over all contours for n, contour in enumerate(contours): smoothed_contour = [] length = len(contour) # Reject contours that are too short if limit_len < length: # Go over each point in the contour for i in range(length): # Calculate the new position of the point as the mean # of the positions of the neighbours. # np.take is needed for wrap-around indexing proposed = np.mean(np.take(contour, range(i-range_len, i+range_len), mode='wrap', axis=0), axis=0).astype('int') # We now check whether this point is not the same as the # last one (when averaging and rounding, points tend to overlap) if len(smoothed_contour) != 0 and (proposed[0] != smoothed_contour[-1][0] or proposed[1] != smoothed_contour[-1][1]): # We also check the distance between the new point and # the previous one. We want them to be neighbours. if (proposed[0]-smoothed_contour[-1][0])**2+(proposed[1]-smoothed_contour[-1][1])**2 > 2: # This is a naive fix, but works for most cases smoothed_contour.append([int(np.mean((proposed[0], smoothed_contour[-1][0]))), int(np.mean((proposed[1],smoothed_contour[-1][1])))]) smoothed_contour.append(proposed) elif len(smoothed_contour) == 0: smoothed_contour.append(proposed) smoothed_contour = np.array(smoothed_contour) smoothed_contours.append(smoothed_contour) # Update the progress bar f.value = n # The progress bar is updated only when processing long enough contours # (for performance reasons). Set it so that it's full, to indicate # completion of the process f.value = len(contours)-1 return np.array(smoothed_contours)
def run(self, duration, obs=None): """Run the simulation. Parameters ---------- duration : Real a duration for running a simulation. A simulation is expected to be stopped at t() + duration. obs : list of Obeservers, optional observers """ from ecell4_base.core import TimeoutObserver timeout = TimeoutObserver(self.__timeout) if obs is None: obs = (timeout, ) elif isinstance(obs, collections.Iterable): obs = tuple(obs) + (timeout, ) else: obs = (obs, timeout) from ipywidgets import FloatProgress, HBox, HTML from IPython.display import display from time import sleep fp = FloatProgress(min=0, max=100) ptext = HTML() display(HBox(children=[fp, ptext])) tstart = self.__sim.t() upto = tstart + duration while self.__sim.t() < upto: self.__sim.run(upto - self.__sim.t(), obs) value = (self.__sim.t() - tstart) / duration fp.value = value * 100 ptext.value = self.get_text(value, timeout.accumulation()) sleep(self.__wait) fp.value = 100 ptext.value = self.get_text(1, timeout.accumulation())
def get_airtemperature_from_files(): #Read all Tif images in current directory from sys import platform files = glob('./data/*.txt') files.sort() progressBar = FloatProgress(min=0, max=len(files)) display(progressBar) progressBar.value = 0 air_temperature = [] for file in files: progressBar.value = progressBar.value + 1 if platform == "win32": name = '.\data' + file.split('data')[-1] else: name = './data' + file.split('data')[-1] filename = name print('reading...', name) air_temperature = air_temperature + read_data_column(filename) return air_temperature
def abel_invert(self, y_lim, x_range, parameters=None, model=None): if model is None: # Create the lmfit model model = GaussianModel() model += ConstantModel() params = model.make_params() params['c'].set(0.45) params['center'].set(0, vary=False) params['sigma'].set(min=0.001) if parameters is not None: for key, value in parameters.items(): params[key].set(**value) f = FloatProgress(min=0.3, max=4.5) display(f) fit_data = [] abel_data = [] xx = x_range self.abel_extent = [-xx, xx, y_lim[0], y_lim[1]] for yy in np.arange(y_lim[0], y_lim[1], 1 / self.scale): f.value = yy self.create_lineout(start=(yy, -xx), end=(yy, xx), lineout_width_mm=1 / self.scale) # The data obtained by the lineout y = self.lo x = self.mm out = model.fit(y, params, x=x) fit_data.append(out.best_fit) abel_data.append( self.abel_gauss(x, out.best_values['sigma'], out.best_values['amplitude']) * 10) #*10 converts from mm^-1 to cm^-1 # Change the lists to numpy arrays and flip them fit_data = np.array(fit_data)[::-1] abel_data = np.array(abel_data)[::-1] extent = [-x_range, x_range, y_lim[0], y_lim[1]] origin = [ int(len(fit_data) + y_lim[0] * self.scale), int(len(fit_data[0]) / 2) ] self.fit = DMFromArray(fit_data, self.scale, extent=extent, origin=origin) self.abel = DMFromArray(abel_data, self.scale, extent=extent, origin=origin) return self.fit, self.abel
def compute_atom_two_out_of_core(hdfname, uni, a, **kwargs): """ Perform an out of core periodic two body calculation for a simple cubic unit cell with dimension a. All data will be saved to and HDF5 file with the given filename. Key structure is per frame, i.e. ``frame_fdx/atom_two``. Args: hdfname (str): HDF file name uni (:class:`~exatomic.core.universe.Universe`): Universe a (float): Simple cubic unit cell dimension kwargs: Keyword arguments for bond computation (i.e. covalent radii) See Also: :func:`~exatomic.core.two._compute_bonds` """ store = pd.HDFStore(hdfname, mode="a") unit_atom = uni.atom[['symbol', 'x', 'y', 'z', 'frame']].copy() unit_atom['symbol'] = unit_atom['symbol'].astype(str) unit_atom['frame'] = unit_atom['frame'].astype(int) unit_atom.update(uni.unit_atom) grps = unit_atom.groupby("frame") n = len(grps) fp = FloatProgress(description="AtomTwo to HDF:") display(fp) for i, (fdx, atom) in enumerate(grps): v = pdist_ortho(atom['x'].values, atom['y'].values, atom['z'].values, a, a, a, atom.index.values, a) tdf = pd.DataFrame.from_dict({ 'frame': np.array([fdx] * len(v[0]), dtype=int), 'dx': v[0], 'dy': v[1], 'dz': v[2], 'dr': v[3], 'atom0': v[4], 'atom1': v[5], 'projection': v[6] }) _compute_bonds(uni.atom[uni.atom['frame'] == fdx], tdf, **kwargs) store.put("frame_" + str(fdx) + "/atom_two", tdf) fp.value = i / n * 100 store.close() fp.close()
def _counter_nb(items, tot=None): from ipywidgets import FloatProgress, FloatText from IPython.display import display if tot is not None: f = FloatProgress(min=0, max=tot) else: f = FloatText() f.value = 0 display(f) for ii, item in enumerate(items): f.value += 1 yield item
def torus_dat(kp, kq, refine=300, segm=40, tR=1.6, tr=0.6): spt, spp, spq, spr, spR = sp.symbols("t p q r R", real=True) c = sp.Matrix([(spR+spr*sp.cos(2*sp.pi*spq*spt))*sp.cos(2*sp.pi*spp*spt),\ (spR+spr*sp.cos(2*sp.pi*spq*spt))*sp.sin(2*sp.pi*spp*spt),\ spr*sp.sin(2*sp.pi*spq*spt)]) dc = sp.Matrix([sp.diff(x,spt) for x in c]) # derivative ldc = sp.sqrt(sum( [ x**2 for x in dc ] )).simplify() # speed udc = dc/ldc ## 2nd order kc = sp.Matrix([sp.diff(x,spt) for x in udc]) # curvature vector ks = sp.sqrt(sum( [ x**2 for x in kc])) # curvature scalar ukc = kc/ks # unit curvature vector ## bi-normal bnc = udc.cross(ukc) # cross of unit tangent and unit curvature. ## the parametrization of the boundary of the width w tubular neighbourhood spw, spu = sp.symbols("w, u", real=True) ## width of torus knot, and meridional parameter tSurf = c + spw*sp.cos(2*sp.pi*(spu+kp*kq*spt))*ukc + spw*sp.sin(2*sp.pi*(spu+kp*kq*spt))*bnc ## (b) ufuncify from sympy.utilities.autowrap import ufuncify knotSuf = [ufuncify([spt, spp, spq, spr, spR, spw, spu], tSurf[i]) for i in range(3)] knotSnp = sp.lambdify((spt, spp, spq, spr, spR, spw, spu), tSurf, "numpy" ) kt = (np.pi*tr) / (4*kp) # knot radial thickness 2*pi*tr is circumf, and kp strands pass through so this ## should be around 2*pi*tr would be 2*kp*kt for the knot to fill the surface, i.e kt = pi*tr / 4*kp ## make bigger or smaller depending on how much empty space one wants to see. seg = kp*refine ## segments along length of pq torus knot. kp*120 gives a fairly smooth image. def surf(i,j): ## lambdify return np.array(knotSnp(float(i)/seg, kp, kq, tr, tR, kt, float(j)/segm)).ravel() fp = FloatProgress(min=0, max=100, description="Knot data"); display(fp); ## progrss indicator xyz = np.ndarray( (seg+1, segm+1, 3) ) for i,j in it.product( range(seg+1), range(segm+1) ): ## put the affine reparametrization here. xyz[i,j] = surf(i,j) fp.value = int(100*i/(seg+1)) fp.close() return(xyz)
def invoke_in_process_pool(num_workers, *funcs): if FLAGS.plot: progress = FloatProgress(min=0, max=1) display(progress) done = 0.0 futures = [] res = [None] * len(funcs) with SameProcessExecutor() if num_workers <= 0 else concurrent.futures.ProcessPoolExecutor( num_workers) as executor: for i, fun in enumerate(funcs): inserted = False while not inserted: if len(futures) < num_workers: futures.append((i, executor.submit(fun))) inserted = True for fut in list(futures): try: res[fut[0]] = fut[1].result(0) done += 1 if FLAGS.plot: progress.value = done / len(funcs) futures.remove(fut) except concurrent.futures.TimeoutError: pass if len(futures) == num_workers: time.sleep(1) for fut in list(futures): res[fut[0]] = fut[1].result() done += 1 if FLAGS.plot: progress.value = done / len(funcs) return res
def get_depth_data(track_files, track_names, chrom, start, stop, strand, track_type): from ipywidgets import FloatProgress from IPython.display import display from IPython.display import clear_output printmd("Loading...") f = FloatProgress(min=0, max=100) display(f) f.value = 0 if len(track_files) > 1: raise NameError("Pick one .bam alignment!") for n, track_file in enumerate(track_files): for n, track_name in enumerate(track_names): if 'My Data: ' not in track_file: f.value = f.value + 10 download_file = 'https://s3-us-west-1.amazonaws.com/graphy101/' + track_file.split( '/')[1] + '/' + track_file.split('/')[2] f.value = f.value + 10 download_file2 = 'https://s3-us-west-1.amazonaws.com/graphy101/' + track_file.split( '/')[1] + '/' + track_file.split('/')[2] + '.bai' f.value = f.value + 10 data1 = urllib.request.urlretrieve(download_file, filename='Data/' + track_file.split('/')[2]) data2 = urllib.request.urlretrieve( download_file2, filename='Data/' + track_file.split('/')[2] + '.bai') f.value = f.value + 20 bamfile = pysam.AlignmentFile( 'Data/' + track_file.split('/')[2], index_filename='Data/' + track_file.split('/')[2] + '.bai') if 'My Data: ' in track_file: track_file = track_file.split('/')[2].replace('My Data: ', '') bamfile = pysam.AlignmentFile('Data/' + track_file, index_filename='Data/' + track_file + '.bai') f.value = f.value + 25 depths_data = bamfile.count_coverage(chrom, start, stop) depths_data = [ a + b + c + d for a, b, c, d in zip(depths_data[0].tolist(), depths_data[1].tolist(), depths_data[2].tolist(), depths_data[3].tolist()) ] df = {'pos': np.arange(start, stop), track_name: depths_data} df = pd.DataFrame(data=df) df = df.set_index('pos') del df.index.name f.value = f.value + 25 clear_output() return df
def gradients(self, df , batch_size, sess): X = get_feature_X(df,maxlen) Y = pd.get_dummies(df.is_duplicate) sess = self.sess start_index = 0 final_loss = 0 current_total_trained =0 p_bar = FloatProgress() display(p_bar) start_time = time.time() while start_index < X[0].shape[0]: temp_x1 = X[0][start_index:start_index+batch_size] temp_x2 = X[1][start_index:start_index+batch_size] temp_seq_len1 = X[2][start_index:start_index+batch_size] temp_seq_len2 = X[3][start_index:start_index+batch_size] test_y = Y[start_index:start_index+batch_size] feed_dict = { self.min_mask1: get_init_min_mask_value(temp_seq_len1), self.min_mask2: get_init_min_mask_value(temp_seq_len2), self.seq_length1: temp_seq_len1, self.seq_length2: temp_seq_len2, self.input: temp_x1, self.input2: temp_x2, self.y: test_y } current_total_trained += temp_x1.shape[0] var_grad = tf.gradients(self.loss, [self.output])[0] # the exact output you're looking for: g = sess.run([var_grad, self.concat_output], feed_dict=feed_dict) print("gradient %s" % (g)) # sys.stdout.write("\r%s/%s training loss %s" % (start_index, X[0].shape[0], c)) # sys.stdout.flush() duration = time.time() - start_time speed = duration/current_total_trained eta = (X[0].shape[0]-current_total_trained)*speed p_bar.value = current_total_trained/X[0].shape[0] p_bar.description = "%s/%s, eta %s sec"%(current_total_trained, X[0].shape[0], eta) start_index += batch_size break final_loss = final_loss/X[0].shape[0] return final_loss
def progress_iterator(orig_iterator, description): """Wrap an iterator so that a progress bar is displayed Parameters ---------- orig_iterator: iterator The original iterator. It must implement the __len__ operation so that its length can be calculated in advance. description: string Description will give a text label for the bar. """ progress_widget = FloatProgress(min=0, max=len(orig_iterator) - 1) widget = HBox([Label(description), progress_widget]) display(widget) for count, val in enumerate(orig_iterator): yield val progress_widget.value = count
def progress_iterator(orig_iterator, description): """Wrap an iterator so that a progress bar is displayed Parameters ---------- orig_iterator: iterator The original iterator. It must implement the __len__ operation so that its length can be calculated in advance. description: string Description will give a text label for the bar. """ progress_widget = FloatProgress(min=0, max=len(orig_iterator)-1) widget = HBox([Label(description), progress_widget]) display(widget) for count, val in enumerate(orig_iterator): yield val progress_widget.value = count
def progress_iterator(orig_iterator, **kwargs): """Wrap an iterator so that a progress bar is displayed Parameters ---------- orig_iterator: iterator The original iterator. It must implement the __len__ operation so that its length can be calculated in advance. kwargs: additional arguments Any additional arguments will be passed to the float widget. In particular, description will give a text label for the bar. """ widget = FloatProgress(min=0, max=len(orig_iterator)-1, **kwargs) display(widget) for count, val in enumerate(orig_iterator): yield val widget.value = count
def compile(): global template global modpath os.chdir(modpath) if os.path.exists("pas.mod"): os.remove("pas.mod") p = Popen('compile.bat', stdout=PIPE, stderr=STDOUT, shell=True) f = FloatProgress(min=0, max=100, description='Compiling Mod files...') display(f) path, dirs, files = os.walk(modpath).next() increment = 100 / (len(files)) for line in iter(p.stdout.readline, ""): print (line) if line.split(' ', 1)[0] == 'Translating': f.value = f.value + increment while True: if os.path.isfile(modpath + '/nrnmech.dll') == True: print ("Compiling Successful") break
def plot_digit_observations(digit, centroids, n_observation_classes, display_progress = False): pen_down_label = n_observation_classes - settings.PEN_DOWN_LABEL_DELTA pen_up_label = n_observation_classes - settings.PEN_UP_LABEL_DELTA stop_label = n_observation_classes - settings.STOP_LABEL_DELTA fig=plt.figure() ax=fig.add_subplot(111) f = FloatProgress(min=0, max=100) if display_progress: display(f) curves = [] current_curve = [] for observation in digit.observations: if observation < pen_down_label: point = centroids[observation] current_curve.append(point) elif observation == pen_up_label: if len(current_curve) > 0: curves.append(current_curve) current_curve = [] n_points = 0 for curve in curves: n_points += len(curve) i = 0 for curve in curves: x_points = [] y_points = [] for point in curve: x_points.append(point[0]) y_points.append(point[1]) f.value = 100.0*(float(i) / float(n_points)) i += 1 plt.plot(x_points, y_points, linewidth = 2.0) f.close() plt.axis([settings.IMAGE_PLOT_X_MIN, settings.IMAGE_PLOT_X_MAX, settings.IMAGE_PLOT_Y_MIN, settings.IMAGE_PLOT_Y_MAX]) plt.show()
def cepstral_analysis_kappa(self,other, aic_type='aic', Kmin_corrfactor=1.0, bayes_p=False, density_grid=None): #need also "other", a class with the charge current! """Perform the Cepstral Analysis on all blocks.""" if self.GUI: progbar = FloatProgress(min=0, max=100) progbar.description = "0 %" display(progbar) self.BLOCK_NFREQS = self.BLOCK_SIZE/2 + 1 if self.MULTI_COMPONENT: print ' N_COMPONENTS = {:10d}'.format(self.N_COMPONENTS) self.ck_THEORY_var, self.psd_THEORY_mean = tc.md.cepstral.multicomp_cepstral_parameters(self.BLOCK_NFREQS, self.N_COMPONENTS-1) #different number of degrees of freedom! self.bayes_p = bayes_p if (self.N_BLOCKS == 1): raise NotImplementedError('One block.') for L in range(self.N_BLOCKS): if self.MULTI_COMPONENT: self.block[L].compute_kappa(other=other.block[L],DT=self.TSKIP, DT_FS=self.DT_FS, average_components=True) #different method call! self.block[L].dct = tc.md.CosFilter(self.block[L].logpsd, \ ck_theory_var=self.ck_THEORY_var, psd_theory_mean=self.psd_THEORY_mean, aic_type=aic_type, Kmin_corrfactor=Kmin_corrfactor)#, normalization=self.BLOCK_SIZE) #removed (personal comunication with Loris) else: self.block[L].compute_kappa(other=other.block[L],DT=self.TSKIP, DT_FS=self.DT_FS) #different method call! self.block[L].dct = tc.md.CosFilter(self.block[L].logpsd, aic_type=aic_type, Kmin_corrfactor=Kmin_corrfactor)#, normalization=self.BLOCK_SIZE) # theory_var=None self.block[L].dct.scan_filter_tau() if self.bayes_p: self.block[L].dct.compute_p_aic(method='ba') if density_grid is not None: self.density_grid = density_grid self.block[L].dct.compute_logtau_density(method='ba', only_stats=False, density_grid=density_grid) else: self.block[L].dct.compute_logtau_density(method='ba', only_stats=True) if self.GUI: progbar.value = float(L+1)/self.N_BLOCKS*100.; progbar.description = "%5.2f %%" % progbar.value if self.GUI: progbar.close() self.freqs = self.block[0].freqs return
def _compute_current_density(bvs, gvx, gvy, gvz, cmatr, cmati, occvec, verbose=True): """Compute the current density in each cartesian direction.""" nbas, npts = bvs.shape curx = np.zeros(npts, dtype=np.float64) cury = np.zeros(npts, dtype=np.float64) curz = np.zeros(npts, dtype=np.float64) cval = np.zeros(nbas, dtype=np.float64) if verbose: fp = FloatProgress(description='Computing:') display(fp) for mu in range(nbas): if verbose: fp.value = mu / nbas * 100 crmu = cmatr[mu] cimu = cmati[mu] bvmu = bvs[mu] gvxmu = gvx[mu] gvymu = gvy[mu] gvzmu = gvz[mu] for nu in range(nbas): crnu = cmatr[nu] cinu = cmati[nu] bvnu = bvs[nu] gvxnu = gvx[nu] gvynu = gvy[nu] gvznu = gvz[nu] cval = evaluate('-0.5 * (occvec * (crmu * cinu - cimu * crnu))', out=cval) csum = cval.sum() evaluate('curx + csum * (bvmu * gvxnu - gvxmu * bvnu)', out=curx) evaluate('cury + csum * (bvmu * gvynu - gvymu * bvnu)', out=cury) evaluate('curz + csum * (bvmu * gvznu - gvzmu * bvnu)', out=curz) if verbose: fp.close() return curx, cury, curz
def compute_angles_out_of_core(hdfname, uni, bond=True): """ Given an HDF of atom two body properties, compute angles. Atomic two body data is expected to have been computed (see :func:`~exatomic.core.two.compute_atom_two_out_of_core`) Args: hdfname (str): Path to HDF file containing two body data uni (:class:`~exatomic.core.universe.Universe`): Universe bond (bool): Restrict to bond angles (default True) Warning: If bond is set to False, this process may take a very long time. """ store = pd.HDFStore(hdfname, mode="a") f = u.atom['frame'].unique() n = len(f) fp = FloatProgress(description="Computing:") display(fp) for i, fdx in enumerate(f): tdf = store.get("frame_" + str(fdx) + "/atom_two") indexes = [] radians = [] for atom0, group in tdf[tdf['bond'] == True].groupby("atom0"): dx = group['dx'].values.astype(float) dy = group['dy'].values.astype(float) dz = group['dz'].values.astype(float) dr = group['dr'].values.astype(float) atom1 = group['atom1'].values.astype(int) rad, adx = angles(dx, dy, dz, dr, atom0, atom1) indexes.append(adx) radians.append(rad) indexes = np.concatenate(indexes) radians = np.concatenate(radians) adf = pd.DataFrame(indexes, columns=("atom0", "atom1", "atom2")) adf['angle'] = radians store.put("frame_" + str(fdx) + "/atom_angle", adf) fp.value = i / n * 100 store.close() fp.close()
def float_progress(min_, max_): prog = FloatProgress(min=min_, max=max_) display(prog) for i in linspace(min_, max_, 100): time.sleep(0.1) prog.value = i
def periodic_nearest_neighbors_by_atom(uni, source, a, sizes, **kwargs): """ Determine nearest neighbor molecules to a given source (or sources) and return the data as a dataframe. For a simple cubic periodic system with unit cell dimension ``a``, clusters can be generated as follows. In the example below, additional keyword arguments have been included as they are almost always required in order to correctly identify molecular units semi-empirically. .. code-block:: python periodic_nearest_neighbors_by_atom(u, [0], 40.0, [0, 5, 10, 50], dmax=40.0, C=1.6, O=1.6) Argument descriptions can be found below. The additional keyword arguments, ``dmax``, ``C``, ``O``, are passed directly to the two body computation used to determine (semi-empirically) molecular units. Note that although molecules are computed, neighboring molecular units are determine by an atom to atom criteria. Args: uni (:class:`~exatomic.core.universe.Universe`): Universe source (int, str, list): Integer label or string symbol of source atom a (float): Cubic unit cell dimension sizes (list): List of slices to create kwargs: Additional keyword arguments to be passed to atom two body calculation Returns: dct (dict): Dictionary of sliced universes and nearest neighbor table See Also: Sliced universe construction can be facilitated by :func:`~exatomic.algorithms.neighbors.construct`. """ def sorter(group, source_atom_idxs): s = group[['atom0', 'atom1']].stack() return s[~s.isin(source_atom_idxs)].reset_index() if "label" not in uni.atom.columns: uni.atom['label'] = uni.atom.get_atom_labels() dct = defaultdict(list) grps = uni.atom.groupby("frame") ntot = len(grps) fp = FloatProgress(description="Slicing:") display(fp) for i, (fdx, atom) in enumerate(grps): if len(atom) > 0: uu = _create_super_universe(Universe(atom=atom.copy()), a) uu.compute_atom_two(**kwargs) uu.compute_molecule() if isinstance(source, (int, np.int32, np.int64)): source_atom_idxs = uu.atom[(uu.atom.index.isin([source])) & (uu.atom['prj'] == 13)].index.values elif isinstance(source, (list, tuple)): source_atom_idxs = uu.atom[uu.atom['label'].isin(source) & (uu.atom['prj'] == 13)].index.values else: source_atom_idxs = uu.atom[(uu.atom['symbol'] == source) & (uu.atom['prj'] == 13)].index.values source_molecule_idxs = uu.atom.loc[source_atom_idxs, 'molecule'].unique().astype(int) uu.atom_two['frame'] = uu.atom_two['atom0'].map(uu.atom['frame']) nearest_atoms = uu.atom_two[(uu.atom_two['atom0'].isin(source_atom_idxs)) | (uu.atom_two['atom1'].isin(source_atom_idxs))].sort_values("dr")[['frame', 'atom0', 'atom1']] nearest = nearest_atoms.groupby("frame").apply(sorter, source_atom_idxs=source_atom_idxs) del nearest['level_1'] nearest.index.names = ['frame', 'idx'] nearest.columns = ['two', 'atom'] nearest['molecule'] = nearest['atom'].map(uu.atom['molecule']) nearest = nearest[~nearest['molecule'].isin(source_molecule_idxs)] nearest = nearest.drop_duplicates('molecule', keep='first') nearest.reset_index(inplace=True) nearest['frame'] = nearest['frame'].astype(int) nearest['molecule'] = nearest['molecule'].astype(int) dct['nearest'].append(nearest) for nn in sizes: atm = [] for j, fdx in enumerate(nearest['frame'].unique()): mdxs = nearest.loc[nearest['frame'] == fdx, 'molecule'].tolist()[:nn] mdxs.append(source_molecule_idxs[j]) atm.append(uu.atom[uu.atom['molecule'].isin(mdxs)][['symbol', 'x', 'y', 'z', 'frame']].copy()) dct[nn].append(pd.concat(atm, ignore_index=True)) fp.value = i/ntot*100 dct['nearest'] = pd.concat(dct['nearest'], ignore_index=True) for nn in sizes: dct[nn] = Universe(atom=pd.concat(dct[nn], ignore_index=True)) fp.close() return dct
sample_pathway_df = pd.DataFrame(np.zeros((number_of_samples, number_of_pathways), dtype=np.int), index=mutation_df.index, columns=pathways) # Now populate this data frame. This is a slow Python loop, hence the progress bar. It takes a few minutes on my laptop. The idea is to loop over all gene-pathway interactions in the hetnet query. If the gene is in the Cognoma dataset, we grab the pathway id in that gene-pathway interaction. We look at Cognoma samples where that gene is labeled 1, i.e., at Cognoma samples that have a mutation in that gene, and grab the corresponding indices. Then, in the pathway matrix all samples get the associated pathway tagged as a 1, since they have a mutated gene that participates in that pathway. # In[9]: i = 0 progress_bar = FloatProgress(min=0, max=len(hetnet_results)) display(progress_bar) for _, row in hetnet_results.iterrows(): gene_id = row['gene_id'] if gene_id in genes_in_both: pathway_id = row['pathway_id'] affected_samples = mutation_df.loc[:, str(gene_id)] == 1 sample_pathway_df.loc[affected_samples, pathway_id] = 1 i += 1 progress_bar.value = i sample_pathway_df.head() # Finally, we write to disk. The raw file is about 26MB, so we use bz2 compression. The file is no longer tracked due to <code>data/.gitignore</code>. # In[10]: path = os.path.join('data','pathways.tsv.bz2') sample_pathway_df.to_csv(path, sep='\t', compression='bz2')