def print_progress(self): """ Print a progress bar representing the current state of the tasks. """ tasks_number = len(self.tasks_status) done = sum(map(lambda x: x == 2, self.tasks_status)) working = sum(map(lambda x: x == 1, self.tasks_status)) utils.print_progress(self.print_control, tasks_number, done, working)
def import_shapefile_to_db(filepath): connection = psycopg2.connect(host=DB_HOSTNAME, database="census_uk", user="******", password="******") cursor = connection.cursor(cursor_factory=psycopg2.extras.DictCursor) count = 0 shp = fiona.open(filepath) nb_tracts = len(shp) for feature in shp: type = feature['type'] id = feature['id'] geom = shape(feature['geometry']) if geom.geom_type == "Polygon": geom = MultiPolygon([geom]) prop = feature['properties'] geo_code = prop['geo_code'] label = prop['label'] query_string = """INSERT INTO tracts (geo_code, geom) VALUES (%s, ST_GeomFromText(%s, %s)) ON CONFLICT DO NOTHING;""" data = (geo_code, geom.wkt, EPSG) cursor.execute(query_string, data) count += 1 if count % 100 == 0: utils.print_progress("Done: %.2f" % (100.0 * count / nb_tracts)) # if count == 10: # break connection.commit()
def calc_rama(grof, xtcf, btime, etime): u = Universe(grof, xtcf) resname_query = 'resname GLY or resname VAL or resname PRO' atoms = u.selectAtoms(resname_query) resname = atoms.resnames()[0] # [0] because .resnames() returns a list of one element resid = atoms.resids()[0] # [0] because .resnames() returns a list of one element phi_query = ('(resname ACE and name C) or ' '(resname GLY or resname VAL or resname PRO and ' '(name N or name CA or name C))') psi_query = ('(resname GLY or resname VAL or resname PRO and (name N or name CA or name C or name NT)) or ' '(resname NH2 and name N)') # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm phi = u.selectAtoms(phi_query) psi = u.selectAtoms(psi_query) for _ in phi.atoms: print _ for _ in psi.atoms: print _ for ts in u.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break yield '{0:.3f} {1:.3f} {2}-{3}\n'.format( phi.dihedral(), psi.dihedral(), resname, resid) U.print_progress(ts)
def __gen_hist__(plot_details,verbose=False): ''' generate single histogram only runs on samples with an input file defined, ie. does not loop through daughters not really designed to be called by analysers ''' log.debug( 'in get_hist for %s' % plot_details.sample.name ) ## add the details from the sample #plot_details += plot_details.sample.plot_details s = plot_details.sample vd = plot_details.var_details sel = plot_details.get_selector() weights = plot_details.get_weights() target_lumi = plot_details.target_lumi if not s.is_active(): log.warn('failed to gen hist %s for sample: %s' % (vd.name,s.name) ) return None ## create hist h = plot_details.new_hist(s) h.samples = s ## initialise sample s.initialise() ## return empty hist if no events if not s.tree.GetEntries(): return h s.switch_on_branches() ## selection event_list = sel.select(s) if sel else None ## initialise variables s.prepare( plot_details ) ## process events entries = event_list.GetN() if sel else s.tree.GetEntries() for i in xrange(entries): ientry = event_list.GetEntry(i) if sel else i s.tree.GetEntry(ientry) ## calculate event weight weight = weights.weight() if weights else 1. ## fill hists val = vd.var.calc_vals()[0] h.Fill(val,weight) frac = float(i)/float(entries) if entries else 0.0 if verbose: utils.print_progress(frac,title = '%s: '%s.name) if verbose: utils.clear_progress() #s.style_hist(h) if target_lumi: s.scale_hist(h,target_lumi) return h
def extend_balancing_classes(X, y, aug_intensity=0.5, counts=None): num_classes = 43 _, class_counts = np.unique(y, return_counts=True) max_c = max(class_counts) total = max_c * num_classes if counts is None else np.sum(counts) X_extended = np.empty([0, X.shape[1], X.shape[2], X.shape[3]], dtype=X.dtype) y_extended = np.empty([0], dtype=y.dtype) print("Extending dataset using augmented data (intensity = {})".format( aug_intensity)) for c, c_count in zip(range(num_classes), class_counts): max_c = max_c if counts is None else counts[c] X_source = (X[y == c] / 255.).astype(np.float32) y_source = y[y == c] X_extended = np.append(X_extended, X_source, axis=0) for i in range((max_c // c_count) - 1): batch_iterator = AugmentedSignsBatchIterator( batch_size=X_source.shape[0], p=1.0, intensity=aug_intensity) for x_batch, _ in batch_iterator(X_source, y_source): X_extended = np.append(X_extended, x_batch, axis=0) utils.print_progress(X_extended.shape[0], total) batch_iterator = AugmentedSignsBatchIterator(batch_size=max_c % c_count, p=1.0, intensity=aug_intensity) for x_batch, _ in batch_iterator(X_source, y_source): X_extended = np.append(X_extended, x_batch, axis=0) utils.print_progress(X_extended.shape[0], total) break added = X_extended.shape[0] - y_extended.shape[0] y_extended = np.append(y_extended, np.full((added), c, dtype=int)) return ((X_extended * 255.).astype(np.uint8), y_extended)
def load_features(feature_store, is_blind): def _make_id(scanId, viewpointId): return scanId + '_' + viewpointId # if the tsv file for image features is provided if feature_store: tsv_fieldnames = ['scanId', 'viewpointId', 'image_w', 'image_h', 'vfov', 'features'] features = {} with open(feature_store, "r") as tsv_in_file: print('Reading image features file %s' % feature_store) reader = list(csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=tsv_fieldnames)) total_length = len(reader) print('Loading image features ..') for i, item in enumerate(reader): image_h = int(item['image_h']) image_w = int(item['image_w']) vfov = int(item['vfov']) long_id = _make_id(item['scanId'], item['viewpointId']) features[long_id] = np.frombuffer(base64.b64decode(item['features']), dtype=np.float32).reshape((36, 2048)) if is_blind: features[long_id] = np.random.rand(36, 2048) print_progress(i + 1, total_length, prefix='Progress:', suffix='Complete', bar_length=50) else: print('Image features not provided') features = None image_w = 640 image_h = 480 vfov = 60 return features, (image_w, image_h, vfov)
def image_curation(num_threads=24): subsets = ['train', 'val'] for subset in subsets: subdir = join(base_path, subset) with open(join(base_path, subset, 'list.txt')) as f: videos = f.readlines() if debug: videos = videos[:20] n_videos = len(videos) """ for video in videos: crop_video(subdir, subset, video.replace("\n", "")) """ with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: fs = [ executor.submit(crop_video, subdir, subset, video.replace("\n", "")) for video in videos ] for i, f in enumerate(futures.as_completed(fs)): utils.print_progress(i, n_videos, prefix=subset, suffix='Done ', barLength=40)
def count_interactions(A): logger.debug('loading {0}'.format(A.grof)) univ = Universe(A.grof) logger.debug('loaded {0}'.format(A.grof)) pro_atoms = univ.selectAtoms( 'protein and not resname ACE and not resname NH2') pl = pro_atoms.residues.numberOfResidues() # +1: for missing resname ACE, such that it's easier to proceed in the next # step logger.debug('loading {0}, {1}'.format(A.grof, A.xtcf)) u = Universe(A.grof, A.xtcf) logger.debug('loaded {0}, {1}'.format(A.grof, A.xtcf)) # Just for reference to the content of query when then code was first # written and used # query = ('(resname PRO and (name CB or name CG or name CD)) or' # '(resname VAL and (name CG1 or name CG2)) or' # '(resname GLY and name CA) or' # '(resname ALA and name CB)') query = A.query atoms = u.selectAtoms(query) logger.info('Number of atoms selected: {0}'.format(atoms.numberOfAtoms())) # MDAnalysis will convert the unit of length to angstrom, though in Gromacs # the unit is nm cutoff = A.cutoff * 10 nres_away = A.nres_away btime = A.btime etime = A.etime nframe = 0 unun_map = None for ts in u.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break nframe += 1 map_ = np.zeros((pl + 1, pl + 1)) # map for a single frame for i, ai in enumerate(atoms): ai_resid = ai.resid for j, aj in enumerate(atoms): aj_resid = aj.resid # to avoid counting the same pair twices, # the 2 resid cannot be neigbors if i < j and aj_resid - ai_resid >= nres_away: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: # -1: resid in MDAnalysis starts from 1 map_[ai_resid - 1][aj_resid - 1] += 1 if unun_map is None: unun_map = map_ else: unun_map = unun_map + map_ utils.print_progress(ts) sys.stdout.write("\n") return unun_map / float(nframe)
def image_curation(num_threads=24): subsets = os.listdir(train_base_path) for subset in subsets: subdir = join(train_base_path, subset) videos = os.listdir(subdir) if debug: videos = videos[:1] n_videos = len(videos) """ for video in videos: crop_video(subdir, subset, video) """ with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: fs = [ executor.submit(crop_video, subdir, subset, video) for video in videos ] for i, f in enumerate(futures.as_completed(fs)): utils.print_progress(i, n_videos, prefix=subset, suffix='Done ', barLength=40)
def count_interactions(grof, xtcf, btime, etime, cutoff): cutoff = cutoff * 10 # * 10: convert from nm to angstrom to work with MDAnalysis u = Universe(grof, xtcf) query = ('(resname PRO and (name CB or name CG or name CD)) or' '(resname VAL and (name CG1 or name CG2)) or' '(resname GLY and name CA) or' '(resname ALA and name CB)') # MDAnalysis will convert the unit of length to angstrom, though in Gromacs the unit is nm atoms = u.selectAtoms(query) for ts in u.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break numcount = 0 for i, ai in enumerate(atoms): for j, aj in enumerate(atoms): # to avoid counting the same pair twices, # the 2 resid cannot be neigbors if i < j and abs(ai.resid - aj.resid) >= 2: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: numcount += 1 yield '{0:10.0f}{1:8d}\n'.format(ts.time, numcount) utils.print_progress(ts)
def calc_dihedral(grof, xtcf, btime, etime): # xtcf=None, so if only gro file is parsed, it still works univer = Universe(grof, xtcf) tets = select_dihedrals(univer) # there should be a better name for tet # Write headers, hdr: header hdrs = [] for k, tet in enumerate(tets): # ca1 + ca2 + (NO. peptide-bond) hdr = (utils.swap_aa_name(tet[0].resname) + utils.swap_aa_name(tet[-1].resname) + "{0:02d}".format(k + 1)) # hdrs.append("{0:8s}".format(hdr)) hdrs.append("{0:4s}".format(hdr)) yield '#{0:8s}{1}\n'.format('t(ps)', ' '.join(hdrs)) if not xtcf: yield calc_dih(tets) else: for ts in univer.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break res = calc_dih(tets, ts.time) yield res utils.print_progress(ts)
def ISTA(fx, gx, gradf, proxg, params): method_name = 'ISTA' print_start_message(method_name) # Parameter setup alpha = 1 / params['Lips'] info = [] x = params['x0'] for k in range(params['maxit']): # Perform proximal gradient step x = proxg(x - alpha * gradf(x), alpha) # Record convergence F_x = fx(x) + gx(x) info.append(F_x) # Early stopping if stop({'F_x': F_x}, params): print('Early stop') return x, info if k % params['iter_print'] == 0: print_progress(k, params['maxit'], F_x, fx(x), gx(x)) return x, info
def calc_dihedral(grof, xtcf, btime, etime): # xtcf=None, so if only gro file is parsed, it still works univer = Universe(grof, xtcf) tets = select_dihedrals(univer) # there should be a better name for tet # Write headers, hdr: header hdrs = [] for k, tet in enumerate(tets): # ca1 + ca2 + (NO. peptide-bond) hdr = (utils.swap_aa_name(tet[0].resname) + utils.swap_aa_name(tet[-1].resname) + "{0:02d}".format(k+1)) # hdrs.append("{0:8s}".format(hdr)) hdrs.append("{0:4s}".format(hdr)) yield '#{0:8s}{1}\n'.format('t(ps)', ' '.join(hdrs)) if not xtcf: yield calc_dih(tets) else: for ts in univer.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break res = calc_dih(tets, ts.time) yield res utils.print_progress(ts)
def image_curation(num_threads=24): subset_dirs = sorted( glob.glob(join(base_path, TRAIN_SUBSET_PREFIX + '*') + "[!zip]")) #subset_dirs.append(join(base_path, TEST_SUBSET_PREFIX)) for subdir in subset_dirs: videos = sorted(os.listdir((join(subdir, 'videos')))) if debug: videos = videos[:1] n_videos = len(videos) subset = subdir.split('/')[-1] #for video in videos: # crop_video(subdir, subset, video) with futures.ProcessPoolExecutor(max_workers=num_threads) as executor: fs = [ executor.submit(crop_video, subdir, subset, video) for video in videos ] for i, f in enumerate(futures.as_completed(fs)): utils.print_progress(i, n_videos, prefix=subset, suffix='Done ', barLength=40)
def main(args): """Main function for the testing pipeline :args: commandline arguments :returns: None """ ########################################################################## # Basic settings # ########################################################################## exp_dir = 'experiments' model_dir = os.path.join(exp_dir, 'models') model_file = os.path.join(model_dir, 'best.pth') val_dataset = dataset.NCovDataset('data/', stage='val') val_loader = torch.utils.data.DataLoader( val_dataset, batch_size=1, shuffle=False, num_workers=11, drop_last=False) cov_net = model.COVNet(n_classes=args.n_classes) if torch.cuda.is_available(): cov_net.cuda() state = torch.load(model_file) cov_net.load_state_dict(state.state_dict()) with torch.no_grad(): val_loss, metric_collects = evaluate_model(cov_net, val_loader) prefix = '******Evaluate******' utils.print_progress(mean_loss=val_loss, metric_collects=metric_collects, prefix=prefix)
def get_atom_and_bond_features(mols, mol_ids, dist_matrices): atom_features, bond_features = [], [] bond_idx, atom_to_m_id, bond_to_m_id = [], [], [] print('Get atom and bond features.') for it, m_name in enumerate(mols): print_progress(it, C.N_MOLS) m_id, mol = mol_ids[m_name], mols[m_name] dist_matrix = dist_matrices[m_name] n_atoms, n_bonds = mol.GetNumAtoms(), mol.GetNumBonds() atom_features.append(get_atom_features(mol, dist_matrix)) e_feats, b_idx = get_bond_features(mol, dist_matrix) bond_features.append(e_feats) bond_idx.append(b_idx) atom_to_m_id.append(np.repeat(m_id, n_atoms)) bond_to_m_id.append(np.repeat(m_id, n_bonds)) atom_features = pd.DataFrame(np.concatenate(atom_features), columns=C.ATOM_FEATS) bond_features = pd.DataFrame(np.concatenate(bond_features), columns=C.BOND_FEATS) bond_idx = np.concatenate(bond_idx) bond_features['idx_0'] = bond_idx[:, 0] bond_features['idx_1'] = bond_idx[:, 1] atom_features['molecule_id'] = np.concatenate(atom_to_m_id) bond_features['molecule_id'] = np.concatenate(bond_to_m_id) return atom_features, bond_features
def load_features(feature_store, whether_img_feat): def _make_id(scanId, viewpointId): return scanId + '_' + viewpointId # if the tsv file for image features is provided if feature_store and whether_img_feat: tsv_fieldnames = ['scanId', 'viewpointId', 'image_w', 'image_h', 'vfov', 'features'] features = {} with open(feature_store, "r") as tsv_in_file: print('Reading image features file %s' % feature_store) reader = list(csv.DictReader(tsv_in_file, delimiter='\t', fieldnames=tsv_fieldnames)) #reader = reader[1:] total_length = len(reader) print('Loading image features ..') for i, item in enumerate(reader): image_h = int(item['image_h']) image_w = int(item['image_w']) vfov = int(item['vfov']) long_id = _make_id(item['scanId'], item['viewpointId']) features[long_id] = np.frombuffer(base64.b64decode(item['features']), dtype=np.float32).reshape((36, 2048)) print_progress(i + 1, total_length, prefix='Progress:', suffix='Complete', bar_length=50) #features['17DRP5sb8fy_10c252c90fa24ef3b698c6f54d984c5c'] = np.zeros((36, 152) else: print('Image features not provided') features = None image_w = 640 image_h = 480 vfov = 60 return features, (image_w, image_h, vfov)
def main(): raw_list = csv_to_list(csv_file)[:100] total_len = len(raw_list) counter = 0 result_dict = dict() print "Commencing Web Scraping..." start_time = time.time() for raw_link in raw_list: try: raw_link = raw_link[0] whois_link = "http://www.whois.com/whois/" + raw_link ipaddress_link = "http://" + raw_link + ".ipaddress.com/" whois_soup = link_to_lxmlsoup(whois_link) ipaddress_soup = link_to_lxmlsoup(ipaddress_link) result_dict.setdefault('Raw Link', []).append(str(raw_link)) result_dict = whois_parser(whois_soup, result_dict) result_dict = ipaddress_parser(ipaddress_soup, result_dict) counter, total_len = print_counter(counter, total_len) if counter % 400 == 0: print "Commencing 30 Second Sleep after 400 iterations" time.sleep(30) time_elapsed = time.time() - start_time print_progress(time_elapsed, counter, total_len) except: dict_to_json(result_dict, 'output.json') dict_to_csv(result_dict, 'output.csv') print "Unexpected Error", sys.exc_info()[0] raise dict_to_json(result_dict, 'output.json') dict_to_csv(result_dict, 'output.csv')
def read_data(data_dir, max_files=MAX_NUM_FILES): # prepare data = [] # sort files for reproducibility files = os.listdir(data_dir) files = sorted(files) random.Random(SHUFFLE_SEED).shuffle(files) if max_files > 0: files = files[:int(max_files)] print('Reading files...') for index, file_name in enumerate(files): if not index % PROGRESS_ITER: print_progress(index, files) file_path = os.path.join(data_dir, file_name) if os.path.isfile(file_path): # read array and parse all elements to float, keep NaNs as nan value contents = pd.read_csv(file_path, delimiter='|').values patient = PatientData(sequence_x=contents[:, :-1], sequence_y=contents[:, -1:]) data.append(patient) print_progress(len(files), files) return data
def pretrain(self, env): test_s, test_q_c, test_w = self._get_batch(env, config.PRETRAIN_BATCH) last_loss = 9999. for i in range(config.PRETRAIN_EPOCHS): utils.print_progress(i, config.PRETRAIN_EPOCHS, step=100) # print loss if utils.is_time(i, 100): lr = config.PRETRAIN_LR / (1 + i / 100) self.model.set_lr(lr) q = self.model(test_s) loss_c = self.model.get_loss_c(q, test_q_c, test_w) print("\nLoss: {:.4e} LR: {:.2e}".format(loss_c.data.item(), lr)) if last_loss <= loss_c: # stop early break last_loss = loss_c s, q_c, w = self._get_batch(env, config.PRETRAIN_BATCH) self.model.train_c(s, q_c, w) self.model_.copy_weights(self.model, rho=1.0)
def main(args_dict): # Extract configuration MK = args_dict['MK'] # Construct Robust Bayesian regression model sigma = 2 * np.ones(1) bounder = robustbayesregr.Bounder() splitter = robustbayesregr.Splitter() proposal = robustbayesregr.IsotropicGaussian(1, sigma) np.random.seed(0) x, y = robustbayesregr.generate_data(1000) target = robustbayesregr.CauchyRegression(x, y, sigma) # Obtain MK samples (and their corresponding MAP values) using A* sampling implementation samples = np.empty((MK)).squeeze() MAPs = [] for i in range(MK): stream = astar.astar_sampling_iterator(target, proposal, bounder, splitter) X, G = stream.next() samples[i] = X MAPs.append(G[0] - EULER) if i % 1 == 0: print_progress('Sampled %d / %d' % (i + 1, MK)) print('') lnZ = float(np.log(target.z())) # Dump true ln(Z) and MAP values to JSON file data = {'lnZ': lnZ, 'MAPs': MAPs} savepath = 'data/astar_rbr_MK%d.json' % (MK) json_dump(data, savepath, indent=None) print('Saved %d samples to %s' % (len(MAPs), savepath))
def estimate_MSE_vs_alpha(transform, Ms, alphas, K): # Without loss of generality Z = 1 tZ = transform(Z) # Estimate MSEs by constructing estimators K times MSEs = np.empty((len(Ms), len(alphas))) MSEs_stdev = np.empty((len(Ms), len(alphas))) for Mi, M in enumerate(Ms): # Compute means (K x alphas) in a loop, as otherwise # this runs out of memory with K = 100,000. means = np.empty((K, len(alphas))) for ai, alpha in enumerate(alphas): Ws = np.power(np.random.exponential(1.0, size=(K, M)), alpha) # (K, M) means[:, ai] = np.mean(Ws, axis=1) print_progress('M = %d: done %.0f%%' % (M, 100.0 * (ai+1) / len(alphas))) print('') g = np.power(gamma(1.0 + alphas), 1.0 / alphas) # (alphas) tZ_hats = transform(g * np.power(means, -1.0/alphas)) # (K, alphas) SEs = (tZ_hats - tZ) ** 2 # (K, alphas) MSEs[Mi] = np.mean(SEs, axis=0) # (alphas) MSEs_stdev[Mi] = np.std(SEs, axis=0) / np.sqrt(K) # (alphas) return MSEs, MSEs_stdev
def run(self, content_urls): responses = [] total = len(content_urls) count = 0 start = time.time() for url in content_urls: rrl = utils.RequestRateLimiterFactory(self.request_rate)() if count is 0: print(f'[WARNING] rate_limited _call_api(): max_per_Second:{self.request_rate}') try: request_url = f'{url}{self.query_params}' if self.query_params is not None else url res = rrl.make_rate_limited_request(request_url, self.request_verb, headers=self.headers) parsed_res = self.res_callback(res=res, url=url, remaining_count=total-count, rrl=rrl) except Exception as e: # TODO: could provide 'skip' vs 'abandon' option for exceptions print(f'[WARNING] Exception thrown - skipping {url}') print(e) parsed_res = [] count += 1 responses.append(parsed_res) utils.print_progress(count, total, urlparse(url).hostname) end = time.time() print(f'{sum([len(res) for res in responses])} items scraped from {count} urls in {end-start:.2f}s.') return responses
def ista(fx, gx, gradf, proxg, params): method_name = 'ISTA' print_start_message(method_name) tic = time.time() # Parameter setup lmbd = params['lambda'] alpha = 1 / params['Lips'] X = params['x0'] run_details = {'X_final': None, 'conv': np.zeros(params['maxit'] + 1)} run_details['conv'][0] = fx(params['x0']) + lmbd * gx(params['x0']) for k in range(1, params['maxit'] + 1): # Perform proximal gradient step X = proxg(X - alpha * gradf(X), alpha * lmbd) # Record convergence run_details['conv'][k] = fx(X) + lmbd * gx(X) if k % params['iter_print'] == 0: print_progress(k, params['maxit'], run_details['conv'][k], fx(X), gx(X)) run_details['X_final'] = X print_end_message(method_name, time.time() - tic) return run_details
def get_atom_and_bond_features(mols, mol_ids, dist_matrices): # mol_ids: {} atom_features, bond_features = [], [] bond_idx, atom_to_m_id, bond_to_m_id = [], [], [] print('Get atom and bond features.') for it, m_name in enumerate(mols): print_progress(it, C.N_MOLS) m_id, mol = mol_ids[m_name], mols[m_name] # m_id: int dist_matrix = dist_matrices[m_name] # 获取分子的距离矩阵 n_atoms, n_bonds = mol.GetNumAtoms(), mol.GetNumBonds() # 根据mol结构文件获取分子中 原子的个数和 键的个数 # mol.GetNumAtoms(), mol.GetNumBonds() atom_features.append(get_atom_features(mol, dist_matrix)) # 针对每个分子文件,获取分子中原子特征 e_feats, b_idx = get_bond_features(mol, dist_matrix) # 针对每个分子文件,获取键的特征 bond_features.append(e_feats) bond_idx.append(b_idx) # b_idx: array(n_bonds, 2) atom_to_m_id.append(np.repeat(m_id, n_atoms)) # 根据原子个数产生对应个数的分子id, [array, array, array, ...] bond_to_m_id.append(np.repeat(m_id, n_bonds)) # 根据键个数产生对应个数的分子id atom_features = pd.DataFrame( np.concatenate(atom_features), columns=C.ATOM_FEATS) # atom_features:[np.array(n_atoms*atom_feats), np.array(), ...] bond_features = pd.DataFrame( np.concatenate(bond_features), columns=C.BOND_FEATS) # bond_features:[np.array(n_bonds*bond_feats), np.array(), ...] bond_idx = np.concatenate(bond_idx) # bond_idx: [np.array(n_bonds*2), np.array(), ...] bond_features['idx_0'] = bond_idx[:,0] # 给bond_features——df添加成键原子的索引 bond_features['idx_1'] = bond_idx[:,1] # atom_features['molecule_id'] = np.concatenate(atom_to_m_id) # 给atom_features--df添加列 bond_features['molecule_id'] = np.concatenate(bond_to_m_id) # 给bond_features--df添加列 return atom_features, bond_features # 返回 atom_features--df, bond_features--df
def __init__( self, opts, features, img_spec, batch_size=64, seed=10, splits=["train"], tokenizer=None, ): self.env = PanoEnvBatch(features, img_spec, batch_size=batch_size) self.data = [] self.scans = [] self.opts = opts print("Loading {} dataset".format(splits[0])) json_data = load_datasets(splits) total_length = len(json_data) # iteratively load data into system memory for i, item in enumerate(json_data): if not is_experiment() and i >= 20: break # if this is in developing mode, load only a small amount of data # Split multiple instructions into separate entries for j, instr in enumerate(item["instructions"]): self.scans.append(item["scan"]) new_item = dict(item) new_item["instr_id"] = "%s_%d" % (item["path_id"], j) new_item["instructions"] = instr if tokenizer: if ( "instr_encoding" not in item ): # we may already include 'instr_encoding' when generating synthetic instructions new_item["instr_encoding"] = tokenizer.encode_sentence( instr) else: new_item["instr_encoding"] = item["instr_encoding"] self.data.append(new_item) print_progress( i + 1, total_length, prefix="Progress:", suffix="Complete", bar_length=50, ) self.scans = set(self.scans) self.splits = splits self.seed = seed random.seed(self.seed) random.shuffle(self.data) self.ix = 0 self.batch_size = batch_size self._load_nav_graphs() print("R2RBatch loaded with %d instructions, using splits: %s" % (len(self.data), ",".join(splits)))
def __gen_hist__(plot_details, verbose=False): ''' generate single histogram only runs on samples with an input file defined, ie. does not loop through daughters not really designed to be called by analysers ''' log.debug('in get_hist for %s' % plot_details.sample.name) ## add the details from the sample #plot_details += plot_details.sample.plot_details s = plot_details.sample vd = plot_details.var_details sel = plot_details.get_selector() weights = plot_details.get_weights() target_lumi = plot_details.target_lumi if not s.is_active(): log.warn('failed to gen hist %s for sample: %s' % (vd.name, s.name)) return None ## create hist h = plot_details.new_hist(s) h.samples = s ## initialise sample s.initialise() ## return empty hist if no events if not s.tree.GetEntries(): return h s.switch_on_branches() ## selection event_list = sel.select(s) if sel else None ## initialise variables s.prepare(plot_details) ## process events entries = event_list.GetN() if sel else s.tree.GetEntries() for i in xrange(entries): ientry = event_list.GetEntry(i) if sel else i s.tree.GetEntry(ientry) ## calculate event weight weight = weights.weight() if weights else 1. ## fill hists val = vd.var.calc_vals()[0] h.Fill(val, weight) frac = float(i) / float(entries) if entries else 0.0 if verbose: utils.print_progress(frac, title='%s: ' % s.name) if verbose: utils.clear_progress() #s.style_hist(h) if target_lumi: s.scale_hist(h, target_lumi) return h
def count_interactions(A): logger.debug('loading {0}'.format(A.grof)) univ = Universe(A.grof) logger.debug('loaded {0}'.format(A.grof)) pro_atoms = univ.selectAtoms('protein and not resname ACE and not resname NH2') pl = pro_atoms.residues.numberOfResidues() # +1: for missing resname ACE, such that it's easier to proceed in the next # step logger.debug('loading {0}, {1}'.format(A.grof, A.xtcf)) u = Universe(A.grof, A.xtcf) logger.debug('loaded {0}, {1}'.format(A.grof, A.xtcf)) # Just for reference to the content of query when then code was first # written and used # query = ('(resname PRO and (name CB or name CG or name CD)) or' # '(resname VAL and (name CG1 or name CG2)) or' # '(resname GLY and name CA) or' # '(resname ALA and name CB)') query = A.query atoms = u.selectAtoms(query) logger.info('Number of atoms selected: {0}'.format(atoms.numberOfAtoms())) # MDAnalysis will convert the unit of length to angstrom, though in Gromacs # the unit is nm cutoff = A.cutoff * 10 nres_away = A.nres_away btime = A.btime etime = A.etime nframe = 0 unun_map = None for ts in u.trajectory: if btime > ts.time: continue if etime > 0 and etime < ts.time: break nframe += 1 map_ = np.zeros((pl+1, pl+1)) # map for a single frame for i, ai in enumerate(atoms): ai_resid = ai.resid for j, aj in enumerate(atoms): aj_resid = aj.resid # to avoid counting the same pair twices, # the 2 resid cannot be neigbors if i < j and aj_resid - ai_resid >= nres_away: d = np.linalg.norm(ai.pos - aj.pos) if d <= cutoff: # -1: resid in MDAnalysis starts from 1 map_[ai_resid-1][aj_resid-1] += 1 if unun_map is None: unun_map = map_ else: unun_map = unun_map + map_ utils.print_progress(ts) sys.stdout.write("\n") return unun_map / float(nframe)
def generate_svg_stack(dataset_name, X, classIds, n, data_size): outfile = '../data/imgs/{}.svg'.format(dataset_name) with open(outfile, "w") as svgFile: svgFile.write(svgMetaData) for i in range(n): utils.print_progress(i, n) figData = generate_figure_data(X[i], classIds[i], data_size) svgFile.write(svgImgTag.format(i, i, figData)) svgFile.write("</svg>")
def train(self, inputs, targets, lr=1, batch_size=30, epochs=100, plot=False, kernel='linear'): self.batch_size = batch_size # init the kernel self.set_kernel(kernel) # set optimization method (Gradient Descent) self.optimization = tf.train.GradientDescentOptimizer(lr) self.training_step = self.optimization.minimize(self.loss) self.init = tf.global_variables_initializer() self.session.run(self.init) # set training data train_inputs, train_target = inputs, targets # performance tracking train_loss_result, train_accuracy_result = [], [] # for each epoch for i in range(epochs): # generate random indexes for each batch batch_index = np.random.choice(len(train_inputs), size=batch_size) self.session.run(self.training_step, feed_dict={self.inputs: train_inputs[batch_index], self.target: train_target[:, batch_index]}) # if plotting, record every epoch if plot: # record accuracy train_accuracy, train_loss = self.generate_step_tracking_data( train_inputs[batch_index], train_target[:, batch_index]) train_accuracy_result.append(train_accuracy) train_loss_result.append(train_loss) if (i+1) % (epochs / 5) == 0: # if not plotting, get intermittent accuracy and loss if not plot: # record accuracy train_accuracy, train_loss = self.generate_step_tracking_data( train_inputs[batch_index], train_target[:, batch_index]) utl.print_progress(i, epochs, train_loss, train_accuracy) # plot results if plot: if not self.features == 2: print('Plotting only supported for 2 feature data sets... skipping output') else: utl.plot_loss(train_loss_result) utl.plot_accuracy(train_accuracy_result) grid = utl.generate_grid(train_inputs) grid_predictions = self.session.run(self.prediction, feed_dict={self.inputs: train_inputs[batch_index], self.target: train_target[:, batch_index], self.grid: grid}) # plot the result grid utl.plot_result(grid_predictions, inputs, targets) # commit data points for the last support vectors used self.support_vector_data = [train_inputs[batch_index], train_target[:, batch_index]]
def evaluate_checkpoint(P, postfix, data, word2id, checkpoint_fname, epoch, device): """ """ # Check for checkpoints _ = load_checkpoint(P, model, opt, device, checkpoint_fname) response_filename = "{}response_str_{}.txt".format(P.EVAL_DIR, postfix) if os.path.isfile(response_filename): print("Skipping {}, it does already exist.".format(response_filename)) return saver_response_str = sentences_saver(response_filename) print() total_decoder_loss = 0 total_saliency_loss = 0 start_time = time() for batch_num, batch in enumerate(data): # Get batch input, target, templates, target_saliencies = unpack_batch( batch, device) saliency, response = model(input, target, templates) decoder_target = [t[1:] for t in target] # Cut <BOS> from target decoder_loss = torch.stack([ decoder_loss_fn(res, tar) for res, tar in zip(response, decoder_target) ]).mean() if P.USE_BILINEAR: # Only when the bilinear is used, there is a sailency loss. saliency_loss = torch.stack([ saliency_loss_fn(sal, true_sal) for sal, true_sal in zip(saliency, target_saliencies) ]).mean() total_saliency_loss += saliency_loss.item() total_decoder_loss += decoder_loss.item() for inp, templ, targ in zip(input, templates, target): response, _ = model.respond(device, word2id, [inp], [templ], max_length=50) # Write the results to txt files saver_response_str.store_sentence(word2id.id2string(response)) print_progress("Evaluating: ", P, epoch - 1, batch_num, len(data), total_saliency_loss / (batch_num + 1), total_decoder_loss / (batch_num + 1), start_time) print() saver_response_str.write_to_file()
def run(self, filename): firmwarefilename = filename name = firmwarefilename.remove(".new.dat") transferfilename = name + ".transfer.list" if not os.path.exists(transferfilename): print(f"Couldn't find needed {transferfilename}.") exit(0) with open(transferfilename, 'r') as tfr: with open(firmwarefilename, 'rb') as qr: version = int(tfr.readline().replace("\n", "")) if version > 3: print(f"Error, version {str(version)} not supported.") exit(0) with open(name + ".bin", "wb") as qw: totalblocks = int(tfr.readline().replace("\n", "")) blocksize = 4096 buffersize = 0x200000 ip = tfr.readline() command = ip.split(" ")[0] ip = ip.split(" ")[1] values = ip.split(",") print_progress(0, 100, prefix='Progress:', suffix='Complete', bar_length=50) if command == "new": count = int(values[0]) old = 0 for i in range(0, count / 2): start = int(values[1 + (i * 2)]) end = int(values[2 + (i * 2)]) length = (end - start) * blocksize for pos in range(0, (blocksize * start), 4096): qw.write(b"\x00" * 4096) total = length while length > 0: size = buffersize if size > length: size = length buffer = qr.read(size) qw.write(buffer) length -= size prog = int( float(i) / float(total) * float(100)) if (prog > old): print_progress(prog, 100, prefix='Progress:', suffix='Complete', bar_length=50) old = prog elif command == "erase": pass
def main(): # get all image names images = glob.glob('{}/*.jpg'.format(PATH_TO_IMAGES)) for i, image in enumerate(images): rotate_image(image, 'right') rotate_image(image, 'left') rotate_image(image) print_progress((i + 1) / len(images)) print('\nRotated {} images'.format(len(images)))
def clear_table(db, name_arr=[]): print('Cleaning Tables') c = db.cursor() total = len(name_arr) count = 0 for n in name_arr: count += 1 c.execute("DELETE FROM " + n) c.execute("ALTER TABLE " + n + " AUTO_INCREMENT = 1") print_progress(count, total) db.commit()
def sequence_spacing(grof, xtcf, btime, etime, peptide_length, atom_sel): u = Universe(grof, xtcf) # this selection part should be better customized # here, only have been backbone atoms are used, u.selectAtoms doesn't # include Hydrogen atoms # REMMEMBER: ARGS verification should be done in main ONLY! # range works like this: # in MDAnalysis, resid starts from 1, in sequence_spacing.py, we don't count # the C- and N- termini, so it's from 2 to peptide_len+2 residues = [u.selectAtoms(atom_sel.format(i)) for i in range(2, peptide_length + 2)] ijdist_dict = {} for ts in u.trajectory: # btime, etime defaults to 0, if etime is 0, loop till the end of the # trajectory if btime > ts.time: continue if etime > 0 and etime < ts.time: break # the good stuff for i, resi in enumerate(residues): for j, resj in enumerate(residues): # to remove duplicate since resi & resj are within the same peptide if i < j: dij = abs(i - j) d_atomi_atomj = [] # loop through every atom in both residues for atomi in resi: for atomj in resj: d_atomi_atomj.append( np.linalg.norm(atomi.pos - atomj.pos)) # add the result to the dictionary ij_dist = np.average(d_atomi_atomj) # distance between i and j if dij not in ijdist_dict.keys(): ijdist_dict[dij] = [ij_dist] else: ijdist_dict[dij].append(ij_dist) utils.print_progress(ts) return ijdist_dict
if search_term: params["jql"] = params["jql"] + " AND text~'%s'" % search_term if ticket_id: params["jql"] = params["jql"] + " AND issue='%s'" % ticket_id params["maxResults"] = config._MAX_SEARCH_RESULTS params["fields"] = ["summary",] search_result = utils.get_request_auth(config._SEARCH, params) if "issues" in search_result and len(search_result["issues"]) != 0: print("\nTicket found:") issues = search_result["issues"] for issue in issues: print("%s -> %s" % (issue["key"], issue["fields"]["summary"])) print("\nWould you liket to tag/label the above ticket(s) with '%s'?" % search_term) prompt = input("Proceed? [y/n] ") if prompt == "n": pass elif prompt == "y": for index, issue in enumerate(issues, start=1): payload = utils.add_label_payload(search_term) if utils.put_request_auth(config._ISSUE_BY_KEY % issue["key"], payload): utils.print_progress(index, len(issues)) else: print("\nNo tickets") print("\n\nSee you next time!") print("Have a good day!")