def _check_corresponding_files(self, pdb1, pdb2, parm1, parm2, tree=True): self.assertEqual(len(pdb1.atoms), len(pdb2.atoms)) self.assertEqual(len(parm1.atoms), len(parm2.atoms)) self.assertEqual(len(parm1.bonds), len(parm2.bonds)) for a1, a2 in zip(pdb1.atoms, pdb2.atoms): self.assertEqual(a1.name, a2.name) self.assertEqual(a1.atomic_number, a2.atomic_number) for a1, a2 in zip(parm1.atoms, parm2.atoms): # Check EVERYTHING self.assertIsNot(a1, a2) self.assertEqual(a1.name, a2.name) self.assertEqual(a1.type, a2.type) self.assertEqual(a1.nb_idx, a2.nb_idx) self.assertEqual(a1.atomic_number, a2.atomic_number) self.assertEqual(a1.atom_type.rmin, a2.atom_type.rmin) self.assertEqual(a1.atom_type.epsilon, a2.atom_type.epsilon) self.assertEqual(a1.radii, a2.radii) self.assertEqual(a1.screen, a2.screen) # Ugh. OFF libs are inconsistent if tree: self.assertEqual(a1.tree, a2.tree) self.assertEqual(len(a1.bonds), len(a2.bonds)) self.assertEqual(len(a1.angles), len(a2.angles)) self.assertEqual(len(a1.dihedrals), len(a2.dihedrals)) set1 = set([a.name for a in a1.bond_partners]) set2 = set([a.name for a in a2.bond_partners]) self.assertEqual(set1, set2) set1 = set([a.name for a in a1.angle_partners]) set2 = set([a.name for a in a2.angle_partners]) self.assertEqual(set1, set2) set1 = set([a.name for a in a1.dihedral_partners]) set2 = set([a.name for a in a2.dihedral_partners]) self.assertEqual(set1, set2) # Check residue properties self.assertEqual(a1.residue.name, a2.residue.name)
def hierarchical(keys): """ Iterates over dimension values in keys, taking two sets of dimension values at a time to determine whether two consecutive dimensions have a one-to-many relationship. If they do a mapping between the first and second dimension values is returned. Returns a list of n-1 mappings, between consecutive dimensions. """ ndims = len(keys[0]) if ndims <= 1: return True dim_vals = list(zip(*keys)) combinations = (zip(*dim_vals[i : i + 2]) for i in range(ndims - 1)) hierarchies = [] for combination in combinations: hierarchy = True store1 = defaultdict(list) store2 = defaultdict(list) for v1, v2 in combination: if v2 not in store2[v1]: store2[v1].append(v2) previous = store1[v2] if previous and previous[0] != v1: hierarchy = False break if v1 not in store1[v2]: store1[v2].append(v1) hierarchies.append(store2 if hierarchy else {}) return hierarchies
def get_regstr(regtype, var, val): regtype_map = { 'REG_EXPAND_SZ': 'hex(2):', 'REG_DWORD': 'dword:', 'REG_BINARY': None, 'REG_MULTI_SZ': None, 'REG_SZ': '', } # It is not a good idea to write these variables... EXCLUDE = ['USERPROFILE', 'USERNAME', 'SYSTEM32'] if var in EXCLUDE: return '' def quotes(str_): return '"' + str_.replace('"', r'\"') + '"' sanitized_var = quotes(var) if regtype == 'REG_EXPAND_SZ': # Weird encoding #bin_ = binascii.hexlify(hex_) #val_ = ','.join([''.join(hex2) for hex2 in hex2zip]) #import binascii # NOQA x = val ascii_ = x.encode("ascii") hex_ = ascii_.encode("hex") hex_ = x.encode("hex") hex2zip = zip(hex_[0::2], hex_[1::2]) spacezip = [('0', '0')] * len(hex2zip) hex3zip = zip(hex2zip, spacezip) sanitized_val = ','.join([''.join(hex2) + ',' + ''.join(space) for hex2, space in hex3zip]) elif regtype == 'REG_DWORD': sanitized_val = '%08d' % int(val) else: sanitized_val = quotes(val) comment = '; ' + var + '=' + val regstr = sanitized_var + '=' + regtype_map[regtype] + sanitized_val return comment + '\n' + regstr
def decode_rows(self, stream, conversors): data, rows, cols = [], [], [] for i, row in enumerate(stream): values = _parse_values(row) if not isinstance(values, dict): raise BadLayout() if not values: continue row_cols, values = zip(*sorted(values.items())) try: values = [value if value is None else conversors[key](value) for key, value in zip(row_cols, values)] except ValueError as exc: if 'float: ' in str(exc): raise BadNumericalValue() raise except IndexError: # conversor out of range raise BadDataFormat(row) data.extend(values) rows.extend([i] * len(values)) cols.extend(row_cols) return data, rows, cols
def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs): # Get dimensions information dimensions = [dataset.get_dimension(d) for d in dim_names] kdims = [kdim for kdim in dataset.kdims if kdim not in dimensions] # Update the kwargs appropriately for Element group types group_kwargs = {} group_type = dict if group_type == 'raw' else group_type if issubclass(group_type, Element): group_kwargs.update(util.get_param_values(dataset)) group_kwargs['kdims'] = kdims group_kwargs.update(kwargs) # Find all the keys along supplied dimensions keys = [dataset.data[d.name] for d in dimensions] # Iterate over the unique entries applying selection masks grouped_data = [] for unique_key in zip(*util.cartesian_product(keys)): group_data = cls.select(dataset, **dict(zip(dim_names, unique_key))) if np.isscalar(group_data): group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)} for dim, v in zip(dim_names, unique_key): group_data[dim] = np.atleast_1d(v) else: for vdim in dataset.vdims: group_data[vdim.name] = np.squeeze(group_data[vdim.name]) group_data = group_type(group_data, **group_kwargs) grouped_data.append((tuple(unique_key), group_data)) if issubclass(container_type, NdMapping): with item_check(False): return container_type(grouped_data, kdims=dimensions) else: return container_type(grouped_data)
def fromfile(cls, file, key=None): """Load Space from HDF5 file. file filename string or h5py.Group instance key sliced (subset) loading, should be an n-tuple of slice()s in data coordinates""" try: with util.open_h5py(file, 'r') as fp: if 'type' in fp.attrs.keys(): if fp.attrs['type'] == 'Empty': return EmptySpace() axes = Axes.fromfile(fp) config = util.ConfigFile.fromfile(fp) metadata = util.MetaData.fromfile(fp) if key: if len(axes) != len(key): raise ValueError("dimensionality of 'key' does not match dimensionality of Space in HDF5 file {0}".format(file)) key = tuple(ax.get_index(k) for k, ax in zip(key, axes)) for index, sl in enumerate(key): if sl.start == sl.stop and sl.start is not None: raise KeyError('key results in empty space') axes = tuple(ax[k] for k, ax in zip(key, axes) if isinstance(k, slice)) else: key = Ellipsis space = cls(axes, config, metadata) try: fp['counts'].read_direct(space.photons, key) fp['contributions'].read_direct(space.contributions, key) except (KeyError, TypeError) as e: raise errors.HDF5FileError('unable to load Space from HDF5 file {0}, is it a valid BINoculars file? (original error: {1!r})'.format(file, e)) except IOError as e: raise errors.HDF5FileError("unable to open '{0}' as HDF5 file (original error: {1!r})".format(file, e)) return space
def process_predictor_args(predictors, params=None, sds=None): """Returns a predictor data structure for the given lists of predictors, params, and standard deviations. When no parameters or deviations are provided, the predictors are a simple list. When parameters and deviations are provided, the predictor are a dictionary indexed by predictor name with values corresponding to each predictor's param and global standard deviation. >>> process_predictor_args(None, None, None) >>> process_predictor_args(['ep']) ['ep'] >>> process_predictor_args(['ep'], None, None) ['ep'] >>> process_predictor_args(['ep'], [1], [5]) {'ep': [1, 5]} """ if predictors is None: processed_predictors = None elif params is None or sds is None: processed_predictors = predictors else: merged_params = map(list, zip(params, sds)) processed_predictors = dict(zip(predictors, merged_params)) return processed_predictors
def sample(cls, dataset, samples=[]): """ Samples the gridded data into dataset of samples. """ ndims = dataset.ndims dimensions = dataset.dimensions(label=True) arrays = [dataset.data[vdim.name] for vdim in dataset.vdims] data = defaultdict(list) first_sample = util.wrap_tuple(samples[0]) if any(len(util.wrap_tuple(s)) != len(first_sample) for s in samples): raise IndexError('Sample coordinates must all be of the same length.') for sample in samples: if np.isscalar(sample): sample = [sample] if len(sample) != ndims: sample = [sample[i] if i < len(sample) else None for i in range(ndims)] sampled, int_inds = [], [] for d, ind in zip(dimensions, sample): cdata = dataset.data[d] mask = cls.key_select_mask(dataset, cdata, ind) inds = np.arange(len(cdata)) if mask is None else np.argwhere(mask) int_inds.append(inds) sampled.append(cdata[mask]) for d, arr in zip(dimensions, np.meshgrid(*sampled)): data[d].append(arr) for vdim, array in zip(dataset.vdims, arrays): flat_index = np.ravel_multi_index(tuple(int_inds)[::-1], array.shape) data[vdim.name].append(array.flat[flat_index]) concatenated = {d: np.concatenate(arrays).flatten() for d, arrays in data.items()} return concatenated
def extrudeVertices(self, extrusion_factor, z0=0): """Extrudes the vertices of a shape and returns the three dimensional values """ a = self.triangles3() vertices = [] for coord in a: for dec in coord: vertices.append(dec[0]) #x-axis vertices.append(dec[1]) #y-axis vertices.append(z0) #z-axis for coord in a: for dec in reversed(coord): vertices.append(dec[0]) #x-axis vertices.append(dec[1]) #y-axis vertices.append(z0 + extrusion_factor) #z-axi top_edges = self.exteriorpoints_3d(z0=z0) bottom_edges = self.exteriorpoint_3d(z0=z0 + extrusion_factor) sideTriangles = list(zip(top_edges, top_edges[1:] + top_edges[:1], bottom_edges)) sideTriangles2 = list(zip(bottom_edges[1:] + bottom_edges[:1], bottom_edges, top_edges[1:] + top_edges[:1])) sideTriangles.extend(sideTriangles2) sideTriangles = [list(triangle) for triangle in sideTriangles] import itertools sideTriangles = list(itertools.chain.from_iterable(sideTriangles)) sideTriangles = [list(point) for point in sideTriangles] sideTriangles = list(itertools.chain.from_iterable(sideTriangles)) vertices.extend(sideTriangles) return vertices
def mulpower(fgen,ggen): 'From: http://en.wikipedia.org/wiki/Power_series#Multiplication_and_division' a,b = [],[] for f,g in zip(fgen, ggen): a.append(f) b.append(g) yield sum(f*g for f,g in zip(a, reversed(b)))
def test_iter_acgt_geno_marker(self): """Tests the 'iter_acgt_geno_marker' function.""" # Getting a subset of indexes indexes = random.sample(range(len(self.markers)), 4) # Getting the markers and genotypes markers = [self.markers[i] for i in indexes] genotypes = [self.acgt_genotypes[i] for i in indexes] # Zipping and comparing zipped = zip( [i for i in zip(markers, genotypes)], self.pedfile.iter_acgt_geno_marker(markers), ) for (e_marker, e_geno), (marker, geno) in zipped: self.assertEqual(e_marker, marker) np.testing.assert_array_equal(e_geno, geno) # Testing a single marker index = random.randint(0, len(self.markers) - 1) e_marker = self.markers[index] e_geno = self.acgt_genotypes[index] for marker, geno in self.pedfile.iter_acgt_geno_marker(e_marker): self.assertEqual(e_marker, marker) np.testing.assert_array_equal(e_geno, geno) # Adding a marker that doesn't exist markers.extend(["unknown_3", "unknown_4"]) with self.assertRaises(ValueError) as cm: [i for i in self.pedfile.iter_acgt_geno_marker(markers)] self.assertEqual("unknown_3: marker not in BIM", str(cm.exception))
def render_readme(): s = """\ This file details the CSV files contained within this archive, and the structure of their rows and columns. """ for table in results: s += '\n' s += '%s.csv:\n' % table.name s += ''.join( ' %s\n' % l for l in textwrap.wrap(table.title, width=70) ) s += ' Rows:\n' for title, unit in zip(table.row_titles, table.row_units): if unit: s += ' %s [%s]\n' % (title, unit if unit else 'unitless') else: s += ' %s\n' % title s += ' Cols:\n' for title, unit in zip(table.col_titles, table.col_units): if unit: s += ' %s [%s]\n' % (title, unit if unit else 'unitless') else: s += ' %s\n' % title stream = io.BytesIO() stream.write(s.encode('utf-8')) stream.seek(0) return stream
def sample(cls, dataset, samples=[]): """ Samples the gridded data into dataset of samples. """ ndims = dataset.ndims dimensions = dataset.dimensions(label='name') arrays = [dataset.data[vdim.name] for vdim in dataset.vdims] data = defaultdict(list) for sample in samples: if np.isscalar(sample): sample = [sample] if len(sample) != ndims: sample = [sample[i] if i < len(sample) else None for i in range(ndims)] sampled, int_inds = [], [] for d, ind in zip(dimensions, sample): cdata = dataset.data[d] mask = cls.key_select_mask(dataset, cdata, ind) inds = np.arange(len(cdata)) if mask is None else np.argwhere(mask) int_inds.append(inds) sampled.append(cdata[mask]) for d, arr in zip(dimensions, np.meshgrid(*sampled)): data[d].append(arr) for vdim, array in zip(dataset.vdims, arrays): da = dask_array_module() flat_index = np.ravel_multi_index(tuple(int_inds)[::-1], array.shape) if da and isinstance(array, da.Array): data[vdim.name].append(array.flatten().vindex[tuple(flat_index)]) else: data[vdim.name].append(array.flat[flat_index]) concatenated = {d: np.concatenate(arrays).flatten() for d, arrays in data.items()} return concatenated
def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs): # Get dimensions information dimensions = [dataset.get_dimension(d, strict=True) for d in dim_names] if 'kdims' in kwargs: kdims = kwargs['kdims'] else: kdims = [kdim for kdim in dataset.kdims if kdim not in dimensions] kwargs['kdims'] = kdims invalid = [d for d in dimensions if dataset.data[d.name].ndim > 1] if invalid: if len(invalid) == 1: invalid = "'%s'" % invalid[0] raise ValueError("Cannot groupby irregularly sampled dimension(s) %s." % invalid) # Update the kwargs appropriately for Element group types group_kwargs = {} group_type = dict if group_type == 'raw' else group_type if issubclass(group_type, Element): group_kwargs.update(util.get_param_values(dataset)) else: kwargs.pop('kdims') group_kwargs.update(kwargs) drop_dim = any(d not in group_kwargs['kdims'] for d in kdims) # Find all the keys along supplied dimensions keys = [cls.coords(dataset, d.name) for d in dimensions] transpose = [dataset.ndims-dataset.kdims.index(kd)-1 for kd in kdims] transpose += [i for i in range(dataset.ndims) if i not in transpose] # Iterate over the unique entries applying selection masks grouped_data = [] for unique_key in zip(*util.cartesian_product(keys)): select = dict(zip(dim_names, unique_key)) if drop_dim: group_data = dataset.select(**select) group_data = group_data if np.isscalar(group_data) else group_data.columns() else: group_data = cls.select(dataset, **select) if np.isscalar(group_data) or (isinstance(group_data, get_array_types()) and group_data.shape == ()): group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)} for dim, v in zip(dim_names, unique_key): group_data[dim] = np.atleast_1d(v) elif not drop_dim: if isinstance(group_data, get_array_types()): group_data = {dataset.vdims[0].name: group_data} for vdim in dataset.vdims: data = group_data[vdim.name] data = data.transpose(transpose[::-1]) group_data[vdim.name] = np.squeeze(data) group_data = group_type(group_data, **group_kwargs) grouped_data.append((tuple(unique_key), group_data)) if issubclass(container_type, NdMapping): with item_check(False): return container_type(grouped_data, kdims=dimensions) else: return container_type(grouped_data)
def test_Array3Diterator(): a=FloatArray3D(IndexRange3D(Int3BasicCoordinate((1,3,-1)), Int3BasicCoordinate((3,9,5)))) for i1,i2 in zip(a.flat(), range(a.size_all())): i1=i2; np=stirextra.to_numpy(a); for i1,i2 in zip(a.flat(), np.flat): assert abs(i1-i2)<.01
def _compareInputOutputPDBs(self, pdbfile, pdbfile2, reordered=False, altloc_option='all'): # Now go through all atoms and compare their attributes for a1, a2 in zip(pdbfile.atoms, pdbfile2.atoms): if altloc_option in ('first', 'all'): self.assertEqual(a1.occupancy, a2.occupancy) a1idx = a1.idx elif altloc_option == 'occupancy': a, occ = a1, a1.occupancy for key, oa in iteritems(a1.other_locations): if oa.occupancy > occ: occ = oa.occupancy a = oa a1idx = a1.idx a1 = a # This is the atom we want to compare with self.assertEqual(a1.atomic_number, a2.atomic_number) self.assertEqual(a1.name, a2.name) self.assertEqual(a1.type, a2.type) self.assertEqual(a1.mass, a2.mass) self.assertEqual(a1.charge, a2.charge) self.assertEqual(a1.bfactor, a2.bfactor) self.assertEqual(a1.altloc, a2.altloc) self.assertEqual(a1idx, a2.idx) if altloc_option == 'all': self.assertEqual(set(a1.other_locations.keys()), set(a2.other_locations.keys())) self.assertEqual(a1.xx, a2.xx) self.assertEqual(a1.xy, a2.xy) self.assertEqual(a1.xz, a2.xz) if altloc_option != 'all': # There should be no alternate locations unless we keep them all self.assertEqual(len(a2.other_locations), 0) if not reordered: self.assertEqual(a1.number, a2.number) # Search all alternate locations as well for k1, k2 in zip(sorted(a1.other_locations.keys()), sorted(a2.other_locations.keys())): self.assertEqual(k1, k2) oa1 = a1.other_locations[k1] oa2 = a2.other_locations[k2] self.assertEqual(oa1.atomic_number, oa2.atomic_number) self.assertEqual(oa1.name, oa2.name) self.assertEqual(oa1.type, oa2.type) self.assertEqual(oa1.mass, oa2.mass) self.assertEqual(oa1.charge, oa2.charge) self.assertEqual(oa1.occupancy, oa2.occupancy) self.assertEqual(oa1.bfactor, oa2.bfactor) self.assertEqual(oa1.altloc, oa2.altloc) self.assertEqual(oa1.idx, oa2.idx) if not reordered: self.assertEqual(oa1.number, oa2.number) # Now compare all residues for r1, r2 in zip(pdbfile.residues, pdbfile2.residues): self.assertEqual(r1.name, r2.name) self.assertEqual(r1.idx, r2.idx) self.assertEqual(r1.ter, r2.ter) self.assertEqual(len(r1), len(r2)) self.assertEqual(r1.insertion_code, r2.insertion_code) if not reordered: self.assertEqual(r1.number, r2.number)
def _align_columns(rows): """Format a list of rows as CSV with aligned columns. """ col_widths = [] for col in zip(*rows): col_widths.append(max(len(value) for value in col)) for row_num, row in enumerate(rows): line = [] last_col = len(row) - 1 for col_num, (width, value) in enumerate(zip(col_widths, row)): space = ' ' * (width - len(value)) if row_num == 0: if col_num == last_col: line.append(value) else: line.append('%s, %s' % (value, space)) elif col_num == last_col: if col_num == 0: line.append(value) else: line.append('%s%s' % (space, value)) else: if col_num == 0: line.append('%s, %s' % (value, space)) else: line.append('%s%s, ' % (space, value)) yield ''.join(line)
def __call__(self, *args, **kwargs): argvalues = [arg.value if isinstance(arg, DiffObject) else arg for arg in args] kwargvalues = kwargs #TODO: for now can not diff wrt kwargs #? should I check is all derivatives are provided? #? provide option for numerically computed derivative if not defined? f = self.fun(*argvalues, **kwargvalues) if not any([isinstance(arg, DiffObject) for arg in args]): return f if self.dfun: #compute df_args df = [self.dfun[i](*argvalues, **kwargvalues) \ if isinstance(arg, DiffObject) else None \ for i, arg in enumerate(args)] else: #if self.dfun is empty assume fun returns a tuple of nominal #value and derivative list f, df = f #try to make DiffObject if type(f) in DiffObject._types: dlist = [arg.chain(dfi) for arg, dfi in zip(args, df) if isinstance(arg, DiffObject)] d = sum_dicts(*dlist) return DiffObject(f, d) elif isinstance(f, Iterable): dlist = [[arg.chain(dfij) for dfij in dfi] for arg, dfi in zip(args, df) if isinstance(arg, DiffObject)] d = [sum_dicts(*d) for d in zip(*dlist)] return type(f)(map(DiffObject, f, d)) raise TypeError('DiffFunction output not implemented as a DiffObject')
def testLanguageModel(self): for n_future in [0, 1, 10, 13]: dataset = TextDataset(path=self.shakespeare, level="char", target_n_future=n_future) i = 0 chars, labels = dataset.train_inputs, dataset.train_targets for char, label in zip(chars, labels): char = dataset.vocab_inverse[np.argmax(char, 0)] label = dataset.label_vocab_inverse[np.argmax(label, 0)] if i >= self.n_chars - n_future: break assert char == self.first_n_chars[i], \ "Expected %s at index %d, found %s" % (self.first_n_chars[i], i, char) assert label == self.first_n_chars[i+n_future], \ "Expected label %s at index %d, found %s" % (self.first_n_chars[i+n_future], i, label) i += 1 del dataset for len in [2, 5]: dataset = TextDataset(path=self.shakespeare, level="char", target_n_future=1, sequence_length=len) chars, labels = dataset.train_inputs, dataset.train_targets for i, (char_seq, label_seq) in enumerate(zip(chars, labels)): char_s = [dataset.vocab_inverse[np.argmax(char, 0)] for char in char_seq] label_s = [dataset.label_vocab_inverse[np.argmax(label, 0)] for label in label_seq] del dataset
def createVoxelList(self): # create dictionary of voxels with atom numbers as keys self.startTimer() self.printStepNumber() self.lgwrite(ln = 'Combining voxel density and atom values...') self.success() vxlDic = {atm:[] for atm in self.atmmap.vxls_val} xyzDic = {atm:[] for atm in self.atmmap.vxls_val} self.densmap.reshape1dTo3d() self.densmap.abs2xyz_params() for atm,dens in zip(self.atmmap.vxls_val,self.densmap.vxls_val): vxlDic[atm].append(dens) xyz_list = self.densmap.getVoxXYZ(self.atomIndices,coordType = 'fractional') for atm,xyz in zip(self.atmmap.vxls_val,xyz_list): xyzDic[atm].append(xyz) self.vxlsPerAtom = vxlDic self.xyzsPerAtom = xyzDic # not essential for run if self.calcFCmap: vxlDic2 = {atm:[] for atm in self.atmmap.vxls_val} for atm,dens in zip(self.atmmap.vxls_val,self.FCmap.vxls_val): vxlDic2[atm].append(dens) self.FCperAtom = vxlDic2 self.deleteMapsAttributes() self.stopTimer()
def contingent(intervals, domain_name, nodoms_only=False): """ intervals should be all intervals in all genes that contain the domain """ import fisher n_domain_variants = sum(len(i.mafs.split(",")) for i in intervals if i.domain == domain_name) if nodoms_only: n_gene_variants = sum(len(i.mafs.split(",")) for i in intervals if i.domain == ".") else: n_gene_variants = sum(len(i.mafs.split(",")) for i in intervals if i.domain != domain_name) gene=set() n_domain_bases, n_gene_bases = 0, 0 for iv in intervals: gene.add(iv.gene) starts = map(int, iv.starts.split(",")) ends = map(int, iv.ends.split(",")) l = sum(e - s for s, e in zip(starts, ends)) assert all(e > s for s, e in zip(starts, ends)), domain_name if iv.domain == domain_name: n_domain_bases += l elif nodoms_only and iv.domain == ".": n_gene_bases += l elif not nodoms_only and iv.domain != domain_name: n_gene_bases += l tbl = "gene:%d/%d,dom:%d/%d" % (n_gene_variants, n_gene_bases, n_domain_variants, n_domain_bases) p = fisher.pvalue(n_gene_bases, n_gene_variants, n_domain_bases, n_domain_variants) denom = float(n_gene_variants) / (n_gene_bases or 1) or 1 return p.two_tail, (float(n_domain_variants) / (n_domain_bases or 1)) / denom, tbl, gene
def _check_arbitrary_res(self, struct, res): orig_indices = [a.idx for a in res] templ = ResidueTemplate.from_residue(res) # Make sure we didn't clobber any of the atoms in res for i, atom in zip(orig_indices, res.atoms): self.assertIs(atom.list, struct.atoms) self.assertEqual(atom.idx, i) # Make sure that we have the same number of atoms in the residue as the # source self.assertEqual(len(res), len(templ)) for a1, a2 in zip(res, templ): self.assertIsInstance(a1, Atom) self.assertIsInstance(a2, Atom) self.assertEqual(a1.name, a2.name) self.assertEqual(a1.type, a2.type) self.assertEqual(a1.atomic_number, a2.atomic_number) self.assertEqual(a1.xx, a2.xx) self.assertEqual(a1.xy, a2.xy) self.assertEqual(a1.xz, a2.xz) # Make sure we have the correct number of bonds in the residue bondset = set() for atom in res: for bond in atom.bonds: if bond.atom1 in res and bond.atom2 in res: bondset.add(bond) self.assertGreater(len(bondset), 0) self.assertEqual(len(bondset), len(templ.bonds)) # Make sure that each atom has the correct number of bonds for i, atom in enumerate(res): for bond in atom.bonds: try: id1 = res.atoms.index(bond.atom1) id2 = res.atoms.index(bond.atom2) except ValueError: if bond.atom1 in res: oatom = bond.atom2 idx = res.atoms.index(bond.atom1) else: oatom = bond.atom1 idx = res.atoms.index(bond.atom2) if oatom.residue.idx == res.idx - 1: self.assertIs(templ.head, templ[idx]) elif oatom.residue.idx == res.idx + 1: self.assertIs(templ.tail, templ[idx]) elif oatom.residue.idx == res.idx: self.assertTrue(False) # Should never hit else: # Should only happen with CYX for amber prmtop... self.assertEqual(res.name, 'CYX') self.assertEqual(atom.name, 'SG') if bond.atom1 in res: self.assertIn(templ[idx], templ.connections) else: self.assertIn(templ[id1], templ[id2].bond_partners) self.assertIn(templ[id2], templ[id1].bond_partners) # Make sure that our coordinates come as a numpy array if utils.has_numpy(): self.assertIsInstance(templ.coordinates, utils.numpy.ndarray) self.assertEqual(templ.coordinates.shape, (len(templ)*3,))
def compute_samples(channels, nsamples=None): ''' create a generator which computes the samples. essentially it creates a sequence of the sum of each function in the channel at each sample in the file for each channel. ''' return islice(zip(*(map(sum, zip(*channel)) for channel in channels)), nsamples)
def rotate(self, rotation=Rotate.clockwise): if rotation == Rotate.clockwise: self._set_lines(''.join(reversed(chars)) for chars in zip(*self.lines)) elif rotation == Rotate.counterclockwise: self._set_lines(''.join(chars) for chars in zip(*(reversed(line) for line in self.lines))) else: pass return self
def fastq2shuffled(fnames, out=sys.stdout, stripNames=1, limit=0): """Process FastQ files""" i = 0 for fn1, fn2 in zip(fnames[::2], fnames[1::2]): for i, (fq1, fq2) in enumerate(zip(fqparser(fn1, stripNames, '/1', i), fqparser(fn2, stripNames, '/2', i)), i): if limit and i>limit: break out.write(fq1+fq2)
def test_Array2Diterator(): a = FloatArray2D(IndexRange2D(Int2BasicCoordinate( (1, 3)), Int2BasicCoordinate((3, 9)))) for i1, i2 in zip(a.flat(), range(a.size_all())): i1 = i2 np = stirextra.to_numpy(a) for i1, i2 in zip(a.flat(), np.flat): assert abs(i1 - i2) < .01
def assertImagesAlmostEqual(self, a, b, epsilon=0.1, *args): self.assertEqual(a.size, b.size, 'sizes dont match') a = a.filter(ImageFilter.BLUR).getdata() b = b.filter(ImageFilter.BLUR).getdata() for i, ax, bx in zip(range(len(a)), a, b): diff = sum(abs(ac / 256 - bc / 256) for ac, bc in zip(ax, bx)) / 3 if diff > epsilon: self.fail('images differed by %s at index %d; %s %s' % (diff, i, ax, bx))
def find_random_duplicates(input): # input = [10,11,12,13,14,13,12,10,11,11,12,13,14,10,14] # (indices[ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14]) # return: {10: [0, 7, 13], 11: [1, 8, 9], 12: [2, 6, 10], 13: [3, 5, 11], 14: [4, 12, 14]} #http://stackoverflow.com/questions/176918/finding-the-index-of-an-item-given-a-list-containing-it-in-python #http://stackoverflow.com/questions/479897/how-do-you-remove-duplicates-from-a-list-in-python-if-the-item-order-is-not-impo deduplicated = sorted(set(input)) indexlist = [[i for i, j in zip(count(), input) if j == k] for k in deduplicated] return dict((e0,e1) for e0,e1 in zip(deduplicated,indexlist))
def __next__(self): for currentReads,currentBars in zip(zip(*self.fastqs),zip(*self.indexs)): bars=tuple([x[1] for x in currentBars]) if bars in self.barcodes: self.nGood+=1 return (currentReads,bars) else: self.nBad+=1 raise StopIteration()
def test_iter_acgt_geno(self): """Tests the 'iter_acgt_geno" function.""" zipped = zip( [i for i in zip(self.markers, self.acgt_genotypes)], self.pedfile.iter_acgt_geno(), ) for (e_marker, e_geno), (marker, geno) in zipped: self.assertEqual(e_marker, marker) np.testing.assert_array_equal(e_geno, geno)
def amap(self, f, *args, **kwds): # register a callback ? AbstractWorkerPool._AbstractWorkerPool__map(self, f, *args, **kwds) _pool = self._serve() return _pool.map_async(star(f), zip(*args)) # chunksize
def window(iterations, size): n = tee(iterations, size) for i in range(1, size): for each in n[i:]: next(each, None) return zip(*n)
def _main(args): table = read_hdf(args.tablefile, 'ldtable') table_size = sum(map(int, table.index.values[0].split())) if table_size < args.samplesize: raise IOError('Lookup table was constructed for {} haploids, ' 'but --samplesize is {} haploids. Either build ' 'a larger lookup table or simulate fewer ' 'individuals.'.format(table_size, args.samplesize)) max_rho = table.columns[-1] table.columns *= 100. / max_rho block_penalties = list(map(float, args.blockpenalty.split(','))) window_sizes = list(map(float, args.windowsize.split(','))) logging.info('Searching over Windowsizes %s, and Block Penalties %s', window_sizes, block_penalties) if args.msmc_file: if args.smcpp_file or args.epochtimes or args.popsizes: raise IOError('Can only specify one of msmc_file, smcpp_file, or ' 'popsizes') pop_sizes, times = read_msmc(args.msmc_file, args.mu) elif args.smcpp_file: if args.msmc_file or args.epochtimes or args.popsizes: raise IOError('Can only specify one of msmc_file, smcpp_file, or ' 'popsizes') pop_sizes, times = read_smcpp(args.smcpp_file) else: pop_sizes = list(map(float, args.popsizes.split(','))) times = [] if args.epochtimes: times = list(map(float, args.epochtimes.split(','))) if len(pop_sizes) != len(times) + 1: raise IOError('Number of population sizes must ' 'match number of epochs.') pop_sizes, times = decimate_sizes(pop_sizes, times, args.decimate_rel_tol, args.decimate_anc_size) pop_config = [ msprime.PopulationConfiguration(sample_size=args.samplesize, initial_size=pop_sizes[0])] demography = [] if times: for pop_size, time in zip(pop_sizes[1:], times): demography.append( msprime.PopulationParametersChange(time=time * 2, initial_size=pop_size, population_id=0)) reco_maps = _load_hapmap() pool = Pool(args.numthreads, maxtasksperchild=100) logging.info('Simulating data...') simulation_args = [((pop_config, args.mu, demography, args.ploidy), reco_maps) for k in range(args.num_sims)] test_set = list(pool.imap(_simulate_data, simulation_args, chunksize=10)) logging.info('\tdone simulating') scores = {} for block_penalty in block_penalties: for window_size in window_sizes: estimates = list(pool.imap(partial(_call_optimize, metawindow=args.metawindow, windowsize=window_size, table=table, ploidy=args.ploidy, bpen=block_penalty, overlap=args.overlap, max_rho=max_rho), test_set, chunksize=10)) scores[(block_penalty, window_size)] = _score(estimates, [ts[1] for ts in test_set], [ts[2] for ts in test_set], pool) ofile = open(args.outfile, 'w') if args.outfile else sys.stdout ofile.write('\t'.join(['Block_Penalty', 'Window_Size', 'Pearson_Corr_1bp', 'Pearson_Corr_10kb', 'Pearson_Corr_100kb', 'Log_Pearson_Corr_1bp', 'Log_Pearson_Corr_10kb', 'Log_Pearson_Corr_100kb', 'Spearman_Corr_1bp', 'Spearman_Corr_10kb', 'Spearman_Corr_100kb', 'L2', 'Log_L2']) + '\n') for block_penalty, window_size in sorted(scores): line = ([block_penalty, window_size] + scores[block_penalty, window_size]) ofile.write('\t'.join(map(str, line)) + '\n') if args.outfile: ofile.close()
def _create_batch(self, series): """ Create an Arrow record batch from the given pandas.Series or list of Series, with optional type. :param series: A single pandas.Series, list of Series, or list of (series, arrow_type) :return: Arrow RecordBatch """ import pandas as pd import pyarrow as pa from pyspark.sql.types import _check_series_convert_timestamps_internal # Make input conform to [(series1, type1), (series2, type2), ...] if not isinstance(series, (list, tuple)) or \ (len(series) == 2 and isinstance(series[1], pa.DataType)): series = [series] series = ((s, None) if not isinstance(s, (list, tuple)) else s for s in series) def create_array(s, t): mask = s.isnull() # Ensure timestamp series are in expected form for Spark internal representation if t is not None and pa.types.is_timestamp(t): s = _check_series_convert_timestamps_internal( s.fillna(0), self._timezone) # TODO: need cast after Arrow conversion, ns values cause error with pandas 0.19.2 return pa.Array.from_pandas(s, mask=mask).cast(t, safe=False) try: array = pa.Array.from_pandas(s, mask=mask, type=t, safe=self._safecheck) except pa.ArrowException as e: error_msg = "Exception thrown when converting pandas.Series (%s) to Arrow " + \ "Array (%s). It can be caused by overflows or other unsafe " + \ "conversions warned by Arrow. Arrow safe type check can be " + \ "disabled by using SQL config " + \ "`spark.sql.execution.pandas.arrowSafeTypeConversion`." raise RuntimeError(error_msg % (s.dtype, t), e) return array arrs = [] for s, t in series: if t is not None and pa.types.is_struct(t): if not isinstance(s, pd.DataFrame): raise ValueError( "A field of type StructType expects a pandas.DataFrame, " "but got: %s" % str(type(s))) # Input partition and result pandas.DataFrame empty, make empty Arrays with struct if len(s) == 0 and len(s.columns) == 0: arrs_names = [(pa.array([], type=field.type), field.name) for field in t] # Assign result columns by schema name if user labeled with strings elif self._assign_cols_by_name and any( isinstance(name, basestring) for name in s.columns): arrs_names = [(create_array(s[field.name], field.type), field.name) for field in t] # Assign result columns by position else: arrs_names = [(create_array(s[s.columns[i]], field.type), field.name) for i, field in enumerate(t)] struct_arrs, struct_names = zip(*arrs_names) arrs.append( pa.StructArray.from_arrays(struct_arrs, struct_names)) else: arrs.append(create_array(s, t)) return pa.RecordBatch.from_arrays( arrs, ["_%d" % i for i in xrange(len(arrs))])
def run(schematization, engine, max_iterations, mmi): """Console script for sandbox_fm keys: - 1, 2, 3 -> switch views - f -> fullscreen - c -> toggle currents - p -> make picture - r -> reset bathymethry - b -> set bed level """ schematization_name = pathlib.Path(schematization.name) # keep absolute path so model can change directory calibration_name = schematization_name.with_name( 'calibration.json').absolute() config_name = schematization_name.with_name('config.json').absolute() anomaly_name = pathlib.Path('anomaly.npy').absolute() # calibration info data = {} data['schematization'] = schematization_name with open(str(calibration_name)) as f: calibration = json.load(f) data.update(calibration) data.update(compute_transforms(data)) # if we have a configuration file if config_name.exists(): # open it with open(str(config_name)) as f: configuration_read = json.load(f) # combine with defaults configuration = default_config.copy() configuration.update(configuration_read) else: # default empty configuration = default_config with open(str(config_name), 'w') as f: json.dump(configuration, f, indent=2) data.update(configuration) # model if not mmi: model = bmi.wrapper.BMIWrapper(engine) else: model = MMIClient(mmi) model.engine = engine logger.info('Connected to MMI: {}'.format(mmi)) # initialize model schematization, changes directory # search for a background or overlay image for layer in ['background', 'overlay', 'background_mask']: known_background_paths = [ pathlib.Path(schematization.name).with_name(layer + '.jpg'), pathlib.Path(schematization.name).with_name(layer + '.png') ] for path in known_background_paths: if path.exists(): data[layer + '_name'] = str(path.absolute()) break else: data[layer + '_name'] = None # mmi model is already initialized if not mmi: model.initialize(str(schematization_name.absolute())) update_initial_vars(data, model) # compute the model bounding box that is shown on the screen model_bbox = matplotlib.path.Path(data['model_points']) # create an index to see which points/cells are visualized data['node_in_box'] = model_bbox.contains_points( np.c_[data['X_NODES'].ravel(), data['Y_NODES'].ravel()]) data['cell_in_box'] = model_bbox.contains_points( np.c_[data['X_CELLS'].ravel(), data['Y_CELLS'].ravel()]) img_bbox = matplotlib.path.Path([(00, 00), (00, HEIGHT - KINECTBUFFER), (WIDTH - KINECTBUFFER, HEIGHT - KINECTBUFFER), (WIDTH - KINECTBUFFER, 00)]) x_nodes_box, y_nodes_box = transform(data['X_NODES'].ravel(), data['Y_NODES'].ravel(), data['model2box']) x_cells_box, y_cells_box = transform(data['X_CELLS'].ravel(), data['X_CELLS'].ravel(), data['model2box']) # for transformed coordinates see if they are on the screen data['node_in_img_bbox'] = img_bbox.contains_points(np.c_[x_nodes_box, y_nodes_box]) data['cell_in_img_bbox'] = img_bbox.contains_points(np.c_[x_cells_box, y_cells_box]) # images kinect_heights = calibrated_height_images(calibration["z_values"], calibration["z"], anomaly_name=anomaly_name) kinect_images = video_images() # load model library kinect_height = next(kinect_heights) kinect_image = next(kinect_images) data['kinect_height'] = kinect_height data['kinect_image'] = kinect_image data['height_cells_original'] = data['HEIGHT_CELLS'].copy() data['kinect_height_original'] = data['kinect_height'].copy() vis = Visualization() update_vars(data, model) vis.initialize(data) vis.subscribers.append( # fill in the data parameter and subscribe to events functools.partial(process_events, data=data, model=model, vis=vis)) iterator = enumerate((zip(kinect_images, kinect_heights))) tics = {} last_bed_update = 0 # Time since last automatic bed level update if mmi: sub_poller = model.subscribe() # synchronize data when received model.remote('play') for i, (kinect_image, kinect_height) in iterator: tics['t0'] = time.time() # Get data from model if not mmi: update_vars(data, model) else: # listen for at most 10 miliseconds for incomming data (flush the queue) for sock, n in sub_poller.poll(10): for i in range(n): message = recv_array(sock) update_with_message(data, model, message) # update kinect data['kinect_height'] = kinect_height data['kinect_image'] = kinect_image tics['update_vars'] = time.time() gestures = recognize_gestures(data['kinect_height']) data['gestures'] = gestures tics['gestures'] = time.time() # update visualization vis.update(data) # visualization can trigger an exit if vis.quitting: break tics['vis'] = time.time() if not mmi: dt = model.get_time_step() # HACK: fix unstable timestep in xbeach if model.engine == 'xbeach': dt = 60 model.update(dt) tics['model'] = time.time() if data['auto_bedlevel_update_interval']: time_since_bed_update = (time.time() - last_bed_update) if time_since_bed_update > data['auto_bedlevel_update_interval']: run_update_bedlevel(data, model) last_bed_update = time.time() tics['automate_bed_update'] = time.time() logger.info("tics: %s", tic_report(tics)) if max_iterations and i > max_iterations: break
def random_walk_tacticity(m, nmon, s_=None, **kwargs): """pysimm.apps.random_walk.random_walk_tacticity Builds homopolymer with controllable tacticity from capped monomer structure Args: m: reference monomer :class:`~pysimm.system.System`. Must be a capped monomer, with headCap and tail_cap as the first and last atoms in the .mol file. nmon: total number of monomers to add to chain s_: :class:`~pysimm.system.System` in which to build polymer chain (None) extra_bonds: EXPERMINTAL, True if making ladder backbone polymer settings: dictionary of simulation settings density: density at which to build polymer (0.3) forcefield: :class:`~pysimm.forcefield.Forcefield` object to acquire new force field parameters unwrap: True to unwrap final system debug: Boolean; print extra-output (False) traj: True to build xyz trajectory of polymer growth (True) limit: during MD, limit atomic displacement by this max value (LAMMPS ONLY) sim: :class:`~pysimm.lmps.Simulation` object for relaxation between polymer growth tacticity: float between 0 and 1. 1 = 100% isotactic insertions 0 = 100% syndiotactic insertions 0.5 = equal changes of isotactic or syndiotactic insertions (i.e. atactic) rotation: degrees to rotate monomer per insertion md_spacing: how many monomer insertion steps to perform between MD relaxation steps (1) error_check: True/False for if monomers should be checked for hardcore overlaps after insertion Returns: new polymer :class:`~pysimm.system.System` """ m = m.copy() extra_bonds = kwargs.get('extra_bonds', False) settings = kwargs.get('settings', {}) density = kwargs.get('density', 0.3) f = kwargs.get('forcefield') unwrap = kwargs.get('unwrap') traj = kwargs.get('traj', True) debug = kwargs.get('debug', False) limit = kwargs.get('limit', 0.1) sim = kwargs.get('sim') tacticity = kwargs.get('tacticity', 0.5) if tacticity == 'atactic': tacticity = 0.5 elif tacticity == 'isotactic': tacticity = 1 elif tacticity == 'syndiotactic': tacticity = 0 elif not (0 <= tacticity <= 1): sys.exit( "tacticity must be a number between 0 and 1, or 'atactic' (0.5), " "'isotactic' (1), or 'syndiotactic' (0)") tact_seq = [False] * round( (nmon - 1) * tacticity) + [True] * ((nmon - 1) - round( (nmon - 1) * tacticity)) random.shuffle(tact_seq) rotation = kwargs.get('rotation', 0) md_spacing = kwargs.get('md_spacing', 1) error_check = kwargs.get('error_check', False) m.add_particle_bonding() if error_check: lmps.quick_min(m, min_style='fire') # Automatically redefine linkers if they have specially defined names for p in m.particles: if p.type.name.find('@') >= 0 and p.type.name.split('@')[0].find('H'): p.linker = 'head' elif p.type.name.find('@') >= 0 and p.type.name.split('@')[0].find( 'T'): p.linker = 'tail' m.remove_linker_types() # Check whether the monomer is decorated correctly if not __check_tags__(m.particles): sys.exit( "random_walk:random_walk_tacticity() requires a **monomer capped with a single atom** as an input" " (i.e. to model polyethylene, ethane as a monomer is required). \n" "\tIn addition to 'head' and 'tail', 3 other tags should be defined: \n" "\t\t(i) p.linker = 'mirror' for a particle that defines plane for iso- syndio- tactic reflection \n" "\t\t(ii) p.rnd_wlk_tag = 'head_cap' and p.rnd_wlk_tag = 'tail_cap' for particles that capping head " "and tail linkers correspondingly \n \t\t(see the example #13 of this distribution for details)" ) # Remove tail-cap if it exists for p in m.particles: if p.linker == 'tail': for p_ in p.bonded_to: if p_.rnd_wlk_tag == 'tail_cap': p.charge += p_.charge # unite charge of tailcap into head m.particles.remove(p_.tag) # remove tailcap of monomer m.remove_spare_bonding() break # Add first monomer to the output system if s_ is None: s = system.replicate(m, 1, density=density / nmon) else: s = system.replicate(m, 1, s_=s_, density=None) print('%s: %s/%s monomers added' % (strftime('%H:%M:%S'), 1, nmon)) if traj: s.write_xyz('random_walk.xyz') s.add_particle_bonding() # Main polymerisation loop for insertion in range(nmon - 1): n = m.copy() head = None tail = None mirror_atom = None for p in n.particles: if p.linker == 'head': head = p elif p.linker == 'tail': tail = p elif p.linker == 'mirror': mirror_atom = p backbone_vector = np.array(find_last_backbone_vector(s, m)) tail_vector = np.array( find_last_tail_vector(s.particles[-n.particles.count:])) for p, p_ in zip(s.particles[-1 * n.particles.count:], n.particles): # translate monomer a = 1.1 # coefficient of displacement of a new monomer along the head--tail direction b = 2.4 # coefficient of displacement of a new monomer along the head--headcap direction p_.x = p.x + a * backbone_vector[0] + b * tail_vector[0] p_.y = p.y + a * backbone_vector[1] + b * tail_vector[1] p_.z = p.z + a * backbone_vector[2] + b * tail_vector[2] if tact_seq[insertion]: # if syndiotactic insertion, reflect monomer print("syndiotactic insertion...") mirrorPlane = define_plane(head, tail, mirror_atom) for p in n.particles: p.x, p.y, p.z = reflect_coords_thru_plane([p.x, p.y, p.z], mirrorPlane) else: # else isotatic insertion, rotate monomer if necessary print("isotatic insertion...") if rotation != 0: # rotate monomer, if necessary rot_mat = rot_mat_about_axis(backbone_vector, rotation) n.rotate(around=head, rot_matrix=rot_mat) for p_ in s.particles[-n.particles.count:]: if p_.rnd_wlk_tag == 'head_cap': head.charge += p_.charge # unite charge of head_cap into tail atom s.particles.remove( p_.tag) # Removing head_cap atom from growing chain s.remove_spare_bonding() break if extra_bonds: heads = [] for p in s.particles[-n.particles.count:]: if p.linker == 'head': heads.append(p) else: for p in s.particles[-n.particles.count:]: if p.linker == 'head': head = p s.add(n, change_dim=False) s.add_particle_bonding() if extra_bonds: tails = [] for p in s.particles[-n.particles.count:]: if p.linker == 'tail': tails.append(p) else: for p in s.particles[-n.particles.count:]: if p.linker == 'tail': tail = p if debug: for p in s.particles: if not p.bonded_to: print(p.tag) if head and tail: s.make_new_bonds(head, tail, f) print('%s: %s/%s monomers added' % (strftime('%H:%M:%S'), insertion + 2, nmon)) elif extra_bonds and len(heads) == len(tails): for h, t in zip(heads, tails): s.make_new_bonds(h, t, f) print('%s: %s/%s monomers added' % (strftime('%H:%M:%S'), insertion + 2, nmon)) else: print('cannot find head and tail') if sim is None: sim = lmps.Simulation(s, name='relax_%03d' % (insertion + 2), log='relax.log', **settings) if (insertion + 2) % md_spacing == 0: sim.add_md(ensemble='nve', limit=limit, **settings) # sim.add_min(**settings) if isinstance(sim, lmps.Simulation): s_ = s.copy() sim.system = s sim.name = 'relax_%03d' % (insertion + 2) sim.run(np=settings.get('np')) energy = lmps.energy(s) print("LAMMPS Energy = " + str(energy)) print("LAMMPS Energy/#ofAtoms = " + str(energy / s.particles.count)) if error_check == True: # check for hardcore overlap print("checking for hardcore overlap") if s.quality(tolerance=0.3) > 0: print( "Found bad quality monomer insertion. Redoing last insertion..." ) s.unwrap() s.write_xyz('bad_insertion_' + str(insertion + 2) + '.xyz') s.wrap() redo_monomer_insertion(s_, n, insertion + 2) s = s_.copy() if traj: s.unwrap() s.write_xyz('random_walk.xyz', append=True) # Removing the very last 'head_cap' at the end of the chain for p_ in s.particles[-n.particles.count:]: if p_.rnd_wlk_tag == 'head_cap': head.charge += p_.charge # unite charge of head_cap into tail atom s.particles.remove( p_.tag) # Removing head_cap atom from growing chain s.remove_spare_bonding() # Syncronizing molecule representation with particles ItemContainer representation for the chain s.objectify() if debug: s.write_lammps('polymer.lmps') s.write_xyz('polymer.xyz') s.unwrap() return s
def sample(self, samples=[], bounds=None, closest=True, **kwargs): """Samples values at supplied coordinates. Allows sampling of element with a list of coordinates matching the key dimensions, returning a new object containing just the selected samples. Supports multiple signatures: Sampling with a list of coordinates, e.g.: ds.sample([(0, 0), (0.1, 0.2), ...]) Sampling a range or grid of coordinates, e.g.: 1D: ds.sample(3) 2D: ds.sample((3, 3)) Sampling by keyword, e.g.: ds.sample(x=0) Args: samples: List of nd-coordinates to sample bounds: Bounds of the region to sample Defined as two-tuple for 1D sampling and four-tuple for 2D sampling. closest: Whether to snap to closest coordinates **kwargs: Coordinates specified as keyword pairs Keywords of dimensions and scalar coordinates Returns: Element containing the sampled coordinates """ if kwargs and samples != []: raise Exception( 'Supply explicit list of samples or kwargs, not both.') elif kwargs: sample = [slice(None) for _ in range(self.ndims)] for dim, val in kwargs.items(): sample[self.get_dimension_index(dim)] = val samples = [tuple(sample)] elif isinstance(samples, tuple) or util.isscalar(samples): if self.ndims == 1: xlim = self.range(0) lower, upper = (xlim[0], xlim[1]) if bounds is None else bounds edges = np.linspace(lower, upper, samples + 1) linsamples = [(l + u) / 2.0 for l, u in zip(edges[:-1], edges[1:])] elif self.ndims == 2: (rows, cols) = samples if bounds: (l, b, r, t) = bounds else: l, r = self.range(0) b, t = self.range(1) xedges = np.linspace(l, r, cols + 1) yedges = np.linspace(b, t, rows + 1) xsamples = [(lx + ux) / 2.0 for lx, ux in zip(xedges[:-1], xedges[1:])] ysamples = [(ly + uy) / 2.0 for ly, uy in zip(yedges[:-1], yedges[1:])] Y, X = np.meshgrid(ysamples, xsamples) linsamples = list(zip(X.flat, Y.flat)) else: raise NotImplementedError( "Regular sampling not implemented " "for elements with more than two dimensions.") samples = list(util.unique_iterator(self.closest(linsamples))) # Note: Special handling sampling of gridded 2D data as Curve # may be replaced with more general handling # see https://github.com/ioam/holoviews/issues/1173 from ...element import Table, Curve if len(samples) == 1: sel = {kd.name: s for kd, s in zip(self.kdims, samples[0])} dims = [kd for kd, v in sel.items() if not np.isscalar(v)] selection = self.select(**sel) # If a 1D cross-section of 2D space return Curve if self.interface.gridded and self.ndims == 2 and len(dims) == 1: new_type = Curve kdims = [self.get_dimension(kd) for kd in dims] else: new_type = Table kdims = self.kdims if np.isscalar(selection): selection = [samples[0] + (selection, )] else: reindexed = selection.clone(new_type=Dataset).reindex(kdims) selection = tuple( reindexed.columns(kdims + self.vdims).values()) datatype = list( util.unique_iterator(self.datatype + ['dataframe', 'dict'])) return self.clone(selection, kdims=kdims, new_type=new_type, datatype=datatype) lens = set(len(util.wrap_tuple(s)) for s in samples) if len(lens) > 1: raise IndexError( 'Sample coordinates must all be of the same length.') if closest: try: samples = self.closest(samples) except NotImplementedError: pass samples = [util.wrap_tuple(s) for s in samples] return self.clone(self.interface.sample(self, samples), new_type=Table)
""" def __repr__(self): return "Lightness(%s)" % self clamp_float = lambda v: max(0.0, min(1.0, v)) clamp_bytes = lambda v: max(0, min(255, v)) to_srgb = lambda c: 12.92 * c if c <= 0.0031308 else (1.055 * c ** (1/2.4) - 0.055) from_srgb = lambda c: c / 12.92 if c <= 0.04045 else ((c + 0.055) / 1.055) ** 2.4 D65 = (0.95047, 1.0, 1.08883) U = lambda x, y, z: 4 * x / (x + 15 * y + 3 * z) V = lambda x, y, z: 9 * y / (x + 15 * y + 3 * z) matrix_mult = lambda m, n: ( sum(mval * nval for mval, nval in zip(mrow, n)) for mrow in m ) class Color(namedtuple('Color', ('red', 'green', 'blue'))): """ The Color class is a tuple which represents a color as red, green, and blue components. The class has a flexible constructor which allows you to create an instance from a variety of color systems including `RGB`_, `Y'UV`_, `Y'IQ`_, `HLS`_, and `HSV`_. There are also explicit constructors for each of these systems to allow you to force the use of a system in your code. For example, an instance of :class:`Color` can be constructed in any of the following ways::
def stress_reversed(slt): itr = reversed(list(reversed(slt))) assert all(tup[0] == tup[1] for tup in zip(slt, itr))
def set_values_detail_account(self, kpi, col_key, account_id, vals, drilldown_args, tooltips=True): """ Set values for a kpi and a column and a detail account. Invoke this after declaring the kpi and the column. """ if not account_id: row = self._kpi_rows[kpi] else: kpi_row = self._kpi_rows[kpi] if account_id in self._detail_rows[kpi]: row = self._detail_rows[kpi][account_id] else: row = KpiMatrixRow(self, kpi, account_id, parent_row=kpi_row) self._detail_rows[kpi][account_id] = row col = self._cols[col_key] cell_tuple = [] assert len(vals) == col.colspan assert len(drilldown_args) == col.colspan for val, drilldown_arg, subcol in zip(vals, drilldown_args, col.iter_subcols()): if isinstance(val, DataError): val_rendered = val.name val_comment = val.msg else: val_rendered = self._style_model.render( self.lang, row.style_props, kpi.type, val) if row.kpi.multi and subcol.subkpi: val_comment = u"{}.{} = {}".format( row.kpi.name, subcol.subkpi.name, row.kpi._get_expression_str_for_subkpi(subcol.subkpi), ) else: val_comment = u"{} = {}".format(row.kpi.name, row.kpi.expression) cell_style_props = row.style_props if row.kpi.style_expression: # evaluate style expression try: style_name = mis_safe_eval(row.kpi.style_expression, col.locals_dict) except Exception: _logger.error( "Error evaluating style expression <%s>", row.kpi.style_expression, exc_info=True, ) if style_name: style = self._style_model.search([("name", "=", style_name) ]) if style: cell_style_props = self._style_model.merge( [row.style_props, style[0]]) else: _logger.error("Style '%s' not found.", style_name) cell = KpiMatrixCell( row, subcol, val, val_rendered, tooltips and val_comment or None, cell_style_props, drilldown_arg, kpi.type, ) cell_tuple.append(cell) assert len(cell_tuple) == col.colspan col._set_cell_tuple(row, cell_tuple)
def compute_comparisons(self): """ Compute comparisons. Invoke this after setting all values. """ for ( cmpcol_key, (col_key, base_col_key, label, description), ) in self._comparison_todo.items(): col = self._cols[col_key] base_col = self._cols[base_col_key] common_subkpis = self._common_subkpis([col, base_col]) if (col.subkpis or base_col.subkpis) and not common_subkpis: raise UserError( _("Columns {} and {} are not comparable").format( col.description, base_col.description)) if not label: label = u"{} vs {}".format(col.label, base_col.label) comparison_col = KpiMatrixCol( cmpcol_key, label, description, {}, sorted(common_subkpis, key=lambda s: s.sequence), ) self._cols[cmpcol_key] = comparison_col for row in self.iter_rows(): cell_tuple = col.get_cell_tuple_for_row(row) base_cell_tuple = base_col.get_cell_tuple_for_row(row) if cell_tuple is None and base_cell_tuple is None: continue if cell_tuple is None: vals = [AccountingNone] * (len(common_subkpis) or 1) else: vals = [ cell.val for cell in cell_tuple if not common_subkpis or cell.subcol.subkpi in common_subkpis ] if base_cell_tuple is None: base_vals = [AccountingNone] * (len(common_subkpis) or 1) else: base_vals = [ cell.val for cell in base_cell_tuple if not common_subkpis or cell.subcol.subkpi in common_subkpis ] comparison_cell_tuple = [] for val, base_val, comparison_subcol in zip( vals, base_vals, comparison_col.iter_subcols()): # TODO FIXME average factors comparison = self._style_model.compare_and_render( self.lang, row.style_props, row.kpi.type, row.kpi.compare_method, val, base_val, 1, 1, ) delta, delta_r, delta_style, delta_type = comparison comparison_cell_tuple.append( KpiMatrixCell( row, comparison_subcol, delta, delta_r, None, delta_style, None, delta_type, )) comparison_col._set_cell_tuple(row, comparison_cell_tuple)
def init(cls, eltype, data, kdims, vdims): if kdims is None: kdims = eltype.kdims if vdims is None: vdims = eltype.vdims if not vdims: raise ValueError('GridInterface interface requires at least ' 'one value dimension.') ndims = len(kdims) dimensions = [dimension_name(d) for d in kdims + vdims] vdim_tuple = tuple(dimension_name(vd) for vd in vdims) if isinstance(data, tuple): if (len(data) != len(dimensions) and len(data) == (ndims + 1) and len(data[-1].shape) == (ndims + 1)): value_array = data[-1] data = {d: v for d, v in zip(dimensions, data[:-1])} data[vdim_tuple] = value_array else: data = {d: v for d, v in zip(dimensions, data)} elif (isinstance(data, list) and data == []): if len(kdims) == 1: data = OrderedDict([(d, []) for d in dimensions]) else: data = OrderedDict([(d.name, np.array([])) for d in kdims]) if len(vdims) == 1: data[vdims[0].name] = np.zeros((0, 0)) else: data[vdim_tuple] = np.zeros((0, 0, len(vdims))) elif not any( isinstance(data, tuple(t for t in interface.types if t is not None)) for interface in cls.interfaces.values()): data = {k: v for k, v in zip(dimensions, zip(*data))} elif isinstance(data, np.ndarray): if data.shape == (0, 0) and len(vdims) == 1: array = data data = OrderedDict([(d.name, np.array([])) for d in kdims]) data[vdims[0].name] = array elif data.shape == (0, 0, len(vdims)): array = data data = OrderedDict([(d.name, np.array([])) for d in kdims]) data[vdim_tuple] = array else: if data.ndim == 1: if eltype._auto_indexable_1d and len(kdims) + len( vdims) > 1: data = np.column_stack([np.arange(len(data)), data]) else: data = np.atleast_2d(data).T data = {k: data[:, i] for i, k in enumerate(dimensions)} elif isinstance(data, list) and data == []: data = {d: np.array([]) for d in dimensions[:ndims]} data.update( {d: np.empty((0, ) * ndims) for d in dimensions[ndims:]}) elif not isinstance(data, dict): raise TypeError('GridInterface must be instantiated as a ' 'dictionary or tuple') validate_dims = list(kdims) if vdim_tuple in data: if not isinstance(data[vdim_tuple], get_array_types()): data[vdim_tuple] = np.array(data[vdim_tuple]) else: validate_dims += vdims for dim in validate_dims: name = dimension_name(dim) if name not in data: raise ValueError("Values for dimension %s not found" % dim) if not isinstance(data[name], get_array_types()): data[name] = np.array(data[name]) kdim_names = [dimension_name(d) for d in kdims] if vdim_tuple in data: vdim_names = [vdim_tuple] else: vdim_names = [dimension_name(d) for d in vdims] expected = tuple([len(data[kd]) for kd in kdim_names]) irregular_shape = data[kdim_names[0]].shape if kdim_names else () valid_shape = irregular_shape if len( irregular_shape) > 1 else expected[::-1] shapes = tuple([data[kd].shape for kd in kdim_names]) for vdim in vdim_names: shape = data[vdim].shape error = DataError if len(shape) > 1 else ValueError if vdim_tuple in data: if shape[-1] != len(vdims): raise error( 'The shape of the value array does not match the number of value dimensions.' ) shape = shape[:-1] if (not expected and shape == (1, )) or (len(set((shape, ) + shapes)) == 1 and len(shape) > 1): # If empty or an irregular mesh pass elif len(shape) != len(expected): raise error('The shape of the %s value array does not ' 'match the expected dimensionality indicated ' 'by the key dimensions. Expected %d-D array, ' 'found %d-D array.' % (vdim, len(expected), len(shape))) elif any( (s != e and (s + 1) != e) for s, e in zip(shape, valid_shape)): raise error( 'Key dimension values and value array %s ' 'shapes do not match. Expected shape %s, ' 'actual shape: %s' % (vdim, valid_shape, shape), cls) return data, {'kdims': kdims, 'vdims': vdims}, {}
def groupby(cls, dataset, dim_names, container_type, group_type, **kwargs): # Get dimensions information dimensions = [dataset.get_dimension(d, strict=True) for d in dim_names] if 'kdims' in kwargs: kdims = kwargs['kdims'] else: kdims = [kdim for kdim in dataset.kdims if kdim not in dimensions] kwargs['kdims'] = kdims invalid = [d for d in dimensions if dataset.data[d.name].ndim > 1] if invalid: if len(invalid) == 1: invalid = "'%s'" % invalid[0] raise ValueError( "Cannot groupby irregularly sampled dimension(s) %s." % invalid) # Update the kwargs appropriately for Element group types group_kwargs = {} group_type = dict if group_type == 'raw' else group_type if issubclass(group_type, Element): group_kwargs.update(util.get_param_values(dataset)) else: kwargs.pop('kdims') group_kwargs.update(kwargs) drop_dim = any(d not in group_kwargs['kdims'] for d in kdims) # Find all the keys along supplied dimensions keys = [cls.coords(dataset, d.name) for d in dimensions] transpose = [ dataset.ndims - dataset.kdims.index(kd) - 1 for kd in kdims ] transpose += [i for i in range(dataset.ndims) if i not in transpose] # Iterate over the unique entries applying selection masks grouped_data = [] for unique_key in zip(*util.cartesian_product(keys)): select = dict(zip(dim_names, unique_key)) if drop_dim: group_data = dataset.select(**select) group_data = group_data if np.isscalar( group_data) else group_data.columns() else: group_data = cls.select(dataset, **select) if np.isscalar(group_data) or (isinstance( group_data, get_array_types()) and group_data.shape == ()): group_data = {dataset.vdims[0].name: np.atleast_1d(group_data)} for dim, v in zip(dim_names, unique_key): group_data[dim] = np.atleast_1d(v) elif not drop_dim: if isinstance(group_data, get_array_types()): group_data = {dataset.vdims[0].name: group_data} for vdim in dataset.vdims: data = group_data[vdim.name] data = data.transpose(transpose[::-1]) group_data[vdim.name] = np.squeeze(data) group_data = group_type(group_data, **group_kwargs) grouped_data.append((tuple(unique_key), group_data)) if issubclass(container_type, NdMapping): with item_check(False): return container_type(grouped_data, kdims=dimensions) else: return container_type(grouped_data)
def create_ndkey(length, indexes, values): key = [None] * length for i, v in zip(indexes, values): key[i] = v return tuple(key)
def unit_cell_analysis(self): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found or if turned off if not self.info.categories['integrated']: util.main_log(self.info.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) util.main_log(self.info.logfile, '\n UNIT CELL CANNOT BE DETERMINED!', True) elif len(self.info.categories['integrated']) == 1: unit_cell = (self.info.cluster_iterable[0][:5]) point_group = self.info.cluster_iterable[0][6] util.main_log(self.info.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, " \ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) util.main_log(self.info.logfile, uc_line, True) self.info.best_pg = str(point_group) self.info.best_uc = unit_cell else: uc_table = [] uc_summary = [] if self.params.analysis.run_clustering: # run hierarchical clustering analysis from xfel.clustering.cluster import Cluster counter = 0 self.info.clusters = [] threshold = self.params.analysis.cluster_threshold cluster_limit = self.params.analysis.cluster_limit final_pickles = self.info.categories['integrated'][0] pickles = [] if self.params.analysis.cluster_n_images > 0: import random for i in range(len(self.params.analysis.cluster_n_images)): random_number = random.randrange(0, len(final_pickles)) if final_pickles[random_number] in pickles: while final_pickles[random_number] in pickles: random_number = random.randrange( 0, len(final_pickles)) pickles.append(final_pickles[random_number]) else: pickles = final_pickles # Cluster from files (slow, but will keep for now) ucs = Cluster.from_files(pickle_list=pickles) # Do clustering clusters, _ = ucs.ab_cluster(threshold=threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list if cluster_limit is None: if len(pickles) / 10 >= 10: cluster_limit = 10 else: cluster_limit = len(pickles) / 10 for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] if len(cluster.members) > cluster_limit: counter += 1 # Write to file cluster_filenames = [j.path for j in cluster.members] if self.params.analysis.cluster_write_files: output_file = os.path.join( self.info.int_base, "uc_cluster_{}.lst".format(counter)) for fn in cluster_filenames: with open(output_file, 'a') as scf: scf.write('{}\n'.format(fn)) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # Populate clustering info for GUI display uc_init = uctbx.unit_cell(cluster.medians) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_sg = str(groups.lattice_group_info()).split('(')[0] best_uc = top_group['best_subsym'].unit_cell().parameters() # best_sg = str(top_group['best_subsym'].space_group_info()) uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(cluster.members), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': mark_output } self.info.clusters.append(cluster_info) # format and record output # TODO: How to propagate stdevs after conversion from Niggli? # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ # "{}".format('({})'.format(len(cluster.members)), cons_pg[0], # cluster.medians[0], cluster.stdevs[0], # cluster.medians[1], cluster.stdevs[1], # cluster.medians[2], cluster.stdevs[2], # cluster.medians[3], cluster.stdevs[3], # cluster.medians[4], cluster.stdevs[4], # cluster.medians[5], cluster.stdevs[5], # mark_output) # uc_table.append(uc_line) uc_table.append("{:<6}: {} {}".format( len(cluster.members), uc_no_stdev, mark_output)) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp]) # uc_info = [len(cluster.members), cons_pg[0], cluster.medians, # output_file, uc_line, lattices] uc_info = [ len(cluster.members), best_sg, best_uc, output_file, uc_no_stdev, lattices ] uc_summary.append(uc_info) else: # generate average unit cell uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL AVERAGING (no clustering) ')) uc_a, uc_b, uc_c, uc_alpha, \ uc_beta, uc_gamma, uc_sg = zip(*self.info.cluster_iterable) cons_pg = Counter(uc_sg).most_common(1)[0][0] all_pgs = Counter(uc_sg).most_common() unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c), np.median(uc_alpha), np.median(uc_beta), np.median(uc_gamma)) # Populate clustering info for GUI display uc_init = uctbx.unit_cell(unit_cell) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_sg = str(groups.lattice_group_info()).split('(')[0] best_uc = top_group['best_subsym'].unit_cell().parameters() # best_sg = str(top_group['best_subsym'].space_group_info()) uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(self.info.cluster_iterable), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': None } self.info.clusters.append(cluster_info) # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) " \ # "{}".format('({})'.format(len(self.final_objects)), cons_pg, # np.median(uc_a), np.std(uc_a), # np.median(uc_b), np.std(uc_b), # np.median(uc_c), np.std(uc_c), # np.median(uc_alpha), np.std(uc_alpha), # np.median(uc_beta), np.std(uc_beta), # np.median(uc_gamma), np.std(uc_gamma), '') # # uc_table.append(uc_line) uc_table.append(uc_no_stdev) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in all_pgs]) # uc_info = [len(self.final_objects), cons_pg, unit_cell, None, # uc_line, lattices] uc_info = [ len(self.info.cluster_iterable), best_sg, best_uc, None, uc_no_stdev, lattices ] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) uc_table.append('\nBravais Lattices in Biggest Cluster: {}' ''.format(uc_pick[5])) self.info.best_pg = str(uc_pick[1]) self.info.best_uc = uc_pick[2] if uc_pick[3] is not None: self.prime_data_path = uc_pick[3] for item in uc_table: util.main_log(self.info.logfile, item, False) self.info.update(uc_table=uc_table) if self.gui_mode: return self.info.clusters
def stress_iter(slt): itr1 = iter(slt) itr2 = (slt[pos] for pos in range(len(slt))) assert all(tup[0] == tup[1] for tup in zip(itr1, itr2))
def get_results(self, finished_objects=None): if not finished_objects: finished_objects = self.info.get_finished_objects() if not finished_objects: return False final_objects = [] if self.gui_mode: self.info.unplotted_stats = {} for key in self.info.stats: self.info.unplotted_stats[key] = dict(lst=[]) for obj in finished_objects: if len(self.info.unprocessed) > 0: for item in self.info.unprocessed: if item[0] == obj.img_index: self.info.unprocessed.remove(item) break if len(self.info.categories['not_processed'][0]) > 0: self.info.categories['not_processed'][0].remove(obj.img_path) if obj.fail: key = obj.fail.replace(' ', '_') if key in self.info.categories: self.info.categories[key][0].append(obj.img_path) else: self.info.categories['integrated'][0].append( obj.final['final']) self.info.final_objects.append(obj.obj_file) final_objects.append(obj) if not obj.fail or 'triage' not in obj.fail: self.info.categories['have_diffraction'][0].append( obj.img_path) # Calculate processing stats from final objects if final_objects: self.info.pixel_size = final_objects[0].final['pixel_size'] # Get observations from file try: all_obs = ep.load(self.info.idx_file) except Exception: all_obs = None # Collect image processing stats for obj in final_objects: for key in self.info.stats: if key in obj.final: stat_tuple = (obj.img_index, obj.img_path, obj.final[key]) self.info.stats[key]['lst'].append(stat_tuple) if self.gui_mode: if key not in self.info.unplotted_stats: self.info.unplotted_stats[key] = dict(lst=[]) self.info.unplotted_stats[key]['lst'].append( stat_tuple) # Unit cells and space groups (i.e. cluster iterable) self.info.cluster_iterable.append([ float(obj.final['a']), float(obj.final['b']), float(obj.final['c']), float(obj.final['alpha']), float(obj.final['beta']), float(obj.final['gamma']), str(obj.final['sg']) ]) # Get observations from this image obs = None if 'observations' in obj.final: obs = obj.final['observations'].as_non_anomalous_array() else: pickle_path = obj.final['final'] if os.path.isfile(pickle_path): try: pickle = ep.load(pickle_path) obs = pickle['observations'][ 0].as_non_anomalous_array() except Exception as e: print('IMAGE_PICKLE_ERROR for {}: {}'.format( pickle_path, e)) with util.Capturing(): if obs: # Append observations to combined miller array obs = obs.expand_to_p1() if all_obs: all_obs = all_obs.concatenate( obs, assert_is_similar_symmetry=False) else: all_obs = obs # Get B-factor from this image try: mxh = mx_handler() asu_contents = mxh.get_asu_contents(500) observations_as_f = obs.as_amplitude_array() observations_as_f.setup_binner(auto_binning=True) wp = statistics.wilson_plot(observations_as_f, asu_contents, e_statistics=True) b_factor = wp.wilson_b except RuntimeError as e: b_factor = 0 print('B_FACTOR_ERROR: ', e) self.info.b_factors.append(b_factor) # Save collected observations to file if all_obs: ep.dump(self.info.idx_file, all_obs) # Calculate dataset stats for k in self.info.stats: stat_list = zip(*self.info.stats[k]['lst'])[2] stats = dict(lst=self.info.stats[k]['lst'], median=np.median(stat_list), mean=np.mean(stat_list), std=np.std(stat_list), max=np.max(stat_list), min=np.min(stat_list), cons=Counter(stat_list).most_common(1)[0][0]) self.info.stats[k].update(stats) return True else: return False
if opts.compute_IS: IS = [] all_preds = [] if opts.compute_CIS: CIS = [] if opts.compute_LPIPS: LPIPS = [] dist_model = PerceptualLoss(net='alex') if opts.trainer == 'CDUNIT': with torch.no_grad(): # Start testing style_fixed = Variable( torch.randn(opts.num_style, style_dim, 1, 1).cuda()) for i, (images, names) in enumerate(zip(data_loader, image_names)): #if opts.compute_LPIPS and i > 200: # break if opts.compute_CIS: cur_preds = [] print(names[1]) images = Variable(images.cuda()) content, _, content_seg = encode(images) style = style_fixed if opts.synchronized else Variable( torch.randn(opts.num_style, style_dim, 1, 1).cuda()) for j in range(opts.num_style): s = style[j].unsqueeze(0) outputs = decode(content, s, content_seg) outputs = (outputs + 1) / 2. if opts.compute_IS or opts.compute_CIS: pred = F.softmax(inception(inception_up(outputs)),
def _conv(cls, node, input_dict, transpose=False): """ Convolution method for both conv and transposed conv For transposed conv, Attr pads is not used for input, but declares how much output is padded. Here, output means output from transposed conv which already pad output_padding if set. So the pseudo explanation for output should be: output = conv_transpose_output + output_padding - pads And conv_transpose_output shape should be: conv_transpose_output_shape[i] = strides[i] * (input_shape[i] - 1) + kernel_shape[i] """ x = input_dict[node.inputs[0]] x_rank = len(x.get_shape()) x_shape = x.get_shape().as_list() spatial_size = x_rank - 2 support_cuda = cls.supports_device("CUDA") storage_format, compute_format = cls.get_data_format(x_rank, support_cuda) compute_c_idx = compute_format.find("C") spatial_format = "".join([d for d in compute_format if d not in ["N", "C"]]) in_weights = input_dict[node.inputs[1]] weights_rank = len(in_weights.get_shape()) if transpose: # Translate weights from (C x M x KH x KW) to (KH x KW X M X C) perm = list(range(2, weights_rank)) + [1, 0] else: # Translate weights from (M x C x KH x KW) to (KH x KW X C X M) perm = list(range(2, weights_rank)) + [1, 0] if "kernel_shape" in node.attrs.keys(): kernel_shape = node.attrs["kernel_shape"] assert in_weights.get_shape().as_list()[2:] == kernel_shape, ( "kernel_shape " "attr of convolution does not match the actual weight " "passed to this operation, attr {}, actual {}").format( kernel_shape, in_weights.get_shape().as_list()) weights = tf.transpose(in_weights, perm) dilations = node.attrs.get("dilations", [1] * spatial_size) strides = node.attrs.get("strides", [1] * spatial_size) pads = node.attrs.get("pads", [0, 0] * spatial_size) if not transpose: x = cls.get_padding_as_op(x, pads) group = node.attrs.get("group", 1) weight_groups = tf.split(weights, num_or_size_splits=group, axis=-1) if support_cuda: xs = tf.split(x, num_or_size_splits=group, axis=1) else: x = tf.transpose( x, perm=cls.get_perm_from_formats(storage_format, compute_format)) xs = tf.split(x, num_or_size_splits=group, axis=-1) if transpose: if dilations != [1] * spatial_size: raise RuntimeError("Cannot set non-1 dilation for conv transpose.") convolved = [] for (x, weight) in zip(xs, weight_groups): x_spatial_shape = [ x_shape[storage_format.find(d)] for d in spatial_format ] weights_shape = weights.get_shape().as_list() # calculate output shape output_shape = node.attrs.get("output_shape", None) if output_shape is None: conv_output_shape = [x_shape[storage_format.find("N")]] + [ strides[i] * (x_spatial_shape[i] - 1) + weights_shape[i] for i in list(range(spatial_size)) ] conv_output_shape.insert(compute_c_idx, weights_shape[-2]) else: conv_output_shape = [output_shape[0]] + [ s + pads[i] + pads[spatial_size + i] for i, s in enumerate(output_shape[2:]) ] conv_output_shape.insert(compute_c_idx, output_shape[1]) # make strides to match input rank strides_full = [1] + strides strides_full.insert(compute_c_idx, 1) # get corresponding function in tf if spatial_size == 1: conv_func = tf.contrib.nn.conv1d_transpose strides_full = strides[0] elif spatial_size == 2: conv_func = tf.nn.conv2d_transpose elif spatial_size == 3: conv_func = tf.nn.conv3d_transpose else: raise NotImplementedError( "Transposed convolution for {}d is not implemented in Tensorflow". format(spatial_size)) # use raw input x to do transposed conv conv_rs = conv_func( x, weights, conv_output_shape, strides_full, padding="VALID", data_format=compute_format) # pad output first by output_padding attr if "output_padding" in node.attrs and output_shape is None: output_padding = [[0, 0] ] + [[0, p] for p in node.attrs["output_padding"]] output_padding.insert(compute_c_idx, [0, 0]) conv_rs = tf.pad(conv_rs, output_padding) # remove pads set in pads attr conv_rs_shape = conv_rs.get_shape().as_list() begin = [0] + pads[:spatial_size] begin.insert(compute_c_idx, 0) size = [ s if d in ["N", "C"] else s - pads[spatial_format.find(d)] - pads[spatial_format.find(d) + spatial_size] for d, s in zip(compute_format, conv_rs_shape) ] conv_rs = tf.slice(conv_rs, begin=begin, size=size) convolved.append(conv_rs) else: convolved = [ tf.nn.convolution( x, weight, "VALID", strides=strides, dilation_rate=dilations, data_format=compute_format) for (x, weight) in zip(xs, weight_groups) ] if len(node.inputs) == 2: if support_cuda: output = tf.concat(convolved, axis=1) else: output = tf.concat(convolved, axis=-1) output = tf.transpose( output, perm=cls.get_perm_from_formats(compute_format, storage_format)) else: bias = input_dict[node.inputs[2]] bias = cls._explicit_broadcast( bias, broadcast_dim=compute_c_idx, total_num_dim=x_rank) if support_cuda: output = tf.concat(convolved, axis=1) output = tf.add(output, bias) else: output = tf.concat(convolved, axis=-1) output = tf.add(output, bias) output = tf.transpose( output, perm=cls.get_perm_from_formats(compute_format, storage_format)) return [output]
def uimap(self, f, *args, **kwds): AbstractWorkerPool._AbstractWorkerPool__imap(self, f, *args, **kwds) _pool = self._serve() return _pool.imap_unordered(star(f), zip(*args)) # chunksize
def get_cursor_for_dict_results(self): conn_cursor = self.conn.cursor() conn_cursor.row_factory = lambda c, r: dict(list(zip([col[0] for col in c.description], r))) return conn_cursor
def _create_video_clip(self, chunks, start_offset, stop_offset): playlist_duration = stop_offset - start_offset playlist_offset = 0 playlist_streams = [] playlist_tags = [] for chunk in chunks: chunk_url = chunk["url"] chunk_length = chunk["length"] chunk_start = playlist_offset chunk_stop = chunk_start + chunk_length chunk_stream = HTTPStream(self.session, chunk_url) if start_offset >= chunk_start and start_offset <= chunk_stop: try: headers = extract_flv_header_tags(chunk_stream) except IOError as err: raise StreamError("Error while parsing FLV: {0}", err) if not headers.metadata: raise StreamError( "Missing metadata tag in the first chunk") metadata = headers.metadata.data.value keyframes = metadata.get("keyframes") if not keyframes: if chunk["upkeep"] == "fail": raise StreamError( "Unable to seek into muted chunk, try another timestamp" ) else: raise StreamError( "Missing keyframes info in the first chunk") keyframe_offset = None keyframe_offsets = keyframes.get("filepositions") keyframe_times = [ playlist_offset + t for t in keyframes.get("times") ] for time, offset in zip(keyframe_times, keyframe_offsets): if time > start_offset: break keyframe_offset = offset if keyframe_offset is None: raise StreamError("Unable to find a keyframe to seek to " "in the first chunk") chunk_headers = dict( Range="bytes={0}-".format(int(keyframe_offset))) chunk_stream = HTTPStream(self.session, chunk_url, headers=chunk_headers) playlist_streams.append(chunk_stream) for tag in headers: playlist_tags.append(tag) elif chunk_start >= start_offset and chunk_start < stop_offset: playlist_streams.append(chunk_stream) playlist_offset += chunk_length return FLVPlaylist(self.session, playlist_streams, tags=playlist_tags, duration=playlist_duration)
def named_series(self): '''Generator of tuples with name and serie data.''' return zip(self.names(), self.series())
analysis with the "LB Incident Radiation" component. """ ghenv.Component.Name = 'LB Real Time Incident Radiation' ghenv.Component.NickName = 'RTrad' ghenv.Component.Message = '1.2.0' ghenv.Component.Category = 'Ladybug' ghenv.Component.SubCategory = '3 :: Analyze Geometry' ghenv.Component.AdditionalHelpFromDocStrings = '0' try: # python 2 from itertools import izip as zip except ImportError: # python 3 pass try: from ladybug_rhino.grasshopper import all_required_inputs, de_objectify_output except ImportError as e: raise ImportError('\nFailed to import ladybug_rhino:\n\t{}'.format(e)) if all_required_inputs(ghenv.Component): # deconstruct the matrices and get the total radiation from each patch int_mtx = de_objectify_output(_int_mtx) sky_mtx = de_objectify_output(_sky_mtx) total_sky_rad = [dirr + difr for dirr, difr in zip(sky_mtx[1], sky_mtx[2])] # compute the results results = [] for pt_rel in int_mtx: results.append(sum(r * w for r, w in zip(pt_rel, total_sky_rad)))
def directional_irradiance(self, altitude=90, azimuth=180, ground_reflectance=0.2, isotrophic=True): """Returns the irradiance components facing a given altitude and azimuth. This method computes unobstructed solar flux facing a given altitude and azimuth. The default is set to return the golbal horizontal irradiance, assuming an altitude facing straight up (90 degrees). Args: altitude: A number between -90 and 90 that represents the altitude at which irradiance is being evaluated in degrees. azimuth: A number between 0 and 360 that represents the azimuth at wich irradiance is being evaluated in degrees. ground_reflectance: A number between 0 and 1 that represents the reflectance of the ground. Default is set to 0.2. Some common ground reflectances are: urban: 0.18 grass: 0.20 fresh grass: 0.26 soil: 0.17 sand: 0.40 snow: 0.65 fresh_snow: 0.75 asphalt: 0.12 concrete: 0.30 sea: 0.06 isotrophic: A boolean value that sets whether an istotrophic sky is used (as opposed to an anisotrophic sky). An isotrophic sky assummes an even distribution of diffuse irradiance across the sky while an anisotrophic sky places more diffuse irradiance near the solar disc. Default is set to True for isotrophic Returns: total_irradiance: A data collection of total solar irradiance. direct_irradiance: A data collection of direct solar irradiance. diffuse_irradiance: A data collection of diffuse sky solar irradiance. reflected_irradiance: A data collection of ground reflected solar irradiance. """ # function to convert polar coordinates to xyz. def pol2cart(phi, theta): mult = math.cos(theta) x = math.sin(phi) * mult y = math.cos(phi) * mult z = math.sin(theta) return Vector3(x, y, z) # convert the altitude and azimuth to a normal vector normal = pol2cart(math.radians(azimuth), math.radians(altitude)) # create sunpath and get altitude at every timestep of the year direct_irr, diffuse_irr, reflected_irr, total_irr = [], [], [], [] sp = Sunpath.from_location(self.location) sp.is_leap_year = self.is_leap_year for dt, dnr, dhr in zip(self.datetimes, self.direct_normal_irradiance, self.diffuse_horizontal_irradiance): sun = sp.calculate_sun_from_date_time(dt) sun_vec = pol2cart(math.radians(sun.azimuth), math.radians(sun.altitude)) vec_angle = sun_vec.angle(normal) # direct irradiance on surface srf_dir = 0 if sun.altitude > 0 and vec_angle < math.pi / 2: srf_dir = dnr * math.cos(vec_angle) # diffuse irradiance on surface if isotrophic is True: srf_dif = dhr * ((math.sin(math.radians(altitude)) / 2) + 0.5) else: y = max( 0.45, 0.55 + (0.437 * math.cos(vec_angle)) + 0.313 * math.cos(vec_angle) * 0.313 * math.cos(vec_angle)) srf_dif = self.dhr * ( y * (math.sin(math.radians(abs(90 - altitude)))) + math.cos(math.radians(abs(90 - altitude)))) # reflected irradiance on surface. e_glob = dhr + dnr * math.cos(math.radians(90 - sun.altitude)) srf_ref = e_glob * ground_reflectance * ( 0.5 - (math.sin(math.radians(altitude)) / 2)) # add it all together direct_irr.append(srf_dir) diffuse_irr.append(srf_dif) reflected_irr.append(srf_ref) total_irr.append(srf_dir + srf_dif + srf_ref) # create the headers a_per = AnalysisPeriod(timestep=self.timestep, is_leap_year=self.is_leap_year) direct_hea = diffuse_hea = reflected_hea = total_hea = \ Header(Irradiance(), 'W/m2', a_per, self.metadata) # create the data collections direct_irradiance = HourlyContinuousCollection(direct_hea, direct_irr) diffuse_irradiance = HourlyContinuousCollection( diffuse_hea, diffuse_irr) reflected_irradiance = HourlyContinuousCollection( reflected_hea, reflected_irr) total_irradiance = HourlyContinuousCollection(total_hea, total_irr) return total_irradiance, direct_irradiance, \ diffuse_irradiance, reflected_irradiance
def map(self, f, *args, **kwds): AbstractWorkerPool._AbstractWorkerPool__map(self, f, *args, **kwds) _pool = self._serve() return _pool.map(star(f), zip(*args)) # chunksize
def _load_stream_without_unbatching(self, stream): key_batch_stream = self.key_ser._load_stream_without_unbatching(stream) val_batch_stream = self.val_ser._load_stream_without_unbatching(stream) for (key_batch, val_batch) in zip(key_batch_stream, val_batch_stream): # for correctness with repeated cartesian/zip this must be returned as one batch yield product(key_batch, val_batch)
print('Note that dataloader may hang with too much nworkers.') #如果使用多个GPU训练 if num_gpu > 1: print('Now you are using %d gpus.' % num_gpu) trainer.dis_a = torch.nn.DataParallel(trainer.dis_a, gpu_ids) trainer.dis_b = trainer.dis_a trainer = torch.nn.DataParallel(trainer, gpu_ids) #训练 while True: #循环获取训练数据a,b #train_loader分为image,label,pos;注意,这里的image和pos是同一ID的不同图片,label表示ID for it, ((images_a, labels_a, pos_a), (images_b, labels_b, pos_b)) in enumerate(zip(train_loader_a, train_loader_b)): if num_gpu > 1: trainer.module.update_learning_rate() else: # 进行学习率更新 trainer.update_learning_rate() #images_a[batch_size,3,256,128],images_b[batch_size,3,256,128] images_a, images_b = images_a.cuda().detach(), images_b.cuda().detach() #pos_a[batch_size,3,256,128],pos_b[batch_size,3,1024] pos_a, pos_b = pos_a.cuda().detach(), pos_b.cuda().detach() # labels_a[batch_size],labels_b[batch_size] labels_a, labels_b = labels_a.cuda().detach(), labels_b.cuda().detach() with Timer("Elapsed time in update: %f"): # Main training code
def init(cls, eltype, data, kdims, vdims): element_params = eltype.params() kdim_param = element_params['kdims'] vdim_param = element_params['vdims'] if util.is_series(data): data = data.to_frame() if util.is_dataframe(data): ncols = len(data.columns) index_names = data.index.names if isinstance( data, pd.DataFrame) else [data.index.name] if index_names == [None]: index_names = ['index'] if eltype._auto_indexable_1d and ncols == 1 and kdims is None: kdims = list(index_names) if isinstance(kdim_param.bounds[1], int): ndim = min([kdim_param.bounds[1], len(kdim_param.default)]) else: ndim = None nvdim = vdim_param.bounds[1] if isinstance(vdim_param.bounds[1], int) else None if kdims and vdims is None: vdims = [c for c in data.columns if c not in kdims] elif vdims and kdims is None: kdims = [c for c in data.columns if c not in vdims][:ndim] elif kdims is None: kdims = list(data.columns[:ndim]) if vdims is None: vdims = [ d for d in data.columns[ndim:(( ndim + nvdim) if nvdim else None)] if d not in kdims ] elif kdims == [] and vdims is None: vdims = list(data.columns[:nvdim if nvdim else None]) # Handle reset of index if kdims reference index by name for kd in kdims: kd = dimension_name(kd) if kd in data.columns: continue if any(kd == ('index' if name is None else name) for name in index_names): data = data.reset_index() break if any(isinstance(d, (np.int64, int)) for d in kdims + vdims): raise DataError( "pandas DataFrame column names used as dimensions " "must be strings not integers.", cls) if kdims: kdim = dimension_name(kdims[0]) if eltype._auto_indexable_1d and ncols == 1 and kdim not in data.columns: data = data.copy() data.insert(0, kdim, np.arange(len(data))) for d in kdims + vdims: d = dimension_name(d) if len([c for c in data.columns if c == d]) > 1: raise DataError( 'Dimensions may not reference duplicated DataFrame ' 'columns (found duplicate %r columns). If you want to plot ' 'a column against itself simply declare two dimensions ' 'with the same name. ' % d, cls) else: # Check if data is of non-numeric type # Then use defined data type kdims = kdims if kdims else kdim_param.default vdims = vdims if vdims else vdim_param.default columns = [dimension_name(d) for d in kdims + vdims] if isinstance(data, dict) and all(c in data for c in columns): data = cyODict(((d, data[d]) for d in columns)) elif isinstance(data, list) and len(data) == 0: data = {c: np.array([]) for c in columns} elif isinstance(data, (list, dict)) and data in ([], {}): data = None elif (isinstance(data, dict) and not all(d in data for d in columns) and not any(isinstance(v, np.ndarray) for v in data.values())): column_data = sorted(data.items()) k, v = column_data[0] if len(util.wrap_tuple(k)) != len(kdims) or len( util.wrap_tuple(v)) != len(vdims): raise ValueError( "Dictionary data not understood, should contain a column " "per dimension or a mapping between key and value dimension " "values.") column_data = zip(*((util.wrap_tuple(k) + util.wrap_tuple(v)) for k, v in column_data)) data = cyODict( ((c, col) for c, col in zip(columns, column_data))) elif isinstance(data, np.ndarray): if data.ndim == 1: if eltype._auto_indexable_1d and len(kdims) + len( vdims) > 1: data = (np.arange(len(data)), data) else: data = np.atleast_2d(data).T else: data = tuple(data[:, i] for i in range(data.shape[1])) if isinstance(data, tuple): data = [ np.array(d) if not isinstance(d, np.ndarray) else d for d in data ] if not cls.expanded(data): raise ValueError( 'PandasInterface expects data to be of uniform shape.') data = pd.DataFrame(dict(zip(columns, data)), columns=columns) elif ((isinstance(data, dict) and any(c not in data for c in columns)) or (isinstance(data, list) and any( isinstance(d, dict) and c not in d for d in data for c in columns))): raise ValueError( 'PandasInterface could not find specified dimensions in the data.' ) else: data = pd.DataFrame(data, columns=columns) return data, {'kdims': kdims, 'vdims': vdims}, {}
def genFields(names, types): return list(zip(names, types))