def redim(array, ndim, shape=None): """ Add or remove trailing dimensions from an array by reshaping. Useful for turning N-dimensional data into the 2D shape required for matrix factorization / clustering, and for reversing this transformation. Returns a view of the input array. array : numpy array with 2 or more dimensions. ndim : int, desired number of dimensions when contracting dimensions. shape : tuple, desired shape when expanding dimensions. """ from numpy import prod result = None if (ndim > array.ndim) and (shape is None): raise ValueError( "Cannot expand dimensions without supplying a shape argument") if ndim < array.ndim: new_shape = (*array.shape[:(ndim - 1)], prod(array.shape[(ndim - 1):])) result = array.reshape(new_shape) elif ndim > array.ndim: new_shape = shape result = array.reshape(new_shape) elif ndim == array.ndim: new_shape = array.shape result = array.reshape(new_shape) return result
def invert_scale(scaler, X, value): import numpy as np new_row = [x for x in X] + [value] array = np.array(new_row) array = array.reshape(1, len(array)) inverted = scaler.inverse_transform(array) return inverted[0, -1]
def pad_array(array): copied_index = 0 for i in range(len(array)): if array[i] != 0: copied_index = i else: array[i] = array[copied_index] arr = array.reshape((len(array), 1)) return arr
def expand_1d_to_2d_array(array, length, axis=0): """ General utility routine to 'extend arbitrary dimensional array into a higher dimension by duplicating the data along a given 'axis' (default is 0) of size 'length'. Examples:: >>> a = np.array([1, 2, 3, 4]) >>> expand_1d_to_2d_array(a, 4, axis=0) [[1 2 3 4] [1 2 3 4] [1 2 3 4] [1 2 3 4]] >>> a = np.array([1, 2, 3, 4]) >>> expand_1d_to_2d_array(a, 4, axis=1) [[1 1 1 1] [2 2 2 2] [3 3 3 3] [4 4 4 4]] :param array: :param length: :param axis: :return: """ from numpy.ma import MaskedArray if axis == 0: new_shape = (length, array.size) reshaped = array else: new_shape = (array.size, length) reshaped = array.reshape(array.size, 1) array_2d = np.broadcast_to(reshaped, new_shape, subok=True) # Broadcast the mask too (this gets lost otherwise) if isinstance(array, MaskedArray): array_2d.mask = np.broadcast_to(reshaped.mask, new_shape) return array_2d
def invert_scale(scaler, X, yhat): """ Since you "scale" object saves data_min and data_max, as well as feature_range, you just need to pass it to this function, togheter with the object you want to inverse scale (yhat) too. :param scaler: Scaler object from scipy, previously calculated in the same dataset being worked here. :param X: Matrix line used as input to generate this output prediction (yhat). :param yhat: Predicted output for your network. :return: Only the "yhat" data rescaled. """ # We have input features in X information. We need to add "y" info # (prediction, not ground truth or reference) before unscaling (because scaler was made with whole dataset, X and y). # We call it "row" because it's a array (there's no columns). We'll reshape # it into a matrix line ahead in this function. new_row = [x for x in X] + [yhat] array = numpy.array(new_row) # Array is vertical, but scaler expects a matrix or matrix line, and that's # why we reshape it. array = array.reshape(1, len(array)) inverted = scaler.inverse_transform(array) return inverted[0, -1]
def expand_1d_to_2d_array(array, length, axis=0): """ General utility routine to 'extend arbitrary dimensional array into a higher dimension by duplicating the data along a given 'axis' (default is 0) of size 'length'. Examples:: >>> a = np.array([1, 2, 3, 4]) >>> expand_1d_to_2d_array(a, 4, axis=0) [[1 2 3 4] [1 2 3 4] [1 2 3 4] [1 2 3 4]] >>> a = np.array([1, 2, 3, 4]) >>> expand_1d_to_2d_array(a, 4, axis=1) [[1 1 1 1] [2 2 2 2] [3 3 3 3] [4 4 4 4]] :param array: :param length: :param axis: :return: """ from numpy.lib.stride_tricks import broadcast_to if axis == 0: array_2d = broadcast_to(array, (length, array.size)) # array_2d = np.lib.stride_tricks.as_strided(array_1d, (length, array_1d.size), (0, array_1d.itemsize)) else: reshaped = array.reshape(array.size, 1) array_2d = broadcast_to(reshaped, (array.size, length)) # array_2d = np.lib.stride_tricks.as_strided(array_1d, (array_1d.size, length), (array_1d.itemsize, 0)) return array_2d
def ensure_2d_array(array, flags, **kwargs): array = require(array, requirements = flags, **kwargs) if len(array.shape) == 1: array = array.reshape(-1,array.size) return array
def parsePSF(filename, title=None, ag=None): """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR format PSF file *filename*. Atom and bond information is parsed from the file. If *title* is not given, *filename* will be set as the title of the :class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be provided as *ag* argument. When provided, *ag* must have the same number of atoms in the same order as the file. Data from PSF file will be added to the *ag*. This may overwrite present data if it overlaps with PSF file content. Note that this function does not evaluate angles, dihedrals, and impropers sections.""" if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') psf = openFile(filename, 'rb') line = psf.readline() i_line = 1 while line: line = line.strip() if line.endswith('!NATOM'): n_atoms = int(line.split('!')[0]) break line = psf.readline() i_line += 1 if title is None: title = os.path.splitext(os.path.split(filename)[1])[0] else: title = str(title) if ag is None: ag = AtomGroup(title) else: if n_atoms != ag.numAtoms(): raise ValueError('ag and PSF file must have same number of atoms') serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype) segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype) resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype) resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype) atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype) atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype) charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype) masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype) lines = psf.readlines(71 * (n_atoms + 5)) if len(lines) < n_atoms: raise IOError('number of lines in PSF is less than the number of ' 'atoms') for i, line in enumerate(lines): if i == n_atoms: break i_line += 1 if len(line) <= 71: serials[i] = line[:8] segnames[i] = line[9:13].strip() resnums[i] = line[14:19] resnames[i] = line[19:23].strip() atomnames[i] = line[24:28].strip() atomtypes[i] = line[29:35].strip() charges[i] = line[35:44] masses[i] = line[50:60] else: items = line.split() serials[i] = items[0] segnames[i] = items[1] resnums[i] = items[2] resnames[i] = items[3] atomnames[i] = items[4] atomtypes[i] = items[5] charges[i] = items[6] masses[i] = items[7] i = n_atoms while 1: line = lines[i].split() if len(line) >= 2 and line[1] == '!NBOND:': n_bonds = int(line[0]) break i += 1 lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71) array = fromstring(lines, count=n_bonds*2, dtype=int, sep=' ') if len(array) != n_bonds*2: raise IOError('number of bonds expected and parsed do not match') psf.close() ag.setSerials(serials) ag.setSegnames(segnames) ag.setResnums(resnums) ag.setResnames(resnames) ag.setNames(atomnames) ag.setTypes(atomtypes) ag.setCharges(charges) ag.setMasses(masses) array = add(array, -1, array) ag.setBonds(array.reshape((n_bonds, 2))) return ag
def ensure_2d_array(array, flags, **kwargs): array = require(array, requirements=flags, **kwargs) if len(array.shape) == 1: array = array.reshape(-1, array.size) return array
def parsePSF(filename, title=None, ag=None): """Return an :class:`.AtomGroup` instance storing data parsed from X-PLOR format PSF file *filename*. Atom and bond information is parsed from the file. If *title* is not given, *filename* will be set as the title of the :class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be provided as *ag* argument. When provided, *ag* must have the same number of atoms in the same order as the file. Data from PSF file will be added to the *ag*. This may overwrite present data if it overlaps with PSF file content. Note that this function does not evaluate angles, dihedrals, and impropers sections.""" if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError('ag must be an AtomGroup instance') psf = openFile(filename, 'rb') line = psf.readline() i_line = 1 while line: line = line.strip() if line.endswith('!NATOM'): n_atoms = int(line.split('!')[0]) break line = psf.readline() i_line += 1 if title is None: title = os.path.splitext(os.path.split(filename)[1])[0] else: title = str(title) if ag is None: ag = AtomGroup(title) else: if n_atoms != ag.numAtoms(): raise ValueError('ag and PSF file must have same number of atoms') serials = zeros(n_atoms, ATOMIC_FIELDS['serial'].dtype) segnames = zeros(n_atoms, ATOMIC_FIELDS['segment'].dtype) resnums = zeros(n_atoms, ATOMIC_FIELDS['resnum'].dtype) resnames = zeros(n_atoms, ATOMIC_FIELDS['resname'].dtype) atomnames = zeros(n_atoms, ATOMIC_FIELDS['name'].dtype) atomtypes = zeros(n_atoms, ATOMIC_FIELDS['type'].dtype) charges = zeros(n_atoms, ATOMIC_FIELDS['charge'].dtype) masses = zeros(n_atoms, ATOMIC_FIELDS['mass'].dtype) lines = psf.readlines(71 * (n_atoms + 5)) if len(lines) < n_atoms: raise IOError('number of lines in PSF is less than the number of ' 'atoms') for i, line in enumerate(lines): if i == n_atoms: break i_line += 1 if len(line) <= 71: serials[i] = line[:8] segnames[i] = line[9:13].strip() resnums[i] = line[14:19] resnames[i] = line[19:23].strip() atomnames[i] = line[24:28].strip() atomtypes[i] = line[29:35].strip() charges[i] = line[35:44] masses[i] = line[50:60] else: items = line.split() serials[i] = items[0] segnames[i] = items[1] resnums[i] = items[2] resnames[i] = items[3] atomnames[i] = items[4] atomtypes[i] = items[5] charges[i] = items[6] masses[i] = items[7] i = n_atoms while 1: line = lines[i].split() if len(line) >= 2 and line[1] == '!NBOND:': n_bonds = int(line[0]) break i += 1 lines = ''.join(lines[i + 1:]) + psf.read(n_bonds / 4 * 71) array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=' ') if len(array) != n_bonds * 2: raise IOError('number of bonds expected and parsed do not match') psf.close() ag.setSerials(serials) ag.setSegnames(segnames) ag.setResnums(resnums) ag.setResnames(resnames) ag.setNames(atomnames) ag.setTypes(atomtypes) ag.setCharges(charges) ag.setMasses(masses) array = add(array, -1, array) ag.setBonds(array.reshape((n_bonds, 2))) return ag
def invert_scale(scaler, X, yhat): new_row = [x for x in X] + [yhat] array = numpy.array(new_row) array = array.reshape(1, len(array)) inverted = scaler.inverse_transform(array) return inverted[0, -1]
def parsePSF(filename, title=None, ag=None): """Returns an :class:`.AtomGroup` instance storing data parsed from X-PLOR format PSF file *filename*. Atom and bond information is parsed from the file. If *title* is not given, *filename* will be set as the title of the :class:`.AtomGroup` instance. An :class:`.AtomGroup` instance may be provided as *ag* argument. When provided, *ag* must have the same number of atoms in the same order as the file. Data from PSF file will be added to the *ag*. This may overwrite present data if it overlaps with PSF file content. Note that this function does not evaluate angles, dihedrals, and impropers sections.""" if ag is not None: if not isinstance(ag, AtomGroup): raise TypeError("ag must be an AtomGroup instance") psf = openFile(filename, "rb") line = psf.readline() i_line = 1 while line: line = line.strip() if line.endswith(b"!NATOM"): n_atoms = int(line.split(b"!")[0]) break line = psf.readline() i_line += 1 if title is None: title = os.path.splitext(os.path.split(filename)[1])[0] else: title = str(title) if ag is None: ag = AtomGroup(title) else: if n_atoms != ag.numAtoms(): raise ValueError("ag and PSF file must have same number of atoms") serials = zeros(n_atoms, ATOMIC_FIELDS["serial"].dtype) segnames = zeros(n_atoms, ATOMIC_FIELDS["segment"].dtype) resnums = zeros(n_atoms, ATOMIC_FIELDS["resnum"].dtype) resnames = zeros(n_atoms, ATOMIC_FIELDS["resname"].dtype) atomnames = zeros(n_atoms, ATOMIC_FIELDS["name"].dtype) atomtypes = zeros(n_atoms, ATOMIC_FIELDS["type"].dtype) charges = zeros(n_atoms, ATOMIC_FIELDS["charge"].dtype) masses = zeros(n_atoms, ATOMIC_FIELDS["mass"].dtype) # lines = psf.readlines(71 * (n_atoms + 5)) n = 0 n_bonds = 0 for i, line in enumerate(psf): if line.strip() == b"": continue if b"!NBOND:" in line.upper(): items = line.split() n_bonds = int(items[0]) break if n + 1 > n_atoms: continue if len(line) <= 71: serials[n] = line[:8] segnames[n] = line[9:13].strip() resnums[n] = line[14:19] resnames[n] = line[19:23].strip() atomnames[n] = line[24:28].strip() atomtypes[n] = line[29:35].strip() charges[n] = line[35:44] masses[n] = line[50:60] else: items = line.split() serials[n] = items[0] segnames[n] = items[1] resnums[n] = items[2] resnames[n] = items[3] atomnames[n] = items[4] atomtypes[n] = items[5] charges[n] = items[6] masses[n] = items[7] n += 1 if n < n_atoms: raise IOError("number of lines in PSF is less than the number of " "atoms") # i = n_atoms # while 1: # line = lines[i].split() # if len(line) >= 2 and line[1] == '!NBOND:': # n_bonds = int(line[0]) # break # i += 1 # lines = ''.join(lines[i+1:]) + psf.read(n_bonds/4 * 71) lines = [] for i, line in enumerate(psf): if line.strip() == b"": continue if b"!" in line: break lines.append(line.decode(encoding="UTF-8")) lines = "".join(lines) array = fromstring(lines, count=n_bonds * 2, dtype=int, sep=" ") if len(array) != n_bonds * 2: raise IOError("number of bonds expected and parsed do not match") psf.close() ag.setSerials(serials) ag.setSegnames(segnames) ag.setResnums(resnums) ag.setResnames(resnames) ag.setNames(atomnames) ag.setTypes(atomtypes) ag.setCharges(charges) ag.setMasses(masses) array = add(array, -1, array) ag.setBonds(array.reshape((n_bonds, 2))) return ag
def calcCrossCorr(modes, n_cpu=1, norm=True): """Returns cross-correlations matrix. For a 3-d model, cross-correlations matrix is an NxN matrix, where N is the number of atoms. Each element of this matrix is the trace of the submatrix corresponding to a pair of atoms. Cross-correlations matrix may be calculated using all modes or a subset of modes of an NMA instance. For large systems, calculation of cross-correlations matrix may be time consuming. Optionally, multiple processors may be employed to perform calculations by passing ``n_cpu=2`` or more.""" if not isinstance(n_cpu, int): raise TypeError('n_cpu must be an integer') elif n_cpu < 1: raise ValueError('n_cpu must be equal to or greater than 1') if not isinstance(modes, (Mode, Vector, NMA, ModeSet)): if isinstance(modes, list): try: is3d = modes[0].is3d() except: raise TypeError( 'modes must be a list of Mode or Vector instances, ' 'not {0}'.format(type(modes))) else: raise TypeError( 'modes must be a Mode, Vector, NMA, or ModeSet instance, ' 'not {0}'.format(type(modes))) else: is3d = modes.is3d() if is3d: model = modes if isinstance(modes, (Mode, ModeSet)): model = modes._model if isinstance(modes, (Mode)): indices = [modes.getIndex()] n_modes = 1 else: indices = modes.getIndices() n_modes = len(modes) elif isinstance(modes, Vector): indices = [0] n_modes = 1 else: n_modes = len(modes) indices = np.arange(n_modes) array = model._getArray() n_atoms = model._n_atoms if not isinstance(modes, Vector): variances = model._vars else: array = array.reshape(-1, 1) variances = np.ones(1) if n_cpu == 1: s = (n_modes, n_atoms, 3) arvar = (array[:, indices] * variances[indices]).T.reshape(s) array = array[:, indices].T.reshape(s) covariance = np.tensordot(array.transpose(2, 0, 1), arvar.transpose(0, 2, 1), axes=([0, 1], [1, 0])) else: import multiprocessing n_cpu = min(multiprocessing.cpu_count(), n_cpu) queue = multiprocessing.Queue() size = n_modes / n_cpu for i in range(n_cpu): if n_cpu - i == 1: indices = modes.indices[i * size:] else: indices = modes.indices[i * size:(i + 1) * size] process = multiprocessing.Process(target=_crossCorrelations, args=(queue, n_atoms, array, variances, indices)) process.start() while queue.qsize() < n_cpu: time.sleep(0.05) covariance = queue.get() while queue.qsize() > 0: covariance += queue.get() else: covariance = calcCovariance(modes) if norm: diag = np.power(covariance.diagonal(), 0.5) D = np.outer(diag, diag) covariance = div0(covariance, D) return covariance