def endElement(self, name): #print "END", name if name == 'molecule': if len(self.current_numbers) > 0: self.current_coordinates = np.array(self.current_coordinates)*angstrom molecule = Molecule(self.current_numbers, self.current_coordinates, self.current_title) molecule.extra = self.current_extra molecule.atoms_extra = self.current_atoms_extra name_to_index = {} for counter, name in enumerate(self.current_atom_names): name_to_index[name] = counter edges = set() current_bonds_extra = {} for name1, name2, extra in self.current_bonds: i1 = name_to_index.get(name1) i2 = name_to_index.get(name2) if i1 is not None and i2 is not None: edge = frozenset([i1, i2]) if len(extra) > 0: current_bonds_extra[edge] = extra edges.add(edge) molecule.bonds_extra = current_bonds_extra if len(edges) == 0: molecule.graph = None else: molecule.graph = MolecularGraph(edges, self.current_numbers) del self.current_atom_names del self.current_bonds self.molecules.append(molecule) self.current_title = None
def all_lone_pairs(molecule, singles=[7], doubles=[8], angle=1.910): """ Returns a list with pairs (index, lone), where index indicates the atom and lone is a relative vector with unit length pointing along a lone pair on that atom. Arguments: molecule -- A molecule for which the lone pairs should be calculated. singles -- A list of atom number which should be treated like nitrogen. doubles -- A list of atom numbers which should be treated like oxygen. angle -- The angle between two lone pairs on the same atom. (in rad) Returns: lone_pairs -- a list with pairs (index, lone) """ result = [] mgraph = MolecularGraph(molecule) for index, (number, coordinate) in enumerate( zip(molecule.numbers, molecule.coordinates)): if number in singles: neighbors = mgraph.neighbors[index] result.append( (index, lone_pair_1(molecule.coordinates[neighbors[0]] - coordinate, molecule.coordinates[neighbors[1]] - coordinate, molecule.coordinates[neighbors[2]] - coordinate))) elif number in doubles: neighbors = mgraph.neighbors[index] lone1, lone2 = lone_pair_2( molecule.coordinates[neighbors[0]] - coordinate, molecule.coordinates[neighbors[1]] - coordinate, angle) result.append((index, lone1)) result.append((index, lone2)) return result
def get_molecular_graph(self): """Return the molecular graph represented by the data structure""" return MolecularGraph(self.bonds, self.numbers)
def __next__(self): """Load the next molecule from the SDF file This method is part of the iterator protocol. """ while True: title = next(self.f) if len(title) == 0: raise StopIteration else: title = title.strip() next(self.f) # skip line next(self.f) # skip empty line words = next(self.f).split() if len(words) < 2: raise FileFormatError( "Expecting at least two numbers at fourth line.") try: num_atoms = int(words[0]) num_bonds = int(words[1]) except ValueError: raise FileFormatError( "Expecting at least two numbers at fourth line.") numbers = np.zeros(num_atoms, int) coordinates = np.zeros((num_atoms, 3), float) for i in range(num_atoms): words = next(self.f).split() if len(words) < 4: raise FileFormatError( "Expecting at least four words on an atom line.") try: coordinates[i, 0] = float(words[0]) coordinates[i, 1] = float(words[1]) coordinates[i, 2] = float(words[2]) except ValueError: raise FileFormatError( "Coordinates must be floating point numbers.") atom = periodic[words[3]] if atom is None: raise FileFormatError("Unrecognized atom symbol: %s" % words[3]) numbers[i] = atom.number coordinates *= angstrom edges = [] orders = np.zeros(num_bonds, int) for i in range(num_bonds): words = next(self.f).split() if len(words) < 3: raise FileFormatError( "Expecting at least three numbers on a bond line.") try: edges.append((int(words[0]) - 1, int(words[1]) - 1)) orders[i] = int(words[2]) except ValueError: raise FileFormatError( "Expecting at least three numbers on a bond line.") formal_charges = np.zeros(len(numbers), int) line = next(self.f) while line != "M END\n": if line.startswith("M CHG"): words = line[6:].split( )[1:] # drop the first number which is the number of charges i = 0 while i < len(words) - 1: try: formal_charges[int(words[i]) - 1] = int(words[i + 1]) except ValueError: raise FileFormatError( "Expecting only integer formal charges.") i += 2 line = next(self.f) # Read on to the next molecule for line in self.f: if line == "$$$$\n": break molecule = Molecule(numbers, coordinates, title) molecule.formal_charges = formal_charges molecule.formal_charges.setflags(write=False) molecule.graph = MolecularGraph(edges, numbers, orders) return molecule
def get_molecular_graph(self, labels=None): return MolecularGraph(self.bonds, self.numbers)