Example #1
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure object or raise StructureFormatError.
        """
        xcfg_Number_of_particles = None
        xcfg_A = None
        xcfg_H0 = numpy.zeros((3, 3), dtype=float)
        xcfg_H0_set = numpy.zeros((3, 3), dtype=bool)
        xcfg_NO_VELOCITY = False
        xcfg_entry_count = None
        xcfg_auxiliary = []
        p_nl = 0
        p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =")
        p_auxiliary = {}
        try:
            stru = Structure()
            # ignore trailing blank lines
            stop = len(lines)
            while stop > 0 and lines[stop - 1].strip() == "":
                stop -= 1
            ilines = iter(lines[:stop])
            # read XCFG header
            for line in ilines:
                p_nl += 1
                stripped_line = line.strip()
                # blank lines and lines starting with # are ignored
                if stripped_line == "" or line[0] == '#':
                    continue
                elif xcfg_Number_of_particles is None:
                    if line.find("Number of particles =") != 0:
                        emsg = ("%d: first line must " +
                                "contain 'Number of particles ='") % p_nl
                        raise StructureFormatError(emsg)
                    xcfg_Number_of_particles = int(line[21:].split(None, 1)[0])
                    p_natoms = xcfg_Number_of_particles
                elif line.find("A =") == 0:
                    xcfg_A = float(line[3:].split(None, 1)[0])
                elif line.find("H0(") == 0:
                    i, j = (int(line[3]) - 1, int(line[5]) - 1)
                    xcfg_H0[i, j] = float(line[10:].split(None, 1)[0])
                    xcfg_H0_set[i, j] = True
                elif line.find(".NO_VELOCITY.") == 0:
                    xcfg_NO_VELOCITY = True
                elif line.find("entry_count =") == 0:
                    xcfg_entry_count = int(line[13:].split(None, 1)[0])
                elif p_auxiliary_re.match(line):
                    m = p_auxiliary_re.match(line)
                    idx = int(m.group(1))
                    p_auxiliary[idx] = line[m.end():].split(None, 1)[0]
                else:
                    break
            # check header for consistency
            if numpy.any(xcfg_H0_set == False):
                emsg = "H0 tensor is not properly defined"
                raise StructureFormatError(emsg)
            p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys()) + 1
            for i in range(p_auxnum):
                if not i in p_auxiliary:
                    p_auxiliary[i] = "aux%d" % i
            sorted_aux_keys = p_auxiliary.keys()
            sorted_aux_keys.sort()
            if p_auxnum != 0:
                stru.xcfg = {
                    'auxiliaries': [p_auxiliary[k] for k in sorted_aux_keys]
                }
            if 6 - 3 * xcfg_NO_VELOCITY + len(p_auxiliary) != xcfg_entry_count:
                emsg = ("%d: auxiliary fields " +
                        "not consistent with entry_count") % p_nl
                raise StructureFormatError(emsg)
            # define proper lattice
            stru.lattice.setLatBase(xcfg_H0)
            # build p_assign_atom function to assign entries to proper fields
            p_exprs = [
                "a.xyz[0]=fields[0]", "a.xyz[1]=fields[1]",
                "a.xyz[2]=fields[2]"
            ]
            if not xcfg_NO_VELOCITY:
                p_exprs += [
                    "a.v=numpy.zeros(3, dtype=float)", "a.v[0]=fields[3]",
                    "a.v[1]=fields[4]", "a.v[2]=fields[5]"
                ]
            for idx in sorted_aux_keys:
                prop = p_auxiliary[idx]
                col = idx + 6 - 3 * xcfg_NO_VELOCITY
                if prop == "Uiso":
                    p_exprs.append("a.U[0,0]=a.U[1,1]=a.U[2,2]=" +
                                   "fields[%d]" % col)
                elif re.match(r"^U\d\d$", prop) \
                and 1<=int(prop[1])<=3 and 1<=int(prop[2])<=3 :
                    i, j = int(prop[1]) - 1, int(prop[2]) - 1
                    if i == j:
                        p_exprs.append("a.U[%i,%i]=fields[%d]" % (i, j, col))
                    else:
                        p_exprs.append("a.U[%i,%i]=a.U[%i,%i]=fields[%d]" % \
                                (i, j, j, i, col) )
                else:
                    p_exprs.append( "a.__dict__[%r]=fields[%d]" % \
                            (prop, col) )
            p_assign_expr = "pass; " + "; ".join(p_exprs[3:])
            exec "def p_assign_atom(a, fields) : %s" % p_assign_expr
            # here we are inside data
            p_element = None
            p_nl -= 1
            for line in lines[p_nl:stop]:
                p_nl += 1
                words = line.split()
                # ignore atom mass
                if len(words) == 1 and isfloat(words[0]):
                    continue
                # parse element allowing empty symbol
                elif len(words) <= 1:
                    w = line.strip()
                    p_element = w[:1].upper() + w[1:].lower()
                elif len(words) == xcfg_entry_count and p_element is not None:
                    fields = [float(w) for w in words]
                    stru.addNewAtom(p_element, fields[:3])
                    a = stru.getLastAtom()
                    a.xyz *= xcfg_A
                    p_assign_atom(a, fields)
                else:
                    emsg = "%d: invalid record" % p_nl
                    raise StructureFormatError(emsg)
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: file is not in XCFG format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru
Example #2
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure instance or raise StructureFormatError.
        """
        try:
            stru = Structure()
            scale = numpy.identity(3, dtype=float)
            scaleU = numpy.zeros(3, dtype=float)
            p_nl = 0
            for line in lines:
                p_nl += 1
                # skip blank lines
                if not line.strip():    continue
                # make sure line has 80 characters
                if len(line) < 80:
                    line = "%-80s" % line
                words = line.split()
                record = words[0]
                if record == "TITLE":
                    continuation = line[8:10]
                    if continuation.strip():
                        stru.description += line[10:].rstrip()
                    else:
                        stru.description = line[10:].rstrip()
                elif record == "CRYST1":
                    a = float(line[7:15])
                    b = float(line[15:24])
                    c = float(line[24:33])
                    alpha = float(line[33:40])
                    beta = float(line[40:47])
                    gamma = float(line[47:54])
                    stru.lattice.setLatPar(a, b, c, alpha, beta, gamma)
                    scale = numpy.transpose(stru.lattice.recbase)
                elif record == "SCALE1":
                    sc = numpy.zeros((3,3), dtype=float)
                    sc[0,:] = [float(x) for x in line[10:40].split()]
                    scaleU[0] = float(line[45:55])
                elif record == "SCALE2":
                    sc[1,:] = [float(x) for x in line[10:40].split()]
                    scaleU[1] = float(line[45:55])
                elif record == "SCALE3":
                    sc[2,:] = [float(x) for x in line[10:40].split()]
                    scaleU[2] = float(line[45:55])
                    base = numpy.transpose(numpy.linalg.inv(sc))
                    abcABGcryst = numpy.array(stru.lattice.abcABG())
                    stru.lattice.setLatBase(base)
                    abcABGscale = numpy.array(stru.lattice.abcABG())
                    reldiff = numpy.fabs(1.0 - abcABGscale/abcABGcryst)
                    if not numpy.all(reldiff < self.epsilon):
                        emsg = "%d: " % p_nl + \
                                "SCALE and CRYST1 are not consistent."
                        raise StructureFormatError(emsg)
                    if numpy.any(scaleU != 0.0):
                        emsg = "Origin offset not yet implemented."
                        raise NotImplementedError(emsg)
                elif record in ("ATOM", "HETATM"):
                    name = line[12:16].strip()
                    rc = [float(x) for x in line[30:54].split()]
                    xyz = numpy.dot(scale, rc) + scaleU
                    try:
                        occupancy = float(line[54:60])
                    except ValueError:
                        occupancy = 1.0
                    try:
                        B = float(line[60:66])
                        U = numpy.identity(3)*B/(8*pi**2)
                    except ValueError:
                        U = numpy.zeros((3,3), dtype=float)
                    symbol = line[76:78].strip()
                    if symbol == "":
                        # get symbol from the first 2 characters of name
                        symbol = line[12:14].strip()
                        symbol = symbol[0].upper() + symbol[1:].lower()
                    #stru.addNewAtom(symbol, occupancy=occupancy, name=name, U=U)
                    stru.addNewAtom(symbol, occupancy=occupancy, label=name, U=U)
                    last_atom = stru.getLastAtom()
                    last_atom.xyz_cartn = rc
                elif record == "SIGATM":
                    sigrc = [float(x) for x in line[30:54].split()]
                    sigxyz = numpy.dot(scale, sigrc)
                    try:
                        sigo = float(line[54:60])
                    except ValueError:
                        sigo = 0.0
                    try:
                        sigB = float(line[60:66])
                        sigU = numpy.identity(3)*sigB/(8*pi**2)
                    except ValueError:
                        sigU = numpy.zeros((3,3), dtype=float)
                    last_atom.sigxyz = sigxyz
                    last_atom.sigo = sigo
                    last_atom.sigU = sigU
                elif record == "ANISOU":
                    Uij = [ float(x)*1.0e-4 for x in line[28:70].split() ]
                    for i in range(3):
                        last_atom.U[i,i] = Uij[i]
                    last_atom.U[0,1] = last_atom.U[1,0] = Uij[3]
                    last_atom.U[0,2] = last_atom.U[2,0] = Uij[4]
                    last_atom.U[1,2] = last_atom.U[2,1] = Uij[5]
                elif record == "SIGUIJ":
                    sigUij = [ float(x)*1.0e-4 for x in line[28:70].split() ]
                    for i in range(3):
                        last_atom.sigU[i,i] = sigUij[i]
                    last_atom.sigU[0,1] = last_atom.sigU[1,0] = sigUij[3]
                    last_atom.sigU[0,2] = last_atom.sigU[2,0] = sigUij[4]
                    last_atom.sigU[1,2] = last_atom.sigU[2,1] = sigUij[5]
                elif record in P_pdb.validRecords:
                    pass
                else:
                    emsg = "%d: invalid record name '%r'" % (p_nl, record)
                    raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: invalid PDB record" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru
Example #3
0
    def parseLines(self, lines):
        """Parse list of lines in PDB format.

        Return Structure object or raise StructureFormatError.
        """
        xcfg_Number_of_particles = None
        xcfg_A = None
        xcfg_H0 = numpy.zeros((3,3), dtype=float)
        xcfg_H0_set = numpy.zeros((3,3), dtype=bool)
        xcfg_NO_VELOCITY = False
        xcfg_entry_count = None
        xcfg_auxiliary = []
        p_nl = 0
        p_auxiliary_re = re.compile(r"^auxiliary\[(\d+)\] =")
        p_auxiliary = {}
        try:
            stru = Structure()
            # ignore trailing blank lines
            stop = len(lines)
            while stop>0 and lines[stop-1].strip() == "":
                stop -= 1
            ilines = iter(lines[:stop])
            # read XCFG header
            for line in ilines:
                p_nl += 1
                stripped_line = line.strip()
                # blank lines and lines starting with # are ignored
                if stripped_line == "" or line[0] == '#':
                    continue
                elif xcfg_Number_of_particles is None:
                    if line.find("Number of particles =") != 0:
                        emsg = ("%d: first line must " +
                                "contain 'Number of particles ='") % p_nl
                        raise StructureFormatError(emsg)
                    xcfg_Number_of_particles = int(line[21:].split(None, 1)[0])
                    p_natoms = xcfg_Number_of_particles
                elif line.find("A =") == 0:
                    xcfg_A = float(line[3:].split(None, 1)[0])
                elif line.find("H0(") == 0:
                    i, j = ( int(line[3])-1 ,  int(line[5])-1 )
                    xcfg_H0[i,j] = float(line[10:].split(None, 1)[0])
                    xcfg_H0_set[i,j] = True
                elif line.find(".NO_VELOCITY.") == 0:
                    xcfg_NO_VELOCITY = True
                elif line.find("entry_count =") == 0:
                    xcfg_entry_count = int(line[13:].split(None, 1)[0])
                elif p_auxiliary_re.match(line):
                    m = p_auxiliary_re.match(line)
                    idx = int(m.group(1))
                    p_auxiliary[idx] = line[m.end():].split(None, 1)[0]
                else:
                    break
            # check header for consistency
            if numpy.any(xcfg_H0_set == False):
                emsg = "H0 tensor is not properly defined"
                raise StructureFormatError(emsg)
            p_auxnum = len(p_auxiliary) and max(p_auxiliary.keys())+1
            for i in range(p_auxnum):
                if not i in p_auxiliary:
                    p_auxiliary[i] = "aux%d" % i
            sorted_aux_keys = p_auxiliary.keys()
            sorted_aux_keys.sort()
            if p_auxnum != 0:
                stru.xcfg = {
                    'auxiliaries' : [ p_auxiliary[k]
                                      for k in sorted_aux_keys ]
                }
            if 6-3*xcfg_NO_VELOCITY+len(p_auxiliary) != xcfg_entry_count:
                emsg = ("%d: auxiliary fields " +
                        "not consistent with entry_count") % p_nl
                raise StructureFormatError(emsg)
            # define proper lattice
            stru.lattice.setLatBase(xcfg_H0)
            # build p_assign_atom function to assign entries to proper fields
            p_exprs = [ "a.xyz[0]=fields[0]",
                        "a.xyz[1]=fields[1]",
                        "a.xyz[2]=fields[2]" ]
            if not xcfg_NO_VELOCITY:
                p_exprs += [  "a.v=numpy.zeros(3, dtype=float)",
                              "a.v[0]=fields[3]",
                              "a.v[1]=fields[4]",
                              "a.v[2]=fields[5]" ]
            for idx in sorted_aux_keys:
                prop = p_auxiliary[idx]
                col = idx + 6 - 3*xcfg_NO_VELOCITY
                if prop == "Uiso":
                    p_exprs.append("a.U[0,0]=a.U[1,1]=a.U[2,2]=" +
                        "fields[%d]" % col)
                elif re.match(r"^U\d\d$", prop) \
                and 1<=int(prop[1])<=3 and 1<=int(prop[2])<=3 :
                    i, j = int(prop[1])-1, int(prop[2])-1
                    if i==j:
                        p_exprs.append("a.U[%i,%i]=fields[%d]" % (i, j, col) )
                    else:
                        p_exprs.append("a.U[%i,%i]=a.U[%i,%i]=fields[%d]" % \
                                (i, j, j, i, col) )
                else:
                    p_exprs.append( "a.__dict__[%r]=fields[%d]" % \
                            (prop, col) )
            p_assign_expr = "pass; " + "; ".join(p_exprs[3:])
            exec "def p_assign_atom(a, fields) : %s" % p_assign_expr
            # here we are inside data
            p_element = None
            p_nl -= 1
            for line in lines[p_nl:stop]:
                p_nl += 1
                words = line.split()
                # ignore atom mass
                if len(words) == 1 and isfloat(words[0]):
                    continue
                # parse element allowing empty symbol
                elif len(words) <= 1:
                    w = line.strip()
                    p_element = w[:1].upper() + w[1:].lower()
                elif len(words) == xcfg_entry_count and p_element is not None:
                    fields = [ float(w) for w in words ]
                    stru.addNewAtom(p_element, fields[:3])
                    a = stru.getLastAtom()
                    a.xyz *= xcfg_A
                    p_assign_atom(a, fields)
                else:
                    emsg = "%d: invalid record" % p_nl
                    raise StructureFormatError(emsg)
            if len(stru) != p_natoms:
                emsg = "expected %d atoms, read %d" % (p_natoms, len(stru))
                raise StructureFormatError(emsg)
        except (ValueError, IndexError):
            emsg = "%d: file is not in XCFG format" % p_nl
            exc_type, exc_value, exc_traceback = sys.exc_info()
            raise StructureFormatError, emsg, exc_traceback
        return stru