Example #1
0
def process(args):
    Sfn = args['Sfn']

    # Open matrix file in parallel mode
    Sf = h5py.File(Sfn, 'r')
    model_list = args['pdb_list']

    atypes_t = Sf['atypes'][:][0]
    atypes_, rtypes = gu.choose_artypes(atypes_t)

    atypes_ = Sf.keys()
    try:
        atypes_.remove('origin')
        atypes_.remove('step')
        atypes_.remove('atypes')
        atypes_.remove('H')  # we do not need hydrogens!
    except:
        pass

    atypes_ = set(atypes_)
    atypes = atypes_

    excl = args['excl']
    incl = args['incl']

    if excl:
        excl = set(excl)
        atypes = atypes - excl

    if incl:
        incl = set(incl)
        atypes.intersection_update(incl)
        atypes = atypes

    if excl and incl:
        raise ('You can not use include and exclude options simultaneiously!')

    print(excl, incl, atypes)

    extractor = er.PepExtractor(**args)
    lM = len(model_list)

    # Init storage for matrices
    # Get file name

    # tSfn = 'tmp.' + Sfn
    tSfn = args['output']
    tSf = h5py.File(tSfn, 'w')

    # fix of pos number // zlo

    pivot_pept_ = extractor.plist[0]
    pivot_pept = extractor.extract_result(pivot_pept_)
    num_poses = pivot_pept.numCoordsets()

    score = tSf.create_dataset('score', (lM, ), dtype=np.float)

    GminXYZ = Sf['origin'][:]
    step = Sf['step'][0]

    NUCS = set(Sf.keys())
    protected = set(['origin', 'step'])
    NUCS -= protected

    gNUCS = dict()

    for i in NUCS:
        gNUCS[i] = Sf[i][:]

    for cm in tqdm.trange(lM):
        m_ = model_list[cm]
        S = prody.parsePDB(m_)

        mscore = 0.0
        ac = 0

        for a in S.iterAtoms():
            try:
                atype = rtypes[(a.getResname(), a.getName())]
            except:
                continue

            if atype not in atypes:
                continue

            C = a.getCoords()
            adj = (C - GminXYZ)
            adj = (adj / step).astype(np.int)
            x, y, z = adj

            try:
                tscore = gNUCS[atype][x, y, z]
            except (IndexError, ValueError):
                continue

            mscore += tscore
            ac += 1

        if ac > 0:
            mscore /= float(ac)

        score[cm] = mscore

    tscore = np.zeros((lM, ), dtype=[('name', 'S128'), ('score', 'f8')])
    for n in range(lM):
        tscore['name'][n] = model_list[n]
    tscore['score'] = score[:]

    tscore.sort(order='score')
    tscore['score'] /= tscore['score'][-1]

    np.savetxt('a_score_table.csv', tscore[::-1], fmt="%s\t%.2f")

    tSf.close()
    Sf.close()
def process(args):

    step = args['step']

    padding = max(gu.avdw.values()) * 2 * \
        args['pad']  # Largest VdW radii - 1.7A

    box_config = dp.ConfigLedock(args['config'])

    GminXYZ = box_config.get_min_xyz()
    GminXYZ = gu.adjust_grid(GminXYZ, step, padding)

    box = box_config.get_box_size()
    L = box + padding

    N = np.ceil((L / step)).astype(np.int)

    GmaxXYZ = GminXYZ + N * step

    print('GMIN', GminXYZ, N)
    print('BOX', L)

    with open('box_coords.txt', 'w') as f:
        f.write('BOX: ' + ' '.join(GminXYZ.astype(np.str)) + '\n')
        f.write('STEP: %.2f\n' % step)
        f.write('NSTEPS: ' + ';'.join(N.astype(np.str)) + '\n')

    Sfn = args['Sfn']

    # atypes_, rtypes = gu.choose_artypes(args['atypes'])

    atypes_ = set(gu.three_let)

    try:
        atypes_.remove('H')  # we do not need hydrogens!
    except KeyError:
        pass
    atypes = tuple(atypes_)

    extractor = er.PepExtractor(**args)

    NUCS = gu.three_let
    iNUCS = dict(map(lambda x: (
        x[1],
        x[0],
    ), enumerate(NUCS)))

    lnucs = len(NUCS)
    fNUCS = np.zeros((lnucs, ), dtype=np.int)

    # Init storage for matrices
    # Get file name

    tSf = dict()
    for i in NUCS:
        tSf[i] = np.zeros(N, dtype=np.float32)

    lM = len(extractor.plist)

    t0 = time.time()

    for cm in range(lM):
        m = extractor.plist[cm]

        t1 = time.time()
        dt = t1 - t0
        t0 = t1
        print('STEP: %d PERCENT: %.2f%% TIME: %s' %
              (cm, float(cm) / lM * 100, dt))

        try:
            S = extractor.extract_result(m)
        except:
            print('ERROR: BAD PEPTIDE: %s' % m)
            continue

        lS = S.numCoordsets()

        for S_ in range(lS):

            for a in S.iterResidues():

                a.setACSIndex(S_)

                try:
                    atype = a.getResname().upper()
                except:
                    continue

                if atype not in atypes:
                    continue

                AminXYZ, Ashape = gu.get_bounding(a.getCoords(), padding, step)

                adj = (AminXYZ - GminXYZ)
                adj = (adj / step).astype(np.int)

                if (adj < 0).any():
                    continue

                if ((GmaxXYZ - (AminXYZ + Ashape * step)) < 0).any():

                    continue

                x, y, z = adj
                Agrid, AminXYZ = gu.process_residue(a, padding, step)

                try:
                    tSf[atype][x:x + Agrid.shape[0], y:y + Agrid.shape[1],
                               z:z + Agrid.shape[2]] += Agrid

                    fNUCS[iNUCS[atype]] += 1

                except:
                    print(m, a)
                    pass

    print('fNUCS ', fNUCS)

    nNUCS = np.zeros((lnucs, ), dtype=np.float32)

    ln = len(NUCS)

    for i in range(ln):

        mult = fNUCS[i]

        if mult > 0:
            ttSf = tSf[NUCS[i]]
            nmax = None
            nmax = np.max(ttSf)
            med = np.median(ttSf)
            ttSf[ttSf < (med)] = 0
            ttSf /= mult

            nNUCS[i] = nmax / mult
            print(i, NUCS[i], nNUCS[i], nmax)

    nmax = np.max(nNUCS)

    # Open matrix file in parallel mode
    Sf = h5py.File(Sfn, 'w')

    for i in range(ln):

        if mult > 0:
            ttSf = tSf[NUCS[i]]

            ttSf /= nmax
            ttSf *= 100.0
            tG = np.ceil(ttSf).astype(np.int8)
            Sf.create_dataset(NUCS[i], data=tG)
        else:
            print('Array is empty for: ', NUCS[i])

    Gstep = np.array([step, step, step], dtype=np.float32)
    Sf.create_dataset('step', data=Gstep)
    Sf.create_dataset('origin', data=GminXYZ)
    #    Sf.create_dataset('atypes', data=np.array([args['atypes'], ], dtype='S20'))
    print('Totgood: ', np.sum(fNUCS))
    Sf.close()
Example #3
0
    def process(self, args=None):
        if not args:
            args = self.args

        NUCS = self.atypes
        iNUCS = dict(map(lambda x: (
            x[1],
            x[0],
        ), enumerate(NUCS)))
        iNUCS = self.mpi.comm.bcast(iNUCS)

        lnucs = len(NUCS)
        fNUCS = np.zeros((lnucs, ), dtype=np.float)

        # Init storage for matrices
        # Get file name

        tSf = dict()
        for i in NUCS:
            tSf[i] = np.zeros(self.N, dtype=np.float)

        args['mpi'] = self.mpi
        extractor = er.PepExtractor(**args)
        lM = len(self.aplist)

        self.mpi.comm.Barrier()

        #        if self.mpi.rank == 0:
        #            pbar = tqdm(total=lM)

        tota_ = 0
        totba_ = 0

        for cm in range(lM):
            m = self.aplist[cm]

            print('Rank %d model %d of %d' % (self.mpi.rank, cm, lM))

            #            if self.mpi.rank == 0:
            #                pbar.update(cm)

            try:
                S = extractor.extract_result(m)
            except:
                print('ERROR: BAD PEPTIDE: %s' % m)
                continue

            lS = S.numCoordsets()
            tlS = range(lS)

            if self.cluster is True:
                resc = S.select('not element H').getCoordsets()
                cl = 'NOSUP_SERIAL_CALCULATOR'

                mHandler = MatrixHandler()
                matrix = mHandler.createMatrix(resc, cl)
                mat = scipy.spatial.distance.squareform(matrix.get_data())
                smatrix = (mat**2) * (-1)
                aff = AffinityPropagation(affinity='precomputed')
                aff_cluster = aff.fit(smatrix)
                tlS = aff_cluster.cluster_centers_indices_

            if tlS is None:
                continue

            for S_ in tlS:

                S.setACSIndex(S_)

                for a in S.iterAtoms():

                    # skip hydrogens
                    if a.getElement() == 'H':
                        continue

                    try:
                        atype = self.rtypes[(a.getResname(), a.getName())]
                    except:
                        print('ATYPE not found', a.getResname(), a.getName())

                    if atype not in self.atypes:
                        continue

                    Agrid, AminXYZ = gu.process_atom(a, self.step)

                    adj = (AminXYZ - self.GminXYZ)
                    adj = (adj / self.step).astype(np.int)
                    x, y, z = adj

                    try:
                        tSf[atype][x:x + Agrid.shape[0], y:y + Agrid.shape[1],
                                   z:z + Agrid.shape[2]] += Agrid

                        fNUCS[iNUCS[atype]] += 1
                        tota_ += 1

                    except:
                        # print(m, a)
                        totba_ += 1
                        pass


#        if self.mpi.rank == 0:
#            pbar.close()

        self.mpi.comm.Barrier()

        if self.mpi.rank == 0:
            print('Collecting grids')

        fNUCS_ = self.mpi.comm.allreduce(fNUCS)
        nNUCS = np.zeros((lnucs, ), dtype=np.float)

        tota = self.mpi.comm.reduce(tota_)
        totba = self.mpi.comm.reduce(totba_)

        for i in range(lnucs):
            NUC_ = NUCS[i]

            if self.mpi.rank != 0:
                self.mpi.comm.Send(tSf[NUC_], dest=0, tag=i)

            elif self.mpi.rank == 0:
                for j in range(1, self.mpi.NPROCS):
                    tG = np.empty(tSf[NUC_].shape, dtype=np.float)
                    self.mpi.comm.Recv(tG, source=j, tag=i)
                    tSf[NUC_] += tG
                nNUCS[i] = np.max(tSf[NUC_])

        nNUCS_ = self.mpi.comm.bcast(nNUCS)

        self.mpi.comm.Barrier()

        # Allocate results file

        Sfn = args['Sfn']

        if self.mpi.rank == 0:

            print('Saving data')
            # Sf.atomic = True

            nmax = bn.nanmax(np.divide(nNUCS_, fNUCS_))

            Sf = h5py.File(Sfn, 'w')

            for i in range(lnucs):

                NUC_ = NUCS[i]
                iNUC_ = iNUCS[NUC_]
                mult = fNUCS_[iNUC_]

                if mult > 0.0:

                    tG = tSf[NUC_]

                    med = np.median(tG)
                    tG[tG < (med)] = 0

                    tG /= float(mult)
                    tG /= float(nmax)
                    tG *= 100.0

                    tSf[NUC_] = tG

                else:
                    print('Array is empty for: ', NUC_)

                Sf.create_dataset(NUC_, data=tSf[NUC_])

            Gstep = np.array([self.step, self.step, self.step], dtype=np.float)
            Sf.create_dataset('step', data=Gstep)
            Sf.create_dataset('origin', data=self.GminXYZ)
            Sf.create_dataset('atypes',
                              data=np.array([
                                  args['atypes'],
                              ], dtype='S20'))

            print('Total bad atoms %d of %d' % (totba, tota))

            Sf.close()

        self.mpi.comm.Barrier()
        # Open matrix file in parallel mode

        self.database.close()
def process(args):
    Sfn = args['Sfn']

    # Open matrix file in parallel mode
    Sf = h5py.File(Sfn, 'r')

    padding = max(gu.avdw.values()) * 2 * \
        args['pad']  # Largest VdW radii - 1.7A

    atypes_ = Sf.keys()
    try:
        atypes_.remove('origin')
        atypes_.remove('step')
        atypes_.remove('atypes')
        atypes_.remove('H')  # we do not need hydrogens!
    except:
        pass

    atypes_ = set(atypes_)
    atypes = atypes_

    excl = args['excl']
    incl = args['incl']

    if excl:
        excl = set(excl)
        atypes = atypes - excl

    if incl:
        incl = set(incl)
        atypes.intersection_update(incl)
        atypes = atypes

    if excl and incl:
        raise ('You can not use include and exclude options simultaneiously!')

    print(excl, incl, atypes)

    extractor = er.PepExtractor(**args)
    lM = len(extractor.plist)

    # Init storage for matrices
    # Get file name

    # tSfn = 'tmp.' + Sfn
    tSfn = args['output']
    tSf = h5py.File(tSfn, 'w')
    score = tSf.create_dataset('score', (lM * 20, ), dtype=np.float)

    GminXYZ = Sf['origin'][:]
    step = Sf['step'][0]

    NUCS = set(Sf.keys())
    protected = set(['origin', 'step'])
    NUCS -= protected

    gNUCS = dict()

    for i in NUCS:
        gNUCS[i] = Sf[i][:]

    rk = gNUCS.keys()[0]

    GmaxXYZ = GminXYZ + np.array(gNUCS[rk].shape[:]) * step

    t0 = time.time()

    for cm in range(lM):
        m = extractor.plist[cm]

        t1 = time.time()
        dt = t1 - t0
        t0 = t1
        print('STEP: %d PERCENT: %.2f%% TIME: %s' %
              (cm, float(cm) / lM * 100, dt))

        try:
            S = extractor.extract_result(m)
        except:
            print('ERROR: BAD PEPTIDE: %s' % m)
            continue

        lS = S.numCoordsets()

        for S_ in range(lS):

            mscore = 0.0
            ac = 0

            for a in S.iterResidues():
                a.setACSIndex(S_)

                try:
                    atype = a.getResname().upper()
                except:
                    continue

                if atype not in atypes:
                    continue

                AminXYZ, Ashape = gu.get_bounding(a.getCoords(), padding, step)

                adj = (AminXYZ - GminXYZ)
                adj = (adj / step).astype(np.int)
                x, y, z = adj

                if (adj < 0).any():
                    continue

                if ((GmaxXYZ - (AminXYZ + Ashape * step)) < 0).any():

                    continue

                try:
                    tscore = gNUCS[atype][x:x + Ashape[0], y:y + Ashape[1],
                                          z:z + Ashape[2]]

                except (IndexError, ValueError):
                    continue

                tscore = np.sum(tscore)
                mscore += tscore
                ac += 1

            if ac > 0:
                mscore /= float(ac)
                mscore /= float(len(a))

            score[cm * lS + S_] = mscore

    tscore = np.zeros((lM * lS, ), dtype=[('name', 'S128'), ('score', 'f8')])
    for n in range(lM):
        for i in range(lS):
            tscore['name'][n * lS +
                           i] = '%s_%02d.pdb' % (extractor.plist[n], i + 1)
    tscore['score'] = score[:]

    tscore.sort(order='score')
    tscore['score'] /= tscore['score'][-1]

    np.savetxt('r_score_table.csv', tscore[::-1], fmt="%s\t%.7f")

    tSf.close()
    Sf.close()
Example #5
0
    def process(self, args=None):
        if not args:
            args = self.args
        # Sfn = args['Sfn']

        # Open matrix file in parallel mode
        # Sf = h5py.File(Sfn, 'r')
        args['mpi'] = self.mpi
        extractor = er.PepExtractor(**args)
        lM = len(self.aplist)

        dtype = np.dtype([
            ('name', 'S10'),
            ('pnum', '<i4'),
            ('rnum', '<i4'),
            ('dist', '<f8'),
            ('hdist1', '<f8'),
            ('hdist2', '<f8'),
            ('BD', '<f8'),
            ('FL', '<f8'),
            ('AT', '<f8'),
            ('dir', 'S3'),
            ('aln', 'S20')
        ])

        if self.mpi.rank == 0:
            m = self.aplist[0]
            lm = len(m)
            S = extractor.extract_result(m)
            lS = S.numCoordsets()
            Sf = h5py.File(args['out'], 'w')
            out = Sf.create_dataset(
                'out', (len(self.plist) * lS * lm, ), dtype=dtype)
            Sf.close()

        self.mpi.comm.Barrier()

        Sf = h5py.File(args['out'], 'r+', driver='mpio', comm=self.mpi.comm)
        out = Sf['out']

        # Init storage for matrices
        # Get file name

        # tSfn = 'tmp.' + Sfn
        # tSfn = args['output']

        # stubs = {
        #     'ACE': 'X',
        #     'NME': 'Z',
        # }

        a = prody.parsePDB(args['receptor'])

        OG = a.select('resnum 151 name SG')
        Ob = a.select('resnum 168 and name O')
        ND1 = a.select('resnum 46 and name NE2')

        OXH = a.select("name N resnum 149 150 151")

        t0 = time.time()

        for cm in range(lM):
            m = self.aplist[cm]
            lm = len(m)

            t1 = time.time()
            dt = t1 - t0
            t0 = t1
            print('STEP: %d PERCENT: %.2f%% TIME: %s' % (
                cm, float(cm) / lM * 100, dt))

            try:
                S = extractor.extract_result(m)
            except:
                print('ERROR: BAD PEPTIDE: %s' % m)
                continue

            lS = S.numCoordsets()

            for S_ in range(lS):

                S.setACSIndex(S_)

                tC = S.select('name C')

                dist = np.inf
                rnum = None

                for C in tC.iterAtoms():

                    rnum = C.getResnum()

                    if rnum == 1:
                        continue

                    O = S.select('resnum %i and name O' % rnum)
                    Nl = S.select('resnum %i and name N' % (rnum))
                    N = S.select('resnum %i and name N' % (rnum + 1))

                    C_ = C.getCoords()
                    O_ = O.getCoords()[0]
                    N_ = N.getCoords()[0]

                    dist = prody.calcDistance(C, OG)[0]
                    hdist1 = np.min(prody.calcDistance(OXH, O))
                    hdist2 = np.min(prody.calcDistance(Ob, Nl))

                    nC_ = np.cross((C_ - N_), (O_ - C_))
                    nC_ /= np.linalg.norm(nC_)
                    nC_ = nC_ + C_
                    nC_ = nC_.reshape(1, 3)

                    nC = C.copy()
                    nC.setCoords(nC_)

                    BD = prody.calcAngle(OG, C, O)
                    FL = prody.calcDihedral(OG, nC, C, O)
                    AT = prody.calcAngle(ND1, OG, C)

                    angle_ = prody.calcDihedral(ND1, OG, C, N)

                    # angle_ = prody.calcDistance(Od, N)[0] \
                    #     - prody.calcDistance(Od, C)[0]

                    if angle_ < 0:
                        DIR = 'F'
                    else:
                        DIR = 'R'

                    s = 'X' + m + 'Z'
                    pref = ''

                    if DIR == 'F':
                        seq = s
                        pref = 6 - rnum
                    else:
                        seq = s[::-1]
                        pref = rnum

                    suf = 12 - (pref + len(seq))
                    seq = '-' * pref + seq + '-' * suf

                    # outfmt = "%-10s\t%d\t%6.2f\t%6.2f%6.2f\t%6.2f\t" \
                    #         "%6.2f\t%6.2f\t%3s\t%12s\n"

                    # outstr = outfmt % (
                    #    ('%s_%02d' % (m, S_ + 1),
                    #     rnum, dist, hdist1, hdist2, BD, FL, AT,
                    #     DIR, seq)
                    # )

                    outdata = (m, S_ + 1,
                               rnum, dist, hdist1, hdist2, BD, FL, AT,
                               DIR, seq)

                    ind = (self.tb + cm) * lS * lm + S_ * lm + rnum - 2
                    out[ind] = outdata

        self.database.close()
        Sf.close()
Example #6
0
def process(args):
    # Sfn = args['Sfn']

    # Open matrix file in parallel mode
    # Sf = h5py.File(Sfn, 'r')

    extractor = er.PepExtractor(**args)
    lM = len(extractor.plist)

    # Init storage for matrices
    # Get file name

    # tSfn = 'tmp.' + Sfn
    # tSfn = args['output']

    stubs = {
        'ACE': 'X',
        'NME': 'Z',
    }

    a = prody.parsePDB(args['receptor'])

    SG = a.select('resnum 241 name OG')
    O = a.select('resnum 262 and name O')
    ND1 = a.select('resnum 79 and name NE2')

    tHN_ = a.select("name N resnum 239 240 241")
    tHN = np.average(tHN_.getCoords(), axis=0)

    t0 = time.time()

    out = open(args['out'], 'w')

    for cm in range(lM):
        m = extractor.plist[cm]

        t1 = time.time()
        dt = t1 - t0
        t0 = t1
        print('STEP: %d PERCENT: %.2f%% TIME: %s' % (
            cm, float(cm) / lM * 100, dt))

        try:
            S = extractor.extract_result(m)
        except:
            print('ERROR: BAD PEPTIDE: %s' % m)
            continue

        lS = S.numCoordsets()

        for S_ in range(lS):

            S.setACSIndex(S_)

            tC = S.select('name C')

            dist = np.inf

            for c in tC.iterAtoms():

                dist_ = prody.calcDistance(c, SG)[0]

                if dist_ < dist:
                    dist = dist_
                    rnum = c.getResnum()

            C = S.select('resnum %i and name C' % rnum)
            CO = S.select('resnum %i and name O' % rnum)
            # N = S.select('resnum %i and name N' % rnum_)
            N_ = S.select('resnum %i and name N' % (rnum + 1))

            angleAttack = prody.calcAngle(ND1, SG, C)
            angle_ = prody.calcDistance(O, N_)[0] - prody.calcDistance(O, C)[0]

            if angle_ > 0:
                DIR = 'F'
            else:
                DIR = 'R'

            s = 'X' + m + 'Z'
            pref = ''

            if DIR == 'F':
                seq = s
                pref = 5 - rnum
            else:
                seq = s[::-1]
                pref = rnum

            suf = 10 - (pref + len(seq))
            seq = '-' * pref + seq + '-' * suf

            # hangle = prody.calcAngle(tHN, C, N)
            hdist = np.linalg.norm(tHN - CO.getCoords())

            # HN = b.select('resnum %i and name H' % rnum)
            # N_B = b.select('resnum %i and name N' % (rnum + 1))
            # HN_B = b.select('resnum %i and name H' % (rnum + 1))

            # distN = prody.calcDistance(O, N)[0]
            # angleHN = prody.calcAngle(O, HN, N)

            # distN_B = prody.calcDistance(O, N_B)[0]
            # angleHN_B = prody.calcAngle(O, HN_B, N_B)

            outstr = "%-10s\t%6.2f\t%6.2f\t%6.2f\t%3s\t%12s\t%d\n" % (
                ('%s_%02d' % (m, S_ + 1),
                dist, angleAttack, hdist,
                # distN, angleHN,
                # distN_B, angleHN_B,
                DIR, seq, rnum)
                )

            out.write(outstr)

    # tSf.close()
    # Sf.close()
#    os.remove(tSfn)
    out.close()