Example #1
0
    def onFileTriggered(self, event):
        if str(event.text()) == "Save":
            Pickle.dump(self.view, "current.pkl")

        if str(event.text()) == "Open":
            self.view.scene.clear()
            self.setMode("free")
            Pickle.load(self.view, "current.pkl")
Example #2
0
    def onFileTriggered(self, event):
        if str(event.text()) == "Save":
            Pickle.dump(self.view, "current.pkl")

        if str(event.text()) == "Open":
            self.view.scene.clear()
            self.setMode("free")
            Pickle.load(self.view, "current.pkl")
Example #3
0
def makeIterInput(home,
                  batch_size,
                  MAX_LEN=16,
                  buffer_size=buffer_size,
                  for_prediction=False):

    CMPATH = os.path.join(home, CMNAME)
    CM = myPickle.load(CMPATH)
    CMm = max(CM.values()) + 1
    DICT_SIZE = len(CM)

    rpath = os.path.join(home, 'rules.txt')
    rules = readRULE(rpath)
    CLASS_NUM = len(rules)
    print("RULES_NUMBER: ", CLASS_NUM)

    def G(*args):
        # for each chunk
        for xpath, index_path, hits_path in zip(getAll(home, 'X.txt_*'),
                                                getAll(home, 'index_hits*'),
                                                getAll(home, 'hits*')):
            # read passwords chunk
            X = readX_strict(xpath, encoding=ENCODING)
            # parse plaintext password
            Xi = prepareXi(X, CM, MAX_LEN, CMm)

            # read index array
            index = read_index(index_path, len(Xi))

            with open(hits_path, 'rb') as f:
                # for each password in the chunk
                for x, nm in zip(Xi, index):
                    # get hitting rules and create dense label
                    tot_byte = nm * 4
                    b = f.read(tot_byte)
                    rhits = np.frombuffer(b, dtype=np.uint32)
                    y = np.zeros(CLASS_NUM, dtype=np.int32)
                    y[rhits] = 1

                    if len(x) > MAX_LEN:
                        continue

                    yield x, y

    dataset = tf.data.Dataset.from_generator(G, (tf.int32, tf.int32),
                                             ((MAX_LEN, ), (CLASS_NUM, )))

    if not for_prediction:
        dataset = dataset.shuffle(buffer_size)

    dataset = dataset.batch(batch_size, drop_remainder=True)
    dataset = dataset.prefetch(buffer_size=buffer_size)

    return dataset, CM, CLASS_NUM
Example #4
0
def batchExtract(pkldir, bdir, ofname):
    """
    Running the information required for all files
    """
    import glob

    flist = glob.glob(pkldir + '*.pdb.pkl')
    TT = len(flist) + 0.0
    if os.path.isfile(ofname) is False:
        fdict = {}
    else:
        fdict = myPickle.load(ofname)
    for cnt, f in enumerate(flist):
        print '% Done =', cnt / TT
        (_, k, _) = getFileParts(getFileParts(f)[1])
        #pdb.set_trace()
        k = k[:-2]
        if k not in fdict:
            print "Processing", f
            try:
                U = myPDB.loader(pkldir + k + '_u.pdb.pkl')
                B = myPDB.loader(pkldir + k + '_b.pdb.pkl')
            except:
                continue
            pdb.set_trace()
            #rmsd,Uidx,Bidx=calcRMSD(U,B)
            try:
                rpymol = calcRMSD_pymol(bdir + k + '_u.pdb',
                                        bdir + k + '_b.pdb')
            except:
                print "Error processing", k
                cmd.reinitialize()
                time.sleep(0.1)
                continue

            #pdb.set_trace()
            #useq=''.join([three_to_one(U.R[i].get_resname()) for i in Uidx])
            #bseq=''.join([three_to_one(B.R[i].get_resname()) for i in Bidx])
            #a_useq=ProteinAnalysis(U.seq)
            #a_bseq=ProteinAnalysis(B.seq)
            #asa_u=np.sum([U.ASA[i] for i in Uidx])
            #asa_b=np.sum([B.ASA[i] for i in Bidx])
            fdict[
                k] = rpymol  #+(BN.nanmean(U.B),BN.nanmean(B.B),BN.nanmedian(U.B),BN.nanmedian(B.B),BN.nanmax(U.B),BN.nanmax(B.B))
            #pdb.set_trace()
            myPickle.dump(ofname, fdict)
            print k, rpymol[0]
        else:
            print "Already found", f
    return fdict
Example #5
0
def makeIterInput(home,
                  batch_size,
                  MAX_MASKED,
                  INCLUDE_END_SYMBOL,
                  MAX_LEN=32,
                  buffer_size=buffer_size,
                  for_prediction=False):
    XPATH = os.path.join(home, XNAME)

    CMPATH = os.path.join(home, CMNAME)
    CM = myPickle.load(CMPATH)
    vocab_size = max(CM.values()) + 1

    def G(*args):
        # for each chunk
        with open(XPATH, encoding=ENCODING, errors='ignore') as f:
            for x in f:

                x = x[:-1]
                xl = len(x)

                #if not INCLUDE_END_SYMBOL: print("NO <END>")

                if xl > MAX_LEN - int(INCLUDE_END_SYMBOL):
                    continue

                xi = string2idx(x, CM, MAX_LEN, vocab_size, INCLUDE_END_SYMBOL)

                xi_in, _, kk = mask(xi.copy(), xl, MAX_MASKED,
                                    INCLUDE_END_SYMBOL)
                prediction_mask = np.zeros(MAX_LEN, np.int32)

                for k in kk:
                    prediction_mask[k] = 1

                xi_out = xi

                yield xi_in, prediction_mask, xi_out

    dataset = tf.data.Dataset.from_generator(G, (tf.int32, tf.int32, tf.int32),
                                             ((None, ), (None, ), (None, )))

    if not for_prediction:
        dataset = dataset.shuffle(buffer_size)

    dataset = dataset.padded_batch(batch_size, drop_remainder=True)
    dataset = dataset.prefetch(buffer_size=buffer_size)

    return dataset, vocab_size + 1, CM
Example #6
0
 def __init__(self, mpath, dhome, ls, batch_size=4096, usegpu=-1):
     self.mpath = mpath
     self.dhome = dhome
     self.batch_size = batch_size
     self.ls = ls
     
     cm_ = os.path.join(dhome, 'char_map.pickle')
     self.cm = myPickle.load(cm_)
     self.cm_ = [x[0] for x in sorted(self.cm.items(), key=lambda x: x[1])]
     
     tf.logging.set_verbosity(tf.logging.ERROR)
     module = hub.Module(mpath)
     self.module = module
     
     if True:
         # pure sampling from latent
         z = self.samplingLatent()
         o = module(z, signature='latent', as_dict=True)
         self.latent2data = o['prediction_string']
         p = o['prediction']
         self.x_len = p.shape.as_list()[-1]
     else:
         self.x_len = 16
         print("NO LATENT")
     
     try:
         # proximity sampling \eg for SSPG
         self.x4PP = tf.placeholder(tf.int32, shape=(None, self.x_len))
         self.n4PP = tf.placeholder(tf.int32, shape=1)
         self.stddev4PP = tf.placeholder(tf.float32, shape=(1,))
         inputs = {'x':self.x4PP, 'n':self.n4PP, 'stddev':self.stddev4PP}
         self.latent2data4PP = module(inputs, as_dict=True, signature='sample_from_latent')['prediction_string']
     except:
         ...
     
     # simple inference
     out = module(self.x4PP, as_dict=True)
     self.infp = out['p']
     self.infx = out['x']
     self.infprediction_string = out['prediction_string']
     
     ###
     self.config = tf.ConfigProto()
     self.config.gpu_options.allow_growth = True
     
     if usegpu != -1:
         print("USE_GPU:", str(usegpu))
         self.config.gpu_options.visible_device_list = str(usegpu)
Example #7
0
def makeIter(home,
             epochs,
             batch_size,
             MAX_LEN,
             buffer_size,
             chunk_size=2**13,
             test=False):

    CMPATH = os.path.join(home, 'char_map.pickle')
    char_map = myPickle.load(CMPATH)
    char_num = len(char_map)

    XPATH = os.path.join(home, 'X.h5df')

    if test:
        key = 'test'
    else:
        key = 'train'

    with h5py.File(XPATH, 'r') as f:
        f = f[key]
        N = len(f)

    def G(*args):
        with h5py.File(XPATH, 'r') as f:
            f = f[key]
            bn = math.ceil(N / chunk_size)
            for i in range(bn):
                s = i * chunk_size
                e = (i + 1) * chunk_size
                Xchunk = f[s:e]
                for x in Xchunk:
                    yield x

    def batch():
        dataset = tf.data.Dataset.from_generator(G, tf.int32, (MAX_LEN, ))
        if not test:
            dataset = dataset.repeat(epochs)
        dataset = dataset.shuffle(buffer_size)
        dataset = dataset.batch(batch_size)
        dataset = dataset.prefetch(buffer_size=buffer_size)
        iterator = dataset.make_one_shot_iterator()
        x = iterator.get_next()
        return {'x': x}, x

    return batch, char_num, N
Example #8
0
    A = np.bmat([[2 * np.dot(pts, pts.T),
                  np.ones((rows, 1))], [np.ones((1, rows)),
                                        np.zeros((1, 1))]])

    b = np.hstack((np.sum(pts * pts, axis=1), np.ones((1))))
    x = np.linalg.solve(A, b)
    bary_coords = x[:-1]
    return np.sum(pts * np.tile(bary_coords.reshape((pts.shape[0], 1)),
                                (1, pts.shape[1])),
                  axis=0)


categs = ['All', 'Hard']
for pidx, categ in enumerate(categs):
    fname = 'Data_out/propAsabrx_nogly_' + categ + '.prp.mkl'
    (Pcnt, Ncnt, APcnt, ANcnt, TAC) = myPickle.load(fname)
    TAC = np.array(TAC)
    TAC = TAC[~np.any(TAC > 180, axis=1), :]
    if categ == 'All':
        Nc = 60
        niter = 2000
        res0, _ = kmeans2(np.vstack((TAC[:, :2], TAC[:, 2:])),
                          Nc,
                          iter=niter,
                          minit='points')

        res = np.zeros((Nc**2, 4))
        k = 0
        for i in range(Nc):
            for j in range(Nc):
                res[k, :] = np.hstack((res0[i, :], res0[j, :]))
Example #9
0
    ifname = os.path.join(bdir, cid + '.zd3.0.2.cg.out.rmsds')
    R = np.zeros(N)
    with open(ifname, 'r') as f:
        for n in range(N):
            R[n] = f.readline().split()[1]
    sidx = np.argsort(A[cid][0][:M])
    RS = R + 0.0

    RS[:M] = R[:M][sidx]
    return RS, R


bdir = '../zdock_data/decoys_bm4_zd3.0.2_15deg/results/'
dfname = '../ascores_2K2.scr.pkl'
d4 = parseCSVData('..\Complete Data\DBD4_data.csv')
A = mPickle.load(dfname)
RS = []
R = []
dthr = 2.5
N = 2000
ncids = 0
for i, cid in enumerate(A.keys()):
    #    if cid not in d4 or d4[cid][1]=='RB':
    #        continue
    rs, r = getSortedR(cid, bdir, A, N=N)
    #    if np.any(r[:10]<dthr):
    #        # import pdb
    #        print cid, d4[cid][1]
    #
    #        pdb.set_trace()
    R.append(np.cumsum(r < dthr) > 0)
Example #10
0
            x = x[:-1]
            if len(x) <= MAX_LEN and len(x) >= MIN_LEN:
                X.append(x)
    return X


def write_tsv(output, X, P, encoding=ENCODING):
    assert len(X) == len(P)
    n = len(X)
    with open(output, 'w', encoding=encoding) as f:
        for x, p in zip(X, P):
            print("%s\t%f" % (x, p), file=f)


if __name__ == '__main__':
    try:
        model_path = sys.argv[1]
        password_file = sys.argv[2]
        output_path = sys.argv[3]
    except:
        print("USAGE: model_path.h5 password_path.txt output_path.txt")
        sys.exit(1)

    X = read_passwords(password_file)
    cm = myPickle.load(CHARMAP)

    model = tf.keras.models.load_model(model_path, compile=False)
    S = Inference(model, cm, MAX_LEN, BATCH_SIZE)

    logP = S.applyBatch(X, TERMINAL_SYMBOL)
    write_tsv(output_path, X, logP)

bdir = '../DBD4N/PDBPKL4/'
exfile = bdir + 'E_125PN_15_35_50.lbl.pkl'
categs = ['All', 'RB', 'Med', 'Hard']  #
AAcodes = [
    'Ala', 'Val', 'Leu', 'Ile', 'Cys', 'Met', 'Pro', 'Phe', 'Trp', 'Tyr',
    'Gly', 'Ser', 'Thr', 'Glu', 'Gln', 'Arg', 'Lys', 'His', 'Asn', 'Asp'
]
AAcodes = [three_to_one(a.upper()) for a in AAcodes]
#f3=['1SBB', '1JPS', '2HMI', '1GHQ', '1KTZ', '1K74', '1D6R', '2SIC', '1GPW', '1XD3', '1EAW', '1VFB', '7CEI', '1E4K', '1I4D', '1H1V', '2PCC', '1FQ1', '2HLE', '1FQJ', '1S1Q', '2OOB', '1UDI', '1KLU', '1WQ1', '1CGI', '1ATN', '1N2C', '1GP2', '1FAK', '1NW9', '1GLA', '1GRN', '2HRK', '1AZS', '1JMO', '1PXV', '1EWY', '1RLB', '1DQJ', '2BTF', '2I25', '1I2M', '1BUH', '1BGX', '1ML0', '1EFN', '1DFJ', '1Y64', '2UUY', '1MAH', '1BVK', '1BVN', '1EER', '1MLC', '1NSN', '1AK4', '1A2K', '1QFW', '2H7V', '1T6B', '1KAC', '1YVB', '1J2J', '1QA9', '1AHW', '2OT3', '2FD6', '2AJF', '1K4C', '1NCA', '1OPH', '1XQS', '1B6C', '1PPE', '2O8V', '1HIA', '1Z0K', '1R0R', '1WEJ', '1ACB', '1KXP', '1KXQ', '1R8S', '1IRA', '1GCQ', '1F51', '2B42', '2HQS', '1AKJ', '2JEL', '1KKL', '1FC2', '1E96', '1N8O', '2MTA', '2VIS', '1IB1', '1E6J', '1Z5Y', '1EZU', '1TMQ', '2C0L', '1E6E', '1IQD', '1ZHI', '1M10', '2NZ8', '1AY7', '1HE8', '1IJK', '1HE1', '1FSK', '1F34', '2SNI', '1BJ1', '2CFH', '1BKD', '1DE4', '1IBR', '1I9R', '1K5D', '1AVX']
#f4=['2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX', '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B', '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T', '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M', 'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD', '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU']
dbd4 = parseCSVData('..\Complete Data\DBD4_data.csv')
fig, axes = plt.subplots(nrows=2, ncols=2)
rmsdfname = 'rmsd_atomic.mkl'
rmsd = myPickle.load(rmsdfname)
for subidx, categ in enumerate(categs):

    if categ != 'All':
        clist = [
            cid for cid in dbd4.keys()
            if dbd4[cid][1].upper() == categ.upper()
        ]
    else:
        clist = dbd4.keys()
    E = getExamplesDBD.loader(exfile)
    Pcnt = getCountDict()
    Ncnt = getCountDict()
    APcnt = dict(zip(AAcodes, [[0, 0, 0, 0] for _ in AAcodes]))
    ANcnt = dict(zip(AAcodes, [[0, 0, 0, 0] for _ in AAcodes]))
    TAC = []
Example #12
0
def getSortedR(cid,bdir,A,N=2000,M=2000):
    ifname=os.path.join(bdir,cid+'.zd3.0.2.cg.out.rmsds')
    R=np.zeros(N)
    with open(ifname, 'r') as f:
        for n in range(N):
            R[n]=f.readline().split()[1]
    sidx=np.argsort(A[cid][0][:M])
    RS=R+0.0
    
    RS[:M]=R[:M][sidx]
    return RS,R

bdir='../zdock_data/decoys_bm4_zd3.0.2_15deg/results/'
dfname='../ascores_2K2.scr.pkl'
d4=parseCSVData('..\Complete Data\DBD4_data.csv')
A=mPickle.load(dfname)
RS=[]
R=[]
dthr=2.5
N=2000
ncids=0
for i,cid in enumerate(A.keys()):
#    if cid not in d4 or d4[cid][1]=='RB':
#        continue
    rs,r=getSortedR(cid,bdir,A,N=N)
#    if np.any(r[:10]<dthr):
#        # import pdb
#        print cid, d4[cid][1]
#        
#        pdb.set_trace()
    R.append(np.cumsum(r<dthr)>0)
Example #13
0
            nprocs = comm.Get_size()
        except ImportError:
            print "Failure importing MPI4py: Not using MPI parallelization."
            comm = None
            myid = 0
            nprocs = 1

        A = parallelRun(N,
                        pdbpklpath,
                        pppath,
                        ofname=ofname,
                        comm=comm,
                        myid=myid,
                        nprocs=nprocs)
    else:
        A = Pickle.load(ofname)
    if A is not None:
        rmsd = getRMSD()
        sels = ['ALL', 'RB', 'MED', 'HARD']  #
        #sels=['ALL']
        mrks = {
            'ALL': 'o',
            'RB': 'o',
            'MED': 's',
            'HARD': '^'
        }
        clrs = {
            'ALL': 'b',
            'RB': 'g',
            'MED': 'b',
            'HARD': 'k'
Example #14
0
def setup(model_path, home, reg_type, arch_id, latent_size, epochs, batch_size, max_len, learning_rate, **conf):

    cm_ = os.path.join(home, CM)
    cm = myPickle.load(cm_)
    chars = np.array([x[0] for x in sorted(cm.items(), key=lambda x: x[1])])
    
    if reg_type == 0:
        print("MMAE")
        from AE import makeIter
        train, char_num, M = makeIter(home, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'])
        test, _, _ = makeIter(home, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'], test=True)

    elif reg_type == 1:
        print("Single NoisingAE")
        from denoising import makeIterNoise as makeIter

        train, char_num, M = makeIter(home, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'])
        test, _, _ = makeIter(home, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'], test=True)
    elif reg_type == 2:
        print("MaskedAE")
        from denoising import makeIterMask as makeIter

        mask_size = conf['mask_size']
        train, char_num, M = makeIter(home, mask_size,  epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'])
        test, _, _ = makeIter(home, mask_size, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'], test=True)
    elif reg_type == 3:
        print("NoisingAE")
        from denoising import makeIterMNoise as makeIter
        
        holes_number = conf['holes_number']

        train, char_num, M = makeIter(home, holes_number, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'])
        test, _, _ = makeIter(home, holes_number, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'], test=True)
    elif reg_type == 4:
        print("Single NoisingAE WITH END CHAR")
        from denoising import makeIterNoise as makeIter

        train, char_num, M = makeIter(home, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'], include_end_char=True)
        test, _, _ = makeIter(home, epochs, batch_size, max_len, buffer_size=conf['BUFFER_SIZE'], test=True, include_end_char=True)
    else:
        sys.exit(1)
    
    if arch_id == 0:
        enc = architecture.enc0_16
        dec = architecture.dec0_16
    elif arch_id == 1:
        enc = architecture.enc1_16
        dec = architecture.dec1_16
    elif arch_id == 2:
        enc, dec = architecture.ARCH_resnetBNK0
    elif arch_id == 3:
        enc, dec = architecture.ARCH_resnetBNK1
    elif arch_id == 4:
        enc, dec = architecture.ARCH_resnetBNK2
    elif arch_id == 5:
        enc, dec = architecture.ARCH_resnetBNK3
    elif arch_id == 6:
        enc, dec = architecture.ARCH_resnetBNK4
    elif arch_id == 7:
        enc, dec = architecture.ARCH_INAE
    elif arch_id == 8:
        enc, dec = architecture.ARCH_INAE2
    else:
        print('NO SUCH ARCH_ID')
        sys.exit(1)
    
    N = math.ceil( ( (epochs*M)  / batch_size ) )
    print('Train_iter: ', N)   

    hparams = {
        'enc_arch' : enc,
        'dec_arch' : dec,
        'learning_rate' : learning_rate,
        'char_num' : char_num,
        'batch_size' : batch_size,
        
        'loss_id' : conf.get('loss_id', 0),
        
        'alpha' : conf['alpha'], # scalar loss latent space
        'beta' : conf['beta'], # scalar loss data space
        'latent_size' : latent_size,
        
        'chars' : chars,
    }

    # make estimator
    ae = MMAE.MMAE(model_path, hparams)

    run_conf = ae.setupRunConfig(conf['SAVE_SUMMARY_STEPS'], conf['SAVE_CHECKPOINT_STEP'], keep_checkpoint_max=1)
    estimator = ae(run_conf)
    
    train_spec = tf.estimator.TrainSpec(train, max_steps=N)
    eval_spec = tf.estimator.EvalSpec(test, steps=None, throttle_secs=conf['THROTTLE_SECS'])

    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
    
    return ae, max_len
Example #15
0
def load_charmap(path):
    cm = myPickle.load(path)
    cm_ = [x[0] for x in sorted(cm.items(), key=lambda x: x[1])]
    return cm, cm_
Example #16
0
def loadMIFile(ofile):
    return cPickle.load(ofile)
Example #17
0
            lM = np.max(lD)
            rM = np.max(rD)
            lD = lD / lM
            rD = rD / rM
            D = []
            for k0, (l0, r0) in enumerate(pex):
                for l1, r1 in pex[k0 + 1:]:
                    d = np.max((lD[l0, l1], rD[r0, r1]))
                    D.append(d)
            C = C + np.histogram(D, bins)[0]
        except Exception as e:
            print "Error", e
            continue
    mPickle.dump(ofname, (bins, C))
else:
    (bins, C) = mPickle.load(ofname)

    bb = (bins[1:] + bins[:-1]) / 2
    idx = bb <= 1
    bb = bb[idx]
    C = C[idx]
    plt.plot(bb, C, 'b', linewidth=2)
    plt.grid()
    plt.xlabel('Normalized pairwise distance (d)')
    plt.ylabel('Number of pairs of simultaneosuly interacting residue pairs',
               color='b')
    ax1 = plt.gca()
    ax2 = ax1.twinx()
    ax2.plot(bb, np.cumsum(C) / np.sum(C), 'r.-', linewidth=2)
    ax2.set_ylabel(
        'Cumulative proporion of pairs of simultaneosuly interacting residue pairs',
Example #18
0
        N=20
        try:
            from mpi4py import MPI
            comm = MPI.COMM_WORLD
            myid = comm.Get_rank()
            nprocs = comm.Get_size()
        except ImportError:
            print "Failure importing MPI4py: Not using MPI parallelization."
            comm=None
            myid=0
            nprocs=1
        
        A=parallelRun(N,pdbpklpath,pppath,ofname=ofname,comm=comm,myid=myid,nprocs=nprocs)
    else:
        A=Pickle.load(ofname)
    if A is not None:        
        rmsd=getRMSD()    
        sels=['ALL','RB','MED','HARD'] #
        #sels=['ALL']
        mrks={'ALL':'o','RB':'o','MED':'s','HARD':'^'};
        clrs={'ALL':'b','RB':'g','MED':'b','HARD':'k'};    
        lbns={'ALL':'','RB':'Rigid Body','MED':'Medium','HARD':'Hard'};    
        for sel in sels:        
            
            if sel!='ALL':
                incids_sel=[cid for cid in rmsd.keys() if rmsd[cid][2]==sel]
            else:
                incids_sel=incids
            lbn=lbns[sel]
            mrk=mrks[sel]
Example #19
0
def main():
    # generate_interval()
    time_tag = myPickle.load('test_user_time_all.pkl')
    print time_tag
Example #20
0
    symm = stxt[(stxt.find('<b>') + 3):(stxt.find('</b>'))]
    return stch, symm


def kMer(stch):
    if len(stch):
        return int(stch[stch.find('-mer') - 2:][0:2])
    else:
        return 0


if __name__ == "__main__":

    dbd4file = '..\..\Complete Data\DBD4_data.csv'
    dbd4 = parseCSVData(dbd4file)
    dbd4s = myPickle.load('../../Complete Data/DBD4_data_stocihiometry.mkl')
    from getExamplesDBD_breakup import *
    E = getExamplesDBD.loader('../../DBD4CSPKL/PKL/ENS_15_35_50.lbl.pkl')
    dbd4es = {}
    for cid in E.Pex:
        scid = cid[0][:4]
        dbd4es[cid] = dbd4s[scid] + (E.Pex[cid][1], E.Pex[cid][2],
                                     E.Pex[cid][1] * E.Pex[cid][2],
                                     len(E.Pex[cid][0]), kMer(dbd4s[scid][6]))
    with open('DBD4_broken.csv', 'wb') as csvfile:
        spamwriter = csv.writer(csvfile, delimiter=',')
        for cid in dbd4es:
            spamwriter.writerow([cid] + list(dbd4es[cid]))
#    bdir='../../Complete Data/CPDB/'
#    from BISEPutils import fetchPDB
#    for c in dbd4:
Example #21
0
import random
import myPickle
DEBUG = False

video_info = myPickle.load('../video.pkl')
def transverse(train):
    '''
       {'1':{'class1':5, 'class2':4}}
    '''
    item_user = {}
    for user, item in train:
        print user
        print item

def process_rate(fi, fo):
    '''
        Some users rate same class several times,so we calculate a average score
        User1, tv,  4
        User1, tv,  2
        ---->
        User1, tv,  3
    '''
    # fo = open('train_rate.csv', 'w')
    new_dict = {}
    times = {}
    for line in fi:
        user, class_name, rate = line.split(',')
        user = int(user)
        rate = float(rate.strip())

        new_dict.setdefault(user,{})
Example #22
0
def triangle_csc(pts):
    rows, cols = pts.shape

    A = np.bmat([[2 * np.dot(pts, pts.T), np.ones((rows, 1))],
                 [np.ones((1, rows)), np.zeros((1, 1))]])

    b = np.hstack((np.sum(pts * pts, axis=1), np.ones((1))))
    x = np.linalg.solve(A,b)
    bary_coords = x[:-1]
    return np.sum(pts * np.tile(bary_coords.reshape((pts.shape[0], 1)), (1, pts.shape[1])), axis=0)
    
    
categs=['All','Hard']
for pidx,categ in enumerate(categs):
    fname='Data_out/propAsabrx_nogly_'+categ+'.prp.mkl'
    (Pcnt,Ncnt,APcnt,ANcnt,TAC)=myPickle.load(fname)
    TAC=np.array(TAC)
    TAC=TAC[~np.any(TAC>180,axis=1),:]
    if categ=='All':
        Nc=60
        niter=2000
        res0, _ = kmeans2(np.vstack((TAC[:,:2],TAC[:,2:])),Nc,iter=niter,minit='points')
        
        res=np.zeros((Nc**2,4))
        k=0
        for i in range(Nc):
            for j in range(Nc):
                res[k,:]=np.hstack((res0[i,:],res0[j,:]))
                k=k+1
    idx = vq(TAC, res)[0]
Example #23
0
def makeIterInput(home,
                  batch_size,
                  MAX_MASKED,
                  INCLUDE_END_SYMBOL,
                  MAX_LEN=32,
                  buffer_size=buffer_size,
                  for_prediction=False):
    XPATH = os.path.join(home, XNAME)  # 数据集路径

    CMPATH = os.path.join(home, CMNAME)
    CM = myPickle.load(CMPATH)  # 导入charmap
    vocab_size = max(CM.values()) + 1  # 字典大小

    def G(*args):
        """
        生成器
        Args:
            *args:
        Returns:
        """
        # for each chunk
        with open(XPATH, encoding=ENCODING, errors='ignore') as f:
            for x in f:
                x = x[:-1]  # 去除最后的换行符
                x_len = len(x)

                # if not INCLUDE_END_SYMBOL: print("NO <END>")

                if x_len > MAX_LEN - int(
                        INCLUDE_END_SYMBOL
                ):  # 需要留下INCLUDE_END_SYMBOL长度的字符作为口令结尾
                    # 口令过长
                    continue

                x_index = string2idx(
                    x, CM, MAX_LEN, vocab_size,
                    INCLUDE_END_SYMBOL)  # 口令在charmap下的对应下标列表,长度补全

                # .copy()复制新的列表——列表第一层为深拷贝
                # 随机选择部分下标将其遮盖,返回遮盖后的结果和遮盖的下标
                x_index_in, _, masked_index = mask(x_index.copy(), x_len,
                                                   MAX_MASKED,
                                                   INCLUDE_END_SYMBOL)

                prediction_mask = np.zeros(
                    MAX_LEN, np.int32)  # 对遮盖后的结果取反,获得类似[0,0,0,1,0,0]的结果
                for k in masked_index:
                    prediction_mask[k] = 1

                xi_out = x_index

                yield x_index_in, prediction_mask, xi_out

    # 构造数据集(以管道的方式输入网络训练)  输入生成器,输出类型和输出shape,None表明shape自动生成;输出的元组由生成器的输出决定
    dataset = tf.data.Dataset.from_generator(G, (tf.int32, tf.int32, tf.int32),
                                             ((None, ), (None, ), (None, )))

    if not for_prediction:
        # 以 batch_size 打乱数据集
        dataset = dataset.shuffle(buffer_size)
    # padded_shapes = (
    #     tf.TensorShape([None]),
    #     tf.TensorShape([None]),
    #     tf.TensorShape([None])
    # )
    # dataset = dataset.padded_batch(batch_size, padded_shapes=padded_shapes, drop_remainder=True)

    # 将此数据集的多个连续元素 (可能具有不同的形状) 合并到单个元素中。结果元素中的张量有一个额外的外部维度, 并填充到 padded_shapes 中的相应形状
    dataset = dataset.padded_batch(batch_size,
                                   drop_remainder=True)  # 这里是源码,有错误,需要用上面的语句替代
    dataset = dataset.prefetch(
        buffer_size=buffer_size
    )  # 预取数据,将生成数据的时间和使用数据的时间分离,在请求元素之前从输入数据集中预取这些元素

    return dataset, vocab_size + 1, CM
Example #24
0
def loadMIFile(ofile):
    return cPickle.load(ofile)
Example #25
0
    #    bf=ddir+cid+'_b.pdb'
    #    uf=ddir+cid+'_u.pdb'
    #    print "RMSD =",calcRMSD(uf,bf)
    ofname = 'rmsd_atomic.mkl'
    loadDirect = True
    ddir = '../DBD4N/DBD4/'
    pkldir = '../DBD4N/PDBPKL4/'
    if not loadDirect or not os.path.isfile(ofname):
        import __main__
        __main__.pymol_argv = ['pymol', '-qc']  # Pymol: quiet and no GUI #
        import pymol
        pymol.finish_launching()
        from pymol import cmd
        fdict = batchExtract(pkldir, ddir, ofname)
    else:
        fdict = myPickle.load(ofname)
    #cid_l/r: rmsd, (rmsd_pymol, unbound_asa_pymol, bound_asa_pymol), unbound_mol_weight, bound_mol_weight,unbound_asa, bound_asa

    Va = []
    for v in fdict.values():
        Va.append(v[:-2])

    V = np.array(
        Va)  #rmsd, unbound_asa, bound_asa, unbound_mol_weight,bound_mol_weight

    #x=V[:,2]/(4.84*(V[:,4]**0.76)); y=V[:,1];  #x=x[nidx];y=y[nidx];
    x = V[:, 1] / (4.84 * (V[:, 3]**0.76))
    y = V[:, 0]
    #x=V[:,3]/(0.346*V[:,5]+2.5e+03)
    #x=V[:,5]/(4.84*(V[:,2]**0.76)); y=V[:,1];
    nidx = (y > 1e-3)
Example #26
0
            lD=getDistMat(getCoords(L.R))
            rD=getDistMat(getCoords(R.R))
            lM=np.max(lD)
            rM=np.max(rD)
            lD=lD/lM
            rD=rD/rM
            D=[]
            for k0,(l0,r0) in enumerate(pex):
                for l1,r1 in pex[k0+1:]:
                    d=np.max((lD[l0,l1],rD[r0,r1]))
                    D.append(d)
            C=C+np.histogram(D,bins)[0]
        except Exception as e:
            print "Error",e
            continue
    mPickle.dump(ofname,(bins,C))
else:    
    (bins,C)=mPickle.load(ofname)
    
    bb=(bins[1:]+bins[:-1])/2
    idx=bb<=1
    bb=bb[idx]
    C=C[idx]
    plt.plot(bb,C,'b',linewidth=2);plt.grid();
    plt.xlabel('Normalized pairwise distance (d)');
    plt.ylabel('Number of pairs of simultaneosuly interacting residue pairs',color='b');
    ax1=plt.gca()    
    ax2 = ax1.twinx()
    ax2.plot(bb, np.cumsum(C)/np.sum(C), 'r.-',linewidth=2)
    ax2.set_ylabel('Cumulative proporion of pairs of simultaneosuly interacting residue pairs', color='r')
    plt.show()
Example #27
0
        passwords: 口令列表
        log_probability: 非归一化的概率列表(log)
        encoding: 编码

    Returns:
    """
    assert len(passwords) == len(log_probability)
    n = len(passwords)
    with open(output, 'w', encoding=encoding) as f:
        for x, p in zip(passwords, log_probability):
            print("%s\t%f" % (x, p), file=f)


if __name__ == '__main__':
    try:
        model_path = sys.argv[1]
        password_file = sys.argv[2]
        output_path = sys.argv[3]
    except:
        print("USAGE: model_path.h5 password_path.txt output_path.txt")
        sys.exit(1)

    passwords = read_passwords(password_file)
    charmap = myPickle.load(CHARMAP)

    model = tf.keras.models.load_model(model_path, compile=False)
    infer = Inference(model, charmap, MAX_LEN, BATCH_SIZE)

    logP = infer.applyBatch(passwords, TERMINAL_SYMBOL)  # 计算概率
    write_tsv(output_path, passwords, logP)