def data237_fp_gen(refresh=False):
    for complex_id in complex_ids(path = data237_complex_root):

        output_path = os.path.join(data237_fp6_root,complex_id)
        if not refresh and os.path.exists(output_path):
            print("%s already processed" %complex_id)
            continue

        print "start processing %s" %complex_id

        data_dir = os.path.join(data237_complex_root,complex_id)
        antigen = load_pdb_struct(os.path.join(data_dir,"antigen.pdb"),residue_cls = Residue)
        antibody = load_pdb_struct(os.path.join(data_dir,"antibody.pdb"),residue_cls = Residue)
        try:
            c = Complex(complex_id, antigen,antibody)
            fps = c.gen_fps()
        except:
            sys.stderr.write("complex %s encountered error.\n" %complex_id)
        
        if not os.path.exists(output_path):
            os.makedirs(output_path)
        
        for i,fp in enumerate(fps):
            path = os.path.join(output_path,"fp%d.txt" %(i+1))
            print "to path", path
            fp.tofile(path)
def gen_dist_matrix(plane_type, atg_or_atb, res_or_tri):
    data_dir = os.path.join(data237_fp175_padded_root, "%s-%s-%s" % (plane_type, atg_or_atb, res_or_tri))
    dataloader = make_dataloader(data_dir, make_single_line_converter(None))

    c_id_list = complex_ids(data_dir)

    complex_pairwise_calc(c_id_list, dataloader, corr_coef, callback=callback)
def print_splitted_fp_in_csv(res_or_tri, which=0):
    """
    split 360 bits into parts
    """
    data_dir = os.path.join(data480_root, "fp_aaindex_if_padded" if res_or_tri == "res" else "fp_aaindex_if_padded_tri")

    delimiter = ","
    cids = complex_ids(data_dir)

    name = ("%s 3 bits" % (res_or_tri), "atb 4 bits", "15 bits")

    from ve.fp.complex_util.padding import PaddedComplexFingerPrint, OverallSpatialDistribution

    atg_dist, atb_dist, tri_dist = OverallSpatialDistribution.from_cache()

    first_3_count = sum(atg_dist.values() if res_or_tri == "res" else tri_dist.values()) * 3
    second_3_count = sum(atb_dist.values()) * 3
    last_15_count = sum(atg_dist.values()) * 15

    first_3_dataloader = make_dataloader(data_dir, make_single_line_converter(slice(0, first_3_count)))
    second_3_dataloader = make_dataloader(
        data_dir, make_single_line_converter(slice(first_3_count, first_3_count + second_3_count))
    )
    last_15_dataloader = make_dataloader(
        data_dir,
        make_single_line_converter(
            slice(first_3_count + second_3_count, first_3_count + second_3_count + last_15_count)
        ),
    )

    dls = [first_3_dataloader, second_3_dataloader, last_15_dataloader]

    print ("cid, %s" % name[which])
    for cid in sorted(cids):
        print ("%s,%s" % (cid, delimiter.join(map(lambda d: "%.2f" % d, dls[which](cid)))))
def gen_pairwise_dist(res_or_tri):
    data_dir = os.path.join(data480_root, "fp_aaindex_if_padded" if res_or_tri == "res" else "fp_aaindex_if_padded_tri")
    dataloader = make_dataloader(data_dir, make_single_line_converter(None))

    c_id_list = complex_ids(data_dir)

    complex_pairwise_calc(c_id_list, dataloader, corr_coef, callback=callback)
def print_fp_in_csv(plane_type, atg_or_atb, res_or_tri):
    data_dir = os.path.join(data237_fp175_padded_root, "%s-%s-%s" % (plane_type, atg_or_atb, res_or_tri))
    dataloader = make_dataloader(data_dir, make_single_line_converter(None))

    delimiter = ","
    cids = complex_ids(data_dir)
    for cid in sorted(cids):
        print ("%s,%s," % (cid, delimiter.join(map(lambda d: "%.2f" % d, dataloader(cid)))))
def get_overall_residue_distribution():
    from ve.util.load_pdb import load_complexes, complex_ids
    from ve.fp.test.common import GeometryResidue

    cs = load_complexes(complex_ids(), complex_cls=ComplexWithResidueSpatialDistribution, residue_cls=GeometryResidue)
    overall_atg_dist, overall_atb_dist, overall_tri_dist = OverallSpatialDistribution.overall_dist(cs)

    return overall_atg_dist, overall_atb_dist, overall_tri_dist
    def setUp(self):
        from ve.util.load_pdb import load_complexes, complex_ids
        from ve.fp.fp_80 import Residue

        from random import sample

        sample_count = 10
        sampled_ids = sample(complex_ids(), sample_count)

        self.complexes = load_complexes(sampled_ids, complex_cls = ResiduePlaneBasedComplex, residue_cls = Residue)
def main():
    from ve.util.complex import BaseComplex
    from ve.fp.fp_80 import Residue as Residue80

    class Complex(BaseComplex, ResidueTriangleTrait):
        pass

    from ve.util.load_pdb import load_complexes, complex_ids
    for c in load_complexes(complex_ids(), complex_cls = Complex, residue_cls = Residue80):
        c.get_triangles(refresh = True)
def main_480():
    import os
    from ve.util.load_pdb import complex_ids
    from ve.config import data480_root
    from data480 import fp370_atg_dataloader, fp370_atb_dataloader
    
    fp_dir = os.path.join(data480_root, "fp_370_atg")
    cids = complex_ids(fp_dir)
    cids = map(lambda x: x.split(",")[0], cids)
    
    complex_pairwise_calc(cids, fp370_atb_dataloader, corr_coef, callback = callback)
Exemple #10
0
def simmat_from_db():
    from ve.util.load_pdb import complex_ids
    from ve.config import epi166_fp
    from ve.dbconfig import db

    cids = complex_ids(epi166_fp)

    mat = lmatrix.from_db(db["epi_166"], cids)

    mat = lmatrix(cids, data=mat / np.matrix(mat.diagonal()).T)

    print mat.to_csv_str()
def main1():
    """
    main function
    generate the 370-bit finger prints for all complexes in 237 dataset
    """

    from ve.util.load_pdb import complex_ids
    
    c_ids = complex_ids()
    
    for cid in c_ids:
        print cid
        gen_fp_for_complex(cid)
def simmat_from_db():
    from ve.util.load_pdb import complex_ids
    from ve.config import epi166_fp
    from ve.dbconfig import db
    
    cids = complex_ids(epi166_fp)
    
    mat = lmatrix.from_db(db["epi_166"], cids)
    
    
    mat = lmatrix(cids, data = mat / np.matrix(mat.diagonal()).T)

    print mat.to_csv_str()
def test_count():
    import os
    from ve.util.load_pdb import complex_ids, load_complexes
    from ve.config import data480_root, data480_complex_root
    
    ids = complex_ids(data480_complex_root)
    fp_dir = os.path.join(data480_root, "fp_370_atg" )

    for cid in ids:
        fp_path = os.path.join(fp_dir, "%s.fp" %cid)
        if os.path.exists(fp_path):
            print cid, "exists"
        else:
            print cid
def main(fp_dir, use_complex_plane = True, atg_as_rec = True, use_tri = True, use_cache = True):
    from ve.fp.fp_80 import Residue
    from ve.util.load_pdb import complex_ids, load_complexes
    from ve.config import data237_fp175_padded_root, data480_complex_root
    
    complex_cls = ComplexPlaneBasedComplex if use_complex_plane else ResiduePlaneBasedComplex

    cids = complex_ids()
    
    cs = load_complexes(cids, complex_cls = complex_cls, residue_cls = Residue)
    for c in cs:
        try:
            fp_str = c.gen_fp_str(atg_as_receptor = atg_as_rec, use_cache = use_cache, use_tri = use_tri)
            #print c.c_id
            with open(fp_dir + "/" + c.c_id + ".fp", "w") as f:
                f.write(fp_str)
        except:
            print "%s encountered error" %c.c_id
def main(fp_dir, res_or_tri= "tri", atg_as_rec = True, use_cache = True):
    from ve.fp.fp_80 import Residue
    from ve.util.load_pdb import complex_ids, load_complexes
    from ve.config import data237_fp175_padded_root, data480_complex_root
    
    cids = complex_ids()
    cids = ["1FJ1_F", "3BN9_B","3B9K_EF"]
    
    cs = load_complexes(cids, complex_cls = Complex, residue_cls = Residue)
    for c in cs:
        print c.c_id
        fp_str = c.gen_fp_str(res_or_tri, atg_as_receptor = atg_as_rec, use_cache = use_cache)

        try:
            #fp_str = c.gen_fp_str(res_or_tri, atg_as_receptor = atg_as_rec, use_cache = use_cache)
            with open(fp_dir + "/" + c.c_id + ".fp", "w") as f:
                f.write(fp_str)
        except:
            print "%s encountered error" %c.c_id
def main2(which_as_rec):
    import os
    from ve.util.load_pdb import complex_ids, load_complexes
    from ve.config import data480_root, data480_complex_root
    ids = complex_ids(data480_complex_root)
    
    cs = load_complexes(ids, directory = data480_complex_root,complex_cls = MyComplex, residue_cls = MyResidue)

    fp_dir = os.path.join(data480_root, "fp_370_%s" %which_as_rec)

    for c in cs:
        fp_path = os.path.join(fp_dir, "%s.fp" %c.c_id)
        if os.path.exists(fp_path):
            print "%s preexists\n" %c.c_id
        else:
            print "processing", c.c_id

            try:
                fp = c.get_fp(which_as_rec)
                fp.tofile(fp_path)
            except Exception as e:
                from ve.util.error import get_error_info
                print c.c_id, get_error_info(e)
            print c.c_id, "processed\n"
def main_237():
    filter_list = set(["1UWX_AP"])
    c_id_list = set(complex_ids()) - filter_list

#    complex_pairwise_calc(c_id_list, src.fp370_atg_dataloader, corr_coef, callback = callback)
    complex_pairwise_calc(c_id_list, src.fp370_atb_dataloader, corr_coef, callback = callback)
    c = ComplexDual(complex_id, antigen, antibody)

    c.gen_fp_to_file(complex_id, fp_type="double")

def already_processed(complex_id):
    file_types = ["atg.fp","15bits.fp","atb.fp"]
    calc_types = ["single", "double"]
    for ct in calc_types:
        for ft in file_types:
            file_dir = os.path.join(data237_fps808015_root, ct, complex_id, ft)
            print file_dir
            if not os.path.exists(file_dir): return False#not processed completely
    return True#processed completely
    
if __name__ == "__main__":
    ids = ["1SLG_D", "1N4X_L", "1JV5_A", "1STS_B"]
    
    for c_id in complex_ids(path = data237_complex_root):
        if already_processed(c_id):
            print "already processed", c_id
            continue
        else:
            print "processing", c_id
            try:
                gen_fp(c_id)
            except:
                import sys
                sys.stderr.write("error encountered processing %s" %c_id)
                continue
    
        if only_paratope:
            self.find_epitope()
            self.write_epitope(complex_dir, paraepi_dir)
        
            
    
if __name__ == "__main__":
    import sys
    from ve.util.load_pdb import complex_ids, load_complexes
    from ve.fp.complex_util.paraepi  import ParatopeNotFoundError, EpitopeNotFoundError
    from ve.config import data_root
    
    var = sys.argv[1]

    complex_dir = os.path.join(data_root,"three-groups/split-complexes", var)
    
    #get the complex ids
    ids = complex_ids(complex_dir)
    
    print ids

    cs = load_complexes(ids, directory =  complex_dir, complex_cls = ParaEpiGen, residue_cls =  TestResidue)
    
    for c in cs:
        try:
            c.gen_paraepi(complex_dir, os.path.join(data_root,"three-groups/paraepi", var), only_paratope = True)
        except ParatopeNotFoundError:
            print "paratope not found"
        except EpitopeNotFoundError:
            print "epitope not found"