Ejemplo n.º 1
0
    def test_tempy_sccc(self):
        ''' Test the tempy sccc score based on the files
    provided. Use this as a baseline for the second
    chimeraX test. '''

        # the sigma factor determines the width of the Gaussian distribution used to describe each atom
        sim_sigma_coeff = 0.187

        path_test = "./"
        m = os.path.join(path_test, '1akeA_10A.mrc')
        p = os.path.join(path_test, '1ake_mdl1.pdb')
        r = 10.0
        rb_file = os.path.join(path_test, '1ake_mdl1_rigid.txt')

        scorer = ScoringFunctions()

        # read map file
        emmap = MapParser.readMRC(m)

        # read PDB file
        structure_instance = PDBParser.read_PDB_file('pdbfile',
                                                     p,
                                                     hetatm=False,
                                                     water=False)
        SCCC_list_structure_instance = []

        # read rigid body file and generate structure instances for each segment
        listRB = RBParser.read_FlexEM_RIBFIND_files(rb_file,
                                                    structure_instance)

        # score each rigid body segment
        listsc_sccc = []

        for RB in listRB:
            # sccc score
            score_SCCC = scorer.SCCC(emmap, r, sim_sigma_coeff,
                                     structure_instance, RB)
            listsc_sccc.append(score_SCCC)

        self.assertTrue(len(listRB) == 6)
        self.assertTrue(abs(round(listsc_sccc[0], 4) - 0.954) < 0.01)
        self.assertTrue(abs(round(listsc_sccc[1], 4) - 0.427) < 0.01)
        self.assertTrue(abs(round(listsc_sccc[2], 4) - 0.624) < 0.01)
        self.assertTrue(abs(round(listsc_sccc[3], 4) - 0.838) < 0.01)
        self.assertTrue(abs(round(listsc_sccc[4], 4) - 0.971) < 0.01)
        self.assertTrue(abs(round(listsc_sccc[5], 4) - 0.928) < 0.01)
Ejemplo n.º 2
0
def score(session, atomic_model, map_model, rez):
    ''' Perform the CCC score. Takes a session, a single model and map.'''

    print("Calculating CCC Score")

    # make class instances for density simulation (blurring), scoring and plot scores
    blurrer = StructureBlurrer()
    scorer = ScoringFunctions()

    atomlist = []
    for atom in atomic_model.atoms:
        atomlist.append(chimera_to_tempy_atom(atom, len(atomlist)))

    bio_atom_structure = BioPy_Structure(atomlist)
    bio_map_structure = chimera_to_tempy_map(map_model)
    map_probe = blurrer.gaussian_blur(bio_atom_structure,
                                      rez,
                                      densMap=bio_map_structure)
    score = scorer.CCC(bio_map_structure, map_probe)
    print(score)
    return score
Ejemplo n.º 3
0
    def _ccc(self, mapname, modelname, res):
        path_test = "./"
        m = os.path.join(path_test, mapname)
        emmap1 = MapParser.readMRC(m)
        p = os.path.join(path_test, modelname)
        structure_instance = PDBParser.read_PDB_file('pdbfile',
                                                     p,
                                                     hetatm=False,
                                                     water=False)
        blurrer = StructureBlurrer()

        t = 1.5
        c1 = None
        c2 = None
        #calculate map contour
        zeropeak, ave, sigma1 = emmap1._peak_density()
        if not zeropeak is None: c1 = zeropeak + (t * sigma1)

        mt = 0.1
        if res > 20.0: mt = 2.0
        elif res > 10.0: mt = 1.0
        elif res > 6.0: mt = 0.5

        #emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1)
        emmap2 = blurrer.gaussian_blur_real_space(structure_instance,
                                                  res,
                                                  sigma_coeff=0.187,
                                                  densMap=emmap1,
                                                  normalise=True)

        # calculate model contour - emmap1 apparently?
        c2 = mt * emmap2.std()

        sc = ScoringFunctions()
        _, ovr = sc.CCC_map(emmap1, emmap2, c1, c2, 3, cmode=False)
        ccc, _ = sc.CCC_map(emmap1, emmap2, c1, c2, cmode=False)

        print("Printing CCC", ccc, ovr, c1, c2)

        return (ccc, ovr)
Ejemplo n.º 4
0
path_out = 'Test_Files'
if os.path.exists(path_out) == True:
    print "%s exists" % path_out
else:
    os.mkdir(path_out)
os.chdir(path_out)

structure_instance = PDBParser.read_PDB_file('1J6Z',
                                             '1J6Z.pdb',
                                             hetatm=False,
                                             water=False)

blurrer = StructureBlurrer()
EnsembleGeneration = EnsembleGeneration()
scorer = ScoringFunctions()

map_target = MapParser.readMRC('emd_5168_monomer.mrc')  #read target map
print map_target

map_probe = blurrer.gaussian_blur(structure_instance, 6.6, densMap=map_target)
list_rotate_models = EnsembleGeneration.randomise_structs(structure_instance,
                                                          20,
                                                          10,
                                                          60,
                                                          v_grain=30,
                                                          rad=False,
                                                          write=False)

Cluster = Cluster()
ranked_ensemble = Cluster.cluster_fit_ensemble_top_fit(
Ejemplo n.º 5
0
#print 'reading map'
if c is None: Name1, emmap1, c1 = map_contour(m, t=1.5)
else:
    Name1 = os.path.basename(m).split('.')[0]
    emmap1 = MapParser.readMRC(m)
if r is None:
    sys.exit('Input a map, a model, map resolution and contours (optional)')
if p is None:
    sys.exit('Input a map, a model, map resolution and contours (optional)')
#print 'reading model'
Name2, emmap2, c2 = model_contour(p, res=r, emmap=emmap1, t=0.5)

#print 'Scoring...'
if not None in [Name1, Name2]:
    scores = {}
    sc = ScoringFunctions()
    #OVR
    try:
        ccc_mask, ovr = sc.CCC_map(emmap1, emmap2, c1, c2, 3)
        print 'Percent overlap:', ovr
        if ovr < 0.0: ovr = 0.0
    except:
        print 'Exception for lccc and overlap score'
        print_exc()
        ovr = 0.0
    scores['overlap'] = ovr
    if ovr < 0.02:
        sys.exit("Maps do not overlap.")
    #SCCC
    print 'Local correlation score: ', ccc_mask
    if ccc_mask < -1.0 or ccc_mask > 1.0:
Ejemplo n.º 6
0
def score(session,
          atomic_models,
          map_model,
          rigid_filename,
          rez,
          sim_sigma=0.187,
          window=9,
          colour_atoms=True):

    # TODO - rigid_filename might be optional?
    # TODO - this function is too long

    sc = ScoringFunctions()
    rvals = []

    for atomic_model in atomic_models:
        atomlist = []

        for atom in atomic_model.atoms:
            atomlist.append(chimera_to_tempy_atom(atom, len(atomlist)))

        bio_atom_structure = BioPy_Structure(atomlist)
        bio_map_structure = chimera_to_tempy_map(map_model)
        slow = 0.50
        shigh = 0.25  # fraction of structure fitted reasonably well initially
        list_zscores = []
        curdir = os.getcwd()
        rerun_ct = 0
        flag_rerun = 0
        it = 0
        dict_reslist = {}
        dict_chains_scores = {}
        dict_ch_scores, dict_chain_res = sc.SMOC(bio_map_structure, rez,
                                                 bio_atom_structure, window,
                                                 rigid_filename, sim_sigma)

        for ch in dict_ch_scores:
            flagch = 1
            dict_res_scores = dict_ch_scores[ch]
            #get res number list (for ref)
            if it == 0:
                dict_reslist[ch] = dict_chain_res[ch][:]
            try:
                if len(dict_reslist[ch]) == 0:
                    print('Chain missing:', out_iter_pdb, ch)
                    flagch = 0
                    continue
            except KeyError:
                print('Chain not common:', ch, out_iter_pdb)
                flagch = 0
                continue
            try:
                reslist = dict_reslist[ch]
            except KeyError:
                print('Chain not common:', ch, out_iter_pdb)
                flagch = 0
                continue
            if not ch in dict_chains_scores: dict_chains_scores[ch] = {}
            scorelist = []
            for res in reslist:
                try:
                    scorelist.append(dict_res_scores[res])
                except KeyError:
                    if reslist.index(res) <= 0:
                        scorelist.append(
                            dict_res_scores[reslist[reslist.index(res) + 1]])
                    else:
                        try:
                            scorelist.append(
                                dict_res_scores[reslist[reslist.index(res) -
                                                        1]])
                        except IndexError:
                            scorelist.append(0.0)
                #save scores for each chain
                curscore = "{0:.2f}".format(round(scorelist[-1], 2))
                try:
                    dict_chains_scores[ch][res][it] = str(curscore)
                except KeyError:
                    dict_chains_scores[ch][res] = [str(0.0)]
                    dict_chains_scores[ch][res][it] = str(curscore)

            #calc ratio between current and prev scores
            if it > 0:
                score_cur = scorelist[:]
                score_inc = [(1 + x) / (1 + y)
                             for x, y in zip(score_cur, score_prev)][:]
                score_diff = [(x - y)
                              for x, y in zip(score_cur, score_prev)][:]
            #calculate z-scores
            npscorelist = np.array(scorelist)
            try:
                list_zscores.append(
                    (npscorelist - np.mean(npscorelist)) / np.std(npscorelist))
            except:
                list_zscores.append((npscorelist - np.mean(npscorelist)))
            #calculate low and high score bounds
            list_sccc = scorelist[:]
            score_prev = scorelist[:]
            list_sccc.sort()

            #save avg of highest and lowest 20%
            avglow = list_sccc[int(len(list_sccc) * slow)]
            if avglow == 0.0: avglow = 0.00001
            avghigh = list_sccc[int(len(list_sccc) * (1 - shigh))]
            if it == 0: avghigh1 = list_sccc[int(len(list_sccc) * (1 - shigh))]
            curratio = avghigh / avglow

            #print it, 'Num of good scoring residues', len(goodset)
            print(ch, 'avg-top25%, avg-low25%, avg-high/avg-low', avghigh,
                  avglow, avghigh / avglow)
            print(ch, 'avg', sum(scorelist) / len(scorelist))

        #include smoc scores as b-factor records
        for x in bio_atom_structure.atomList:
            cur_chain = x.chain
            cur_res = x.get_res_no()
            if not cur_chain in dict_reslist.keys(): continue
            if cur_chain in dict_chains_scores.keys():
                try:
                    x.temp_fac = dict_chains_scores[cur_chain][cur_res][it]
                except:
                    print('Residue missing: ', cur_res, ch, out_iter_pdb)
                    x.temp_fac = 0.0
            else:
                x.temp_fac = 0.0

        rvals.append((dict_chains_scores, dict_reslist))

    return rvals
Ejemplo n.º 7
0
    def transform_map(self, matR, transvec, m1, m2, c1, c2):
        mat = matR.T
        emmap1 = MapParser.readMRC(m1)
        emmap2 = MapParser.readMRC(m2)
        # geometric centre of map
        vec_centre = emmap2.centre()
        spacing = emmap2.apix
        # to work on the box transformations, get the box centre irrespective of origin
        vec_centre.x = vec_centre.x - emmap2.x_origin()
        vec_centre.y = vec_centre.y - emmap2.y_origin()
        vec_centre.z = vec_centre.z - emmap2.z_origin()

        # calculate new box dimensions, after rotation
        new_centre = emmap2._box_transform(matR)

        output_shape = (int(new_centre.x / spacing),
                        int(new_centre.y / spacing),
                        int(new_centre.z / spacing))
        new_centre.x = new_centre.x / 2
        new_centre.y = new_centre.y / 2
        new_centre.z = new_centre.z / 2
        # offset for rotation
        offset = emmap2._rotation_offset(mat, vec_centre, new_centre)

        #APPLY ROTATION
        emmap2 = emmap2._matrix_transform_offset(mat, output_shape, offset.x,
                                                 offset.y, offset.z)

        offset_x = new_centre.x - vec_centre.x
        offset_y = new_centre.y - vec_centre.y
        offset_z = new_centre.z - vec_centre.z
        emmap2 = emmap2.shift_origin(-offset_x, -offset_y, -offset_z)

        # TRANSLATION COMPONENT
        a14, a24, a34 = transvec[0], transvec[1], transvec[2]
        emmap_2 = emmap2.shift_origin(
            float(a14) * spacing,
            float(a24) * spacing,
            float(a34) * spacing)

        emmap_1 = emmap1.copy()
        # CROP BOX TO REDUCE ARRAY SIZE
        emmap_1._crop_box(c1, 2)
        emmap_2._crop_box(c2, 2)

        # DETERMINE A COMMON ALIGNMENT BOX
        spacing = emmap_2.apix
        if emmap_2.apix < emmap_1.apix: spacing = emmap_1.apix
        grid_shape, new_ori = emmap_1._alignment_box(emmap_2, spacing)

        # INTERPOLATE TO NEW GRID
        emmap_1 = emmap_1._interpolate_to_grid(grid_shape, spacing, new_ori)
        emmap_2 = emmap_2._interpolate_to_grid(grid_shape, spacing, new_ori)

        sc = ScoringFunctions()
        ccc = sc.CCF_mask_zero(emmap_1, emmap_2, c1, c2)
        mi = sc.MI(emmap_1, emmap_2)
        env = sc.map_envelope_score(emmap_1, emmap_2, c1, c2)
        nv = sc.normal_vector_score(emmap_1, emmap_2,
                                    float(c1) - (emmap1.std() * 0.05),
                                    float(c1) + (emmap1.std() * 0.05))
        nv = sc.normal_vector_score(emmap_1,
                                    emmap_2,
                                    float(c1) - (emmap1.std() * 0.05),
                                    float(c1) + (emmap1.std() * 0.05),
                                    Filter='Sobel')

        return ccc, mi, env, nv, nv_s
Ejemplo n.º 8
0
import os

path_out='Test_Files'
if os.path.exists(path_out)==True:
    print "%s exists" %path_out
else:
    os.mkdir(path_out)
os.chdir(path_out)


structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False)
print structure_instance

blurrer = StructureBlurrer()
EnsembleGeneration=EnsembleGeneration()
scorer = ScoringFunctions()

map_target=MapParser.readMRC('emd_5168_monomer.mrc') #read target map
map_probe = blurrer.gaussian_blur(structure_instance, 6.6,densMap=map_target)#create a simulated map from the structure instance

#Create a Random ensemble of 10 structures randomly within  5 A translation and 60 deg rotation.
list_rotate_models=EnsembleGeneration.randomise_structs(structure_instance, 10, 5, 60, v_grain=30, rad=False,write=True)


#CCC score from starting fit
line='%s %s\n'%('1J6Z',scorer.CCC(map_probe,map_target))
count=0
#loop to score each of the alternative fits in the ensemble
for mod in list_rotate_models:
        count+=1
        mod_name=mod[0]
Ejemplo n.º 9
0
    def rank_fit_ensemble(self,ensemble_list,score,res_target_map,sigma_coeff,number_top_mod=0,\
                          write=False,targetMap=False,cont_targetMap=None):
        """
          
            RMSD clustering of the multiple "fits" accordingly with a chosen score.
            Cluster the fits based on Calpha RMSD (starting from the best scoring model)            
                Arguments:
                    *ensemble_list*
                        Input list of Structure Instances.
                    *targetMap*
                        Target Map Instance.
                    *score*
                        Scoring function to use. 
                        See ScoringFunctions class for a list of the available Scoring Function.
                        E.g. set score='CCC' to use the Cross-correlation coefficient.
                        
                        Score option are:
                        
                        i    'CCC' - Cross-correlation coefficient; 
                        
                        ii    'LAP' - Laplacian-filtered cross-correlation coefficient:  useful for maps with resolutions worse than 10-15 A;
                        
                        iii   'MI' - Mutual information score: a good and robust score but relatively slow to calculate; 
                        
                        iv    'ENV' - Envelope score: the fastest score to calculate due to binarisation of the map. 
                        
                        v-vii  'NV','NV_Sobel','NV_Laplace'- Normal vector score: a vector-based surface superimposition score with or without Sobel/Laplace filter.

                        viii 'CD' - Chamfer Distance: a score used in computer vision algorithms as a fast similarity metric 
                                                                                         

                    *rms_cutoff*
                        float,  the Calpha RMSD cutoff based on which you want to cluster the solutions. For example 3.5 (for 3.5 A).
                    *res_target_map*
                        the resolution, in Angstroms, of the target Map.
                    *sigma_coeff*
                        the sigma value (multiplied by the resolution) that controls the width of the Gaussian. 
                        Default values is 0.356.
                
                        Other values used :
                
                            0.187R corresponding with the Gaussian width of the Fourier transform falling to half the maximum at 1/resolution, as used in Situs (Wriggers et al, 1999);
                    
                            0.225R which makes the Fourier transform of the distribution fall to 1/e of its maximum value at wavenumber 1/resolution, the default in Chimera (Petterson et al, 2004)
                    
                            0.356R corresponding to the Gaussian width at 1/e maximum height equaling the resolution, an option in Chimera (Petterson et al, 2004);
                    
                            0.425R the fullwidth half maximum being equal to the resolution, as used by FlexEM (Topf et al, 2008);
                                
                            0.5R the distance between the two inflection points being the same length as the resolution, an option in Chimera (Petterson et al, 2004);
                                
                            1R where the sigma value simply equal to the resolution, as used by NMFF (Tama et al, 2004).

                    *number_top_mod*
                        Number of Fits to cluster. Default is all.
                    *write*
                        True will write out a file that contains the list of the structure instances representing different fits scored and clustered.
                        note the lrms column is the Calpha RMSD of each fit from the first fit in its class
        
        
        """
        blurrer = StructureBlurrer()

        scorer = ScoringFunctions()

        cluster = Cluster()

        count = 0
        dict_ensembl = {}
        list_to_order = []
        #print targetMap
        if targetMap == False:
            #targetMap = self.protMap(prot, min(resolution/4., 3.5), resolution)
            print("WARNING:Need target map")
            sys.exit()
        if score not in [
                'CCC', 'LAP', 'MI', 'NV', 'NV_Sobel', 'NV_Laplace', 'ENV', 'CD'
        ]:
            print('Incorrect Scoring Function: %s', score)
            print(
                'Please select from one of the following scoring functions: %s',
                ''.join([
                    'CCC', 'LAP', 'MI', 'NV', 'NV_Sobel', 'NV_Laplace', 'ENV',
                    'CD'
                ]))
            sys.exit()

        targetMap = targetMap.copy()
        if score == 'CCC':
            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]

                sim_map = blurrer.gaussian_blur(mod,
                                                res_target_map,
                                                densMap=targetMap,
                                                sigma_coeff=sigma_coeff)
                if not cont_targetMap is None:
                    score_mod = scorer.CCC_map(
                        sim_map, targetMap, 0.5 * sim_map.fullMap.std(),
                        cont_targetMap, 2, True)[0]  #CCC(sim_map,targetMap)
                else:
                    score_mod = scorer.CCC_map(sim_map, targetMap, 0.0, 0.0,
                                               True)[0]
                #else: score_mod=scorer.CCC(sim_map,targetMap)
                #'name_file','structure_instance','score','lrmsd','class'
                list_to_order.append([name_mod, mod, score_mod, 0, 0])
        if score == 'LAP':
            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]
                sim_map = blurrer.gaussian_blur(mod,
                                                res_target_map,
                                                densMap=targetMap,
                                                sigma_coeff=sigma_coeff)
                score_mod = scorer.laplace_CCC(sim_map, targetMap)
                #'name_file','structure_instance','score','lrmsd','class'
                list_to_order.append([name_mod, mod, score_mod, 0, 0])
        if score == 'MI':
            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]
                sim_map = blurrer.gaussian_blur(mod,
                                                res_target_map,
                                                densMap=targetMap,
                                                sigma_coeff=sigma_coeff)
                if not cont_targetMap is None:
                    score_mod = scorer.MI(sim_map, targetMap,
                                          0.5 * sim_map.fullMap.std(),
                                          cont_targetMap, 1)
                else:
                    score_mod = scorer.MI(sim_map, targetMap)
                list_to_order.append([name_mod, mod, score_mod, 0, 0])
        if score == 'NV':
            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]
                #These two values should be calculated for the experimental map, and only
                #need to be calculated once, at the beginning
                sim_map = blurrer.gaussian_blur(mod,
                                                res_target_map,
                                                densMap=targetMap,
                                                sigma_coeff=sigma_coeff)
                if not cont_targetMap is None:
                    score_mod = scorer.normal_vector_score(
                        targetMap,
                        sim_map,
                        cont_targetMap - (0.1 * targetMap.std()),
                        cont_targetMap + (0.1 * targetMap.std()),
                        Filter=None)
                else:
                    min_thr = targetMap.get_primary_boundary(
                        mod.get_prot_mass_from_atoms(), targetMap.min(),
                        targetMap.max())
                    points = targetMap.get_point_map(min_thr, percentage=0.2)

                    max_thr = targetMap.get_second_boundary(min_thr,
                                                            points,
                                                            min_thr,
                                                            targetMap.max(),
                                                            err_percent=1)
                    score_mod = scorer.normal_vector_score(targetMap,
                                                           sim_map,
                                                           min_thr,
                                                           max_thr,
                                                           Filter=None)
                score_mod = 1 - (score_mod / 3.14)
                list_to_order.append([name_mod, mod, score_mod, 0, 0])
        if score == 'NV_Sobel':

            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]
                sim_map = blurrer.gaussian_blur(mod,
                                                res_target_map,
                                                densMap=targetMap,
                                                sigma_coeff=sigma_coeff)
                if not cont_targetMap is None:
                    score_mod = scorer.normal_vector_score(
                        targetMap,
                        sim_map,
                        cont_targetMap - (0.1 * targetMap.std()),
                        cont_targetMap + (0.1 * targetMap.std()),
                        Filter='Sobel')
                else:
                    min_thr = targetMap.get_primary_boundary(
                        mod.get_prot_mass_from_atoms(), targetMap.min(),
                        targetMap.max())
                    points = targetMap.get_point_map(min_thr, percentage=0.2)
                    max_thr = targetMap.get_second_boundary(min_thr,
                                                            points,
                                                            min_thr,
                                                            targetMap.max(),
                                                            err_percent=1)
                    score_mod = scorer.normal_vector_score(targetMap,
                                                           sim_map,
                                                           min_thr,
                                                           max_thr,
                                                           Filter='Sobel')
                score_mod = 1 - (score_mod / 3.14)
                list_to_order.append([name_mod, mod, score_mod, 0, 0])
        if score == 'NV_Laplace':
            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]
                sim_map = blurrer.gaussian_blur(mod,
                                                res_target_map,
                                                densMap=targetMap,
                                                sigma_coeff=sigma_coeff)
                if not cont_targetMap is None:
                    score_mod = scorer.normal_vector_score(
                        targetMap,
                        sim_map,
                        cont_targetMap - (0.1 * targetMap.std()),
                        cont_targetMap + (0.1 * targetMap.std()),
                        Filter='Laplace')
                else:
                    min_thr = targetMap.get_primary_boundary(
                        mod.get_prot_mass_from_atoms(), targetMap.min(),
                        targetMap.max())
                    points = targetMap.get_point_map(min_thr, percentage=0.2)
                    max_thr = targetMap.get_second_boundary(min_thr,
                                                            points,
                                                            min_thr,
                                                            targetMap.max(),
                                                            err_percent=1)
                    score_mod = scorer.normal_vector_score(targetMap,
                                                           sim_map,
                                                           min_thr,
                                                           max_thr,
                                                           Filter='Laplace')
                score_mod = 1 - (score_mod / 3.14)
                list_to_order.append([name_mod, mod, score_mod, 0, 0])
        if score == 'ENV':
            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]
                min_thr = targetMap.get_primary_boundary(
                    mod.get_prot_mass_from_atoms(), targetMap.min(),
                    targetMap.max())
                score_mod = scorer.envelope_score(targetMap, min_thr, mod)
                #'name_file','structure_instance','score','lrmsd','class'
                list_to_order.append([name_mod, mod, score_mod, 0, 0])
        if score == 'CD':
            for mod1 in ensemble_list:
                count += 1
                name_mod = mod1[0]
                mod = mod1[1]
                sim_map = blurrer.gaussian_blur(mod,
                                                res_target_map,
                                                densMap=targetMap,
                                                sigma_coeff=sigma_coeff)
                if not cont_targetMap is None:
                    score_mod = scorer._surface_distance_score(
                        sim_map, targetMap, 0.5 * sim_map.fullMap.std(),
                        cont_targetMap, 'Minimum')
                else:
                    min_thr = targetMap.get_primary_boundary(
                        mod.get_prot_mass_from_atoms(), targetMap.min(),
                        targetMap.max())
                    points = targetMap.get_point_map(min_thr, percentage=0.2)
                    max_thr = targetMap.get_second_boundary(min_thr,
                                                            points,
                                                            min_thr,
                                                            targetMap.max(),
                                                            err_percent=1)
                    score_mod = scorer.chamfer_distance(sim_map,
                                                        targetMap,
                                                        min_thr,
                                                        max_thr,
                                                        kdtree=None)
                    score_mod = 1 / score_mod
                list_to_order.append([name_mod, mod, score_mod, 0, 0])

        if score in ['NV', 'NV_Sobel', 'NV_Laplace']:
            list_ordered = sorted(
                list_to_order, key=lambda x: x[2],
                reverse=True)  #was false when NV was negative
        else:
            list_ordered = sorted(list_to_order,
                                  key=lambda x: x[2],
                                  reverse=True)
        if number_top_mod == 0:
            if write == True:
                return cluster._print_results_cluster2(list_ordered, write)
            return list_ordered
        else:
            x = int(number_top_mod)
            if write == True:
                return cluster._print_results_cluster2(list_ordered[:x], write)
            return list_ordered[:x]
Ejemplo n.º 10
0
def genmap(session, map0 = None, map1 = None, rez1 = None, rez2 = None, c1 = None, c2 = None):
  """ Generate our new map."""
  m0 = chimera_to_tempy_map(map0)
  m1 = chimera_to_tempy_map(map1)

  # What do we do with the contours? We may already have them?
  # TODO - pull contours from m0,m1

  #MAIN CALCULATION
  #whether to shift density to positive values
  if c1 == None:
    c1 = map_contour(m0,t=1.5)
  if c2 == None:
    c2 = map_contour(m1,t=1.5)

  c1 = (c1 - m0.min())
  c2 = (c2 - m1.min())

  m0.fullMap = (m0.fullMap - m0.min())
  m1.fullMap = (m1.fullMap - m1.min())

  #find a common box to hold both maps
  spacing = max(m0.apix,m1.apix)
  grid_shape, new_ori = m0._alignment_box(m1,spacing)

  emmap_1 = m0.copy()
  emmap_2 = m1.copy()

  #resample scaled maps to the common grid
  spacing = max(rez1,rez2)*0.33

  # Not sure we should do scaling here?
  sc = ScoringFunctions()
  emmap_1.fullMap,emmap_2.fullMap = sc._amplitude_match(m0,m1,0,0,0.02,0,0,max(rez1,rez2),lpfiltb=True,lpfilta=False,ref=False)


  apix_ratio = emmap_1.apix/spacing
  diff1 = emmap_1._interpolate_to_grid(grid_shape,spacing,new_ori,1)
  diff2 = emmap_2._interpolate_to_grid(grid_shape,spacing,new_ori,1)

  # get mask inside contour for the initial maps
  emmap_1.fullMap = (m0.fullMap>c1)*1.0
  emmap_2.fullMap = (m1.fullMap>c2)*1.0
  #interpolate masks into common grid
  mask1 = emmap_1._interpolate_to_grid(grid_shape,spacing,new_ori,1,'zero')
  mask2 = emmap_2._interpolate_to_grid(grid_shape,spacing,new_ori,1,'zero')

  mask1.fullMap = mask1.fullMap > 0.1
  mask2.fullMap = mask2.fullMap > 0.1

  #min of minimums in the two scaled maps
  min1 = diff1.min()
  min2 = diff2.min()
  min_scaled_maps = min(min1,min2)
  #shift to positive values
  diff1.fullMap = diff1.fullMap - min_scaled_maps
  diff2.fullMap = diff2.fullMap - min_scaled_maps
  #range of values in the scaled maps
  min1 = np.amin(diff1.fullMap[mask1.fullMap])
  diffc1 = min1+0.10*(np.amax(diff1.fullMap)-min1)
  min2 = np.amin(diff2.fullMap[mask2.fullMap])
  diffc2 = min2+0.10*(np.amax(diff2.fullMap)-min2)
      
  #calculate difference
  diff_map = diff1.copy()
 
  #calculate difference
  diff1.fullMap = (diff1.fullMap - diff2.fullMap)
  diff2.fullMap = (diff2.fullMap - diff_map.fullMap)
 
  diff1.fullMap = diff1.fullMap*(mask1.fullMap)
  diff2.fullMap = diff2.fullMap*(mask2.fullMap)
    
  #interpolate back to original grids
  #mask1 = diff1._interpolate_to_grid1(m0.fullMap.shape,m0.apix,m0.origin,1,'zero')
  mask1 = diff1._interpolate_to_grid(m0.fullMap.shape,m0.apix,m0.origin,1,'zero')
  mask2 = diff2._interpolate_to_grid(m1.fullMap.shape,m1.apix,m1.origin,1,'zero')

  # for assigning differences (see below), use positive differences
  mask1.fullMap = mask1.fullMap*(mask1.fullMap>0.)
  mask2.fullMap = mask2.fullMap*(mask2.fullMap>0.)  

  nm0 = tempy_to_chimera_map(session, mask1)
  nm1 = tempy_to_chimera_map(session, mask2)

  session.models.add([nm0,nm1])
Ejemplo n.º 11
0
    def test_tempy_smoc(self):
        ''' Test the tempy smoc score based on the files
    provided. Use this as a baseline for the second
    chimeraX test.  It is taken straight from the 
    score_smoc.py example tutorial.'''

        list_labels = []

        tp = TempyParser()
        tp.generate_args()

        # the sigma factor determines the width of the Gaussian distribution used to describe each atom
        sim_sigma_coeff = 0.187
        #score window
        win = 9

        path_test = os.getcwd()
        map_file = os.path.join(path_test, '1akeA_10A.mrc')
        res_map = 10.0
        DATADIR = path_test
        list_to_check = ['1ake_mdl1.pdb']

        if len(list_labels) == 0:
            list_labels = [x.split('.')[0]
                           for x in list_to_check]  #['initial','final']
        list_styles = [
            ':', '-.', '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.',
            '--', '-', '-', ':', '-.', '--', '-', '-', ':', '-.', '--', '-',
            '-'
        ]  #'--'

        z_score_check = 2

        def model_tree(list_coord1, distpot=3.5, list_coord2=None):
            try:
                from scipy.spatial import cKDTree
                coordtree = cKDTree(list_coord2)
            except ImportError:
                from scipy.spatial import KDTree
                coordtree = KDTree(list_coord12)
            if list_coord2 != None:
                neigh_points = coordtree.query_ball_point(list_coord1, distpot)

            return neigh_points

        start_pdb = list_to_check[0]
        iter_num = len(list_to_check)
        intermed_file = ""
        slow = 0.50
        shigh = 0.25  # fraction of structure fitted reasonably well initially
        rigidbody_file = None

        sc = ScoringFunctions()
        emmap = MapParser.readMRC(map_file)

        rfilepath = rigidbody_file
        dict_str_scores = {}
        if rigidbody_file is not None:
            rfilepath = os.path.join(DATADIR, rigidbody_file)
        list_zscores = []
        curdir = os.getcwd()
        rerun_ct = 0
        flag_rerun = 0
        it = 0
        dict_reslist = {}

        # TODO - this whole bit needs a cleanup I think

        while iter_num > 0:

            dict_chains_scores = {}
            out_iter_pdb = list_to_check[it]
            lab = list_labels[it]
            if os.path.isfile(os.path.join(DATADIR, out_iter_pdb)):
                #read pdb
                structure_instance = PDBParser.read_PDB_file('pdbfile',
                                                             os.path.join(
                                                                 DATADIR,
                                                                 out_iter_pdb),
                                                             hetatm=False,
                                                             water=False)

                #get scores
                dict_ch_scores, dict_chain_res = sc.SMOC(
                    emmap, res_map, structure_instance, win, rfilepath,
                    sim_sigma_coeff)
            else:
                print('PDB file not found:', out_iter_pdb)

            for ch in dict_ch_scores:
                flagch = 1
                dict_res_scores = dict_ch_scores[ch]
                #get res number list (for ref)
                if it == 0:
                    dict_reslist[ch] = dict_chain_res[ch][:]
                try:
                    if len(dict_reslist[ch]) == 0:
                        print('Chain missing:', out_iter_pdb, ch)
                        flagch = 0
                        continue
                except KeyError:
                    print('Chain not common:', ch, out_iter_pdb)
                    flagch = 0
                    continue
                try:
                    reslist = dict_reslist[ch]
                except KeyError:
                    print('Chain not common:', ch, out_iter_pdb)
                    flagch = 0
                    continue
                if not ch in dict_chains_scores: dict_chains_scores[ch] = {}
                scorelist = []
                for res in reslist:
                    try:
                        scorelist.append(dict_res_scores[res])
                    except KeyError:
                        if reslist.index(res) <= 0:
                            scorelist.append(
                                dict_res_scores[reslist[reslist.index(res) +
                                                        1]])
                        else:
                            try:
                                scorelist.append(
                                    dict_res_scores[reslist[reslist.index(res)
                                                            - 1]])
                            except IndexError:
                                scorelist.append(0.0)
                    #save scores for each chain
                    curscore = "{0:.2f}".format(round(scorelist[-1], 2))
                    try:
                        dict_chains_scores[ch][res][it] = str(curscore)
                    except KeyError:
                        dict_chains_scores[ch][res] = [str(0.0)
                                                       ] * len(list_to_check)
                        dict_chains_scores[ch][res][it] = str(curscore)

                dict_str_scores[lab] = dict_chains_scores

                #calc ratio between current and prev scores
                if it > 0:
                    score_cur = scorelist[:]
                    score_inc = [(1 + x) / (1 + y)
                                 for x, y in zip(score_cur, score_prev)][:]
                    score_diff = [(x - y)
                                  for x, y in zip(score_cur, score_prev)][:]
                #calculate z-scores
                npscorelist = np.array(scorelist)
                try:
                    list_zscores.append((npscorelist - np.mean(npscorelist)) /
                                        np.std(npscorelist))
                except:
                    list_zscores.append((npscorelist - np.mean(npscorelist)))
                #calculate low and high score bounds
                list_sccc = scorelist[:]
                score_prev = scorelist[:]
                list_sccc.sort()

                #save avg of highest and lowest 20%
                avglow = list_sccc[int(len(list_sccc) * slow)]
                if avglow == 0.0: avglow = 0.00001
                avghigh = list_sccc[int(len(list_sccc) * (1 - shigh))]
                if it == 0:
                    avghigh1 = list_sccc[int(len(list_sccc) * (1 - shigh))]
                curratio = avghigh / avglow

                self.assertTrue(abs(avghigh - 0.967) < 0.01)
                self.assertTrue(abs(avglow - 0.956) < 0.01)
                self.assertTrue(
                    abs(sum(scorelist) / len(scorelist) - 0.899) < 0.01)

            #include smoc scores as b-factor records
            for x in structure_instance.atomList:
                cur_chain = x.chain
                cur_res = x.get_res_no()
                if not cur_chain in dict_reslist.keys(): continue
                if cur_chain in dict_chains_scores.keys():
                    try:
                        x.temp_fac = dict_chains_scores[cur_chain][cur_res][it]
                    except:
                        print('Residue missing: ', cur_res, ch, out_iter_pdb)
                        x.temp_fac = 0.0
                else:
                    x.temp_fac = 0.0

            it = it + 1
            iter_num = iter_num - 1
Ejemplo n.º 12
0
    def test_tempy_nmi(self):
        ''' Test the tempy nmi score based on the files
    provided. Use this as a baseline for the second
    chimeraX test. '''

        path_test = "./"
        m = os.path.join(path_test, 'emd_5168.map')
        p = os.path.join(path_test, 'emd_5170.map')

        sc = ScoringFunctions()

        rez1 = 6.6
        rez2 = 15.0

        Name1, emmap1, c1 = map_contour(m, t=1.5)
        Name2, emmap2, c2 = map_contour(p, t=1.5)

        print(rez1, rez2, c1, c2, emmap1.apix, emmap2.apix)

        if not sc.mapComparison(emmap1, emmap2):
            emmap1._crop_box(c1, 0.5)
            emmap2._crop_box(c2, 0.5)

            if rez1 > 1.25 * rez2:
                emmap_2 = lpfilter(emmap2, rez1)
                emmap1, emmap2 = match_grid(emmap1, emmap_2, c1, c2)
            elif rez2 > 1.25 * rez1:
                emmap_1 = lpfilter(emmap1, rez2)
                emmap1, emmap2 = match_grid(emmap_1, emmap2, c1, c2)
            else:
                emmap1, emmap2 = match_grid(emmap1, emmap2, c1, c2)

        nmi = 0
        try:
            nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True)
            if nmi < 0.0: nmi = 0.0
        except:
            self.assertTrue(False)
            print_exc()
            nmi = 0.0

        self.assertTrue(abs(round(nmi, 5) - 1.0492) < 0.001)

        # Now test with a model and map
        p = os.path.join(path_test, '1J6Z.pdb')
        m = os.path.join(path_test, 'emd_5168_monomer.mrc')
        res = 6.6
        Name1 = os.path.basename(m).split('.')[0]
        Name2 = os.path.basename(p).split('.')[0]
        emmap1 = MapParser.readMRC(m)
        structure_instance = PDBParser.read_PDB_file(Name2,
                                                     p,
                                                     hetatm=False,
                                                     water=False)
        blurrer = StructureBlurrer()
        emmap2 = blurrer.gaussian_blur(structure_instance, res, densMap=emmap1)
        c1 = 9.7
        c2 = 1.0

        nmi = 0
        try:
            nmi = sc.MI(emmap1, emmap2, c1, c2, 1, None, None, True)
            if nmi < 0.0: nmi = 0.0
        except:
            self.assertTrue(False)
            print_exc()
            nmi = 0.0

        self.assertTrue(abs(round(nmi, 5) - 1.0575) < 0.001)
Ejemplo n.º 13
0
def score_cmd(session, comparators, compared, rez_comparators, rez_compared, contours_comparators, contour_compared):
  sc = ScoringFunctions()
  blurrer = StructureBlurrer()

   # Loop through these to be compared
  idx = 0
  scores = []
  
  for comparator in comparators:
    emmap1 = None
    emmap2 = None
    
    if type(comparator) is AtomicStructure:
      if type(compared) is AtomicStructure:
        # Both models
        if None in ([rez_compared] + rez_comparators): 
          print("Please provide the resolution for all models")
          return
       
        bms1 = chimera_to_tempy_model(compared)
        bms2 = chimera_to_tempy_model(comparator)
        emmap1 = model_contour( bms1, rez_compared, emmap=False,t=0.5)
        
        if contours_comparators[idx] is None: 
          emmap2 = model_contour(bms2, rez_comparators[idx],emmap=False,t=0.5)
        else:
          emmap2 = blur_model(bms2, rez_comparators[idx], emmap=False)
     
      else:
        # 0 - map, 1 - model
        if rez_comparators[idx] == None: 
          print("Please provide the resolution for the model.")
          return

        emmap1 = chimera_to_tempy_map(compared)
        bms = chimera_to_tempy_model(comparator)
        emmap2 = blurrer.gaussian_blur(bms, rez_compared, densMap=emmap1)
        
    else:
      if type(compared) is AtomicStructure:
        # 0 - model, 1 - map
        if rez_compared == None: 
          print("Please provide the resolution for the model.")
          return

        emmap2 = chimera_to_tempy_map(comparator)
        bms = chimera_to_tempy_model(compared)
        emmap1 = blurrer.gaussian_blur(bms, rez_compared, densMap=emmap2)

      else:
        # 0 - map, 1 - map
        emmap1 = chimera_to_tempy_map(compared)
        emmap2 = chimera_to_tempy_map(comparator)
     
    c1 = contour_compared
    # Contouring
    if c1 == None:
      c1 = map_contour(emmap1,t=1.5)

    c2 = contours_comparators[idx]
    # This kinda makes no sense and could be tricky
    if c2 == None:
      c2 = map_contour(emmap2,t=1.5)

    # Some kind of fix if the maps don't match?
    # Resize, resample or blur of somekind
    if not sc.mapComparison(emmap1,emmap2):
      emmap1._crop_box(c1,0.5)
      emmap2._crop_box(c2,0.5)
      
      if rez_compared > 1.25*rez_comparators[idx]: 
        emmap_2 = lpfilter(emmap2,rez_compared)
        emmap1, emmap2 = match_grid(emmap1,emmap_2,c1,c2)
      elif rez_comparators[idx] > 1.25*rez_compared:
        emmap_1 = lpfilter(emmap1,rez_comparators[idx])
        emmap1, emmap2 = match_grid(emmap_1,emmap2,c1,c2)
      else:
        emmap1, emmap2 = match_grid(emmap1,emmap2,c1,c2)
   
    nmi = 0.0

    try:
      nmi = sc.MI(emmap1,emmap2,c1,c2,1,None,None,True)
      if nmi < 0.0: nmi = 0.0
    except:
      print('Exception for NMI score')
      print_exc()
      nmi = 0.0
    scores.append(nmi)
    idx+=1 

  return scores
Ejemplo n.º 14
0
def score(session, atomic_model1 = None, map_model1 = None, atomic_model2 = None, map_model2 = None, rez1 = None, rez2 = None, c1 = None, c2 = None):
  """ Generate the NMI score for 2 maps or 1 map and 1 model. """

  sc = ScoringFunctions()
 
  # We have choices - 1 map and one model, 2 maps or 2 models
  emmap1 = None
  emmap2 = None

  blurrer = StructureBlurrer()
  
  if atomic_model1 != None and map_model1 != None:
    # 1 map 1 model
    if rez1 == None: 
      print("Please provide the resolution for the model.")
      return

    emmap1 = chimera_to_tempy_map(map_model1)
    bms = chimera_to_tempy_model(atomic_model1)
    emmap2 = blurrer.gaussian_blur(bms, rez1, densMap=emmap1)

  elif map_model1 != None and map_model2 != None:
    # 2 maps
    emmap1 = chimera_to_tempy_map(map_model1)
    emmap2 = chimera_to_tempy_map(map_model2)

  elif atomic_model1 != None and atomic_model2 != None:
    # 2 models
    if None in [rez1,rez2]: 
      print("Please provide the resolution for both model")
      return
   
    bms1 = chimera_to_tempy_model(atomic_model1)
    bms2 = chimera_to_tempy_model(atomic_model2)

    emmap1 = model_contour( bms1, rez1, emmap=False,t=0.5)
    if c2 is None: 
      emmap2 = model_contour(bms2, rez2,emmap=False,t=0.5)
    else:
      emmap2 = blur_model( bms2, rez2, emmap=False)
 
  else:
    print("Error. Must have 1 model and 1 map, 2 maps or 2 models")
    return

  # Contouring
  if c1 == None:
    c1 = map_contour(emmap1,t=1.5)

  if c2 == None:
    c2 = map_contour(emmap2,t=1.5)

  # Some kind of fix if the maps don't match?
  # Resize, resample or blur of somekind
  if not sc.mapComparison(emmap1,emmap2):
    emmap1._crop_box(c1,0.5)
    emmap2._crop_box(c2,0.5)
    
    if rez1 > 1.25*rez2: 
      emmap_2 = lpfilter(emmap2,rez1)
      emmap1, emmap2 = match_grid(emmap1,emmap_2,c1,c2)
    elif rez2 > 1.25*rez1:
      emmap_1 = lpfilter(emmap1,rez2)
      emmap1, emmap2 = match_grid(emmap_1,emmap2,c1,c2)
    else:
      emmap1, emmap2 = match_grid(emmap1,emmap2,c1,c2)
 
  nmi = 0.0

  try:
    nmi = sc.MI(emmap1,emmap2,c1,c2,1,None,None,True)
    if nmi < 0.0: nmi = 0.0
  except:
    print('Exception for NMI score')
    print_exc()
    nmi = 0.0
  return nmi
Ejemplo n.º 15
0
c1 = tp.args.thr
if c1 is None: c1 = tp.args.thr1
if c1 is None:
    Name1, emmap1, c1 = map_contour(m, t=1.5)
else:
    Name1 = os.path.basename(m).split('.')[0]
    emmap1 = MapParser.readMRC(m)

dict_scores_hits = {}
list_models_calc = []
for pfile in list_to_check:
    Name2, emmap2, c2 = model_contour(pfile, res=r, emmap=emmap1, t=0.5)
    if None in [Name1, Name2]:
        sys.exit('Calculation failed, check input map and model files')
    print '#Scoring...', Name2
    sc = ScoringFunctions()
    #OVR
    try:
        ccc_mask, ovr = sc.CCC_map(emmap1, emmap2, c1, c2, 3, meanDist=True)
        print 'Percent overlap:', ovr
        if ovr < 0.0: ovr = 0.0
    except:
        print 'Exception for lccc and overlap score'
        print_exc()
        ovr = 0.0
    if ovr < 0.02:
        print "Maps do not overlap: ", Name2
        continue
    #SCCC
    print 'Local correlation score: ', ccc_mask
    if ccc_mask < -1.0 or ccc_mask > 1.0:
Ejemplo n.º 16
0
        #else:
        #    neigh_points = coordtree.query_ball_point(coordtree,distpot)
    #print len(list_coord1), len(neigh_points)
    return neigh_points


start_pdb = list_to_check[0]
iter_num = len(list_to_check)
intermed_file = ""
slow = 0.50
shigh = 0.25  # fraction of structure fitted reasonably well initially
#rigid body file
rigidbody_file = None  #

blurrer = StructureBlurrer()
sc = ScoringFunctions()
#read map file
emmap = MapParser.readMRC(map_file)

#-----------------------------
#set plotting parameters
flagplot = 1
try:
    import matplotlib
except ImportError:
    flatplot = 0
if flagplot == 1:
    print 'Setting maptpltlib parameters'
    try:
        ##matplotlib.use('Agg')
        try:
Ejemplo n.º 17
0
    if r1 is None and r is None:
        sys.exit('Input a map and model, map resolution (required)')
    elif r1 is None:
        r1 = r
    if all(x is None for x in [p, p1, p2]):
        sys.exit('Input a map and model, map resolution (required)')
    elif None in [p1, p2]:
        p = tp.args.pdb
    else:
        sys.exit('Input a map and model, map resolution (required)')
    rb_file = tp.args.rigidfile
    if rb_file is None: sys.exit('Rigid body file missing')

# make class instances for density simulation (blurring), scoring and plot scores
blurrer = StructureBlurrer()
scorer = ScoringFunctions()
Plot = Plot()

# read map file
emmap = MapParser.readMRC(m)
# read PDB file
structure_instance = PDBParser.read_PDB_file('pdbfile',
                                             p,
                                             hetatm=False,
                                             water=False)
# generate atom density and blur to required resolution
#sim_map = blurrer.gaussian_blur(structure_instance, r,densMap=emmap,sigma_coeff=sim_sigma_coeff,normalise=True)

#sim_map = blurrer.gaussian_blur_real_space(structure_instance, r,densMap=emmap,sigma_coeff=sim_sigma_coeff,normalise=True)
SCCC_list_structure_instance = []
# read rigid body file and generate structure instances for each segment
Ejemplo n.º 18
0
def score(session,
          atomic_model,
          map_model,
          rigid_filename,
          rez,
          sim_sigma=0.187,
          colour_atoms=True):
    """ Perform the SCCC score 
  Takes a session, a single model, map, rigid file path and some tuneable 
  optional variables
  """

    print("Calculating SCCC Score")

    # make class instances for density simulation (blurring), scoring and plot scores
    blurrer = StructureBlurrer()
    scorer = ScoringFunctions()

    atomlist = []

    # Pre-defines
    bio_atom_structure = ""
    bio_map_structure = ""

    try:
        for atom in atomic_model.atoms:
            atomlist.append(chimera_to_tempy_atom(atom, len(atomlist)))

        bio_atom_structure = BioPy_Structure(atomlist)
        bio_map_structure = chimera_to_tempy_map(map_model)

        # read rigid body file and generate structure instances for each segment
        listRB = RBParser.read_FlexEM_RIBFIND_files(rigid_filename,
                                                    bio_atom_structure)
    except Exception as e:
        print(e)
        print(
            "Error in reading Model and Map. Make sure you have selected one model and one map, and the rigid file is correct."
        )
        return

    # score each rigid body segment
    listsc_sccc = []
    print('calculating SCCC')

    for RB in listRB:
        # sccc score
        score_SCCC = scorer.SCCC(bio_map_structure,
                                 rez,
                                 sim_sigma,
                                 bio_atom_structure,
                                 RB,
                                 c_mode=False)

        print('>>', score_SCCC)
        listsc_sccc.append((RB, score_SCCC))

        # Colour the atoms based on the rating from white (1.0) to red (0.0)
        # TODO - maybe a faster way? Also 'all_atoms' mentioned in the API doesnt exist but atoms does! :S
        # TODO - move this to somewhere better maybe?
        if colour_atoms:
            dr = 255
            dg = 255
            db = 255
            if score_SCCC >= 0.5:
                dr = 255 - int(math.floor(255 * ((score_SCCC - 0.5) * 2.0)))
                dg = dr
            else:
                db = int(math.floor(255 * (score_SCCC * 2.0)))
                dg = db

            residues = []
            for a in RB.atomList:
                if a.res_no not in residues:
                    residues.append(a.res_no)

            for r in residues:
                cr = atomic_model.residues[r]
                for catm in cr.atoms:
                    catm.color = [dr, dg, db, 255]
                cr.ribbon_color = [dr, dg, db, 255]

    return listsc_sccc
Ejemplo n.º 19
0
    def cluster_fit_ensemble_top_fit(self,
                                     ensemble_list,
                                     score,
                                     rms_cutoff,
                                     res_target_map,
                                     sigma_coeff,
                                     number_top_mod=0,
                                     write=False,
                                     targetMap=False):
        """
          
            RMSD clustering of the multiple "fits" starting from the best scoring model accordingly with a chosen score.
            Cluster the fits based on Calpha RMSD (starting from the best scoring model)            
                
                Arguments:
                    *ensemble_list*
                        Input list of Structure Instances.
                    *targetMap*
                        Target Map Instance.
                    *score*
                        Scoring function to use. 
                        See ScoringFunctions class for a list of the available Scoring Function.
                        E.g. set score='CCC' to use the Cross-correlation coefficient.
                        
                        Score option are:
                        
                        i    'CCC' - Cross-correlation coefficient; 
                        
                        ii    'LAP' - Laplacian-filtered cross-correlation coefficient:  useful for maps with resolutions worse than 10-15 A;
                        
                        iii   'MI' - Mutual information score: a good and robust score but relatively slow to calculate; 
                        
                        iv    'ENV' - Envelope score: the fastest score to calculate due to binarisation of the map. 
                        
                        v-vii  'NV','NV_Sobel','NV_Laplace'- Normal vector score: a vector-based surface superimposition score with or without Sobel/Laplace filter.

                        viii 'CD' - Chamfer Distance: a score used in computer vision algorithms as a fast similarity metric 

                    *rms_cutoff*
                        float,  the Calpha RMSD cutoff based on which you want to cluster the solutions. For example 3.5 (for 3.5 A).
                    *res_target_map*
                        the resolution, in Angstroms, of the target Map.
                    *sigma_coeff*
                        the sigma value (multiplied by the resolution) that controls the width of the Gaussian. 
                        Default values is 0.356.
                
                        Other values used :
                
                            0.187R corresponding with the Gaussian width of the Fourier transform falling to half the maximum at 1/resolution, as used in Situs (Wriggers et al, 1999);
                    
                            0.225R which makes the Fourier transform of the distribution fall to 1/e of its maximum value at wavenumber 1/resolution, the default in Chimera (Petterson et al, 2004)
                    
                            0.356R corresponding to the Gaussian width at 1/e maximum height equaling the resolution, an option in Chimera (Petterson et al, 2004);
                    
                            0.425R the fullwidth half maximum being equal to the resolution, as used by FlexEM (Topf et al, 2008);
                                
                            0.5R the distance between the two inflection points being the same length as the resolution, an option in Chimera (Petterson et al, 2004);
                                
                            1R where the sigma value simply equal to the resolution, as used by NMFF (Tama et al, 2004).

                    *number_top_mod*
                        Number of Fits to cluster. Default is all.
                    *write*
                        True will write out a file that contains the list of the structure instances representing different fits scored and clustered.
                        note the lrms column is the Calpha RMSD of each fit from the first fit in its class
        """
        blurrer = StructureBlurrer()

        scorer = ScoringFunctions()

        cluster = Cluster()

        count = 0
        dict_ensembl = {}
        list_ordered = cluster.rank_fit_ensemble(ensemble_list,
                                                 score,
                                                 res_target_map,
                                                 sigma_coeff,
                                                 number_top_mod=0,
                                                 write=False,
                                                 targetMap=targetMap.copy())
        #cluster fits by local rmsd
        if number_top_mod == 0:
            ini_num = 0
            end_num = len(list_ordered)
            fit_class = 0
            for ipdb in list_ordered:
                print("model num %d: %s\n",
                      list_ordered.index(ipdb) + 1, ipdb[0])
                ini_num1 = list_ordered.index(ipdb)
                mod1 = ipdb[1]
                print('next index ' + str(ini_num1))
                if ipdb[-1] == 0:
                    fit_class += 1
                    for ipdb1 in list_ordered[ini_num1:end_num]:
                        mod2 = ipdb1[1]
                        if ipdb1[-1] == 0:
                            rmsd_val = float(
                                mod1.RMSD_from_same_structure(mod2, CA=True))
                            ipdb1[3] = rmsd_val
                            print("rmsd of %s from best local fit (%s)= %.2f",
                                  ipdb1[0], ipdb[0], rmsd_val)
                            if rmsd_val < rms_cutoff:
                                ipdb1[-1] = fit_class
                            print('class= ' + str(ipdb1[-1]))
                        else:
                            continue
                else:
                    continue
            return cluster._print_results_cluster(list_ordered, fit_class,
                                                  number_top_mod, score, write)
        else:
            x = int(number_top_mod)
            ini_num = 0
            end_num = len(list_ordered[:x])
            fit_class = 0
            for ipdb in list_ordered[:x]:
                print("model num %d: %s\n",
                      list_ordered.index(ipdb) + 1, ipdb[0])
                ini_num1 = list_ordered.index(ipdb)
                mod1 = ipdb[1]
                print('next index ' + str(ini_num1))
                if ipdb[-1] == 0:
                    fit_class += 1
                    for ipdb1 in list_ordered[ini_num1:end_num]:
                        mod2 = ipdb1[1]
                        if ipdb1[-1] == 0:
                            rmsd_val = float(
                                mod1.RMSD_from_same_structure(mod2, CA=True))
                            print("rms of %s from best local fit (%s)= %.2f",
                                  ipdb1[0], ipdb[0], rmsd_val)
                            ipdb1[3] = rmsd_val
                            if rmsd_val < rms_cutoff:
                                ipdb1[-1] = fit_class
                            print('class= ' + str(ipdb1[-1]))
                        else:
                            continue
                else:
                    continue
            return cluster._print_results_cluster(list_ordered[:x], fit_class,
                                                  number_top_mod, score, write)
Ejemplo n.º 20
0
#rb_file2 ="1J6Z_sse.txt"

structure_instance = PDBParser.read_PDB_file('3MFP',
                                             '3MFP.pdb',
                                             hetatm=False,
                                             water=False)
print structure_instance

structure_instance2 = PDBParser.read_PDB_file('1J6Z.pdb',
                                              '1J6Z.pdb',
                                              hetatm=False,
                                              water=False)
print structure_instance2

blurrer = StructureBlurrer()
scorer = ScoringFunctions()
Plot = Plot()

emmap = MapParser.readMRC('emd_5168_monomer.mrc')  #read target map
print emmap

sim_map = blurrer.gaussian_blur(structure_instance,
                                6.6,
                                densMap=emmap,
                                sigma_coeff=sim_sigma_coeff,
                                normalise=True)
print 'structure_instance', scorer.CCC(sim_map, emmap)
print sim_map

sim_map2 = blurrer.gaussian_blur(structure_instance2,
                                 6.6,
Ejemplo n.º 21
0
#emmap1._crop_box(c1,2)
#emmap2._crop_box(c2,2)
#find a common box to hold both maps
spacing = max(emmap1.apix,emmap2.apix)
grid_shape, new_ori = emmap1._alignment_box(emmap2,spacing)

emmap_1 = emmap1.copy()
emmap_2 = emmap2.copy()
#if a soft mask has to be applied to both maps
if msk:
    print 'Applying soft mask'
    emmap1.fullMap = emmap1._soft_mask(c1)
    emmap2.fullMap = emmap2._soft_mask(c2)
#print datetime.now().time()

sc = ScoringFunctions()
if flag_scale:
    print 'scaling'
    if refsc: print 'Using second model/map amplitudes as reference'
    # amplitude scaling independant of the grid
    emmap_1.fullMap,emmap_2.fullMap = sc._amplitude_match(emmap1,emmap2,0,0,sw,0,0,max(r1,r2),lpfiltb=flag_filt,lpfilta=False,ref=refsc)


#resample scaled maps to the common grid
if apix is None: spacing = max(r1,r2)*0.33
else: spacing = apix
apix_ratio = emmap_1.apix/spacing
diff1 = emmap_1._interpolate_to_grid1(grid_shape,spacing,new_ori,1)
diff2 = emmap_2._interpolate_to_grid1(grid_shape,spacing,new_ori,1)

# get mask inside contour for the initial maps
Ejemplo n.º 22
0
path_out='Test_Files'
if os.path.exists(path_out)==True:
    print "%s exists" %path_out
else:
    os.mkdir(path_out)
os.chdir(path_out)

#read PDB file and create a Structure instance.
#note hetatm and water to include
structure_instance=PDBParser.read_PDB_file('1J6Z','1J6Z.pdb',hetatm=False,water=False)
print "structure_instance:"
print structure_instance

blurrer = StructureBlurrer()
scorer = ScoringFunctions()

map_target=MapParser.readMRC('emd_5168_monomer.mrc') #read target map

map_probe = blurrer.gaussian_blur(structure_instance, 6.6,densMap=map_target)#create a simulated map from the structure instance
map_probe.write_to_MRC_file("map_probe_actin.mrc") #write simulated map to a MRC file format

##SCORING FUNCTION

print "Calculate Envelope Score (ENV):"
molecualr_weight=structure_instance.get_prot_mass_from_atoms()
#Mmolecualr_weight=structure_instance.get_prot_mass_from_res()
first_bound=map_target.get_primary_boundary(molecualr_weight, map_target.min(), map_target.max())
#print scorer.envelope_score_APJ(map_target, first_bound, structure_instance,norm=True)
print scorer.envelope_score(map_target, first_bound, structure_instance,norm=True)