Example #1
0
    def testGetRawAlignment(self):
        from collections import defaultdict
        cmd.fab('ACDEGGKLMN', 'm1')
        cmd.fab('CDEFFGGK', 'm2')
        cmd.fab('ASDEKLMNFY', 'm3')
        cmd.align('m2 & guide', 'm1 & guide', cycles=0, object='aln')
        cmd.align('m3 & guide', 'm1 & guide', cycles=0, object='aln')
        cmd.disable('m2')
        # expecting alignment:
        # m1 ACDE--GGKLMN--
        # m2 -CDEFFGGK-----
        # m3 ASDE----KLMQFY
        guideids = defaultdict(list)
        cmd.iterate('guide', 'guideids[model].append(index)', space=locals())
        idx = lambda m, i: (m, guideids[m][i])
        aln_expect = [
            [idx('m1', 0), idx('m3', 0)],
            [idx('m1', 1), idx('m2', 0),
             idx('m3', 1)],
            [idx('m1', 2), idx('m2', 1),
             idx('m3', 2)],
            [idx('m1', 3), idx('m2', 2),
             idx('m3', 3)],
            [
                idx('m1', 4),
                idx('m2', 5),
            ],
            [
                idx('m1', 5),
                idx('m2', 6),
            ],
            [idx('m1', 6), idx('m2', 7),
             idx('m3', 4)],
            [idx('m1', 7), idx('m3', 5)],
            [idx('m1', 8), idx('m3', 6)],
            [idx('m1', 9), idx('m3', 7)],
        ]
        dictify = lambda aln: [dict(col) for col in aln]
        aln_expect = dictify(aln_expect)

        aln = cmd.get_raw_alignment('aln', 0)
        self.assertEqual(dictify(aln), aln_expect)

        # remove m2 from alignment
        for d in aln_expect:
            d.pop('m2', None)

        aln = cmd.get_raw_alignment('aln', 1)
        self.assertEqual(dictify(aln), aln_expect)
Example #2
0
def get_alignment_coords(name, active_only=0, state=-1, quiet=0):
    '''
DESCRIPTION

    API only function. Returns a dictionary with items

        (object name, Nx3 coords list)
 
    N is the number of alignment columns without gaps.
 
EXAMPLE

    import numpy
    from psico.multistuff import *
    from psico.querying import *

    extra_fit('name CA', cycles=0, object='aln')
    x = get_alignment_coords('aln')
    m = numpy.array(x.values())
    '''
    active_only, state, quiet = int(active_only), int(state), int(quiet)
    aln = cmd.get_raw_alignment(name, active_only)
    object_list = cmd.get_object_list(name)
    idx2coords = dict()
    cmd.iterate_state(state, name, 'idx2coords[model,index] = (x,y,z)',
            space={'idx2coords': idx2coords})
    allcoords = dict((model, []) for model in object_list)
    for pos in aln:
        if len(pos) != len(object_list):
            continue
        for model,index in pos:
            allcoords[model].append(idx2coords[model,index])
    return allcoords
Example #3
0
def get_alignment_coords(name, active_only=0, state=-1, quiet=0):
    '''
DESCRIPTION

    API only function. Returns a dictionary with items

        (object name, Nx3 coords list)
 
    N is the number of alignment columns without gaps.
 
EXAMPLE

    import numpy
    from psico.multistuff import *
    from psico.querying import *

    extra_fit('name CA', cycles=0, object='aln')
    x = get_alignment_coords('aln')
    m = numpy.array(x.values())
    '''
    active_only, state, quiet = int(active_only), int(state), int(quiet)
    aln = cmd.get_raw_alignment(name, active_only)
    object_list = cmd.get_object_list(name)
    idx2coords = dict()
    cmd.iterate_state(state, name, 'idx2coords[model,index] = (x,y,z)',
            space={'idx2coords': idx2coords})
    allcoords = dict((model, []) for model in object_list)
    for pos in aln:
        if len(pos) != len(object_list):
            continue
        for model,index in pos:
            allcoords[model].append(idx2coords[model,index])
    return allcoords
Example #4
0
    def from_alignment(self, mobile, target, aln_obj):
        '''
        Use alignment given by "aln_obj" (name of alignment object)
        '''
        from .selecting import wait_for
        wait_for(aln_obj)

        self.mobile = '(%s) and %s' % (mobile, aln_obj)
        self.target = '(%s) and %s' % (target, aln_obj)
        if self.check():
            return

        # difficult: if selections spans only part of the alignment or
        # if alignment object covers more than the two objects, then we
        # need to pick those columns that have no gap in any of the two
        # given selections

        mobileidx = set(cmd.index(mobile))
        targetidx = set(cmd.index(target))
        mobileidxsel = []
        targetidxsel = []

        for column in cmd.get_raw_alignment(aln_obj):
            mobiles = mobileidx.intersection(column)
            if len(mobiles) == 1:
                targets = targetidx.intersection(column)
                if len(targets) == 1:
                    mobileidxsel.extend(mobiles)
                    targetidxsel.extend(targets)

        self.mobile = cmd.get_unused_name('_mobile')
        self.target = cmd.get_unused_name('_target')
        self.temporary.append(self.mobile)
        self.temporary.append(self.target)

        mobile_objects = set(idx[0] for idx in mobileidxsel)
        target_objects = set(idx[0] for idx in targetidxsel)

        if len(mobile_objects) == len(target_objects) == 1:
            mobile_index_list = [idx[1] for idx in mobileidxsel]
            target_index_list = [idx[1] for idx in targetidxsel]
            cmd.select_list(self.mobile,
                            mobile_objects.pop(),
                            mobile_index_list,
                            mode='index')
            cmd.select_list(self.target,
                            target_objects.pop(),
                            target_index_list,
                            mode='index')
        else:
            cmd.select(self.mobile,
                       ' '.join('%s`%d' % idx for idx in mobileidxsel))
            cmd.select(self.target,
                       ' '.join('%s`%d' % idx for idx in targetidxsel))
Example #5
0
def return_aligned_res():
    cmd.align("thermo", "meso", object='aln')
    raw_aln = cmd.get_raw_alignment('aln')
    idx2resi = {}
    cmd.iterate('aln',
                'idx2resi[model, index] = resi',
                space={'idx2resi': idx2resi})
    raw_res = []
    for idx1, idx2 in raw_aln:
        raw_res.append((idx2resi[idx1], idx2resi[idx2]))
    aligned_res = []
    for (a, b) in raw_res:
        if (a, b) not in aligned_res:
            aligned_res.append((a, b))
    return aligned_res
Example #6
0
def bssa(
    sel1,
    sel2,
    polymer1="polymer",
    polymer2="polymer",
    radius=4,
    method="overlap",
    verbose=1,
):
    """
    Bind site similarity analysis.

    Align the sequence of both selections and compute similarity
    coefficients between two sites.

    OPTIONS
        sel1        Selection or object 1.
        sel2        Selection or object 2.
        polymer1    protein of sel1.
        polymer2    protein of sel2.
        radius      Radius to look for nearby aminoacids.
        method      'overlap' or 'sorensen–dice'

    EXAMPLES
        bssa *CS.000_*, *CS.002_*, radius=4
        bssa *D.001*, *D.002*, polymer1='obj1', polymer2='obj2'
        bssa 6y84.Bs.001, 6y84.B.004, method=sorensen-dice
    """

    sel1 = f"(polymer and ({polymer1})) within {radius} of ({sel1})"
    sel2 = f"(polymer and ({polymer2})) within {radius} of ({sel2})"
    pm.align(sel1, sel2, object='aln')

    n1 = pm.count_atoms(sel1)
    n2 = pm.count_atoms(sel2)
    inter = len(pm.get_raw_alignment('aln'))
    pm.delete('aln')

    if method == "overlap":
        coef = inter / min(n1, n2)
    elif method == "sorensen-dice":
        coef = 2 * inter / (n1 + n2)
    else:
        raise Exception("Not supported method.")
    if verbose:
        print("Similarity coefficient =", coef)
    return coef
Example #7
0
    def from_alignment(self, mobile, target, aln_obj):
        '''
        Use alignment given by "aln_obj" (name of alignment object)
        '''
        from .selecting import wait_for
        wait_for(aln_obj)

        self.mobile = '(%s) and %s' % (mobile, aln_obj)
        self.target = '(%s) and %s' % (target, aln_obj)
        if self.check():
            return

        # difficult: if selections spans only part of the alignment or
        # if alignment object covers more than the two objects, then we
        # need to pick those columns that have no gap in any of the two
        # given selections

        mobileidx = set(cmd.index(mobile))
        targetidx = set(cmd.index(target))
        mobileidxsel = []
        targetidxsel = []

        for column in cmd.get_raw_alignment(aln_obj):
            mobiles = mobileidx.intersection(column)
            if len(mobiles) == 1:
                targets = targetidx.intersection(column)
                if len(targets) == 1:
                    mobileidxsel.extend(mobiles)
                    targetidxsel.extend(targets)

        self.mobile = cmd.get_unused_name('_mobile')
        self.target = cmd.get_unused_name('_target')
        self.temporary.append(self.mobile)
        self.temporary.append(self.target)

        mobile_objects = set(idx[0] for idx in mobileidxsel)
        target_objects = set(idx[0] for idx in targetidxsel)

        if len(mobile_objects) == len(target_objects) == 1:
            mobile_index_list = [idx[1] for idx in mobileidxsel]
            target_index_list = [idx[1] for idx in targetidxsel]
            cmd.select_list(self.mobile, mobile_objects.pop(), mobile_index_list, mode='index')
            cmd.select_list(self.target, target_objects.pop(), target_index_list, mode='index')
        else:
            cmd.select(self.mobile, ' '.join('%s`%d' % idx for idx in mobileidxsel))
            cmd.select(self.target, ' '.join('%s`%d' % idx for idx in targetidxsel))
Example #8
0
def color_by_conservation(aln,
                          names=(),
                          color="rainbow",
                          as_putty=0,
                          _self=cmd):
    # PyMOL doesn't yet know about object:alignment
    # but we need to check that this exists or we might crash
    if _self.get_type(aln) not in ("object:", "object:alignment"):
        print("Error: Bad or incorrectly specified alignment object.")
        return None

    r = cmd.get_raw_alignment(aln)

    if names == ():
        known_objs = []
        list(map(known_objs.extend, [[y[0] for y in x] for x in r]))
        known_objs = set(known_objs)

        # highest number of matches seen
        M = max(list(map(len, r))) + 1
    else:
        known_objs = set(names)
        M = len(known_objs) + 1

    for obj in known_objs:
        _self.alter(obj, "b=0.0")

    for af in r:
        c = float(1.0 + len(af)) / float(M)
        for y in af:
            _self.alter("%s and index %s" % (y[0], y[1]),
                        "b=c",
                        space={'c': c})

    if as_putty != 0:
        for obj in known_objs:
            _self.show_as("cartoon", "%s" % obj)
            _self.cartoon("putty", "%s" % obj)
            _self.spectrum('b', color, obj)
            _self.sort()
            _self.rebuild()
    return None
def color_by_conservation(aln, names=(), color="rainbow", as_putty=0, _self=cmd):
    # PyMOL doesn't yet know about object:alignment
    # but we need to check that this exists or we might crash
    if _self.get_type(aln) not in ("object:", "object:alignment"):
        print("Error: Bad or incorrectly specified alignment object.")
        return None

    r = cmd.get_raw_alignment(aln)

    if names == ():
        known_objs = []
        list(map(known_objs.extend, [[y[0] for y in x] for x in r]))
        known_objs = set(known_objs)

        # highest number of matches seen
        M = max(list(map(len, r))) + 1
    else:
        known_objs = set(names)
        M = len(known_objs) + 1

    for obj in known_objs:
        _self.alter(obj, "b=0.0")

    for af in r:
        c = float(1.0 + len(af)) / float(M)
        for y in af:
            _self.alter("%s and index %s" % (y[0], y[1]), "b=c", space={'c': c})

    if as_putty != 0:
        for obj in known_objs:
            _self.show_as("cartoon", "%s" % obj)
            _self.cartoon("putty", "%s" % obj)
            _self.spectrum('b', color, obj)
            _self.sort()
            _self.rebuild()
    return None
Example #10
0
def colorbyrmsd(mobile, target, doAlign=1, doPretty=1, guide=1, method='super', quiet=1):
    '''
DESCRIPTION

    Align two structures and show the structural deviations in color to more
    easily see variable regions.

    Colors each mobile/target atom-pair by distance (the name is a bit
    misleading).

    Modifies the B-factor columns in your original structures.

ARGUMENTS

    mobile = string: atom selection for mobile atoms
    
    target = string: atom selection for target atoms

    doAlign = 0 or 1: Superpose selections before calculating distances
    {default: 1}

    doPretty = 0 or 1: Show nice representation and colors {default: 1}

EXAMPLE

    fetch 1ake 4ake, async=0
    remove chain B
    colorbyrmsd 1ake, 4ake
    '''
    from chempy import cpv
    # import pdb
    doAlign, doPretty = int(doAlign), int(doPretty)
    guide, quiet = int(guide), int(quiet)
    aln, seleboth = '_aln', '_objSelBoth'

    try:
        align = cmd.keyword[method][0]
    except:
        print ' Error: no such method:', method
        raise CmdException

    if guide:
        mobile = '(%s) and guide' % mobile
        target = '(%s) and guide' % target

    try:
        if doAlign:
            # superpose
            zz=align(mobile, target, object=aln)
        else:
        # get alignment without superposing
            zz=align(mobile, target, cycles=0, transform=0, object=aln)
        if not quiet:
            print "RMSD = ", zz[0]
    except:
        print ' Error: Alignment with method %s failed' % (method)
        raise CmdException

    cmd.select(seleboth, '(%s) or (%s)' % (mobile, target))

    idx2coords = dict()
    cmd.iterate_state(-1, seleboth, 'idx2coords[model,index] = (x,y,z)', space=locals())

    if cmd.count_atoms('?' + aln, 1, 1) == 0:
        # this should ensure that "aln" will be available as selectable object
        cmd.refresh()

    b_dict = dict()
    for col in cmd.get_raw_alignment(aln):
        assert len(col) == 2
        b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]])
        for idx in col:
            b_dict[idx] = b
    #pdb.set_trace()
    cmd.alter(seleboth, 'b = b_dict.get((model, index), -1)', space=locals())

    if doPretty:
        cmd.orient(seleboth)
        cmd.show_as('cartoon', 'byobj ' + seleboth)
        cmd.color('gray', seleboth)
        cmd.spectrum('b', 'blue_red', seleboth + ' and b > -0.5')

    if not quiet:
        print " ColorByRMSD: Minimum Distance: %.2f" % (min(b_dict.values()))
        print " ColorByRMSD: Maximum Distance: %.2f" % (max(b_dict.values()))
        print " ColorByRMSD: Average Distance: %.2f" % (sum(b_dict.values()) / len(b_dict))

    cmd.delete(aln)
    cmd.delete(seleboth)
Example #11
0
def morpheasy(source, target, source_state=0, target_state=0, name=None,
        refinement=5, quiet=1):
    '''
DESCRIPTION

    Morph source to target, based on sequence alignment

USAGE

    morpheasy source, target [, source_state [, target_state [, name ]]]

EXAMPLE

    fetch 1akeA 4akeA, async=0
    extra_fit
    morpheasy 1akeA, 4akeA
    '''
    try:
        from epymol import rigimol
    except ImportError:
        print 'No epymol available, please use a "Incentive PyMOL" build'
        print 'You may use "morpheasy_linear" instead'
        return

    from .editing import mse2met
    from .querying import get_selection_state

    # arguments
    source_state = int(source_state)
    target_state = int(target_state)
    refinement = int(refinement)
    quiet = int(quiet)

    if source_state < 1: source_state = get_selection_state(source)
    if target_state < 1: target_state = get_selection_state(target)

    # temporary objects
    # IMPORTANT: cmd.get_raw_alignment does not work with underscore object names!
    alnobj = cmd.get_unused_name('_aln')
    so_obj = cmd.get_unused_name('source') # see above
    ta_obj = cmd.get_unused_name('target') # see above
    so_sel = cmd.get_unused_name('_source_sel')
    ta_sel = cmd.get_unused_name('_target_sel')
    cmd.create(so_obj, source, source_state, 1)
    cmd.create(ta_obj, target, target_state, 1)
    mse2met(so_obj)
    mse2met(ta_obj)

    # align sequence
    cmd.align(ta_obj, so_obj, object=alnobj, cycles=0, transform=0,
            mobile_state=1, target_state=1)
    cmd.refresh()
    cmd.select(so_sel, '%s and %s' % (so_obj, alnobj))
    cmd.select(ta_sel, '%s and %s' % (ta_obj, alnobj))
    alnmap = dict(cmd.get_raw_alignment(alnobj))
    alnmap.update(dict((v,k) for (k,v) in alnmap.iteritems()))

    # copy source atom identifiers to temporary target
    idmap = dict()
    cmd.iterate(so_sel, 'idmap[model,index] = (segi,chain,resi,resn,name)',
            space={'idmap': idmap})
    cmd.alter(ta_sel, '(segi,chain,resi,resn,name) = idmap[alnmap[model,index]]',
            space={'idmap': idmap, 'alnmap': alnmap})

    # remove unaligned
    cmd.remove('%s and not %s' % (so_obj, so_sel))
    cmd.remove('%s and not %s' % (ta_obj, ta_sel))
    assert cmd.count_atoms(so_obj) == cmd.count_atoms(ta_obj)
    cmd.sort(so_obj)
    cmd.sort(ta_obj)

    # append target to source as 2-state morph-in object
    cmd.create(so_obj, ta_obj, 1, 2)

    # morph
    if name is None:
        name = cmd.get_unused_name('morph')
    rigimol.morph(so_obj, name, refinement=refinement, async=0)

    # clean up
    for obj in [alnobj, so_obj, so_sel, ta_obj, ta_sel]:
        cmd.delete(obj)

    return name
Example #12
0
def colorbyrmsd(mobile,
                target,
                doAlign=1,
                doPretty=1,
                guide=1,
                method='super',
                quiet=1):
    '''
DESCRIPTION

    Align two structures and show the structural deviations in color to more
    easily see variable regions.

    Colors each mobile/target atom-pair by distance (the name is a bit
    misleading).

    Modifies the B-factor columns in your original structures.

ARGUMENTS

    mobile = string: atom selection for mobile atoms

    target = string: atom selection for target atoms

    doAlign = 0 or 1: Superpose selections before calculating distances
    {default: 1}

    doPretty = 0 or 1: Show nice representation and colors {default: 1}

EXAMPLE

    fetch 1ake 4ake, async=0
    remove chain B
    colorbyrmsd 1ake, 4ake
    '''
    from chempy import cpv

    doAlign, doPretty = int(doAlign), int(doPretty)
    guide, quiet = int(guide), int(quiet)
    aln, seleboth = '_aln', '_objSelBoth'

    try:
        align = cmd.keyword[method][0]
    except:
        print(' Error: no such method: ' + str(method))
        raise CmdException

    if guide:
        mobile = '(%s) and guide' % mobile
        target = '(%s) and guide' % target

    try:
        if doAlign:
            # superpose
            align(mobile, target)

        # get alignment without superposing
        align(mobile, target, cycles=0, transform=0, object=aln)
    except:
        print(' Error: Alignment with method %s failed' % (method))
        raise CmdException

    cmd.select(seleboth, '(%s) or (%s)' % (mobile, target))

    idx2coords = dict()
    cmd.iterate_state(-1,
                      seleboth,
                      'idx2coords[model,index] = (x,y,z)',
                      space=locals())

    if cmd.count_atoms('?' + aln, 1, 1) == 0:
        # this should ensure that "aln" will be available as selectable object
        cmd.refresh()

    b_dict = dict()
    for col in cmd.get_raw_alignment(aln):
        assert len(col) == 2
        b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]])
        for idx in col:
            b_dict[idx] = b

    cmd.alter(seleboth, 'b = b_dict.get((model, index), -1)', space=locals())

    if doPretty:
        cmd.orient(seleboth)
        cmd.show_as('cartoon', 'byobj ' + seleboth)
        cmd.color('gray', seleboth)
        cmd.spectrum('b', 'blue_red', seleboth + ' and b > -0.5')

    if not quiet:
        print(" ColorByRMSD: Minimum Distance: %.2f" % (min(b_dict.values())))
        print(" ColorByRMSD: Maximum Distance: %.2f" % (max(b_dict.values())))
        print(" ColorByRMSD: Average Distance: %.2f" %
              (sum(b_dict.values()) / len(b_dict)))

    cmd.delete(aln)
    cmd.delete(seleboth)
Example #13
0
def rmsd(selection = "all", chains = "", doAlign = 1, doPretty = 1, \
         algorithm = 1, guide = 1, method = "super", quiet = 1, colorstyle = "blue_red", colormode = ""):
    """
DESCRIPTION

    Align all structures and show the structural.

ARGUMENTS

    Haves following arguments:
    selection = "all"
    chains = ""  : like {chains = ab"}
    doAlign = 0 or 1 : Superpose selections before calculating distances {default: 1}
    doPretty = 1 
    guide = 1
    algorithm = 0 or 1 :  
    method = "super"
    quiet = 1

EXAMPLE

    fetch

    """
    from chempy import cpv
#initial parameters
    doAlign, doPretty = int(doAlign), int(doPretty)
    guide, quiet = int(guide), int(quiet)
    algorithm = int(algorithm)
    
#get suitable align method
    try:
        align = cmd.keyword[method][0]
    except:
        print "Error: no such method:", method
        raise CmdException

#get object and store each atom's coordinate
    objects = set()
    idx2coords = dict()
    cmd.iterate_state(-1, selection, "objects.add(model) ", space=locals())
#store the compared rmsd tree for each objects, like {obj:{obj1:{(model, index):(model1, index1)}}}

    rmsd_stored = dict()
    for obj in objects:
        rmsd_stored[obj] = {}
        for obj1 in objects:
            if obj != obj1:
                if guide:
                    guide = " and guide"
                else:
                    guide = ""
                rmsd_stored[obj][obj1] = {}
                total_values = {}
                if chains:
                    for eachchain in list(chains):
                        if doAlign:
                            align(obj1 + guide + " and chain " + eachchain, obj + guide + " and chain " + eachchain)
                        align(obj1 + " and chain " + eachchain, obj + " and chain " + eachchain, cycles = 0, transform = 0, object="aln")
                        cmd.iterate_state(-1, selection, "idx2coords[model,index] = (x,y,z)", space=locals())
                        if cmd.count_atoms('?' + "aln", 1, 1) == 0:
                            # this should ensure that "aln" will be available as selectable object
                            cmd.refresh()
                        for col in cmd.get_raw_alignment("aln"):
                            assert len(col) == 2
                            b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]])
                            for idx in col:
                                total_values[idx] = b
                            if col[0][0] == obj:
                                rmsd_stored[obj][obj1][col[0]] = [col[1],b]
                            else:
                                rmsd_stored[obj][obj1][col[1]] = [col[0],b]
                        cmd.delete("aln")
                else:
                    if doAlign:
                        align(obj1 + guide, obj + guide)
                    align(obj1 + guide, obj + guide, cycles=0, transform=0, object="aln")
                    cmd.iterate_state(-1, selection, "idx2coords[model,index] = (x,y,z)", space=locals())
                    if cmd.count_atoms('?' + "aln", 1, 1) == 0:
                        # this should ensure that "aln" will be available as selectable object
                        cmd.refresh()
                    for col in cmd.get_raw_alignment("aln"):
                        assert len(col) == 2
                        b = cpv.distance(idx2coords[col[0]], idx2coords[col[1]])
                        for idx in col:
                                total_values[idx] = b
                        if col[0][0] == obj:
                            rmsd_stored[obj][obj1][col[0]] = [col[1],b]
                        else:
                            rmsd_stored[obj][obj1][col[1]] = [col[0],b]
                    cmd.delete("aln")
    if algorithm:
        def b_replace(model, index):
            n = 0
            bsum = 0

            for obj1 in objects:
                if model != obj1:
                    if (model, index) in rmsd_stored[model][obj1]:
                        nextmodel, nextindex = rmsd_stored[model][obj1][model, index][0]
                        bsum += rmsd_stored[model][obj1][model, index][1]
                        n += 1
                        for nextobj1 in objects:
                            if nextmodel != nextobj1 and nextmodel != obj1 :
                                if (nextmodel, nextindex) in rmsd_stored[nextmodel][nextobj1]:
                                    bsum += rmsd_stored[nextmodel][nextobj1][nextmodel, nextindex][1]
                                    n += 1
            if n == 0 :
                return -1
            else:
                return eval("bsum / n")
                        
        
        
    else:
        
        def b_replace(model, index):
            n = 0
            bsum = 0
           
            for obj1 in objects:
                if model != obj1:
                    if (model, index) in rmsd_stored[model][obj1]:
                        bsum += rmsd_stored[model][obj1][model, index][1]
                        n += 1
               
            if n == 0 :
                return -1
            else:
                return eval("bsum / n")
        
                
    cmd.alter(selection, 'b = b_replace(model, index)', space=locals())
    
           
    if doPretty:
        
        mini = min(total_values.values())
        maxi = max(total_values.values())
        if colormode:
            if colormode == "lowshow":
                maxi = sum(total_values.values()) / len(total_values)
                print("This is lowshow")
            elif colormode == "highshow":
                mini = sum(total_values.values()) / len(total_values)
                print("This is highshow")
            else:
                raise CmdException
                
        cmd.orient(selection)
        cmd.show_as('cartoon', 'byobj ' + selection)
        cmd.color('gray', selection)
        cmd.spectrum('b', "blue_red", selection + ' and b > -0.5',minimum = mini, maximum = maxi)
    if not quiet:
        print " ColorByRMSD: Minimum Distance: %.2f" % (min(total_values.values()))
        print " ColorByRMSD: Maximum Distance: %.2f" % (max(total_values.values()))
        print " ColorByRMSD: Average Distance: %.2f" % (sum(total_values.values()) / len(total_values))
Example #14
0
def drawNetwork(path1, path2, sele=None, sele1=None, sele2=None, top1=None, top2=None,
                r=1, edge_norm=None, alpha=0.5, mutations=False, align_with = None, 
                node_color=(0.6, 0.6, 0.6), edge_color1 = (0, 0, 1), palette="colorblind",
                edge_color2 = (1, 0, 0), labeling='0', norm_expected=False,
                threshold=0, topk=None, max_compo=None, mean_vp=None, strong_compo=None, 
                around=None, keep_previous=False, compo_size=None, save_cc=None, load_cc=None,
                compos_to_excel = None, force_binary_color=False, compo_radius=None, compo_diam=None,
                label_compo='', auto_patch=True, printall=False, sum=False, n_clusters=None,
                color_by_compo=False, color_by_group=False, show_top_group=None,
                name1 = None, name2 = None, name_nodes='nodes', userSelection='all',
                fromstruct=None, color_by_contact_type=False, standard_and_expected=None):
    '''
    Draws a NetworkX network on the PyMol structure
    '''

    #Initialization of labeling variables and retreieving residue XYZ positions
    if not keep_previous:
        cmd.delete('*nodes *edges Component* Group*')
        cmd.label(selection=userSelection, expression="")
        cmd.hide("licorice", "?mutations")
    # Building position -- name correspondance
    stored.posCA = []
    stored.names = []
    stored.ss = []
    userSelection = userSelection + " and ((n. CA) or n. C)"
    cmd.iterate_state(1, selector.process(userSelection), "stored.posCA.append([x,y,z])")
    cmd.iterate(userSelection, "stored.ss.append(ss)")
    cmd.iterate(userSelection, 'stored.names.append(resn+resi+chain)')
    stored.labels = list(map(relabel, stored.names))
    stored.resid = list(map(selection, stored.names))
    node2id = dict(zip(stored.labels, stored.resid))
    node2CA = dict(zip(stored.labels, stored.posCA))

    #Secondary Structure labels
    prevSS, prevChain = None, None
    counters = {'': 0, 'H': 0, 'S': 0, 'L': 0}
    node2SS = dict(zip(stored.labels, stored.ss))
    SS2nodelist = {}
    putflag = lambda X: 'U' if X in ['', 'L'] else X
    for label in node2SS:
        ss = node2SS[label]
        chain = label[-1]
        if prevChain != chain:
            for counter in counters: counters[counter] = 0
        if prevSS != ss:
            counters[ss] +=1
        labss = putflag(ss)+str(counters[ss])+':'+chain
        if labss in SS2nodelist:
            SS2nodelist[labss].append(label)
        else:
            SS2nodelist[labss] = [label]
        prevSS = ss
        prevChain = chain

    prevkey, prevChain = None, None
    order = []
    keys = list(SS2nodelist.keys())

    for key in keys:
        if prevChain != key.split(':')[-1]:
            prevkey = None
        if key[0] == 'U':
            if prevkey == None:
                newkey = 'Head:'+key.split(':')[-1]
            else:
                newkey = 'U'+prevkey
            SS2nodelist[newkey] = SS2nodelist.pop(key)
            order.append(newkey)
        else:
            order.append(key)
        prevkey = key
        prevChain = key.split(':')[-1]
    prevkey = None
    final = []
    for key in order[::-1]:
        if prevChain != key.split(':')[-1]:
            prevkey = None
        if key[0] == 'U':
            if prevkey == None:
                newkey = 'Tail:'+key.split(':')[-1]
            else:
                newkey = '{}-{}'.format(key[1:], prevkey)
            SS2nodelist[newkey] = SS2nodelist.pop(key)
            final.append(newkey)
        else:
            final.append(key)
        prevkey = key
        prevChain = key.split(':')[-1]
    # ss_dict = dict(zip(keys, final[::-1]))
    mapss = {}
    for key in final:
        newkey = key.replace('S', 'β').replace('H', 'α').replace('αead', 'Head')
        if 'IGPS' in str(label_compo):
            _ = []
            for elt in newkey.split('-'):    
                if elt.split(':')[1] in ['A', 'C', 'E']:
                    _.append('𝘧{}'.format(elt.split(':')[0]))
                elif elt.split(':')[1] in ['B', 'D', 'F']:
                    _.append('𝘩{}'.format(elt.split(':')[0]))
            newkey = '-'.join(_)
            mapss[key] = IGPS_mapping[newkey]      
        else:
            mapss[key] = newkey     

    for ss in SS2nodelist:
        for node in SS2nodelist[ss]:
            node2SS[node] = mapss[ss]


    #Loading external data
    atom_mat1, atom_mat2 = list(map(load, [path1, path2]))
    get_ext = lambda X: X.split('.')[-1]
    ext1, ext2 = list(map(get_ext, [path1, path2]))
    top1 = load(path1.split('_')[0].split('.')[0]+'.topy') if top1 == None else load(top1)
    top2 = load(path2.split('_')[0].split('.')[0]+'.topy') if top2 == None else load(top2)

    #Handling selections
    if sele != None:
        sele1, sele2 = [sele]*2
    if sele == None and sele1 == None and sele2 == None:
        sele1, sele2 = ['protein && not hydrogen']*2
        print('Default selection protein without hydrogens')
    
    sels = [sele1, sele2]

    #Creating topology matrices for each selection
    topg1, topd1 = [create_top(sel, top1, fromstruct) for sel in sels]
    topg2, topd2 = [create_top(sel, top2, fromstruct) for sel in sels]
    #From atomic to residual contacts and perturbation network computation
    mat1 = (atom_mat1 @ topd1).transpose() @ topg1
    mat2 = (atom_mat2 @ topd2).transpose() @ topg2
    #Apply expected norm if necessary
    if norm_expected:
        exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1)
        exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2)
        mat1 = divide_expected(mat1, exp1)
        mat2 = divide_expected(mat2, exp2)
        mat1, mat2 = list(map(csr_matrix, [mat1, mat2]))

    if align_with != None:
        cmd.align(align_with, userSelection, object='aln')
        raw_aln = cmd.get_raw_alignment('aln')
        cmd.hide('cgo', 'aln')
        order_string = [idx[0] for idx in raw_aln[-1]][::-1]
        trans_mat = dok_matrix(tuple([cmd.count_atoms(X) for X in order_string]))
        for idx1, idx2 in raw_aln:
            trans_mat[idx2[1]-1, idx1[1]-1] = 1
        trans_mat = csr_matrix(trans_mat)
        top_t1, top_t2 = [create_top('name CA', top) for top in [top1, top2]]
        trans_res = (trans_mat @ top_t1).transpose() @ top_t2
        mat2 = trans_res @ (mat2 @ trans_res.transpose())

    pertmat = mat2 - mat1

    pertmat.setdiag(0)
    pertmat.eliminate_zeros()
    
    net = nx.from_scipy_sparse_matrix(pertmat)

    #Creating labeling dictionnary
    if str(next(top1.residues))[-1] == '0':
        offset = 1
    else:
        offset = 0

    chain_names = [chr(ord('A') + i) for i in range(26)]

    t2o = lambda X: three2one[X] if X in three2one else X[0]
    get_chain = lambda X: chain_names[(X.chain.index % len(chain_names))]
    res2str = lambda X: t2o(X.name)+str(X.resSeq+offset)+':'+get_chain(X)
    id2label = {i: res2str(res) for i, res in enumerate(top1.residues)}
    # if 'IGPS' in label_compo:
    #     igps_label = {}
    #     for elt in id2label.items():
    #         if elt.split(':')[1] in ['A', 'C', 'E']:
    #             rerelabel[elt] = '𝘧{}'.format(elt.split(':')[0])
    #         elif elt.split(':')[1] in ['B', 'D', 'F']:
    #             rerelabel[elt] = '𝘩{}'.format(elt.split(':')[0])
    #Relabeling network
    net = nx.relabel_nodes(net, id2label)

    label2id = {res2str(res): i for i, res in enumerate(top1.residues)}



    #Auto_patching network labels
    if not all(elem in node2CA for elem in net.nodes()):
        print('PDB structure and topology labeling not matching.')
        if auto_patch:
            print('Attempting to auto-patch residue names. (this can be disabled with auto_patch=False)')
            if len(node2CA.keys()) == len(net.nodes()):
                remap = dict(zip(net.nodes(), node2CA.keys()))
                net = nx.relabel_nodes(net, remap)
                label2id = dict(zip(node2CA.keys(), range(top1.n_residues)))
            else:
                print("Auto-patching not working, please try on different PDB file")


    #Output topK if necessary
    if type(topk) == int:
        limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][topk] 
        threshold = limit_weight

    if type(standard_and_expected) == int:
        limit_weight = np.sort([abs(net.edges[(u, v)]['weight']) for u, v in net.edges])[::-1][standard_and_expected]
        relabel_net2 = dict(enumerate(net.nodes()))
        threshold = limit_weight


    if max_compo or mean_vp or any(np.array([compo_size, compo_diam, compo_radius, strong_compo])!= None): 
        color_by_compo = True
        if load_cc != None:
            cc = np.load(load_cc)
        else:
            cc = get_connected_components(pertmat)
            if save_cc != None:
                np.save(save_cc, cc)
        if max_compo:
            threshold = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])]
        else:
            lastmax = np.sort(np.abs(pertmat.data))[::-1][np.argmax(cc[::-1])]
            print('last maximum: {}'.format(np.round(lastmax, 2)))
            net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < lastmax])
            net.remove_nodes_from(list(nx.isolates(net)))
            components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)] 
            if mean_vp:
                vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list]
                threshold = np.median(vanishing_points)
            elif compo_size !=None:
                robust = [list(c.nodes()) for c in components_list if len(c.edges())>=float(compo_size)]
                net = net.subgraph([x for robust in list(robust) for x in robust])
                threshold = 0
            elif compo_diam !=None:
                robust = [list(c.nodes()) for c in components_list if nx.diameter(c)>=float(compo_diam)]
                net = net.subgraph([x for robust in list(robust) for x in robust])
                threshold = 0
            elif compo_radius !=None:
                robust = [list(c.nodes()) for c in components_list if nx.radius(c)>=float(compo_radius)]
                net = net.subgraph([x for robust in list(robust) for x in robust])
                threshold = 0
            elif strong_compo !=None:
                vanishing_points = [np.max([abs(net[u][v]['weight']) for u, v in c.edges()]) for c in components_list]
                edges_len = [len(c.edges()) for c in components_list]
                percentile = float(strong_compo)*len(components_list)/100
                vani_ranks = len(vanishing_points)+1-rankdata(vanishing_points, method='max')
                size_ranks = len(edges_len)+1-rankdata(edges_len, method='max')
                vani_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if vani_ranks[i]<percentile]
                size_nodes = [list(c.nodes()) for i, c in enumerate(components_list) if size_ranks[i]<percentile]
                vani_nodes = [x for vani_nodes in list(vani_nodes) for x in vani_nodes]
                size_nodes = [x for size_nodes in list(size_nodes) for x in size_nodes]
                strong = list(set(vani_nodes) & set(size_nodes))
                net = net.subgraph(strong)


   #Detect mutations
    if mutations:
        cmd.show_as(representation="cartoon", selection="?mutations")
        cmd.color(color="grey80", selection="?mutations")
        cmd.delete("?mutations")
        mutations_list = []
        y = {j: res2str(res) for j, res in enumerate(top2.residues)}
        for resid in id2label:
            if resid in y:
                if id2label[resid] != y[resid]:
                    mutations_list.append((resid, (y[resid][0]+':').join(id2label[resid].split(':'))))
                    cmd.select("mutations", 'resi '+str(id2label[resid].split(':')[0][1:])+ ' and chain '+id2label[resid][-1], merge=1)
            else:
                print('Deletion of ', id2label[resid])
        print('List of mutations: ', ', '.join([elt[1] for elt in mutations_list]))
        cmd.show_as(representation="licorice", selection="?mutations")
        cmd.color(color="magenta", selection="?mutations")


    #Apply threshold
    if threshold !=0:
        print('Applying threshold {}'.format(threshold))
        net.remove_edges_from([(u, v) for u, v in net.edges() if abs(net[u][v]['weight']) < threshold])
        net.remove_nodes_from(list(nx.isolates(net)))

    #Induced perturbation network if needed

    if around !=None:
        net = net.subgraph(nx.node_connected_component(net, around))

    #Setting Pymol parameters
    cmd.set('auto_zoom', 0)
    cmd.set("cgo_sphere_quality", 4)
    if len(net.edges()) == 0:    
        raise ValueError('Computations give empty network')

    #Norm edges
    if edge_norm == None:
        edge_norm = max([net.edges()[(u, v)]['weight'] for u, v in net.edges()])/r

    elif edge_norm == True:
        tot_atoms_in_sel = np.sum([np.sum(elt) for elt in [topd1, topd2, topg1, topg2]])
        tot_atoms = np.sum([max(elt.shape) for elt in [topd1, topd2, topg1, topg2]])
        norm_fact = tot_atoms_in_sel**2/tot_atoms**2
        edge_norm = norm_fact*30
        print('Global normalization factor: {}'.format(1/norm_fact))


    #Function to name edges
    def name_edges(name, path):
        if name == None:
            return '.'.join(basename(path).split('.')[:-1])
        return name

    if type(standard_and_expected) == int:
        exp1 = (topd1.sum(axis=1).transpose() @ topd1).transpose() @ (topg1.sum(axis=1).transpose() @ topg1)
        exp2 = (topd2.sum(axis=1).transpose() @ topd2).transpose() @ (topg2.sum(axis=1).transpose() @ topg2)
        mat1 = divide_expected(mat1, exp1)
        mat2 = divide_expected(mat2, exp2)
        mat1, mat2 = list(map(csr_matrix, [mat1, mat2]))
        net2 = nx.from_scipy_sparse_matrix(mat2-mat1)
        net2 = nx.relabel_nodes(net2, relabel_net2)
        limit_weight = np.sort([abs(net2.edges[(u, v)]['weight']) for u, v in net2.edges])[::-1][standard_and_expected] 
        net2.remove_edges_from([(u, v) for u, v in net2.edges() if abs(net2[u][v]['weight']) < limit_weight])
        net2.remove_nodes_from(list(nx.isolates(net2)))
        colors = [(1, 1, 0), (0, 1, 1), (1, 0, 1)]
        objs_inboth = []
        objs_instd = []
        objs_inexp = []
        nodes = []
        for u, v in net.edges():
            radius = net[u][v]['weight']/edge_norm
            if (u, v) in list(net2.edges()):
                objs_inboth += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[0], *colors[0]]
            else:
                objs_instd += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[1], *colors[1]]
            nodes += [u, v]
        edge_norm2 = max([net2.edges()[(u, v)]['weight'] for u, v in net2.edges()])/r
        for u, v in net2.edges():
            radius = net2[u][v]['weight']/edge_norm2
            if (u, v) not in list(net.edges()):
                objs_inexp += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[2], *colors[2]]
            nodes += [u, v]

        nodelist = set(nodes)
        objs_nodes = [COLOR, *node_color]
        for u in nodelist:
                x, y, z = node2CA[u]
                objs_nodes += [SPHERE, x, y, z, r]
        selnodes = ''.join([node2id[u] for u in nodelist])[4:]
        cmd.load_cgo(objs_inboth, 'in_both_edges') 
        cmd.load_cgo(objs_instd, 'in_std_edges')
        cmd.load_cgo(objs_inexp, 'in_exp_edges')
        cmd.load_cgo(objs_nodes, 'nodes') 



    elif color_by_contact_type:
        expected_matrices = get_expected_type(atom_mat1, atom_mat2, top1, top2, fromstruct)
        name1, name2 = list(map(name_edges, [name1, name2], [path1, path2]))
        names = ['{0}_{1}'.format(name1, sel) for sel in ['hydro', 'polar', 'mixed']] + ['{0}_{1}'.format(name2, sel) for sel in ['hydro', 'polar', 'mixed']]
        nodes_dict = {i: [] for i in range(len(names))}
        objs_dict = {i: [] for i in range(len(names))}
        colors = [(1, 0.86, 0.73), (0.68, 0.85, 0.90), (0.60, 0.98, 0.60), (1, 0.86, 0), (0.25, 0.41, 0.88), (0, 0.50, 0)]
        for u, v in net.edges():
            radius = net[u][v]['weight']/edge_norm
            id_u, id_v = label2id[u], label2id[v]
            values = list(map(lambda _mat: _mat[id_v, id_u], expected_matrices))
            type_of_contact = np.argmax(values)
            objs_dict[type_of_contact] += [CYLINDER, *node2CA[u], *node2CA[v], radius, *colors[type_of_contact], *colors[type_of_contact]]
            nodes_dict[type_of_contact] += [u, v]
        selnodes = ''
        for toc in nodes_dict:
            nodelist = set(nodes_dict[toc])
            objs_dict[toc]+=[COLOR, *node_color]
            for u in nodelist:
                x, y, z = node2CA[u]
                objs_dict[toc]+=[SPHERE, x, y, z, r]
            selnodes += ''.join([node2id[u] for u in nodelist])[4:]

        for i, name in zip(objs_dict.keys(), names):
            cmd.load_cgo(objs_dict[i], '{}_edges'.format(name))         
    
    #Coloring by components
    elif color_by_compo:
        components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)]
        diameters = [nx.diameter(c) for c in components_list]
        ranking = np.argsort(diameters)[::-1]
        colors = sns.color_palette(palette, n_colors=len(components_list)+1)
        for i, c in enumerate(colors):
            if c[0] == c[1] == c[2]:
                print(c)
                colors.pop(i)
                break
        selnodes = ''
        for i, rank in enumerate(ranking):
            color, compo = colors[rank], components_list[rank]
            _obj, nodelist = [], []
            for u, v in compo.edges():
                radius = net[u][v]['weight']/edge_norm
                if abs(net[u][v]['weight']) >= threshold:
                    if not force_binary_color:
                        _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *color, *color]
                    else:
                        if net[u][v]['weight'] <= 0:
                            _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1]
                        else:
                            _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2]
                    nodelist += [u, v]
#            cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1))
            _obj+=[COLOR, *node_color]
            nodelist = set(nodelist)
            selnodes += ''.join([node2id[u] for u in nodelist])[4:]
            for u in nodelist:
                x, y, z = node2CA[u]
                _obj+=[SPHERE, x, y, z, r]
            cmd.load_cgo(_obj, 'Component{}'.format(i+1)) 

    #Color by group of relevance  
    elif color_by_group:
        weights = np.array([abs(net[u][v]['weight']) for u, v in net.edges()]).reshape(-1, 1)
        birch = Birch(n_clusters=n_clusters).fit(weights)
        labels = birch.predict(weights)
        ordered_labels = labels[np.argsort(pertmat.data)]
        _, idx = np.unique(ordered_labels, return_index=True)
        mapping = dict(zip(ordered_labels[np.sort(idx)], np.sort(np.unique(ordered_labels))))
        i2color =  dict(zip(ordered_labels[np.sort(idx)], sns.color_palette(palette, len(np.unique(ordered_labels)))[::-1]))
        selnodes = ''
        if show_top_group == None:
            show_top_group = len(mapping.keys())
        
        for j, i in enumerate(list(mapping.keys())[:show_top_group]):
            _obj, nodelist = [], []
            _net = net.copy()
            to_remove_edges = [(u, v) for j, (u, v) in enumerate(net.edges()) if labels[j] != i]
            _net.remove_edges_from(to_remove_edges)
            _net.remove_nodes_from(list(nx.isolates(_net)))
            for u, v in _net.edges():
                radius = net[u][v]['weight']/edge_norm
                if abs(net[u][v]['weight']) >= threshold:
                    _obj+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *i2color[j], *i2color[j]]
                    nodelist += [u, v]
#            cmd.load_cgo(_obj, 'Component{}_edges'.format(i+1))
            _obj+=[COLOR, *node_color]
            nodelist = set(nodelist)
            selnodes += ''.join([node2id[u] for u in nodelist])[4:]
            for u in nodelist:
                x, y, z = node2CA[u]
                _obj+=[SPHERE, x, y, z, r]
            cmd.load_cgo(_obj, 'Group{}'.format(j+1)) 

    #Default edge coloring   
    else:
        obj1, obj2, nodelist = [], [], []
        for u, v in net.edges():
            radius = net[u][v]['weight']/edge_norm
            if abs(net[u][v]['weight']) >= threshold:
                if 'color' in net[u][v]: 
                    if net[u][v]['color'] == 'r':
                        obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1]
                    else:
                        obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2]
                else:
                    if net[u][v]['weight'] <= 0:
                        obj1+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color1, *edge_color1]
                    else:
                        obj2+=[CYLINDER, *node2CA[u], *node2CA[v], radius, *edge_color2, *edge_color2]
                nodelist+=[u, v]
        name1, name2 = map(name_edges, [name1, name2], [path1, path2])
        cmd.load_cgo(obj1, name1+'_edges')
        cmd.load_cgo(obj2, name2+'_edges')

        #Drawing nodes 
        obj=[COLOR, *node_color]
        nodelist = set(nodelist)
        selnodes = ''.join([node2id[u] for u in nodelist])[4:]
        for u in nodelist:
            x, y, z = node2CA[u]
            obj+=[SPHERE, x, y, z, r]

        cmd.load_cgo(obj, name_nodes)


    #Creating text for labeling components
    if label_compo != '' or compos_to_excel !=None:
        if compos_to_excel != None:
            rows_list = []
        objtxt = []
        axes = -np.array(cmd.get_view()[:9]).reshape(3,3)
        components_list = [net.subgraph(c).copy() for c in nx.connected_components(net)]
        diameters = [nx.diameter(c) for c in components_list]
        for i, j in enumerate(np.argsort(diameters)[::-1]):
            row_dict = {}
            c = components_list[j]
            sses = sorted(list(set([node2SS[node] for node in c])))
            if compos_to_excel !=None:
                row_dict['Secondary structure elements'] = ','.join(sses)
                row_dict['Vanishing point'] = np.max([abs(net[u][v]['weight']) for u, v in c.edges()])
                row_dict['Diameter'] = nx.diameter(c)
                row_dict['Size'] = len(c.edges())
                row_dict['Size rank'] = i+1

            else:
                print('Component {}\n'.format(i+1), ', '.join(sses))
                print('Size (number of edges) {}'.format(len(c.edges())))
                print('Vanishing point: {}'.format(np.max([abs(net[u][v]['weight']) for u, v in c.edges()])))
            if 'h' in str(label_compo):
                methods = ['eigenvector', 'hits_hub', 'hits_authority', 'pagerank', 'betweenness', 'katz']
                hubs = [get_hubs(c, method) for method in methods]
                if compos_to_excel !=None:
                    row_dict.update(dict(zip(methods, hubs)))
                else:
                    print(dict(zip(methods, hubs)))
            if 'c' in str(label_compo):
                pos = np.array(node2CA[next(c.__iter__())]) + (axes[0])
                cyl_text(objtxt, plain, pos, 'Component {}'.format(i+1), radius=0.1, color=[0, 0, 0], axes=axes)
            if compos_to_excel:
                rows_list.append(row_dict)
        if compos_to_excel:
            df = pd.DataFrame(rows_list)
            df.to_excel(compos_to_excel)
        if 's' in str(label_compo):
            for ss in SS2nodelist:
                nodelist = SS2nodelist[ss] 
                print(mapss[ss], ': ', ('{}--{}'.format(nodelist[0], nodelist[-1]) if len(nodelist)>1 else nodelist[0]))

#        print(objtxt)
        cmd.set("cgo_line_radius", 0.03)
        cmd.load_cgo(objtxt, 'txt')

    #labeling
    if labeling==1:
        cmd.label(selection=selnodes, expression="t2o(resn)+resi")
    if labeling==3:
        cmd.label(selection=selnodes, expression="resn+resi")

    #Summing
    if sum:
        print('Sum of contacts lost: ', np.sum(pertmat))

    if printall:
        print([(u,v, net[u][v]) for u, v in net.edges()])
Example #15
0
def pca_plot(
    aln_object,
    ref="all",
    state=0,
    maxlabels=20,
    size=20,
    invert="",
    which=(0, 1),
    alpha=0.75,
    filename=None,
    quiet=1,
    load_b=0,
):
    """
DESCRIPTION

    Principal Component Analysis on a set of superposed conformations, given
    by an alignment object. By default all states in all objects are
    considered. Generates a 2d-plot of the first two principal components.

USAGE

    pca_plot aln_object [, ref [, state [, maxlabels ]]]

ARGUMENTS

    aln_object = string: name of alignment object, defines the selection
    and the atom mapping between objects

    ref = string: object names for which to calculate PCA for {default: all}

    state = integer: if state=0 use all states {default: 0}

    maxlabels = integer: label dots in plot if maxlabels<0 or number of models
    not more than maxlabels {default: 20}

    size = float: size of plot points in px^2 {default: 20}

    invert = string: invert plotting axes x, y or xy {default: ''}

    which = (int,int): indices of principal components to plot {default: (0,1)}

    alpha = float: opacity of plotting points

    filename = string: if given, plot to file {default: None}

EXAMPLE

    fetch 1ake 4ake 1dvr 1ak2, async=0
    split_chains
    extra_fit (*_*) and name CA, reference=1ake_A, cycles=0, object=aln
    pca_plot aln, 1ake_* 4ake_*

    fetch 1ubq 2k39, async=0
    align 2k39, 1ubq and guide, cycles=0, object=aln2
    color blue, 1ubq
    color orange, 2k39
    pca_plot aln2, filename=pca-ubq.pdf
    """
    from numpy import array, dot
    from numpy.linalg import svd, LinAlgError
    from . import matplotlib_fix
    from matplotlib.pyplot import figure

    state, quiet = int(state), int(quiet)
    maxlabels = int(maxlabels)
    if cmd.is_string(which):
        which = cmd.safe_list_eval(which)

    if aln_object not in cmd.get_names_of_type("object:"):
        print(" Warning: first argument should be an alignment object")

        from .fitting import extra_fit

        selection = aln_object
        aln_object = cmd.get_unused_name("aln")
        extra_fit(selection, cycles=0, transform=0, object=aln_object)

    if state == 0:
        states = list(range(1, cmd.count_states() + 1))
    elif state < 0:
        states = [cmd.get_state()]
    else:
        states = [state]

    models = cmd.get_object_list(aln_object)
    references = set(cmd.get_object_list("(" + ref + ")")).intersection(models)
    others = set(models).difference(references)
    aln = cmd.get_raw_alignment(aln_object)

    if not quiet:
        print(" PCA References:", ", ".join(references))
        print(" PCA Others:", ", ".join(others))

    if len(references) == 0:
        print(" PCA Error: No reference objects")
        raise CmdException

    model_count = len(models)
    coords = dict((model, []) for model in models)
    aln = [pos for pos in aln if len(pos) == model_count]

    for state in states:
        idx2xyz = dict()
        cmd.iterate_state(state, aln_object, "idx2xyz[model,index] = (x,y,z)", space={"idx2xyz": idx2xyz})

        for pos in aln:
            for idx in pos:
                if idx not in idx2xyz:
                    continue

                c = coords[idx[0]]
                if len(c) < state:
                    c.append([])
                c[-1].extend(idx2xyz[idx])

    c_iter = lambda models: ((c, model, i + 1) for model in models for (i, c) in enumerate(coords[model]))
    X = array([i[0] for i in c_iter(references)])
    Y = array([i[0] for i in c_iter(others)])
    center = X.mean(0)
    X = X - center

    try:
        U, L, V = svd(X)
    except LinAlgError as e:
        print(" PCA Error: ", e)
        raise CmdException

    if int(load_b):
        cmd.alter("byobj " + aln_object, "b=-0.01")
        b_dict = {}
        i = which[0]
        b_array = (V[i].reshape((-1, 3)) ** 2).sum(1) ** 0.5
        for pos, b in zip(aln, b_array):
            for idx in pos:
                b_dict[idx] = b
        cmd.alter(aln_object, "b=b_dict.get((model,index), -0.01)", space=locals())
        cmd.color("yellow", "byobj " + aln_object)
        cmd.spectrum("b", "blue_red", aln_object + " and b > -0.01")

    X_labels = [i[1:3] for i in c_iter(references)]
    Y_labels = [i[1:3] for i in c_iter(others)]

    x_list = []
    y_list = []
    colors = []
    text_list = []

    def plot_pc_2d(X, labels):
        pca_12 = dot(X, V.T)[:, which]
        for (x, y), (model, state) in zip(pca_12, labels):
            x_list.append(x)
            y_list.append(y)
            colors.append(get_model_color(model))
            if maxlabels < 0 or len(pca_12) <= maxlabels:
                text_list.append("%s(%d)" % (model, state))
            else:
                text_list.append(None)

    plot_pc_2d(X, X_labels)
    if len(Y) > 0:
        Y = Y - center
        plot_pc_2d(Y, Y_labels)

    if "x" in invert:
        x_list = [-x for x in x_list]
    if "y" in invert:
        y_list = [-y for y in y_list]

    fig = figure()
    plt = fig.add_subplot(111, xlabel="PC %d" % (which[0] + 1), ylabel="PC %d" % (which[1] + 1))
    plt.scatter(x_list, y_list, float(size), colors, linewidths=0, alpha=float(alpha))

    for (x, y, text) in zip(x_list, y_list, text_list):
        if text is not None:
            plt.text(x, y, text, horizontalalignment="left")

    _showfigure(fig, filename, quiet)
Example #16
0
def rmsd(selection = "all", chains = "", doAlign = 1, doPretty = 1, \
         algorithm = 1, guide = 1, method = "super", quiet = 1, colorstyle = "blue_red", colormode = ""):
    """
DESCRIPTION

    Align all structures and show the structural.

ARGUMENTS

    Haves following arguments:
    selection = "all"
    chains = ""  : like {chains = ab"}
    doAlign = 0 or 1 : Superpose selections before calculating distances {default: 1}
    doPretty = 1 
    guide = 1
    algorithm = 0 or 1 :  
    method = "super"
    quiet = 1

EXAMPLE

    fetch

    """
    from chempy import cpv
    #initial parameters
    doAlign, doPretty = int(doAlign), int(doPretty)
    guide, quiet = int(guide), int(quiet)
    algorithm = int(algorithm)

    #get suitable align method
    try:
        align = cmd.keyword[method][0]
    except:
        print "Error: no such method:", method
        raise CmdException


#get object and store each atom's coordinate
    objects = set()
    idx2coords = dict()
    cmd.iterate_state(-1, selection, "objects.add(model) ", space=locals())
    #store the compared rmsd tree for each objects, like {obj:{obj1:{(model, index):(model1, index1)}}}

    rmsd_stored = dict()
    for obj in objects:
        rmsd_stored[obj] = {}
        for obj1 in objects:
            if obj != obj1:
                if guide:
                    guide = " and guide"
                else:
                    guide = ""
                rmsd_stored[obj][obj1] = {}
                total_values = {}
                if chains:
                    for eachchain in list(chains):
                        if doAlign:
                            align(obj1 + guide + " and chain " + eachchain,
                                  obj + guide + " and chain " + eachchain)
                        align(obj1 + " and chain " + eachchain,
                              obj + " and chain " + eachchain,
                              cycles=0,
                              transform=0,
                              object="aln")
                        cmd.iterate_state(-1,
                                          selection,
                                          "idx2coords[model,index] = (x,y,z)",
                                          space=locals())
                        if cmd.count_atoms('?' + "aln", 1, 1) == 0:
                            # this should ensure that "aln" will be available as selectable object
                            cmd.refresh()
                        for col in cmd.get_raw_alignment("aln"):
                            assert len(col) == 2
                            b = cpv.distance(idx2coords[col[0]],
                                             idx2coords[col[1]])
                            for idx in col:
                                total_values[idx] = b
                            if col[0][0] == obj:
                                rmsd_stored[obj][obj1][col[0]] = [col[1], b]
                            else:
                                rmsd_stored[obj][obj1][col[1]] = [col[0], b]
                        cmd.delete("aln")
                else:
                    if doAlign:
                        align(obj1 + guide, obj + guide)
                    align(obj1 + guide,
                          obj + guide,
                          cycles=0,
                          transform=0,
                          object="aln")
                    cmd.iterate_state(-1,
                                      selection,
                                      "idx2coords[model,index] = (x,y,z)",
                                      space=locals())
                    if cmd.count_atoms('?' + "aln", 1, 1) == 0:
                        # this should ensure that "aln" will be available as selectable object
                        cmd.refresh()
                    for col in cmd.get_raw_alignment("aln"):
                        assert len(col) == 2
                        b = cpv.distance(idx2coords[col[0]],
                                         idx2coords[col[1]])
                        for idx in col:
                            total_values[idx] = b
                        if col[0][0] == obj:
                            rmsd_stored[obj][obj1][col[0]] = [col[1], b]
                        else:
                            rmsd_stored[obj][obj1][col[1]] = [col[0], b]
                    cmd.delete("aln")
    if algorithm:

        def b_replace(model, index):
            n = 0
            bsum = 0

            for obj1 in objects:
                if model != obj1:
                    if (model, index) in rmsd_stored[model][obj1]:
                        nextmodel, nextindex = rmsd_stored[model][obj1][
                            model, index][0]
                        bsum += rmsd_stored[model][obj1][model, index][1]
                        n += 1
                        for nextobj1 in objects:
                            if nextmodel != nextobj1 and nextmodel != obj1:
                                if (nextmodel, nextindex
                                    ) in rmsd_stored[nextmodel][nextobj1]:
                                    bsum += rmsd_stored[nextmodel][nextobj1][
                                        nextmodel, nextindex][1]
                                    n += 1
            if n == 0:
                return -1
            else:
                return eval("bsum / n")

    else:

        def b_replace(model, index):
            n = 0
            bsum = 0

            for obj1 in objects:
                if model != obj1:
                    if (model, index) in rmsd_stored[model][obj1]:
                        bsum += rmsd_stored[model][obj1][model, index][1]
                        n += 1

            if n == 0:
                return -1
            else:
                return eval("bsum / n")

    cmd.alter(selection, 'b = b_replace(model, index)', space=locals())

    if doPretty:

        mini = min(total_values.values())
        maxi = max(total_values.values())
        if colormode:
            if colormode == "lowshow":
                maxi = sum(total_values.values()) / len(total_values)
                print("This is lowshow")
            elif colormode == "highshow":
                mini = sum(total_values.values()) / len(total_values)
                print("This is highshow")
            else:
                raise CmdException

        cmd.orient(selection)
        cmd.show_as('cartoon', 'byobj ' + selection)
        cmd.color('gray', selection)
        cmd.spectrum('b',
                     "blue_red",
                     selection + ' and b > -0.5',
                     minimum=mini,
                     maximum=maxi)
    if not quiet:
        print " ColorByRMSD: Minimum Distance: %.2f" % (min(
            total_values.values()))
        print " ColorByRMSD: Maximum Distance: %.2f" % (max(
            total_values.values()))
        print " ColorByRMSD: Average Distance: %.2f" % (
            sum(total_values.values()) / len(total_values))
Example #17
0
def pca_plot(aln_object, ref='all', state=0, maxlabels=20, size=20, invert='',
        which=(0,1), alpha=0.75, filename=None, quiet=1, load_b=0):
    '''
DESCRIPTION

    Principal Component Analysis on a set of superposed conformations, given
    by an alignment object. By default all states in all objects are
    considered. Generates a 2d-plot of the first two principal components.

USAGE

    pca_plot aln_object [, ref [, state [, maxlabels ]]]

ARGUMENTS

    aln_object = string: name of alignment object, defines the selection
    and the atom mapping between objects

    ref = string: object names for which to calculate PCA for {default: all}

    state = integer: if state=0 use all states {default: 0}

    maxlabels = integer: label dots in plot if maxlabels<0 or number of models
    not more than maxlabels {default: 20}

    size = float: size of plot points in px^2 {default: 20}

    invert = string: invert plotting axes x, y or xy {default: ''}

    which = (int,int): indices of principal components to plot {default: (0,1)}

    alpha = float: opacity of plotting points

    filename = string: if given, plot to file {default: None}

EXAMPLE

    fetch 1ake 4ake 1dvr 1ak2, async=0
    split_chains
    extra_fit (*_*) and name CA, reference=1ake_A, cycles=0, object=aln
    pca_plot aln, 1ake_* 4ake_*

    fetch 1ubq 2k39, async=0
    align 2k39, 1ubq and guide, cycles=0, object=aln2
    color blue, 1ubq
    color orange, 2k39
    pca_plot aln2, filename=pca-ubq.pdf
    '''
    from numpy import array, dot
    from numpy.linalg import svd, LinAlgError
    from . import matplotlib_fix
    from matplotlib.pyplot import figure

    state, quiet = int(state), int(quiet)
    maxlabels = int(maxlabels)
    if cmd.is_string(which):
        which = cmd.safe_list_eval(which)

    if aln_object not in cmd.get_names_of_type('object:alignment'):
        print(' Warning: first argument should be an alignment object')

        from .fitting import extra_fit

        selection = aln_object
        aln_object = cmd.get_unused_name('aln')
        extra_fit(selection, cycles=0, transform=0, object=aln_object)

    if state == 0:
        states = list(range(1, cmd.count_states()+1))
    elif state < 0:
        states = [cmd.get_state()]
    else:
        states = [state]

    models = cmd.get_object_list(aln_object)
    references = set(cmd.get_object_list('(' + ref + ')')).intersection(models)
    others = set(models).difference(references)
    aln = cmd.get_raw_alignment(aln_object)

    if not quiet:
        print(' PCA References:', ', '.join(references))
        print(' PCA Others:', ', '.join(others))

    if len(references) == 0:
        print(' PCA Error: No reference objects')
        raise CmdException

    model_count = len(models)
    coords = dict((model, []) for model in models)
    aln = [pos for pos in aln if len(pos) == model_count]

    for state in states:
        idx2xyz = dict()
        cmd.iterate_state(state, aln_object, 'idx2xyz[model,index] = (x,y,z)',
                space={'idx2xyz': idx2xyz})

        for pos in aln:
            for idx in pos:
                if idx not in idx2xyz:
                    continue

                c = coords[idx[0]]
                if len(c) < state:
                    c.append([])
                c[-1].extend(idx2xyz[idx])

    c_iter = lambda models: ((c,model,i+1) for model in models
            for (i,c) in enumerate(coords[model]))
    X = array([i[0] for i in c_iter(references)])
    Y = array([i[0] for i in c_iter(others)])
    center = X.mean(0)
    X = X - center

    try:
        U, L, V = svd(X)
    except LinAlgError as e:
        print(' PCA Error: ', e)
        raise CmdException

    if int(load_b):
        cmd.alter('byobj ' + aln_object, 'b=-0.01')
        b_dict = {}
        i = which[0]
        b_array = (V[i].reshape((-1, 3))**2).sum(1)**0.5
        for pos, b in zip(aln, b_array):
            for idx in pos:
                b_dict[idx] = b
        cmd.alter(aln_object, 'b=b_dict.get((model,index), -0.01)', space=locals())
        cmd.color('yellow', 'byobj ' + aln_object)
        cmd.spectrum('b', 'blue_red', aln_object + ' and b > -0.01')

    X_labels = [i[1:3] for i in c_iter(references)]
    Y_labels = [i[1:3] for i in c_iter(others)]

    x_list = []
    y_list = []
    colors = []
    text_list = []

    def plot_pc_2d(X, labels):
        pca_12 = dot(X, V.T)[:,which]
        for (x,y), (model,state) in zip(pca_12, labels):
            x_list.append(x)
            y_list.append(y)
            colors.append(get_model_color(model))
            if maxlabels < 0 or len(pca_12) <= maxlabels:
                text_list.append('%s(%d)' % (model, state))
            else:
                text_list.append(None)

    plot_pc_2d(X, X_labels)
    if len(Y) > 0:
        Y = Y - center
        plot_pc_2d(Y, Y_labels)

    if 'x' in invert:
        x_list = [-x for x in x_list]
    if 'y' in invert:
        y_list = [-y for y in y_list]

    fig = figure()
    plt = fig.add_subplot(111, xlabel='PC %d' % (which[0]+1), ylabel='PC %d' % (which[1]+1))
    plt.scatter(x_list, y_list, float(size), colors, linewidths=0, alpha=float(alpha))

    for (x, y, text) in zip(x_list, y_list, text_list):
        if text is not None:
            plt.text(x, y, text, horizontalalignment='left')

    _showfigure(fig, filename, quiet)
Example #18
0
def morpheasy(source,
              target,
              source_state=0,
              target_state=0,
              name=None,
              refinement=5,
              quiet=1):
    '''
DESCRIPTION

    Morph source to target, based on sequence alignment

USAGE

    morpheasy source, target [, source_state [, target_state [, name ]]]

EXAMPLE

    fetch 1akeA 4akeA, async=0
    extra_fit
    morpheasy 1akeA, 4akeA
    '''
    try:
        from epymol import rigimol
    except ImportError:
        print('No epymol available, please use a "Incentive PyMOL" build')
        print('You may use "morpheasy_linear" instead')
        return

    from .editing import mse2met
    from .querying import get_selection_state

    # arguments
    source_state = int(source_state)
    target_state = int(target_state)
    refinement = int(refinement)
    quiet = int(quiet)

    if source_state < 1: source_state = get_selection_state(source)
    if target_state < 1: target_state = get_selection_state(target)

    # temporary objects
    # IMPORTANT: cmd.get_raw_alignment does not work with underscore object names!
    alnobj = cmd.get_unused_name('_aln')
    so_obj = cmd.get_unused_name('source')  # see above
    ta_obj = cmd.get_unused_name('target')  # see above
    so_sel = cmd.get_unused_name('_source_sel')
    ta_sel = cmd.get_unused_name('_target_sel')
    cmd.create(so_obj, source, source_state, 1)
    cmd.create(ta_obj, target, target_state, 1)
    mse2met(so_obj)
    mse2met(ta_obj)

    # align sequence
    cmd.align(ta_obj,
              so_obj,
              object=alnobj,
              cycles=0,
              transform=0,
              mobile_state=1,
              target_state=1)
    cmd.refresh()
    cmd.select(so_sel, '%s and %s' % (so_obj, alnobj))
    cmd.select(ta_sel, '%s and %s' % (ta_obj, alnobj))
    alnmap = dict(cmd.get_raw_alignment(alnobj))
    alnmap.update(dict((v, k) for (k, v) in alnmap.items()))

    # copy source atom identifiers to temporary target
    idmap = dict()
    cmd.iterate(so_sel,
                'idmap[model,index] = (segi,chain,resi,resn,name)',
                space={'idmap': idmap})
    cmd.alter(ta_sel,
              '(segi,chain,resi,resn,name) = idmap[alnmap[model,index]]',
              space={
                  'idmap': idmap,
                  'alnmap': alnmap
              })

    # remove unaligned
    cmd.remove('%s and not %s' % (so_obj, so_sel))
    cmd.remove('%s and not %s' % (ta_obj, ta_sel))
    assert cmd.count_atoms(so_obj) == cmd.count_atoms(ta_obj)
    cmd.sort(so_obj)
    cmd.sort(ta_obj)

    # append target to source as 2-state morph-in object
    cmd.create(so_obj, ta_obj, 1, 2)

    # morph
    if name is None:
        name = cmd.get_unused_name('morph')
    rigimol.morph(so_obj, name, refinement=refinement, async=0)

    # clean up
    for obj in [alnobj, so_obj, so_sel, ta_obj, ta_sel]:
        cmd.delete(obj)

    return name