Example #1
0
def alignSequencesByChain(PDBs, **kwargs):
    """
    Runs buildMSA for each chain and optionally joins the results.
    Returns either a single MSA or a dictionary containing an MSA for each chain.

    :arg PDBs: a list or array of :class:`AtomGroup` objects or PDB IDs
        a mixed list containing both is acceptable
    :type PDBs: list or :class:`~numpy.ndarray`

    :arg join_chains: whether to join chain alignments
        default is True
    :type join_chains: bool 

    :arg join_char: a character for joining chain alignments
        default is '/' as used by PIR format alignments
    :type join_char: str
    """
    if not (isinstance(PDBs, list) or isinstance(PDBs, ndarray)):
        raise TypeError('PDBs should be a list or array')

    if PDBs == []:
        raise ValueError('PDBs should not be an empty list')

    pdbs = []
    chains = []
    for i, pdb in enumerate(PDBs):
        if isinstance(pdb, Atomic):
            pdbs.append(pdb)
        else:
            raise TypeError(
                'each entry in PDBs must be a :class:`Atomic` instance')

        chains.append([])
        for chain in list(pdbs[i].getHierView()):
            chains[i].append(chain)

        if i != 0 and len(chains[i]) != len(chains[0]):
            raise ValueError('all pdbs should have the same number of chains')

    labels = []
    for pdb in pdbs:
        chids = ''
        for chain in list(pdb.getHierView()):
            chids += chain.getChid()
        labels.append(pdb.getTitle().split('_')[0] + '_' + chids)

    chains = array(chains)
    chain_alignments = []
    alignments = {}
    labels_lists = []
    for j in range(len(chains[0])):
        prefix = 'chain_' + chains[0, j].getChid()
        msa = buildMSA(chains[:, j], title=prefix, labels=labels)

        # make all alignments have the sequences in the same order as the 0th
        labels_lists.append([])
        for sequence in msa:
            labels_lists[j].append(sequence.getLabel())

        if j > 0:
            msaarr = []
            for label in labels_lists[0]:
                msaarr.append(msa.getArray()[msa.getIndex(label)])

            msaarr = array(msaarr)
            msa = MSA(msaarr,
                      title='reordered_msa_1',
                      labels=list(labels_lists[0]))
            writeMSA(prefix + '.aln', msa)

        chain_alignments.append(msa)

        # after reordering, create the alignments dictionary
        alignments[labels_lists[0][0].split('_')[1][j]] = msa

    join_chains = kwargs.get('join_chains', True)
    join_char = kwargs.get('join_char', '/')
    if join_chains:
        aligned_sequences = list(zeros(shape(chain_alignments)).T)
        for j in range(shape(chain_alignments)[1]):
            aligned_sequences[j] = list(aligned_sequences[j])

        orig_labels = []
        for i, chain_alignment in enumerate(chain_alignments):
            for j, sequence in enumerate(chain_alignment):
                aligned_sequences[j][i] = str(sequence)
                if i == 0:
                    orig_labels.append(sequence.getLabel())

        joined_msaarr = []
        for j in range(shape(chain_alignments)[1]):
            joined_msaarr.append(
                array(list(join_char.join(aligned_sequences[j]))))
        joined_msaarr = array(joined_msaarr)

        result = MSA(joined_msaarr, title='joined_chains', labels=orig_labels)
        result = refineMSA(result, colocc=1e-9)  # remove gap-only cols

    else:
        result = alignments

    return result
Example #2
0
def alignSequencesByChain(PDBs, **kwargs):
    """
    Runs :func:`buildMSA` for each chain and optionally joins the results.
    Returns either a single :class:`MSA` or a dictionary containing an :class:`MSA` for each chain.

    :arg PDBs: a list of :class:`AtomGroup` objects
    :type PDBs: list

    :arg join_chains: whether to join chain alignments
        default is True
    :type join_chains: bool 

    :arg join_char: a character for joining chain alignments
        default is '/' as used by PIR format alignments
    :type join_char: str
    """

    if isscalar(PDBs):
        raise TypeError('PDBs should be array-like')

    if not PDBs:
        raise ValueError('PDBs should not be empty')

    pdbs = []
    chains = []
    for i, pdb in enumerate(PDBs):
        if isinstance(pdb, Atomic):
            pdbs.append(pdb)
        else:
            raise TypeError(
                'each entry in PDBs must be a :class:`Atomic` instance')

        chains.append([])
        for chain in list(pdbs[i].getHierView()):
            chains[i].append(chain)

        if i != 0 and len(chains[i]) != len(chains[0]):
            raise ValueError('all pdbs should have the same number of chains')

    labels = []
    for pdb in pdbs:
        chids = ''
        for chain in list(pdb.getHierView()):
            chids += chain.getChid()
        labels.append(pdb.getTitle() + '_' + chids)

    chains = array(chains)
    chain_alignments = []
    alignments = {}
    for j in range(len(chains[0])):
        prefix = 'chain_' + chains[0, j].getChid()
        msa = buildMSA(chains[:, j], title=prefix, labels=labels)
        msa = refineMSA(msa, colocc=1e-9)  # remove gap-only cols

        chain_alignments.append(msa)
        alignments[labels[0].split('_')[1][j]] = msa

    join_chains = kwargs.get('join_chains', True)
    join_char = kwargs.get('join_char', '/')

    if len(chains[0]) == 1:
        join_chains = False

    if join_chains:
        joined_msaarr = []
        for i, chain_alignment in enumerate(chain_alignments):
            pdb_seqs = []
            for j, sequence in enumerate(chain_alignment):
                pdb_seqs.append(sequence)
            joined_msaarr.append(join_char.join(pdb_seqs))

        result = MSA(joined_msaarr,
                     title='joined_chains',
                     labels=[label.split('_')[0] for label in labels])

    else:
        result = alignments
        if len(result) == 1:
            result = result[list(result.keys())[0]]

    return result
Example #3
0
def alignSequencesByChain(PDBs, **kwargs):
    """
    Runs :func:`buildMSA` for each chain and optionally joins the results.
    Returns either a single :class:`MSA` or a dictionary containing an :class:`MSA` for each chain.

    :arg PDBs: a list of :class:`AtomGroup` objects
    :type PDBs: list

    :arg join_chains: whether to join chain alignments
        default is True
    :type join_chains: bool 

    :arg join_char: a character for joining chain alignments
        default is '/' as used by PIR format alignments
    :type join_char: str
    """
    
    if isscalar(PDBs):
        raise TypeError('PDBs should be array-like')

    if not PDBs:
        raise ValueError('PDBs should not be empty')

    pdbs = []
    chains = []
    for i, pdb in enumerate(PDBs):
        if isinstance(pdb, Atomic):
            pdbs.append(pdb)
        else:
            raise TypeError('each entry in PDBs must be a :class:`Atomic` instance')

        chains.append([])
        for chain in list(pdbs[i].getHierView()):
            chains[i].append(chain)

        if i != 0 and len(chains[i]) != len(chains[0]):
            raise ValueError('all pdbs should have the same number of chains')

    labels = []
    for pdb in pdbs:
        chids = ''
        for chain in list(pdb.getHierView()):
            chids += chain.getChid()
        labels.append(pdb.getTitle() + '_' + chids)

    chains = array(chains)
    chain_alignments = []
    alignments = {}
    for j in range(len(chains[0])):
        prefix = 'chain_' + chains[0, j].getChid()
        msa = buildMSA(chains[:, j], title=prefix, labels=labels)
        msa = refineMSA(msa, colocc=1e-9) # remove gap-only cols
        
        chain_alignments.append(msa)
        alignments[labels[0].split('_')[1][j]] = msa

    join_chains = kwargs.get('join_chains', True)
    join_char = kwargs.get('join_char', '/')

    if len(chains[0]) == 1:
        join_chains = False

    if join_chains:
        joined_msaarr = []
        for i, chain_alignment in enumerate(chain_alignments):
            pdb_seqs = []
            for j, sequence in enumerate(chain_alignment):
                pdb_seqs.append(sequence)
            joined_msaarr.append(join_char.join(pdb_seqs))
        
        result = MSA(joined_msaarr, title='joined_chains', 
                     labels=[label.split('_')[0] for label in labels])

    else:
        result = alignments
        if len(result) == 1:
            result = result[list(result.keys())[0]]
            
    return result