Example #1
0
def retrive_relevant_poses() -> (dict, dict):
    """
    :return: seq dicts for cohs and docs, holding only the relevqant positions, determined by 1OHZ
    """
    cohs_old = read_multi_fastas(root_path + 'cohesins_from_rachel.fasta_aln',
                                 suffix_to_remove='/')
    docs_old = read_multi_fastas(root_path + 'dockerins_from_rachel.fasta_aln',
                                 suffix_to_remove='/')

    coh_1ohz = cohs_old['1OHZ']
    coh_poses = [
        coh_1ohz.non_aligned_position_at_aligned(p) for p in coh_poses_1ohz
    ]
    doc_1ohz = docs_old['1OHZ']
    doc_poses = [
        doc_1ohz.non_aligned_position_at_aligned(p) for p in doc_poses_1ohz
    ]

    cohs_new, docs_new = {}, {}

    for coh, res in cohs_old.items():
        cohs_new[coh] = AASeq(string=''.join(
            res.get_aligned_positions(coh_poses)),
                              name=coh)
    for doc, res in docs_old.items():
        docs_new[doc] = AASeq(string=''.join(
            res.get_aligned_positions(doc_poses)),
                              name=doc)
    return cohs_new, docs_new
Example #2
0
    def __init__(self,
                 name: str,
                 coh_AASeq: AASeq,
                 doc_AASeq: AASeq,
                 purples: int,
                 j=False,
                 originals=None,
                 doc_wt=False):
        self.name = name
        self.coh_AASeq = coh_AASeq
        self.doc_AASeq = doc_AASeq
        self.purples = purples

        if not j:
            name_split = name.split('_')
            a_ind = name_split.index('A')
            self.coh_wt = name_split[a_ind - 1]
            self.doc_wt = name_split[a_ind + 1]
        elif doc_wt:
            self.coh_wt = '1ohz'
            self.doc_wt = doc_wt
        else:
            self.coh_wt = '1ohz'
            self.doc_wt = originals[name[:-3] +
                                    '.pdb.gz'].split('_A_')[1].split('_')[0]

        pos_str = coh_AASeq.get_positions(positions_dict['1ohz'])
        self.coh_switch = ''.join(
            [type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str])
        doc_str = doc_AASeq.get_positions(list(doc21ohz[self.doc_wt].keys()))
        self.doc_switch = ''.join(
            [type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str])
Example #3
0
 def __init__(self, chain_id: str = None, residues: dict = None, non_residues: dict = None):
     self.chain_id = chain_id
     self.residues = residues if residues is not None else {}
     self.seq = AASeq(''.join(a.res_type for a in residues.values())) if residues is not None else \
         AASeq('', name=chain_id)
     self.non_residues = non_residues if non_residues is not None else {}
     self.non_residues_seq = AASeq(''.join(a.res_type for a in residues.values()), name=chain_id) if \
         non_residues is not None else AASeq('', name=chain_id)
Example #4
0
def write_multi_seqs_to_file(seqs: dict,
                             out_file: str,
                             query: AASeq = None,
                             no_dups: bool = True):
    """
    :param seqs: {name: AASeq}
    :param out_file: file to write
    :param query: query AASeq
    :param no_dups: whether to refrain from duplicates or not
    :return: write fasta file
    """
    written_seqs = []
    with open(out_file, 'w+') as fout:
        if query is not None:
            fout.write('%s\n' % query.write())
            written_seqs.append(query)
        for s in seqs.values():
            if query is not None:
                if query == s:
                    continue
            if no_dups:
                if s in written_seqs:
                    continue
            fout.write('%s\n' % s.write())
            written_seqs.append(s)
Example #5
0
def extract_seq(pdb: MyPDB) -> dict:
    seqs = {}
    for cid, c in pdb:
        seqs[cid] = AASeq(name='%s.%s' % (pdb.name, cid))
        seq = ''
        for rid, r in c:
            seq += r.res_type
        seqs[cid].set_seq(seq)
    return seqs
Example #6
0
def aln_identity(aln1: AASeq, aln2: AASeq) -> float:
    """
    :param aln1: alignment sequence (with gaps)
    :param aln2: alignment sequence (with gaps)
    :return: the identity calculated by: (# identities)/(aln1 length, no gaps)
    >>> a = AASeq(aligned='-ABC-D')
    >>> b = AASeq(aligned='-ABCED')
    >>> aln_identity(a, b)
    1.0
    >>> b = AASeq(string='-BBCED')
    >>> aln_identity(a, b)
    0.75
    """
    res = 0.
    for i, aa in enumerate(aln1):
        res += 1. if aln1.get_aligned()[i] == aln2.get_aligned()[i] != '-' else 0.
    length = float(len(len(aln1)))
    return res/length
Example #7
0
def aln_identity(aln1: AASeq, aln2: AASeq) -> float:
    """
    :param aln1: alignment sequence (with gaps)
    :param aln2: alignment sequence (with gaps)
    :return: the identity calculated by: (# identities)/(aln1 length, no gaps)
    >>> a = AASeq(aligned='-ABC-D')
    >>> b = AASeq(aligned='-ABCED')
    >>> aln_identity(a, b)
    1.0
    >>> b = AASeq(string='-BBCED')
    >>> aln_identity(a, b)
    0.75
    """
    res = 0.
    for i, aa in enumerate(aln1):
        res += 1. if aln1.get_aligned()[i] == aln2.get_aligned(
        )[i] != '-' else 0.
    length = float(len(len(aln1)))
    return res / length
def setup_db(args):
    rost_db = parse_rost_db()
    failed = []
    logger = Logger('./db_setup.log')
    for k, v in rost_db.items():
        # if k != 'q9u6b8': continue
        logger.create_header('working on %s' % k)
        logger.log('seq: %s' % v['seq'])
        logger.log('pdb: %s' % v['pdb'])
        logger.log('chain: %s' % v['chain'])
        logger.log('ts: %s' % v['ts'])
        os.mkdir(k)
        os.chdir(k)

        # get pdb and extract chain
        download_pdb({'name': v['pdb'], 'path': './'})
        empty_pdb = MyPDB(name=v['pdb'])
        pdb = parse_PDB('pdb%s.ent' % v['pdb'])
        chain = pdb.chains[v['chain']]
        empty_pdb.add_chain(chain)
        write_PDB('%s_%s.pdb' % (k, v['chain']), empty_pdb)
        pdb_seq = extract_seq(empty_pdb)
        rdb_seq = AASeq(v['seq'])
        score, start, end = pdb_seq[v['chain']].align(rdb_seq)
        logger.log('pdb seq: %s' % pdb_seq[v['chain']].aligned)
        logger.log('rst seq: %s' % rdb_seq.aligned)

        # get spans and print xml
        spans = find_topo(v['ts'])

        new_spans = []
        for sp in spans:
            start = pdb_seq[v['chain']].aligned_position_at_non_aligned(
                sp[0]) + 1
            end = pdb_seq[v['chain']].aligned_position_at_non_aligned(
                sp[1]) + 1
            logger.log('span %i->%i %s moving to %i->%i' %
                       (sp[0], sp[1], sp[2], start, end))
            new_spans.append([start, end, sp[2]])
        create_AddMembrane_xml(new_spans, '%s_AddMembrane.xml' % v['pdb'])

        # create flags file
        with open('embed.flags', 'w+') as fout:
            fout.write(
                '-parser:protocol /home/labs/fleishman/jonathaw/elazaridis/protocols/embed_in_membrane.xml\n'
            )
            fout.write('-s %s\n' % '%s_%s.pdb' % (k, v['chain']))
            fout.write('-parser:script_vars add_memb_xml=%s\n' %
                       '%s_AddMembrane.xml' % v['pdb'])
            fout.write('-overwrite\n')
            fout.write('-score::elec_memb_sig_die\n')
            fout.write('-corrections::beta_nov15\n')
            fout.write('-score::memb_fa_sol\n')
        os.chdir('../')
Example #9
0
def translate(seq: str, name=None) -> AASeq:
    """
    :param seq: a nucleotide seq
    :return: amino acid seq
    >>> translate('TTTCATAAG').get_seq()
    'FHK'
    """
    return AASeq(string=''.join(
        [genetic_code[seq[i:i + 3]] for i in range(0,
                                                   len(seq) - 3 + 1, 3)]),
                 name=name)
def highest_seq_similarity(crys_seqs: list, query: AASeq) -> (AASeq, float):
    """
    :param crys_seqs: list of AASeq instances of crystalised seqs
    :param query: a query AASeq
    :return: the most sequence-similar sequence
    """
    best_seq, best_iden = AASeq(), 0.0
    for seq in crys_seqs:
        iden_ = query.aligned_identity(seq)
        if iden_ > best_iden:
            best_iden = iden_
            best_seq = seq
    return best_seq, best_iden
    def __init__(self, name: str, coh_AASeq: AASeq, doc_AASeq: AASeq, purples: int, j=False, originals=None, doc_wt=False):
        self.name = name
        self.coh_AASeq = coh_AASeq
        self.doc_AASeq = doc_AASeq
        self.purples = purples

        if not j:
            name_split = name.split('_')
            a_ind = name_split.index('A')
            self.coh_wt = name_split[a_ind-1]
            self.doc_wt = name_split[a_ind+1]
        elif doc_wt:
            self.coh_wt = '1ohz'
            self.doc_wt = doc_wt
        else:
            self.coh_wt = '1ohz'
            self.doc_wt = originals[name[:-3]+'.pdb.gz'].split('_A_')[1].split('_')[0]

        pos_str = coh_AASeq.get_positions(positions_dict['1ohz'])
        self.coh_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in pos_str])
        doc_str = doc_AASeq.get_positions(list(doc21ohz[self.doc_wt].keys()))
        self.doc_switch = ''.join([type_dict[a] if a in type_dict.keys() else 'c' for a in doc_str])
Example #12
0
def read_multi_fastas(fastas_file: str,
                      suffix_to_remove: str = None,
                      lower=False,
                      add_aligned=False) -> dict:
    """
    :param fastas_file: file address
    :return: {name: AASeq}
    """
    with open(fastas_file, 'r') as f:
        cont = f.read().split('>')
    result = {}
    for entry in cont:
        split_entry = entry.split('\n')
        if len(split_entry) < 2:
            continue
        name = '_'.join(split_entry[0].rstrip().split())
        if name == '':
            continue
        if suffix_to_remove is not None:
            name = name.split(suffix_to_remove)[0]
        seq = ''.join(a.rstrip() for a in split_entry[1:])
        if '-' in seq or add_aligned:
            aln = seq
            seq = aln.replace('-', '')
            if lower:
                result[name.lower()] = AASeq(string=seq,
                                             name=name.lower(),
                                             aligned=aln)
            else:
                result[name] = AASeq(string=seq, name=name, aligned=aln)
        else:
            if lower:
                result[name.lower()] = AASeq(string=seq, name=name.lower())
            else:
                result[name] = AASeq(string=seq, name=name)
    return result
Example #13
0
 def __init__(self,
              chain_id: str = None,
              residues: dict = None,
              non_residues: dict = None):
     self.chain_id = chain_id
     self.residues = residues if residues is not None else {}
     self.seq = AASeq(''.join(a.res_type for a in residues.values())) if residues is not None else \
         AASeq('', name=chain_id)
     self.non_residues = non_residues if non_residues is not None else {}
     self.non_residues_seq = AASeq(''.join(a.res_type for a in residues.values()), name=chain_id) if \
         non_residues is not None else AASeq('', name=chain_id)
Example #14
0
def parse_input_data(in_file: str) -> OrderedDict:
    """
    :param in_file: input table. use the template
    :return: dict of the CSV
    """
    with open(in_file, 'r') as fin:
        cont = fin.read().split('\n')
    result = OrderedDict({})
    for l in cont:
        s = l.split(',')
        if s[0] == 'name' or len(s) < 4:
            continue
        result[s[0]] = {
            'name': s[0],
            'seq': AASeq(s[1], name=s[0]),
            'dilution_factor': float(s[2]),
            'absorbance': float(s[3])
        }
    return result
Example #15
0
def write_multi_seqs_to_file(seqs: dict, out_file: str, query: AASeq=None, no_dups: bool=True):
    """
    :param seqs: {name: AASeq}
    :param out_file: file to write
    :param query: query AASeq
    :param no_dups: whether to refrain from duplicates or not
    :return: write fasta file
    """
    written_seqs = []
    with open(out_file, 'w+') as fout:
        if query is not None:
            fout.write('%s\n' % query.write())
            written_seqs.append(query)
        for s in seqs.values():
            if query is not None:
                if query == s:
                    continue
            if no_dups:
                if s in written_seqs:
                    continue
            fout.write('%s\n' % s.write())
            written_seqs.append(s)
Example #16
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-mode', default='csv', type=str)
    parser.add_argument('-in_file', type=str)
    parser.add_argument('-name', type=str)
    parser.add_argument('-seq', type=str)
    parser.add_argument('-dilution_factor', type=float)
    parser.add_argument('-absorbance', type=float)

    args = vars(parser.parse_args())

    if args['mode'] == 'csv':
        input_dict = parse_input_data(args['in_file'])

    elif args['mode'] == 'line':
        input_dict = {
            args['name']: {
                'name': args['name'],
                'seq': AASeq(args['seq'], args['name']),
                'dilution_factor': args['dilution_factor'],
                'absorbance': args['absorbance']
            }
        }

    elif args['mode'] == 'Js':
        j_data = J_data()
        input_dict = {
            args['name']: {
                'name': args['name'],
                'seq': AASeq(j_data[args['name']], args['name']),
                'dilution_factor': args['dilution_factor'],
                'absorbance': args['absorbance']
            }
        }

    elif args['mode'] == 'excel':
        absorbances = [
            0, 1.2205, 0, 0, 0, 0.7145, 0, 0, 1.3835, 1.859, 0, 0, 1.83875,
            3.18925
        ]
        names = [
            'j829.A', 'j5711.A', 'j5517.A', 'j5106.A', 'j5093.A', 'j4286.A',
            'j3622.A', 'j1647.A', 'j4398.A', 'j3983.A', 'j3626.A', 'j4518.A',
            'j4653.A', '1ohz.A'
        ]
        j_data = J_data()
        input_dict = OrderedDict()
        for name, absorbance in zip(names, absorbances):
            input_dict[name] = {
                'name': name,
                'seq': AASeq(j_data[name], name),
                'dilution_factor': args['dilution_factor'],
                'absorbance': absorbance
            }

    elif args['mode'] == 'just':
        j_data = J_data()
        coh_names = [
            'j829.A', 'j5711.A', 'j5517.A', 'j5106.A', 'j5093.A', 'j4286.A',
            'j3622.A', 'j1647.A', 'j4398.A', 'j3983.A', 'j3626.A', 'j4518.A',
            'j4653.A', '1ohz.A'
        ]
        doc_names = [
            'j829.B', 'j5711.B', 'j5517.B', 'j5106.B', 'j5093.B', 'j1526.B',
            'j3622.B', 'j1647.B', 'j4398.B', 'j3983.B', 'j3626.B', 'j4518.B',
            'j4653.B', '1ohz.B'
        ]
        all_names = doc_names
        input_dict = OrderedDict()
        for name in all_names:
            input_dict[name] = {
                'name': name,
                'seq': AASeq(j_data[name], name),
                'dilution_factor': 1,
                'absorbance': 0.0
            }

    else:
        print('no mode found')
        sys.exit()

    pd.set_option('display.float_format', '{:.2g}'.format)
    df = pd.DataFrame(columns=[
        'name', 'seq', 'dilution_factor', 'absorbance', 'molecular_weight',
        'pI', 'extinction_coefficient'
    ])

    for k, v in input_dict.items():
        # calculate extinction coefficient
        v['extinction_coefficient'] = v['seq'].calc_extinction_coefficient(
            reduced=False)

        # calculate Isoelectroc point
        v['pI'] = v['seq'].calc_isoelectric_point()

        # calculate molar concentration
        v['conc'] = v['dilution_factor'] * v['absorbance'] / v[
            'extinction_coefficient']

        # calcualte concentration if dilued by half
        v['glycerol_conc'] = v['conc'] / 2

        # calculate molecular weight
        v['molecular_weight'] = v['seq'].calc_molecular_weight()

        # calculate g/L
        v['g/l'] = v['conc'] / v['molecular_weight']

        print_evernote_format(v)

        v['seq'] = v['seq'].get_seq()
        df = df.append(v, ignore_index=True)
    print(df)
    print(', '.join(["%i" % i for i in df['extinction_coefficient']]))
    # print conc row for excel
    print('conc row for excel')
    print('\t'.join('%.2f' % (a * 10**6) for a in df['conc'].values))
Example #17
0
class Chain:
    def __init__(self,
                 chain_id: str = None,
                 residues: dict = None,
                 non_residues: dict = None):
        self.chain_id = chain_id
        self.residues = residues if residues is not None else {}
        self.seq = AASeq(''.join(a.res_type for a in residues.values())) if residues is not None else \
            AASeq('', name=chain_id)
        self.non_residues = non_residues if non_residues is not None else {}
        self.non_residues_seq = AASeq(''.join(a.res_type for a in residues.values()), name=chain_id) if \
            non_residues is not None else AASeq('', name=chain_id)

    def __repr__(self) -> str:
        return "chain %s has %i residues" % (self.chain_id, len(self.residues))

    def __getitem__(self, item: int) -> Residue:
        try:
            return self.residues[item]
        except:
            return self.non_residues[item]

    def __iter__(self):
        for k, v in self.residues.items():
            yield k, v

    def __len__(self):
        if self.residues == {}:
            return 0
        return len(self.residues.keys())

    def add_residue(self, residue: Residue) -> None:
        if residue.res_type_3 in three_2_one.keys():
            self.seq.add_aa(residue.res_type)
            self.residues[residue.res_num] = residue
        else:
            self.non_residues_seq.add_aa(residue.res_type)
            self.non_residues[residue.res_num] = residue

    def min_distance_chain(self, other: Residue) -> float:
        distances = []
        for mrid, mres in self:
            for orid, ores in other:
                distances.append(mres.min_distance_res(ores))
        return min(distances)

    def keys(self):
        return self.residues.keys()

    def values(self):
        return self.residues.values()

    def COM(self) -> XYZ:
        """
        :return:the Center Of Mass of the chain as calculated by the averages over Xs, Ys and Zs of all CAs
        """
        Xs = []
        Ys = []
        Zs = []
        for res in self.values():
            if 'CA' in res.keys():
                Xs.append(res['CA'].xyz.x)
                Ys.append(res['CA'].xyz.y)
                Zs.append(res['CA'].xyz.z)
        return XYZ(np.mean(Xs), np.mean(Ys), np.mean(Zs))

    def change_chain_name(self, new: str) -> None:
        self.chain_id = new
        for rid, r in self:
            r.change_chain_name(new)

    def translate_xyz(self, xyz: XYZ) -> None:
        """
        :param xyz: an xyz point
        :return: None. translate all chain atoms by xyz
        """
        for rid, r in self:
            r.translate_xyz(xyz)
def extract_charge_configuration(seq: AASeq, positions: list):
    res_in_poses = seq.get_positions(positions)
    charge = [res2charge[a] if a in res2charge.keys() else "c" for a in res_in_poses]
    return charge
def extract_charge_configuration(seq: AASeq, positions: list):
    res_in_poses = seq.get_positions(positions)
    charge = [
        res2charge[a] if a in res2charge.keys() else 'c' for a in res_in_poses
    ]
    return charge
Example #20
0
class Chain:
    def __init__(self, chain_id: str = None, residues: dict = None, non_residues: dict = None):
        self.chain_id = chain_id
        self.residues = residues if residues is not None else {}
        self.seq = AASeq(''.join(a.res_type for a in residues.values())) if residues is not None else \
            AASeq('', name=chain_id)
        self.non_residues = non_residues if non_residues is not None else {}
        self.non_residues_seq = AASeq(''.join(a.res_type for a in residues.values()), name=chain_id) if \
            non_residues is not None else AASeq('', name=chain_id)

    def __repr__(self) -> str:
        return "chain %s has %i residues" % (self.chain_id, len(self.residues))

    def __getitem__(self, item: int) -> Residue:
        try:
            return self.residues[item]
        except:
            return self.non_residues[item]

    def __iter__(self):
        for k, v in self.residues.items():
            yield k, v

    def __len__(self):
        if self.residues == {}:
            return 0
        return len(self.residues.keys())

    def add_residue(self, residue: Residue) -> None:
        if residue.res_type_3 in three_2_one.keys():
            self.seq.add_aa(residue.res_type)
            self.residues[residue.res_num] = residue
        else:
            self.non_residues_seq.add_aa(residue.res_type)
            self.non_residues[residue.res_num] = residue

    def min_distance_chain(self, other: Residue) -> float:
        distances = []
        for mrid, mres in self:
            for orid, ores in other:
                distances.append(mres.min_distance_res(ores))
        return min(distances)

    def keys(self):
        return self.residues.keys()

    def values(self):
        return self.residues.values()

    def COM(self) -> XYZ:
        """
        :return:the Center Of Mass of the chain as calculated by the averages over Xs, Ys and Zs of all CAs
        """
        Xs = []
        Ys = []
        Zs = []
        for res in self.values():
            if 'CA' in res.keys():
                Xs.append(res['CA'].xyz.x)
                Ys.append(res['CA'].xyz.y)
                Zs.append(res['CA'].xyz.z)
        return XYZ(np.mean(Xs), np.mean(Ys), np.mean(Zs))

    def change_chain_name(self, new: str) -> None:
        self.chain_id = new
        for rid, r in self:
            r.change_chain_name(new)

    def translate_xyz(self, xyz: XYZ) -> None:
        """
        :param xyz: an xyz point
        :return: None. translate all chain atoms by xyz
        """
        for rid, r in self:
            r.translate_xyz(xyz)