Esempio n. 1
0
def retrive_relevant_poses() -> (dict, dict):
    """
    :return: seq dicts for cohs and docs, holding only the relevqant positions, determined by 1OHZ
    """
    cohs_old = read_multi_fastas(root_path + 'cohesins_from_rachel.fasta_aln',
                                 suffix_to_remove='/')
    docs_old = read_multi_fastas(root_path + 'dockerins_from_rachel.fasta_aln',
                                 suffix_to_remove='/')

    coh_1ohz = cohs_old['1OHZ']
    coh_poses = [
        coh_1ohz.non_aligned_position_at_aligned(p) for p in coh_poses_1ohz
    ]
    doc_1ohz = docs_old['1OHZ']
    doc_poses = [
        doc_1ohz.non_aligned_position_at_aligned(p) for p in doc_poses_1ohz
    ]

    cohs_new, docs_new = {}, {}

    for coh, res in cohs_old.items():
        cohs_new[coh] = AASeq(string=''.join(
            res.get_aligned_positions(coh_poses)),
                              name=coh)
    for doc, res in docs_old.items():
        docs_new[doc] = AASeq(string=''.join(
            res.get_aligned_positions(doc_poses)),
                              name=doc)
    return cohs_new, docs_new
Esempio n. 2
0
 def __init__(self,
              chain_id: str = None,
              residues: dict = None,
              non_residues: dict = None):
     self.chain_id = chain_id
     self.residues = residues if residues is not None else {}
     self.seq = AASeq(''.join(a.res_type for a in residues.values())) if residues is not None else \
         AASeq('', name=chain_id)
     self.non_residues = non_residues if non_residues is not None else {}
     self.non_residues_seq = AASeq(''.join(a.res_type for a in residues.values()), name=chain_id) if \
         non_residues is not None else AASeq('', name=chain_id)
Esempio n. 3
0
def extract_seq(pdb: MyPDB) -> dict:
    seqs = {}
    for cid, c in pdb:
        seqs[cid] = AASeq(name='%s.%s' % (pdb.name, cid))
        seq = ''
        for rid, r in c:
            seq += r.res_type
        seqs[cid].set_seq(seq)
    return seqs
def setup_db(args):
    rost_db = parse_rost_db()
    failed = []
    logger = Logger('./db_setup.log')
    for k, v in rost_db.items():
        # if k != 'q9u6b8': continue
        logger.create_header('working on %s' % k)
        logger.log('seq: %s' % v['seq'])
        logger.log('pdb: %s' % v['pdb'])
        logger.log('chain: %s' % v['chain'])
        logger.log('ts: %s' % v['ts'])
        os.mkdir(k)
        os.chdir(k)

        # get pdb and extract chain
        download_pdb({'name': v['pdb'], 'path': './'})
        empty_pdb = MyPDB(name=v['pdb'])
        pdb = parse_PDB('pdb%s.ent' % v['pdb'])
        chain = pdb.chains[v['chain']]
        empty_pdb.add_chain(chain)
        write_PDB('%s_%s.pdb' % (k, v['chain']), empty_pdb)
        pdb_seq = extract_seq(empty_pdb)
        rdb_seq = AASeq(v['seq'])
        score, start, end = pdb_seq[v['chain']].align(rdb_seq)
        logger.log('pdb seq: %s' % pdb_seq[v['chain']].aligned)
        logger.log('rst seq: %s' % rdb_seq.aligned)

        # get spans and print xml
        spans = find_topo(v['ts'])

        new_spans = []
        for sp in spans:
            start = pdb_seq[v['chain']].aligned_position_at_non_aligned(
                sp[0]) + 1
            end = pdb_seq[v['chain']].aligned_position_at_non_aligned(
                sp[1]) + 1
            logger.log('span %i->%i %s moving to %i->%i' %
                       (sp[0], sp[1], sp[2], start, end))
            new_spans.append([start, end, sp[2]])
        create_AddMembrane_xml(new_spans, '%s_AddMembrane.xml' % v['pdb'])

        # create flags file
        with open('embed.flags', 'w+') as fout:
            fout.write(
                '-parser:protocol /home/labs/fleishman/jonathaw/elazaridis/protocols/embed_in_membrane.xml\n'
            )
            fout.write('-s %s\n' % '%s_%s.pdb' % (k, v['chain']))
            fout.write('-parser:script_vars add_memb_xml=%s\n' %
                       '%s_AddMembrane.xml' % v['pdb'])
            fout.write('-overwrite\n')
            fout.write('-score::elec_memb_sig_die\n')
            fout.write('-corrections::beta_nov15\n')
            fout.write('-score::memb_fa_sol\n')
        os.chdir('../')
Esempio n. 5
0
def translate(seq: str, name=None) -> AASeq:
    """
    :param seq: a nucleotide seq
    :return: amino acid seq
    >>> translate('TTTCATAAG').get_seq()
    'FHK'
    """
    return AASeq(string=''.join(
        [genetic_code[seq[i:i + 3]] for i in range(0,
                                                   len(seq) - 3 + 1, 3)]),
                 name=name)
Esempio n. 6
0
def read_multi_fastas(fastas_file: str,
                      suffix_to_remove: str = None,
                      lower=False,
                      add_aligned=False) -> dict:
    """
    :param fastas_file: file address
    :return: {name: AASeq}
    """
    with open(fastas_file, 'r') as f:
        cont = f.read().split('>')
    result = {}
    for entry in cont:
        split_entry = entry.split('\n')
        if len(split_entry) < 2:
            continue
        name = '_'.join(split_entry[0].rstrip().split())
        if name == '':
            continue
        if suffix_to_remove is not None:
            name = name.split(suffix_to_remove)[0]
        seq = ''.join(a.rstrip() for a in split_entry[1:])
        if '-' in seq or add_aligned:
            aln = seq
            seq = aln.replace('-', '')
            if lower:
                result[name.lower()] = AASeq(string=seq,
                                             name=name.lower(),
                                             aligned=aln)
            else:
                result[name] = AASeq(string=seq, name=name, aligned=aln)
        else:
            if lower:
                result[name.lower()] = AASeq(string=seq, name=name.lower())
            else:
                result[name] = AASeq(string=seq, name=name)
    return result
Esempio n. 7
0
def parse_input_data(in_file: str) -> OrderedDict:
    """
    :param in_file: input table. use the template
    :return: dict of the CSV
    """
    with open(in_file, 'r') as fin:
        cont = fin.read().split('\n')
    result = OrderedDict({})
    for l in cont:
        s = l.split(',')
        if s[0] == 'name' or len(s) < 4:
            continue
        result[s[0]] = {
            'name': s[0],
            'seq': AASeq(s[1], name=s[0]),
            'dilution_factor': float(s[2]),
            'absorbance': float(s[3])
        }
    return result
Esempio n. 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-mode', default='csv', type=str)
    parser.add_argument('-in_file', type=str)
    parser.add_argument('-name', type=str)
    parser.add_argument('-seq', type=str)
    parser.add_argument('-dilution_factor', type=float)
    parser.add_argument('-absorbance', type=float)

    args = vars(parser.parse_args())

    if args['mode'] == 'csv':
        input_dict = parse_input_data(args['in_file'])

    elif args['mode'] == 'line':
        input_dict = {
            args['name']: {
                'name': args['name'],
                'seq': AASeq(args['seq'], args['name']),
                'dilution_factor': args['dilution_factor'],
                'absorbance': args['absorbance']
            }
        }

    elif args['mode'] == 'Js':
        j_data = J_data()
        input_dict = {
            args['name']: {
                'name': args['name'],
                'seq': AASeq(j_data[args['name']], args['name']),
                'dilution_factor': args['dilution_factor'],
                'absorbance': args['absorbance']
            }
        }

    elif args['mode'] == 'excel':
        absorbances = [
            0, 1.2205, 0, 0, 0, 0.7145, 0, 0, 1.3835, 1.859, 0, 0, 1.83875,
            3.18925
        ]
        names = [
            'j829.A', 'j5711.A', 'j5517.A', 'j5106.A', 'j5093.A', 'j4286.A',
            'j3622.A', 'j1647.A', 'j4398.A', 'j3983.A', 'j3626.A', 'j4518.A',
            'j4653.A', '1ohz.A'
        ]
        j_data = J_data()
        input_dict = OrderedDict()
        for name, absorbance in zip(names, absorbances):
            input_dict[name] = {
                'name': name,
                'seq': AASeq(j_data[name], name),
                'dilution_factor': args['dilution_factor'],
                'absorbance': absorbance
            }

    elif args['mode'] == 'just':
        j_data = J_data()
        coh_names = [
            'j829.A', 'j5711.A', 'j5517.A', 'j5106.A', 'j5093.A', 'j4286.A',
            'j3622.A', 'j1647.A', 'j4398.A', 'j3983.A', 'j3626.A', 'j4518.A',
            'j4653.A', '1ohz.A'
        ]
        doc_names = [
            'j829.B', 'j5711.B', 'j5517.B', 'j5106.B', 'j5093.B', 'j1526.B',
            'j3622.B', 'j1647.B', 'j4398.B', 'j3983.B', 'j3626.B', 'j4518.B',
            'j4653.B', '1ohz.B'
        ]
        all_names = doc_names
        input_dict = OrderedDict()
        for name in all_names:
            input_dict[name] = {
                'name': name,
                'seq': AASeq(j_data[name], name),
                'dilution_factor': 1,
                'absorbance': 0.0
            }

    else:
        print('no mode found')
        sys.exit()

    pd.set_option('display.float_format', '{:.2g}'.format)
    df = pd.DataFrame(columns=[
        'name', 'seq', 'dilution_factor', 'absorbance', 'molecular_weight',
        'pI', 'extinction_coefficient'
    ])

    for k, v in input_dict.items():
        # calculate extinction coefficient
        v['extinction_coefficient'] = v['seq'].calc_extinction_coefficient(
            reduced=False)

        # calculate Isoelectroc point
        v['pI'] = v['seq'].calc_isoelectric_point()

        # calculate molar concentration
        v['conc'] = v['dilution_factor'] * v['absorbance'] / v[
            'extinction_coefficient']

        # calcualte concentration if dilued by half
        v['glycerol_conc'] = v['conc'] / 2

        # calculate molecular weight
        v['molecular_weight'] = v['seq'].calc_molecular_weight()

        # calculate g/L
        v['g/l'] = v['conc'] / v['molecular_weight']

        print_evernote_format(v)

        v['seq'] = v['seq'].get_seq()
        df = df.append(v, ignore_index=True)
    print(df)
    print(', '.join(["%i" % i for i in df['extinction_coefficient']]))
    # print conc row for excel
    print('conc row for excel')
    print('\t'.join('%.2f' % (a * 10**6) for a in df['conc'].values))