def retrive_relevant_poses() -> (dict, dict): """ :return: seq dicts for cohs and docs, holding only the relevqant positions, determined by 1OHZ """ cohs_old = read_multi_fastas(root_path + 'cohesins_from_rachel.fasta_aln', suffix_to_remove='/') docs_old = read_multi_fastas(root_path + 'dockerins_from_rachel.fasta_aln', suffix_to_remove='/') coh_1ohz = cohs_old['1OHZ'] coh_poses = [ coh_1ohz.non_aligned_position_at_aligned(p) for p in coh_poses_1ohz ] doc_1ohz = docs_old['1OHZ'] doc_poses = [ doc_1ohz.non_aligned_position_at_aligned(p) for p in doc_poses_1ohz ] cohs_new, docs_new = {}, {} for coh, res in cohs_old.items(): cohs_new[coh] = AASeq(string=''.join( res.get_aligned_positions(coh_poses)), name=coh) for doc, res in docs_old.items(): docs_new[doc] = AASeq(string=''.join( res.get_aligned_positions(doc_poses)), name=doc) return cohs_new, docs_new
def __init__(self, chain_id: str = None, residues: dict = None, non_residues: dict = None): self.chain_id = chain_id self.residues = residues if residues is not None else {} self.seq = AASeq(''.join(a.res_type for a in residues.values())) if residues is not None else \ AASeq('', name=chain_id) self.non_residues = non_residues if non_residues is not None else {} self.non_residues_seq = AASeq(''.join(a.res_type for a in residues.values()), name=chain_id) if \ non_residues is not None else AASeq('', name=chain_id)
def extract_seq(pdb: MyPDB) -> dict: seqs = {} for cid, c in pdb: seqs[cid] = AASeq(name='%s.%s' % (pdb.name, cid)) seq = '' for rid, r in c: seq += r.res_type seqs[cid].set_seq(seq) return seqs
def setup_db(args): rost_db = parse_rost_db() failed = [] logger = Logger('./db_setup.log') for k, v in rost_db.items(): # if k != 'q9u6b8': continue logger.create_header('working on %s' % k) logger.log('seq: %s' % v['seq']) logger.log('pdb: %s' % v['pdb']) logger.log('chain: %s' % v['chain']) logger.log('ts: %s' % v['ts']) os.mkdir(k) os.chdir(k) # get pdb and extract chain download_pdb({'name': v['pdb'], 'path': './'}) empty_pdb = MyPDB(name=v['pdb']) pdb = parse_PDB('pdb%s.ent' % v['pdb']) chain = pdb.chains[v['chain']] empty_pdb.add_chain(chain) write_PDB('%s_%s.pdb' % (k, v['chain']), empty_pdb) pdb_seq = extract_seq(empty_pdb) rdb_seq = AASeq(v['seq']) score, start, end = pdb_seq[v['chain']].align(rdb_seq) logger.log('pdb seq: %s' % pdb_seq[v['chain']].aligned) logger.log('rst seq: %s' % rdb_seq.aligned) # get spans and print xml spans = find_topo(v['ts']) new_spans = [] for sp in spans: start = pdb_seq[v['chain']].aligned_position_at_non_aligned( sp[0]) + 1 end = pdb_seq[v['chain']].aligned_position_at_non_aligned( sp[1]) + 1 logger.log('span %i->%i %s moving to %i->%i' % (sp[0], sp[1], sp[2], start, end)) new_spans.append([start, end, sp[2]]) create_AddMembrane_xml(new_spans, '%s_AddMembrane.xml' % v['pdb']) # create flags file with open('embed.flags', 'w+') as fout: fout.write( '-parser:protocol /home/labs/fleishman/jonathaw/elazaridis/protocols/embed_in_membrane.xml\n' ) fout.write('-s %s\n' % '%s_%s.pdb' % (k, v['chain'])) fout.write('-parser:script_vars add_memb_xml=%s\n' % '%s_AddMembrane.xml' % v['pdb']) fout.write('-overwrite\n') fout.write('-score::elec_memb_sig_die\n') fout.write('-corrections::beta_nov15\n') fout.write('-score::memb_fa_sol\n') os.chdir('../')
def translate(seq: str, name=None) -> AASeq: """ :param seq: a nucleotide seq :return: amino acid seq >>> translate('TTTCATAAG').get_seq() 'FHK' """ return AASeq(string=''.join( [genetic_code[seq[i:i + 3]] for i in range(0, len(seq) - 3 + 1, 3)]), name=name)
def read_multi_fastas(fastas_file: str, suffix_to_remove: str = None, lower=False, add_aligned=False) -> dict: """ :param fastas_file: file address :return: {name: AASeq} """ with open(fastas_file, 'r') as f: cont = f.read().split('>') result = {} for entry in cont: split_entry = entry.split('\n') if len(split_entry) < 2: continue name = '_'.join(split_entry[0].rstrip().split()) if name == '': continue if suffix_to_remove is not None: name = name.split(suffix_to_remove)[0] seq = ''.join(a.rstrip() for a in split_entry[1:]) if '-' in seq or add_aligned: aln = seq seq = aln.replace('-', '') if lower: result[name.lower()] = AASeq(string=seq, name=name.lower(), aligned=aln) else: result[name] = AASeq(string=seq, name=name, aligned=aln) else: if lower: result[name.lower()] = AASeq(string=seq, name=name.lower()) else: result[name] = AASeq(string=seq, name=name) return result
def parse_input_data(in_file: str) -> OrderedDict: """ :param in_file: input table. use the template :return: dict of the CSV """ with open(in_file, 'r') as fin: cont = fin.read().split('\n') result = OrderedDict({}) for l in cont: s = l.split(',') if s[0] == 'name' or len(s) < 4: continue result[s[0]] = { 'name': s[0], 'seq': AASeq(s[1], name=s[0]), 'dilution_factor': float(s[2]), 'absorbance': float(s[3]) } return result
def main(): parser = argparse.ArgumentParser() parser.add_argument('-mode', default='csv', type=str) parser.add_argument('-in_file', type=str) parser.add_argument('-name', type=str) parser.add_argument('-seq', type=str) parser.add_argument('-dilution_factor', type=float) parser.add_argument('-absorbance', type=float) args = vars(parser.parse_args()) if args['mode'] == 'csv': input_dict = parse_input_data(args['in_file']) elif args['mode'] == 'line': input_dict = { args['name']: { 'name': args['name'], 'seq': AASeq(args['seq'], args['name']), 'dilution_factor': args['dilution_factor'], 'absorbance': args['absorbance'] } } elif args['mode'] == 'Js': j_data = J_data() input_dict = { args['name']: { 'name': args['name'], 'seq': AASeq(j_data[args['name']], args['name']), 'dilution_factor': args['dilution_factor'], 'absorbance': args['absorbance'] } } elif args['mode'] == 'excel': absorbances = [ 0, 1.2205, 0, 0, 0, 0.7145, 0, 0, 1.3835, 1.859, 0, 0, 1.83875, 3.18925 ] names = [ 'j829.A', 'j5711.A', 'j5517.A', 'j5106.A', 'j5093.A', 'j4286.A', 'j3622.A', 'j1647.A', 'j4398.A', 'j3983.A', 'j3626.A', 'j4518.A', 'j4653.A', '1ohz.A' ] j_data = J_data() input_dict = OrderedDict() for name, absorbance in zip(names, absorbances): input_dict[name] = { 'name': name, 'seq': AASeq(j_data[name], name), 'dilution_factor': args['dilution_factor'], 'absorbance': absorbance } elif args['mode'] == 'just': j_data = J_data() coh_names = [ 'j829.A', 'j5711.A', 'j5517.A', 'j5106.A', 'j5093.A', 'j4286.A', 'j3622.A', 'j1647.A', 'j4398.A', 'j3983.A', 'j3626.A', 'j4518.A', 'j4653.A', '1ohz.A' ] doc_names = [ 'j829.B', 'j5711.B', 'j5517.B', 'j5106.B', 'j5093.B', 'j1526.B', 'j3622.B', 'j1647.B', 'j4398.B', 'j3983.B', 'j3626.B', 'j4518.B', 'j4653.B', '1ohz.B' ] all_names = doc_names input_dict = OrderedDict() for name in all_names: input_dict[name] = { 'name': name, 'seq': AASeq(j_data[name], name), 'dilution_factor': 1, 'absorbance': 0.0 } else: print('no mode found') sys.exit() pd.set_option('display.float_format', '{:.2g}'.format) df = pd.DataFrame(columns=[ 'name', 'seq', 'dilution_factor', 'absorbance', 'molecular_weight', 'pI', 'extinction_coefficient' ]) for k, v in input_dict.items(): # calculate extinction coefficient v['extinction_coefficient'] = v['seq'].calc_extinction_coefficient( reduced=False) # calculate Isoelectroc point v['pI'] = v['seq'].calc_isoelectric_point() # calculate molar concentration v['conc'] = v['dilution_factor'] * v['absorbance'] / v[ 'extinction_coefficient'] # calcualte concentration if dilued by half v['glycerol_conc'] = v['conc'] / 2 # calculate molecular weight v['molecular_weight'] = v['seq'].calc_molecular_weight() # calculate g/L v['g/l'] = v['conc'] / v['molecular_weight'] print_evernote_format(v) v['seq'] = v['seq'].get_seq() df = df.append(v, ignore_index=True) print(df) print(', '.join(["%i" % i for i in df['extinction_coefficient']])) # print conc row for excel print('conc row for excel') print('\t'.join('%.2f' % (a * 10**6) for a in df['conc'].values))