def align_chain_sequence(sys_type): if g_var.args.v >= 2: print(gen.print_sequnce_info('PROTEIN')) at = {} test_chain = {} for chain_at in range(len(g_var.atomistic_protein_input_raw)): skip_sequence = False chain_cg = 0 s = difflib.SequenceMatcher(None, g_var.seq_at[sys_type][chain_at], g_var.seq_cg[sys_type][chain_cg], autojunk=False) seq_info = s.get_matching_blocks() while seq_info[0][2] != len(g_var.seq_at[sys_type][chain_at]): if chain_cg >= len(g_var.seq_cg[sys_type]) - 1: print( '\nCannot find a match for user supplied chain: ' + str(chain_at) ) #+'\n\nAtomistic chain:\n'+str(seq_user[chain_at]),'\n\nIn CG:\n'+str(sequence)) skip_sequence = True break if not skip_sequence: chain_cg += 1 s = difflib.SequenceMatcher(None, g_var.seq_at[sys_type][chain_at], g_var.seq_cg[sys_type][chain_cg], autojunk=False) seq_info = s.get_matching_blocks() if not skip_sequence: temp = {} if chain_cg not in at: at[chain_cg] = {} if seq_info[0][2] == len(g_var.seq_at[sys_type][chain_at]): for resid, residue in enumerate( g_var.atomistic_protein_input_raw[chain_at]): temp[resid + seq_info[0][1]] = g_var.atomistic_protein_input_raw[ chain_at][residue] at[chain_cg][str(seq_info[0][1]) + ':' + str(seq_info[0][1] + seq_info[0][2])] = temp g_var.seq_cg[sys_type][chain_cg] = mask_sequence( g_var.seq_cg[sys_type][chain_cg], seq_info[0][1], seq_info[0][1] + seq_info[0][2]) test_chain[chain_at] = chain_cg check_chain_alignment_coverage(at, sys_type) if len(g_var.atomistic_protein_input_raw) < len(g_var.seq_cg[sys_type]): print( '### WARNING you have supplied fewer chains than exist in the CG system ###\n' ) if len(at) > 0: g_var.user_at_input = True g_var.atomistic_protein_input_aligned = at else: g_var.user_at_input = False if g_var.group_chains == 'chain': g_var.group_chains = test_chain
g_var.args.box) else: g_var.box_vec = box_vec_initial box_shift = np.array([0, 0, 0]) read_in.real_box_vectors(g_var.box_vec) #### pbc fix and residue truncation if required read_in.fix_pbc(box_vec_initial, g_var.box_vec, box_shift) #### checks if fragment database and input files match at_mod.sanity_check() ### convert protein to atomistic representation g_var.tc['r_i_t'] = time.time() if 'PROTEIN' in g_var.cg_residues: g_var.coord_atomistic = at_mod_p.build_multi_residue_atomistic_system( g_var.cg_residues, 'PROTEIN') ## converts protein to atomistic if not g_var.user_at_input and g_var.args.v >= 1: ## prints protein sequences print(gen.print_sequnce_info('PROTEIN')) ## reads in user chain, runs a sequence alignment and finds existing disulphide bonds g_var.tc['p_d_n_t'] = time.time() if g_var.user_at_input: for file_num, file_name in enumerate(g_var.args.a): atomistic_protein_input_raw, g_var.chain_count = read_in.read_in_atomistic( g_var.input_directory + 'AT_INPUT_' + str(file_num) + '.pdb') ## reads in user structure g_var.atomistic_protein_input_raw.update( atomistic_protein_input_raw) read_in.duplicate_chain() ## duplicates user chcains at_mod_p.check_sequence() ## gets user sequence at_mod_p.align_chain_sequence('PROTEIN') ## aligns chains at_mod_p.find_disulphide_bonds_user_sup( ) ## finds user disulphide bonds at_mod_p.find_disulphide_bonds_de_novo() ## finds CG disulphide bonds