Ejemplo n.º 1
0
def align_chain_sequence(sys_type):
    if g_var.args.v >= 2:
        print(gen.print_sequnce_info('PROTEIN'))
    at = {}
    test_chain = {}
    for chain_at in range(len(g_var.atomistic_protein_input_raw)):
        skip_sequence = False
        chain_cg = 0
        s = difflib.SequenceMatcher(None,
                                    g_var.seq_at[sys_type][chain_at],
                                    g_var.seq_cg[sys_type][chain_cg],
                                    autojunk=False)
        seq_info = s.get_matching_blocks()
        while seq_info[0][2] != len(g_var.seq_at[sys_type][chain_at]):
            if chain_cg >= len(g_var.seq_cg[sys_type]) - 1:
                print(
                    '\nCannot find a match for user supplied chain: ' +
                    str(chain_at)
                )  #+'\n\nAtomistic chain:\n'+str(seq_user[chain_at]),'\n\nIn CG:\n'+str(sequence))
                skip_sequence = True
                break
            if not skip_sequence:
                chain_cg += 1
                s = difflib.SequenceMatcher(None,
                                            g_var.seq_at[sys_type][chain_at],
                                            g_var.seq_cg[sys_type][chain_cg],
                                            autojunk=False)
                seq_info = s.get_matching_blocks()
        if not skip_sequence:
            temp = {}
            if chain_cg not in at:
                at[chain_cg] = {}
            if seq_info[0][2] == len(g_var.seq_at[sys_type][chain_at]):
                for resid, residue in enumerate(
                        g_var.atomistic_protein_input_raw[chain_at]):
                    temp[resid +
                         seq_info[0][1]] = g_var.atomistic_protein_input_raw[
                             chain_at][residue]
                at[chain_cg][str(seq_info[0][1]) + ':' +
                             str(seq_info[0][1] + seq_info[0][2])] = temp
            g_var.seq_cg[sys_type][chain_cg] = mask_sequence(
                g_var.seq_cg[sys_type][chain_cg], seq_info[0][1],
                seq_info[0][1] + seq_info[0][2])
            test_chain[chain_at] = chain_cg

    check_chain_alignment_coverage(at, sys_type)

    if len(g_var.atomistic_protein_input_raw) < len(g_var.seq_cg[sys_type]):
        print(
            '### WARNING you have supplied fewer chains than exist in the CG system ###\n'
        )
    if len(at) > 0:
        g_var.user_at_input = True
        g_var.atomistic_protein_input_aligned = at
    else:
        g_var.user_at_input = False
    if g_var.group_chains == 'chain':
        g_var.group_chains = test_chain
Ejemplo n.º 2
0
                                                g_var.args.box)
 else:
     g_var.box_vec = box_vec_initial
     box_shift = np.array([0, 0, 0])
 read_in.real_box_vectors(g_var.box_vec)
 #### pbc fix and residue truncation if required
 read_in.fix_pbc(box_vec_initial, g_var.box_vec, box_shift)
 #### checks if fragment database and input files match
 at_mod.sanity_check()
 ### convert protein to atomistic representation
 g_var.tc['r_i_t'] = time.time()
 if 'PROTEIN' in g_var.cg_residues:
     g_var.coord_atomistic = at_mod_p.build_multi_residue_atomistic_system(
         g_var.cg_residues, 'PROTEIN')  ## converts protein to atomistic
     if not g_var.user_at_input and g_var.args.v >= 1:  ## prints protein sequences
         print(gen.print_sequnce_info('PROTEIN'))
     ## reads in user chain, runs a sequence alignment and finds existing disulphide bonds
     g_var.tc['p_d_n_t'] = time.time()
     if g_var.user_at_input:
         for file_num, file_name in enumerate(g_var.args.a):
             atomistic_protein_input_raw, g_var.chain_count = read_in.read_in_atomistic(
                 g_var.input_directory + 'AT_INPUT_' + str(file_num) +
                 '.pdb')  ## reads in user structure
             g_var.atomistic_protein_input_raw.update(
                 atomistic_protein_input_raw)
         read_in.duplicate_chain()  ## duplicates user chcains
         at_mod_p.check_sequence()  ## gets user sequence
         at_mod_p.align_chain_sequence('PROTEIN')  ## aligns chains
         at_mod_p.find_disulphide_bonds_user_sup(
         )  ## finds user disulphide bonds
     at_mod_p.find_disulphide_bonds_de_novo()  ## finds CG disulphide bonds