Example #1
0
 def calc_rmsd_matrix_intra(self, align=False, sel='all'):
     ag = self.ag.copy()
     rmsd = []
     for i in range(ag.numCoordsets()):
         ag.setACSIndex(i)
         if align:
             prody.alignCoordsets(ag.select(sel))
         rmsd.append([prody.calcRMSD(ag.select(sel))])
     rmsd = np.concatenate(rmsd)
     return rmsd
Example #2
0
def prody_align(opt):
    """Align models in a PDB file or a PDB file onto others."""
            
    import prody
    LOGGER = prody.LOGGER

    args = opt.pdb
    if len(args) == 1:
        pdb = args[0]
        LOGGER.info('Aligning multiple models in: ' + pdb)
        selstr, prefix, model = opt.select, opt.prefix, opt.model
        pdb = prody.parsePDB(pdb)
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            opt.subparser.error('Selection {0:s} do not match any atoms.'
                               .format(repr(selstr)))
        LOGGER.info('{0:d} atoms will be used for alignment.'
                    .format(len(pdbselect)))
        pdbselect.setACSIndex(model-1)
        prody.printRMSD(pdbselect, msg='Before alignment ')
        prody.alignCoordsets(pdbselect)
        prody.printRMSD(pdbselect, msg='After alignment  ')
        if prefix == '':
            prefix = pdb.getTitle() + '_aligned'
        outfn = prefix + '.pdb'
        LOGGER.info('Writing file: ' + outfn)
        prody.writePDB(outfn, pdb)
    else:
        reffn = args.pop(0)
        seqid=opt.seqid
        overlap=opt.overlap
        LOGGER.info('Aligning structures onto: ' + reffn)
        ref = prody.parsePDB(reffn)
        for arg in args:
            if arg == reffn:
                continue
            if '_aligned.pdb' in arg:
                continue
            pdb = prody.parsePDB(arg)
            result = prody.matchAlign(pdb, ref, seqid=seqid, overlap=overlap, 
                                      tarsel=opt.select, allcsets=True,
                                      cslabel='Model', csincr=1) 
            if result:
                outfn = pdb.getTitle() + '_aligned.pdb'
                LOGGER.info('Writing file: ' + outfn)
                prody.writePDB(outfn, pdb)
            else:
                LOGGER.warning('Failed to align ' + arg)
Example #3
0
def prody_align(opt):
    """Align models in a PDB file or a PDB file onto others."""
            
    import prody
    LOGGER = prody.LOGGER

    args = opt.pdb
    if len(args) == 1:
        pdb = args[0]
        LOGGER.info('Aligning multiple models in: ' + pdb)
        selstr, prefix, model = opt.select, opt.prefix, opt.model
        pdb = prody.parsePDB(pdb)
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            LOGGER.warning('Selection "{0:s}" do not match any atoms.'
                           .format(selstr))
            sys.exit(-1)
        LOGGER.info('{0:d} atoms will be used for alignment.'
                               .format(len(pdbselect)))
        pdb.setACSIndex(model-1)
        prody.alignCoordsets(pdb, selstr=selstr)
        rmsd = prody.calcRMSD(pdb)
        LOGGER.info('Max RMSD: {0:0.2f} Mean RMSD: {1:0.2f}'
              .format(rmsd.max(), rmsd.mean()))
        if prefix == '':
            prefix = pdb.getTitle() + '_aligned'
        outfn = prefix + '.pdb'
        LOGGER.info('Writing file: ' + outfn)
        prody.writePDB(outfn, pdb)
    else:
        reffn = args.pop(0)
        LOGGER.info('Aligning structures onto: ' + reffn)
        ref = prody.parsePDB(reffn)
        for arg in args:
            if arg == reffn:
                continue
            if '_aligned.pdb' in arg:
                continue
            pdb = prody.parsePDB(arg)
            if prody.matchAlign(pdb, ref):
                outfn = pdb.getTitle() + '_aligned.pdb'
                LOGGER.info('Writing file: ' + outfn)
                prody.writePDB(outfn, pdb)
            else:
                LOGGER.warning('Failed to align ' + arg)
Example #4
0
    def calc_rmsd_with(self, mol, align=False, sel='all'):
        ag1 = self.ag.copy()
        ag2 = mol.ag.copy()
        sel1 = ag1.select(sel).copy()
        sel2 = ag2.select(sel).copy()
        if sel1 is None or sel2 is None:
            raise RuntimeError('Selection is empty')
        if sel1.numAtoms() != sel2.numAtoms():
            raise RuntimeError('Selections are different')

        merged = np.concatenate([sel1.getCoordsets(), sel2.getCoordsets()])
        n1, n2 = sel1.numCoordsets(), sel2.numCoordsets()
        sel1.setCoords(merged)
        rmsd = []
        for i in range(n1):
            sel1.setACSIndex(i)
            if align:
                prody.alignCoordsets(sel1)
            rmsd.append([prody.calcRMSD(sel1)[n1:]])
        rmsd = np.concatenate(rmsd)
        return rmsd
Example #5
0
 def calc_rmsd_to_frame(self, frame, align=False, sel='all'):
     ag = self.ag.copy()
     ag.setACSIndex(frame)
     if align:
         prody.alignCoordsets(ag.select(sel))
     return prody.calcRMSD(ag.select(sel))
  lines = fileLoader.load_dat(file)
  atom_indices.append((lines,indices_name))

## Distance range parameters for RMSD clustering:
start_dist, end_dist, interval = args.distance_range[0:]


## Loads precalculated fingerprint matrix.
fingerprints_matrix = fileLoader.load_nc(args.fingerprint_matrix)

from netCDF4 import Dataset
## user-defined distance cutoffs.
from prody import alignCoordsets
for atom_inds in atom_indices:
  selection = pdb_file[atom_inds[0]]
  align_coords = alignCoordsets(selection)
  aligned_coords = align_coords.getCoordsets()
  distance_matrix = calcDistMatrix(aligned_coords)
  
#  dmat_file = Dataset('%s-distanceMatrix.nc'%atom_inds[1],mode = 'w',format='NETCDF4')
#  dmat_dim = dmat_file.createDimension( 'distance_matrix')
#  dmat_var = dmat_file.createVariable('distance_matrix','f',('distance_matrix',))
#  dmat_var[:] = distance_matrix
#  continue
#  break
  if args.split_array > 1:
    if not os.path.exists('split_assignments'): os.mkdir('split_assignments')    
    split_matrices = arraySplitter(distance_matrix,args.split_array)
    for i in range(len(split_matrices)):
      cluster = clusterData(split_matrices[i])
      dist_range = distRange(start_dist,end_dist,interval)
Example #7
0
def prody_align(*pdbs, **kwargs):
    """Align models in a PDB file or multiple structures in separate PDB files.
    By default, protein chains will be matched based on selected atoms and
    alignment will be performed based on matching residues.  If non-protein
    atoms are selected and selected atoms match in multiple structures,
    they will be used for alignment.

    :arg pdbs: PDB identifier(s) or filename(s)

    :arg select: atom selection string, default is :term:`calpha`,
        see :ref:`selections`

    :arg model: for NMR files, reference model index, default is ``1``

    :arg seqid: percent sequence identity, default is ``90``

    :arg overlap: percent sequence overlap, default is ``90``

    :arg prefix: prefix for output file, default is PDB filename

    :arg suffix: output filename suffix, default is :file:`_aligned`"""

    from numpy import all
    from prody import LOGGER, writePDB, parsePDB
    from prody import alignCoordsets, printRMSD, matchAlign, superpose

    selstr = kwargs.get('select', 'calpha')
    suffix = kwargs.get('suffix', '_aligned')
    if len(pdbs) == 1:
        pdb = pdbs[0]
        LOGGER.info('Aligning multiple models in: ' + pdb)
        prefix = kwargs.get('prefix')
        model = kwargs.get('model')
        pdb = parsePDB(pdb)
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            subparser = kwargs.get('subparser')
            if subparser:
                subparser.error('Selection {0} do not match any atoms.'.format(
                    repr(selstr)))
            else:
                raise ValueError('select does not match any atoms')
        LOGGER.info('{0} atoms will be used for alignment.'.format(
            len(pdbselect)))
        pdbselect.setACSIndex(model - 1)
        printRMSD(pdbselect, msg='Before alignment ')
        alignCoordsets(pdbselect)
        printRMSD(pdbselect, msg='After alignment  ')
        outfn = (prefix or pdb.getTitle()) + suffix + '.pdb'
        LOGGER.info('Writing file: ' + outfn)
        writePDB(outfn, pdb)
    else:
        pdbs = list(pdbs)
        reffn = pdbs.pop(0)
        seqid = kwargs.get('seqid')
        overlap = kwargs.get('overlap')
        LOGGER.info('Aligning structures onto: ' + reffn)
        ref = parsePDB(reffn)

        ref_sel = ref.select(selstr)
        if ref_sel:
            LOGGER.info('Selection {0} matched {1} atoms.'.format(
                repr(selstr), len(ref_sel)))
        else:
            raise ValueError('selection {0} did not match any atoms'.format(
                repr(selstr)))
        match = True
        if ref_sel.numAtoms('ca') < 2:
            match = False

        for arg in pdbs:
            if arg == reffn:
                continue
            #if '_aligned.pdb' in arg:
            #    continue
            LOGGER.info('Evaluating structure: ' + arg)
            pdb = parsePDB(arg)
            if match:
                result = matchAlign(pdb,
                                    ref,
                                    seqid=seqid,
                                    overlap=overlap,
                                    tarsel=selstr,
                                    allcsets=True,
                                    cslabel='Model',
                                    csincr=1)
                if result:
                    outfn = pdb.getTitle() + suffix + '.pdb'
                    LOGGER.info('Writing file: ' + outfn)
                    writePDB(outfn, pdb)
                    continue

            pdb_sel = pdb.select(selstr)
            LOGGER.info('Selection {0} matched {1} atoms.'.format(
                repr(selstr), len(pdb_sel)))
            if (len(pdb_sel) == len(ref_sel)
                    and all(pdb_sel.getNames() == ref_sel.getNames())):
                printRMSD(ref_sel, pdb_sel, msg='Before alignment ')
                superpose(pdb_sel, ref_sel)
                printRMSD(ref_sel, pdb_sel, msg='After alignment  ')
                outfn = pdb.getTitle() + suffix + '.pdb'
                LOGGER.info('Writing file: ' + outfn)
                writePDB(outfn, pdb)
            else:
                LOGGER.warn('Failed to align structure ' + arg + '.')
Example #8
0
def s2_values(
    model_data, calculate_on_models, s2_records, s2_type, fit, fit_range
):
    """Returns a dictionary with the average S2 values:
    s2_calced[residue] = value"""
    if fit:
        reference = model_data.atomgroup[:]

        model_data.atomgroup.setACSIndex(0)
        prody.alignCoordsets(model_data.atomgroup.calpha)

        if fit_range:
            for model_num in calculate_on_models:
                model_data.atomgroup.setACSIndex(model_num)

                mobile = model_data.atomgroup[:]
                matches = prody.matchChains(reference, mobile)
                match = matches[0]
                ref_chain = match[0]
                mob_chain = match[1]

                weights = np.zeros((len(ref_chain), 1), dtype=np.int)

                fit_start, fit_end = fit_range.split("-")

                for i in range(int(fit_start) - 1, int(fit_end) - 1):
                    weights[i] = 1

                t = prody.calcTransformation(mob_chain, ref_chain, weights)
                t.apply(mobile)

    # get NH vectors from models (model_data[] -> vectors{resnum : vector})
    vector_data = []
    s2_pairs = {"N": "H", "CA": "HA"}
    h_coords = None
    n_coords = None

    for model_num in calculate_on_models:
        model_data.atomgroup.setACSIndex(model_num)
        current_resindex = 1
        has_first, has_second = False, False
        vectors = {}

        for atom in model_data.atomgroup:
            atom_res = atom.getResnum()

            if atom_res != current_resindex:
                current_resindex = atom_res
                has_first, has_second = False, False

            if atom_res == current_resindex:
                if atom.getName() == s2_type:
                    has_second = True
                    n_coords = Vec3D(atom.getCoords())

                elif atom.getName() == s2_pairs[s2_type]:
                    has_first = True
                    h_coords = Vec3D(atom.getCoords())

                if has_first and has_second:
                    has_first, has_second = False, False
                    vectors[atom_res] = Vec3D(
                        n_coords - h_coords
                    ).normalize()

        vector_data.append(vectors)

    s2_calced = {}

    # iterating over STR records
    for resnum in [int(s2rec.resnum) for s2rec in s2_records]:

        x2, y2, z2, xy, xz, yz = 0, 0, 0, 0, 0, 0

        # iterating over PDB models
        for m in vector_data:

            # coordinates in model at a given resnum
            x, y, z = m[resnum].v[0], m[resnum].v[1], m[resnum].v[2]

            x2 += x ** 2
            y2 += y ** 2
            z2 += z ** 2
            xy += x * y
            xz += x * z
            yz += y * z

        x2 /= len(vector_data)
        y2 /= len(vector_data)
        z2 /= len(vector_data)
        xy /= len(vector_data)
        xz /= len(vector_data)
        yz /= len(vector_data)

        s2 = (
            3
            / 2.0
            * (
                x2 ** 2
                + y2 ** 2
                + z2 ** 2
                + 2 * xy ** 2
                + 2 * xz ** 2
                + 2 * yz ** 2
            )
            - 0.5
        )

        s2_calced[resnum] = s2

    return s2_calced
def s2_sidechain(csv_buffer, s2_sidechain, my_path, model_data, fit=None):
    """Back-calculate order parameters from given S2 dict and PDB models"""

    sc_lot = {
        "VAL": {"CG1": "CB", "CG2": "CB"},
        "ILE": {"CG2": "CB", "CD": "CG1", "CD1": "CG1"},
        "THR": {"CG2": "CB"},
        "LEU": {"CD1": "CG", "CD2": "CG"},
        "ALA": {"CB": "CA"},
        "MET": {"CE": "SD"},
    }

    if fit:
        model_data.atomgroup.setACSIndex(0)
        prody.alignCoordsets(model_data.atomgroup.calpha)

    for record in s2_sidechain:
        vectors = []
        resnum = record.resnum
        my_type = record.type
        my_res = model_data.atomgroup[("A", resnum)].getResname()

        # find pair for measured aa
        pair = sc_lot[my_res][my_type]

        for model_num in range(model_data.model_count):
            model_data.atomgroup.setACSIndex(model_num)

            try:
                sel = "resnum {} name {}".format(resnum, my_type)
                coords = model_data.atomgroup.select(sel).getCoords()[0]
                sel = "resnum {} name {}".format(resnum, pair)
                pair_coords = model_data.atomgroup.select(sel).getCoords()[0]
            except AttributeError:
                return {
                    "error": "Sidechain order parameter atom name not found\
                    in PDB. Please check your atom naming."
                }

            vectors.append(Vec3D(coords - pair_coords).normalize())

        x2, y2, z2, xy, xz, yz = 0, 0, 0, 0, 0, 0

        for vector in vectors:
            x, y, z = vector.v[0], vector.v[1], vector.v[2]

            x2 += x ** 2
            y2 += y ** 2
            z2 += z ** 2
            xy += x * y
            xz += x * z
            yz += y * z

        x2 /= len(vectors)
        y2 /= len(vectors)
        z2 /= len(vectors)
        xy /= len(vectors)
        xz /= len(vectors)
        yz /= len(vectors)

        s2 = 3 / 2.0 * (x2 ** 2 + y2 ** 2 + z2 ** 2 +
                        2 * xy ** 2 + 2 * xz ** 2 + 2 * yz ** 2) - 0.5

        record.calced = s2

    sidechain_exp1, sidechain_exp2 = [], []
    sidechain_calc1, sidechain_calc2 = {}, {}

    prev_resnum = -100000

    for record in s2_sidechain:
        if record.resnum != prev_resnum:
            sidechain_exp1.append(record)
            sidechain_calc1[record.resnum] = record.calced
            prev_resnum = record.resnum
        else:
            sidechain_exp2.append(record)
            sidechain_calc2[record.resnum] = record.calced

    csv_buffer.add_data(
        {
            "name": "S2_meth",
            "calced": sidechain_calc1,
            "experimental": sidechain_exp1,
        }
    )

    if sidechain_exp2:
        csv_buffer.add_data(
            {
                "name": "S2_meth (cont)",
                "calced": sidechain_calc2,
                "experimental": sidechain_exp2,
            }
        )

    # correlation calculation
    m = [0.0, 0.0, 0.0]
    d = [0.0, 0.0]

    for record in s2_sidechain:
        exp = record.value
        calc = record.calced

        m[0] += calc
        m[1] += exp
        m[2] += calc * exp

    m[0] /= len(s2_sidechain)
    m[1] /= len(s2_sidechain)
    m[2] /= len(s2_sidechain)

    for record in s2_sidechain:
        exp = record.value
        calc = record.calced

        d[0] += (calc - m[0]) ** 2
        d[1] += (exp - m[1]) ** 2

    d[0] /= len(s2_sidechain)
    d[0] = math.sqrt(d[0])
    d[1] /= len(s2_sidechain)
    d[1] = math.sqrt(d[1])

    correl = (m[2] - (m[0] * m[1])) / (d[0] * d[1])
    print("Corr: ", correl)

    # Q-value calculation
    d2, e2 = 0, 0

    for record in s2_sidechain:
        exp = record.value
        calc = record.calced

        d2 += (calc - exp) ** 2
        e2 += exp ** 2

    Q = 100 * math.sqrt(d2) / math.sqrt(e2)
    q_value = round(Q, 6)
    print("Q-value: ", q_value)

    # RMSD calculation
    d2 = 0

    for record in s2_sidechain:
        exp = record.value
        calc = record.calced

        d2 += (calc - exp) ** 2

    rmsd = math.sqrt(d2 / len(s2_sidechain))
    print("RMSD: ", round(rmsd, 6))

    exp_values = []
    calced_values = []

    for record in s2_sidechain:
        exp_values.append(record.value)
        calced_values.append(record.calced)

    min_calc = min(calced_values)
    max_calc = max(calced_values)

    min_exp = min(exp_values)
    max_exp = max(exp_values)
    miny = min(min_calc, min_exp)  # get minimum value
    maxy = max(max_calc, max_exp)  # get maximum value

    diag = []

    margin = int(abs(miny - maxy) * 0.05)

    if abs(miny - maxy) < 10:
        margin = 0.3
    elif abs(miny - maxy) < 2:
        margin = 0.01
    elif abs(miny - maxy) < 1:
        margin = 0

    maxy += margin
    miny -= margin

    for i in np.arange(miny, maxy * 1.42, 0.1):  # draw graph diagonal
        diag.append(i)

    plt.figure(figsize=(6, 5), dpi=80)
    plt.plot(diag, diag, linewidth=2.0, color="red", alpha=0.7)
    plt.plot(exp_values, calced_values, "bo")
    plt.axis([miny, maxy, miny, maxy])
    plt.xlabel("experimental")
    plt.ylabel("calculated")
    plt.tight_layout(pad=1.08)
    plt.savefig(my_path + "/" + "S2_sc_corr.svg", format="svg")
    plt.close()

    xs = []
    prev_resnum2 = -1

    for record in s2_sidechain:
        if record.resnum != prev_resnum2:
            xs.append(record.resnum)
            prev_resnum2 = record.resnum
        else:
            xs.append(record.resnum + 0.3)

    print("XS AXIS", xs)
    print("len SC", len(s2_sidechain))
    print("len xs", len(xs))

    plt.figure(figsize=(10, 5), dpi=80)
    plt.plot(
        xs,
        exp_values,
        linewidth=2.0,
        color="red",
        marker="o",
        label="exp",
        alpha=0.7,
    )
    plt.plot(
        xs,
        calced_values,
        linewidth=2.0,
        color="blue",
        marker="o",
        label="calc",
        alpha=0.7,
    )
    plt.legend(loc="lower left")
    plt.xlabel("residue number")
    plt.ylabel("value")
    ax = plt.axes()
    ax.yaxis.grid()
    plt.tight_layout(pad=1.08)
    plt.savefig(my_path + "/" + "S2_sc_graph.svg", format="svg")
    plt.close()

    my_id = my_path.split("/")[-2] + "/"

    print("CORR GRAPH", my_id + "S2_sc_corr.svg")
    print("GRAPH", my_id + "S2_sc_graph.svg")

    s2_sc_data = {
        "S2_model_n": len(s2_sidechain),
        "correlation": "{0:.3f}".format(correl),
        "q_value": "{0:.3f}".format(q_value),
        "rmsd": "{0:.3f}".format(rmsd),
        "corr_graph_name": my_id + "S2_sc_corr.svg",
        "graph_name": my_id + "S2_sc_graph.svg",
    }

    return s2_sc_data
Example #10
0
def prody_align(*pdbs, **kwargs):
    """Align models in a PDB file or multiple structures in separate PDB files.
    By default, protein chains will be matched based on selected atoms and
    alignment will be performed based on matching residues.  If non-protein
    atoms are selected and selected atoms match in multiple structures,
    they will be used for alignment.

    :arg pdbs: PDB identifier(s) or filename(s)

    :arg select: atom selection string, default is :term:`calpha`,
        see :ref:`selections`

    :arg model: for NMR files, reference model index, default is ``1``

    :arg seqid: percent sequence identity, default is ``90``

    :arg overlap: percent sequence overlap, default is ``90``

    :arg prefix: prefix for output file, default is PDB filename

    :arg suffix: output filename suffix, default is :file:`_aligned`"""

    from numpy import all
    from prody import LOGGER, writePDB, parsePDB
    from prody import alignCoordsets, printRMSD, matchAlign, superpose

    selstr = kwargs.get('select', 'calpha')
    suffix = kwargs.get('suffix', '_aligned')
    if len(pdbs) == 1:
        pdb = pdbs[0]
        LOGGER.info('Aligning multiple models in: ' + pdb)
        prefix = kwargs.get('prefix')
        model = kwargs.get('model')
        pdb = parsePDB(pdb)
        pdbselect = pdb.select(selstr)
        if pdbselect is None:
            subparser = kwargs.get('subparser')
            if subparser:
                subparser.error('Selection {0} do not match any atoms.'
                                .format(repr(selstr)))
            else:
                raise ValueError('select does not match any atoms')
        LOGGER.info('{0} atoms will be used for alignment.'
                    .format(len(pdbselect)))
        pdbselect.setACSIndex(model-1)
        printRMSD(pdbselect, msg='Before alignment ')
        alignCoordsets(pdbselect)
        printRMSD(pdbselect, msg='After alignment  ')
        outfn = (prefix or pdb.getTitle()) + suffix + '.pdb'
        LOGGER.info('Writing file: ' + outfn)
        writePDB(outfn, pdb)
    else:
        pdbs = list(pdbs)
        reffn = pdbs.pop(0)
        seqid = kwargs.get('seqid')
        overlap = kwargs.get('overlap')
        LOGGER.info('Aligning structures onto: ' + reffn)
        ref = parsePDB(reffn)

        ref_sel = ref.select(selstr)
        if ref_sel:
            LOGGER.info('Selection {0} matched {1} atoms.'
                        .format(repr(selstr), len(ref_sel)))
        else:
            raise ValueError('selection {0} did not match any atoms'
                               .format(repr(selstr)))
        match = True
        if ref_sel.numAtoms('ca') < 2:
            match = False

        for arg in pdbs:
            if arg == reffn:
                continue
            #if '_aligned.pdb' in arg:
            #    continue
            LOGGER.info('Evaluating structure: ' + arg)
            pdb = parsePDB(arg)
            if match:
                result = matchAlign(pdb, ref, seqid=seqid, overlap=overlap,
                                    tarsel=selstr, allcsets=True,
                                    cslabel='Model', csincr=1)
                if result:
                    outfn = pdb.getTitle() + suffix + '.pdb'
                    LOGGER.info('Writing file: ' + outfn)
                    writePDB(outfn, pdb)
                    continue

            pdb_sel = pdb.select(selstr)
            LOGGER.info('Selection {0} matched {1} atoms.'
                        .format(repr(selstr), len(pdb_sel)))
            if (len(pdb_sel) == len(ref_sel) and
                all(pdb_sel.getNames() == ref_sel.getNames())):
                printRMSD(ref_sel, pdb_sel, msg='Before alignment ')
                superpose(pdb_sel, ref_sel)
                printRMSD(ref_sel, pdb_sel, msg='After alignment  ')
                outfn = pdb.getTitle() + suffix + '.pdb'
                LOGGER.info('Writing file: ' + outfn)
                writePDB(outfn, pdb)
            else:
                LOGGER.warn('Failed to align structure ' + arg + '.')
def run_selection(my_path, original_values, user_selection_JSON):
    DumpedData.RDC_isloaded = False
    DumpedData.S2_isloaded = False
    DumpedData.PDB_isloaded = False
    DumpedData.Jcoup_isloaded = False
    DumpedData.ChemShift_isloaded = False

    working_dir = my_path

    user_sel = getUserSel(user_selection_JSON)

    pdb_output_name = my_path + "/raw.pdb"

    if os.path.isfile(pdb_output_name):
        os.remove(pdb_output_name)

    if os.path.isfile(my_path + "/selected.pdb"):
        os.remove(my_path + "/selected.pdb")

    global max_size
    global min_size
    global overdrive

    if "min_size" not in globals():
        min_size = None

    if "max_size" not in globals():
        max_size = None

    print("max_size -> ", max_size)

    if "overdrive" not in globals():
        overdrive = None

    in_selection, iter_data, iter_all = selection_on(
        working_dir, measure, user_sel,
        min_size=min_size, max_size=max_size, overdrive=overdrive
    )

    print("ITER ALL", iter_all)

    for key, val in iter_data.items():
        print("CALCED ", key, '{0:.3f}'.format(val))

    DumpedData.loadPDBData(my_path)
    PDB_data = DumpedData.PDB_model_data
    sel_ensemble = PDB_data.atomgroup.copy()

    for model_num in reversed(range(sel_ensemble.numCoordsets())):
        if model_num not in in_selection:
            sel_ensemble.delCoordset(model_num)

    num_coordsets = sel_ensemble.numCoordsets()

    print("NUM_COORDSETS: ", num_coordsets)

    prody.alignCoordsets(sel_ensemble.calpha)
    prody.writePDB(pdb_output_name, sel_ensemble)

    in_selection = [str(x+1) for x in sorted(in_selection)]
    dummy_pdb = open(pdb_output_name, 'r')
    output_pdb = open(my_path + "/selected.pdb", "w")

    for line in dummy_pdb:
        output_pdb.write(line)
        if 'REMARK' in line:
            model_line = "REMARK ORIGINAL MODELS: "
            for model_num in in_selection:
                if len(model_line) < 76:
                    model_line += model_num + " "
                else:
                    output_pdb.write(model_line + "\n")
                    model_line = "REMARK ORIGINAL MODELS: "

            output_pdb.write(model_line + "\n")

    calc_id = my_path.split('/')[-1]
    print('calcID', calc_id)
    db_entry = CSX_upload.objects.get(id_code=calc_id)
    print(db_entry.PDB_file)
    print("db_entry", db_entry)

    pca_image_names = graph.pca.create_pca_comparison(
        my_path, db_entry.PDB_file, in_selection
    )

    return num_coordsets, iter_data, pca_image_names