Exemple #1
0
 def requires(self):
     conn, c = db_functions.connectDB()
     exists = db_functions.table_exists(c, 'soakdb_files')
     if not exists:
         return TransferAllFedIDsAndDatafiles()
     else:
         return FindSoakDBFiles()
Exemple #2
0
    def requires(self):
        exists = db_functions.column_exists('proasis_hits', 'ligand_list')
        if not exists:
            try:
                conn, c = db_functions.connectDB()
                c.execute(
                    'ALTER TABLE proasis_hits ADD COLUMN ligand_list text;')
                conn.commit()
                return StartLigandSearches()
            except:
                pass

        try:
            run_list = self.get_list()
            return database_operations.FindProjects(
            ), database_operations.CheckFiles(), StartLigandSearches(), [
                data_in_proasis.HitTransfer(bound_pdb=pdb,
                                            crystal=crystal_name,
                                            protein_name=protein_name,
                                            smiles=smiles_string,
                                            mod_date=modification_string,
                                            ligands=ligand_list)
                for (pdb, crystal_name, protein_name, smiles_string,
                     modification_string, ligand_list) in run_list
            ], database_operations.FindProjects()
        except:
            return data_in_proasis.CleanUpHits()
Exemple #3
0
    def get_list(self):
        bound_list = []

        crystal_list = []
        protein_list = []
        smiles_list = []
        modification_list = []
        ligand_list = []

        conn, c = db_functions.connectDB()
        c.execute(
            "SELECT bound_conf, crystal_name, protein, smiles, modification_date, ligand_list, exists_2fofc, exists_fofc, exists_pdb, exists_mtz FROM proasis_hits WHERE modification_date not like '' and ligand_list not like 'None' and bound_conf not like ''"
        )
        rows = c.fetchall()
        for row in rows:
            if '0' in [str(row[6]), str(row[7]), str(row[8]), str(row[9])]:
                continue
            #if not os.path.isfile(str('./hits/' + str(row[1]) + '_' + str(row[4]) + '.added')):
            bound_list.append(str(row[0]))
            crystal_list.append(str(row[1]))
            protein_list.append(str(row[2]))
            smiles_list.append(str(row[3]))
            modification_list.append(str(row[4]))
            ligand_list.append(str(row[5]))

        run_list = list(
            zip(bound_list, crystal_list, protein_list, smiles_list,
                modification_list, ligand_list))
        return run_list
Exemple #4
0
def find_proasis_repeats(protein):
    project_strucids = paf.get_strucids_from_project(protein)
    project_titles = [paf.get_strucid_json(strucid)['allStrucs'][0]['TITLE'].split()[-1] for strucid in
                      project_strucids]

    counts = dict(Counter(project_titles))

    repeats = {'crystal': [], 'strucids': [], 'bound_confs':[]}

    for key in counts.keys():
        if counts[key] > 1:
            repeats['crystal'].append(key)
            repeats['strucids'].append([project_strucids[i] for i, x in enumerate(project_titles) if x == key])

    conn, c = dbf.connectDB()
    for strucid_list in repeats['strucids']:
        bound_list = []
        for struc in strucid_list:
            c.execute('select bound_conf from proasis_hits where strucid=%s', (struc,))
            rows = c.fetchall()
            for row in rows:
                bound_list.append(str(row[0]))
        repeats['bound_confs'].append(bound_list)

    return repeats
Exemple #5
0
 def run(self):
     db_functions.transfer_data(self.data_file)
     conn, c = db_functions.connectDB()
     c.execute(
         'UPDATE soakdb_files SET status_code=2 where filename like %s;',
         (self.data_file, ))
     conn.commit()
Exemple #6
0
 def requires(self):
     conn, c = dbf.connectDB()
     c.execute('select root_dir from proasis_out')
     run_list = []
     for row in c.fetchall():
         run_list.append(str(row[0]).replace('comp_chem', ''))
     return [RemoveADFiles(root_dir=direc) for direc in run_list]
Exemple #7
0
def update_apo_field():
    conn, c = dbf.connectDB()
    c.execute('SELECT root_dir FROM proasis_out')
    rows = c.fetchall()
    for row in rows:
        apo_file = str(str(row[0]).split('/')[-2] + '_apo.pdb')
        if os.path.isfile(os.path.join(str(row[0]), apo_file)):
            c.execute('UPDATE proasis_out SET apo_name = %s WHERE root_dir = %s', (apo_file, str(row[0])))
            conn.commit()
def get_to_dock():
    out_list = []
    conn, c = dbf.connectDB()
    c.execute('SELECT root_dir FROM proasis_out')
    rows = c.fetchall()
    for row in rows:

        out_list.append(str(row[0]))

    return out_list
Exemple #9
0
    def run(self):
        out_file = self.output().path
        print(out_file)
        mol_sdf = paf.get_struc_file(self.strucid, out_file, 'sdf')

        conn, c = dbf.connectDB()

        c.execute('UPDATE proasis_out SET mol_name = %s WHERE strucid = %s',
                  (mol_sdf.split('/')[-1], self.strucid))
        conn.commit()
Exemple #10
0
    def requires(self):
        conn, c = db_functions.connectDB()
        exists = db_functions.column_exists('proasis_hits', 'ligand_list')
        if not exists:
            conn, c = db_functions.connectDB()
            c.execute('ALTER TABLE proasis_hits ADD COLUMN ligand_list text;')
            conn.commit()
        c.execute(
            "select bound_conf from proasis_hits where ligand_list is NULL and bound_conf is not NULL"
        )

        rows = c.fetchall()
        conf_list = []
        for row in rows:
            conf_list.append(str(row[0]))
            print((str(row[0])))
        return database_operations.FindProjects(
        ), database_operations.CheckFiles(), [
            data_in_proasis.FindLigands(bound_conf=conf) for conf in conf_list
        ]
Exemple #11
0
    def run(self):
        out_mtz = paf.get_struc_mtz(
            self.strucid, os.path.join(self.root_dir, self.docking_dir))
        out_mtz = os.path.join(self.root_dir, self.docking_dir, out_mtz)
        shutil.move(out_mtz, self.output().path)

        conn, c = dbf.connectDB()

        c.execute('UPDATE proasis_out SET mtz_name = %s WHERE strucid = %s',
                  (self.output().path.split('/')[-1], self.strucid))
        conn.commit()
Exemple #12
0
    def run(self):
        out_2fofc = paf.get_struc_map(
            self.strucid, os.path.join(self.root_dir, self.docking_dir),
            '2fofc')
        out_2fofc = os.path.join(self.root_dir, self.docking_dir, out_2fofc)
        shutil.move(out_2fofc, self.output().path)

        conn, c = dbf.connectDB()

        c.execute(
            'UPDATE proasis_out SET twofofc_name = %s WHERE strucid = %s',
            (self.output().path.split('/')[-1], self.strucid))
        conn.commit()
Exemple #13
0
    def run(self):
        self.ligands = eval(self.ligands)
        print((len(self.ligands)))
        # if len(list(self.ligands))>1:
        # raise Exception('Structures containing more than 1 ligand are currently unsupported')
        conn, c = dbf.connectDB()
        c.execute('SELECT curated_name from proasis_out WHERE strucid=%s',
                  (self.strucid, ))
        rows = c.fetchall()
        print((len(rows)))
        if len(rows) > 1:
            raise Exception('Multiple files where found for this structure: ' +
                            str(rows))
        if len(rows) > 0 and len(rows[0]) == 0:
            # raise Exception('No entries found for this strucid... check the datasource!')
            c.execute('DELETE from proasis_out WHERE curated_name=%s',
                      (str(self.crystal + '_' + 'curated.pdb'), ))
            conn.commit()
            shutil.rmtree(os.path.join(self.root_dir, self.docking_dir))
            raise Exception(
                'DB problem... resetting the datasource and files for this crystal'
            )

        for row in rows:
            curated_pdb = str(row[0])
        try:
            print(curated_pdb)
        except:
            c.execute('DELETE from proasis_out WHERE curated_name=%s',
                      (str(self.crystal + '_' + 'curated.pdb'), ))
            conn.commit()
            shutil.rmtree(os.path.join(self.root_dir, self.docking_dir))
            raise Exception(
                'DB problem... resetting the datasource and files for this crystal'
            )

        ligand_string = paf.get_lig_strings(self.ligands)

        working_dir = os.getcwd()
        os.chdir(os.path.join(self.root_dir, self.docking_dir))
        try:
            pdb_file = open(curated_pdb, 'r')
        except:
            raise Exception(str(rows))
        for line in pdb_file:
            if any(lig in line for lig in ligand_string):
                continue
            else:
                with open(self.output().path, 'a') as f:
                    f.write(line)
        os.chdir(working_dir)
Exemple #14
0
    def get_file_list(self, status_code):
        datafiles = []
        fileids = []
        conn, c = db_functions.connectDB()
        c.execute(
            'SELECT filename, id FROM soakdb_files WHERE status_code = %s',
            (str(status_code), ))
        rows = c.fetchall()
        for row in rows:
            datafiles.append(str(row[0]))
            fileids.append(str(row[1]))

        out_list = list(zip(datafiles, fileids))
        return out_list
Exemple #15
0
def get_comp_chem_ready():
    bound_list = []
    run_list = []
    conn, c = dbf.connectDB()
    c.execute("SELECT bound_conf FROM proasis_hits WHERE strucid != ''")
    rows = c.fetchall()
    for row in rows:
        bound_list.append(str(row[0]))
    c.execute('SELECT bound_conf FROM refinement WHERE bound_conf IN %s AND outcome SIMILAR TO %s', (tuple(bound_list),
                                                                                                     '(%4%|%5%)'))
    results = c.fetchall()
    for result in results:
        if len(result) > 0:
            run_list.append(str(result[0]))

    return run_list
Exemple #16
0
 def run(self):
     conn, c = db_functions.connectDB()
     c.execute('delete from lab where file_id=%s', (self.file_id, ))
     conn.commit()
     c.execute('delete from refinement where file_id=%s', (self.file_id, ))
     conn.commit()
     c.execute('delete from dimple where file_id=%s', (self.file_id, ))
     conn.commit()
     c.execute('delete from data_processing where file_id=%s',
               (self.file_id, ))
     conn.commit()
     db_functions.transfer_data(self.data_file)
     c.execute(
         'UPDATE soakdb_files SET status_code=2 where filename like %s;',
         (self.data_file, ))
     conn.commit()
Exemple #17
0
    def get_list(self):
        path_list = []
        protein_list = []
        reference_list = []
        conn, c = db_functions.connectDB()
        c.execute(
            "SELECT pandda_path, protein, reference_pdb FROM proasis_leads WHERE pandda_path !='' and pandda_path !='None' and reference_pdb !='' and reference_pdb !='None' "
        )
        rows = c.fetchall()
        for row in rows:
            # if not os.path.isfile(str('logs/leads/' + str(row[1]) + '_' + misc_functions.get_mod_date(str(row[1])) + '.added')):
            path_list.append(str(row[0]))
            protein_list.append(str(row[1]))
            reference_list.append(str(row[2]))

        out_list = list(zip(path_list, protein_list, reference_list))

        return out_list
Exemple #18
0
    def requires(self):
        conn, c = dbf.connectDB()
        c.execute("select crystal_name, strucid from proasis_hits where strucid !=''")
        rows = c.fetchall()

        crystal_list = []
        strucid_list = []

        for row in rows:
            crystal = str(row[0])
            strucid = str(row[1])
            crystal_list.append(crystal)
            strucid_list.append(strucid)

        run_list = list(zip(crystal_list, strucid_list))

        return (data_in_proasis.StartHitTransfers(),
                EdstatsScores(crystal=crystal_name, strucid=strucid_no) for (crystal_name, strucid_no) in run_list)
Exemple #19
0
def get_strucids(run_list):
    out_dict = {'strucid': [], 'crystal': [], 'directory': [], 'ligands': []}
    conn, c = dbf.connectDB()
    for struc in run_list:
        c.execute("SELECT strucid, crystal_name, ligand_list FROM proasis_hits WHERE bound_conf=%s AND "
                  "ligand_list != 'None'", (struc,))
        rows = c.fetchall()
        for row in rows:
            out_dict['strucid'].append(str(row[0]))
            out_dict['crystal'].append(str(row[1]))
            out_dict['ligands'].append(str(row[2]))

            if 'Refine' in struc.split('/')[-2]:
                pdb = str(struc.split('/')[-2] + '/' + struc.split('/')[-1])
            else:
                pdb = struc.split('/')[-1]

            directory = struc.replace(pdb, '')
            out_dict['directory'].append(directory)

    return out_dict
Exemple #20
0
    def run(self):
        # connect to central postgres db
        conn, c = db_functions.connectDB()

        # use list from previous step as input to write to postgres
        with self.input().open('r') as database_list:
            for database_file in database_list.readlines():
                database_file = database_file.replace('\n', '')

                out, err, proposal = db_functions.pop_soakdb(database_file)

        proposal_list = []
        c.execute('SELECT proposal FROM soakdb_files')
        rows = c.fetchall()
        for row in rows:
            proposal_list.append(str(row[0]))

        for proposal_number in set(proposal_list):
            db_functions.pop_proposals(proposal_number)

        c.close()

        with self.output().open('w') as f:
            f.write('TransferFeDIDs DONE')
def export_ligand_edstats(filename):
    current_path = os.getcwd()
    path = os.path.join(current_path, filename)
    conn, c = dbf.connectDB()
    c.execute("COPY ligand_edstats TO %s DELIMITER ',' CSV HEADER;", (path, ))
Exemple #22
0
    def run(self):
        # all data necessary for uploading hits
        crystal_data_dump_dict = {
            'crystal_name': [],
            'protein': [],
            'smiles': [],
            'bound_conf': [],
            'modification_date': [],
            'strucid': []
        }

        # all data necessary for uploading leads
        project_data_dump_dict = {
            'protein': [],
            'pandda_path': [],
            'reference_pdb': [],
            'strucid': []
        }

        outcome_string = '(%3%|%4%|%5%|%6%)'

        conn, c = db_functions.connectDB()

        c.execute(
            '''SELECT crystal_id, bound_conf, pdb_latest FROM refinement WHERE outcome SIMILAR TO %s''',
            (str(outcome_string), ))

        rows = c.fetchall()

        print((str(len(rows)) +
               ' crystals were found to be in refinement or above'))

        for row in rows:

            c.execute(
                '''SELECT smiles, protein FROM lab WHERE crystal_id = %s''',
                (str(row[0]), ))

            lab_table = c.fetchall()

            if len(str(row[0])) < 3:
                continue

            if len(lab_table) > 1:
                print(('WARNING: ' + str(row[0]) +
                       ' has multiple entries in the lab table'))
                # print lab_table

            for entry in lab_table:
                if len(str(entry[1])) < 2 or 'None' in str(entry[1]):
                    protein_name = str(row[0]).split('-')[0]
                else:
                    protein_name = str(entry[1])

                crystal_data_dump_dict['protein'].append(protein_name)
                crystal_data_dump_dict['smiles'].append(entry[0])
                crystal_data_dump_dict['crystal_name'].append(row[0])
                crystal_data_dump_dict['bound_conf'].append(row[1])
                crystal_data_dump_dict['strucid'].append('')

                try:
                    modification_date = misc_functions.get_mod_date(str(
                        row[1]))

                except:
                    modification_date = ''

                crystal_data_dump_dict['modification_date'].append(
                    modification_date)

            c.execute(
                '''SELECT pandda_path, reference_pdb FROM dimple WHERE crystal_id = %s''',
                (str(row[0]), ))

            pandda_info = c.fetchall()

            for pandda_entry in pandda_info:
                project_data_dump_dict['protein'].append(protein_name)
                project_data_dump_dict['pandda_path'].append(pandda_entry[0])
                project_data_dump_dict['reference_pdb'].append(pandda_entry[1])
                project_data_dump_dict['strucid'].append('')

        project_table = pandas.DataFrame.from_dict(project_data_dump_dict)
        crystal_table = pandas.DataFrame.from_dict(crystal_data_dump_dict)

        protein_list = set(list(project_data_dump_dict['protein']))
        print(protein_list)

        for protein in protein_list:

            self.add_to_postgres(project_table, protein, ['reference_pdb'],
                                 project_data_dump_dict, 'proasis_leads')

            self.add_to_postgres(crystal_table, protein,
                                 ['crystal_name', 'smiles', 'bound_conf'],
                                 crystal_data_dump_dict, 'proasis_hits')

        with self.output().open('wb') as f:
            f.write('')
Exemple #23
0
    def run(self):

        conn, c = db_functions.connectDB()
        exists = db_functions.table_exists(c, 'soakdb_files')

        checked = []

        # Status codes:-
        # 0 = new
        # 1 = changed
        # 2 = not changed

        if exists:
            with self.input().open('r') as f:
                files = f.readlines()

            for filename in files:

                filename_clean = filename.rstrip('\n')

                c.execute(
                    'select filename, modification_date, status_code from soakdb_files where filename like %s;',
                    (filename_clean, ))

                for row in c.fetchall():
                    if len(row) > 0:
                        data_file = str(row[0])
                        checked.append(data_file)
                        old_mod_date = str(row[1])
                        current_mod_date = misc_functions.get_mod_date(
                            data_file)

                        if current_mod_date > old_mod_date:
                            c.execute(
                                'UPDATE soakdb_files SET status_code = 1 where filename like %s;',
                                (filename_clean, ))
                            c.execute(
                                'UPDATE soakdb_files SET modification_date = %s where filename like %s;',
                                (current_mod_date, filename_clean))
                            conn.commit()

                if filename_clean not in checked:
                    out, err, proposal = db_functions.pop_soakdb(
                        filename_clean)
                    db_functions.pop_proposals(proposal)
                    c.execute(
                        'UPDATE soakdb_files SET status_code = 0 where filename like %s;',
                        (filename_clean, ))
                    conn.commit()

            c.execute('select filename from soakdb_files;')

            # for row in c.fetchall():
            #     if str(row[0]) not in checked:
            #         data_file = str(row[0])

        exists = db_functions.table_exists(c, 'lab')
        if not exists:
            c.execute('UPDATE soakdb_files SET status_code = 0;')
            conn.commit()

        with self.output().open('w') as f:
            f.write('')
Exemple #24
0
tr:nth-of-type(odd) { 
    background: #eee; 
    }
th { 
    background: #3498db; 
    color: white; 
    font-weight: bold; 
    }
td, th { 
    padding: 10px; 
    border: 1px solid #ccc; 
    text-align: left; 
    font-size:12px;
    }'''

conn, c = dbf.connectDB()

c.execute('select crystal_name from proasis_hits')
rows = c.fetchall()
crystal_list = []

for row in rows:
    crystal_list.append(str(row[0]))

crystal_list = list(set(crystal_list))
protein_list = []

c.execute('select protein from proasis_hits')
rows = c.fetchall()
for row in rows:
    protein_list.append(str(row[0]))
def get_project_counts():
    protein_list = []
    conn, c = dbf.connectDB()
    c.execute('select protein from lab')
    rows = c.fetchall()
    for row in rows:
        if not 'null' or 'None' or 'test' in str(row[0]):
            protein_list.append(str(row[0]))
    protein_list = list(set(protein_list))
    counts_dict = {
        'protein': [],
        'mounted': [],
        'pandda_hit': [],
        'refinement': [],
        'comp_chem': [],
        'depo': []
    }
    for protein in protein_list:
        if len(protein) < 1:
            continue
        if 'None' in protein:
            continue
        if 'test' in protein:
            continue
        if 'null' in protein:
            continue
        if 'QC' in protein:
            continue
        counts_dict['protein'].append(protein)
        crystal_list = []
        c.execute(
            'select crystal_name from lab where protein = %s and mounting_result similar to %s',
            (str(protein), '(%Mounted%|%OK%)'))
        rows = c.fetchall()

        for row in rows:
            crystal_list.append(str(row[0]))
        crystal_list = list(set(crystal_list))
        hit = 0

        hits_list = []
        for crystal in crystal_list:
            c.execute(
                'select pandda_hit, crystal_name from dimple where crystal_name = %s',
                (crystal, ))
            rows2 = c.fetchall()
            for row2 in rows2:
                if str(row2[0]) == 'True':
                    hit += 1
                    hits_list.append(str(row2[1]))

        hits_list = list(set(hits_list))

        refinement = []
        comp_chem = []
        depo = []

        for hit_name in hits_list:
            c.execute(
                'select outcome, crystal_name from refinement where outcome similar to %s and crystal_name = %s',
                ('(%3%|%4%|%5%)', hit_name))
            rows3 = c.fetchall()
            for row3 in rows3:
                if '3' in str(row3[0]):
                    refinement.append(str(row3[1]))
                if '4' in str(row3[0]):
                    comp_chem.append(str(row3[1]))
                if '5' in str(row3[0]):
                    depo.append(str(row3[1]))

        refinement = list(set(refinement))
        counts_dict['refinement'].append(len(refinement))
        comp_chem = list(set(comp_chem))
        counts_dict['comp_chem'].append(len(comp_chem))
        depo = list(set(depo))
        counts_dict['depo'].append(len(depo))
        counts_dict['mounted'].append(len(crystal_list) - hit)
        counts_dict['pandda_hit'].append(hit - len(refinement) -
                                         len(comp_chem) - len(depo))

    dataframe = pd.DataFrame.from_dict(counts_dict)
    return dataframe
Exemple #26
0
    def run(self):

        project_strucids = paf.get_strucids_from_project(self.protein)

        # get crystal names for protein according to db
        conn, c = dbf.connectDB()

        c.execute('select crystal_name from proasis_hits where protein=%s', (self.protein,))
        rows = c.fetchall()
        crystal_list = []

        for row in rows:
            crystal_list.append(str(row[0]))

        crystal_list = list(set(crystal_list))

        db_strucids = []
        # status_list = []
        # good_list = []

        file_checks = {'crystal': [], 'bound_state': [], 'mod_date': [], 'pdb': [], 'mtz': [], '2fofc': [], 'fofc': [],
                       'ligs': []}

        # get info for crystals identified
        for crystal in list(set(crystal_list)):

            c.execute(
                "select strucid, bound_conf, modification_date from proasis_hits "
                "where crystal_name like %s and strucid NOT LIKE ''",
                (crystal,))

            bound_list = []
            strucid_list = []
            mod_date_list = []

            rows = c.fetchall()

            for row in rows:
                strucid = str(row[0])
                strucid_list.append(strucid)

                bound_conf = str(row[1])
                bound_list.append(bound_conf)

                modification_date = str(row[2])
                mod_date_list.append(modification_date)

            unique_bound = list(set(bound_list))
            unique_modification_date = list(set(mod_date_list))
            unique_strucids = list(set(strucid_list))

            for ids in unique_strucids:
                db_strucids.append(ids)

            c.execute("select strucid from proasis_leads where protein=%s and strucid!=''", (self.protein,))
            rows = c.fetchall()

            for row in rows:
                db_strucids.append(str(row[0]))

        #     # get info about crystals in proasis_hits (ones identified as in refinement) that haven't made it to
        #     # proasis
        #
        #     if sum([len(unique_modification_date), len(unique_bound), len(unique_strucids)]) == 0:
        #
        #         c.execute(
        #             "select bound_conf, modification_date, exists_pdb, exists_mtz, exists_2fofc, exists_fofc, "
        #             "ligand_list from proasis_hits where crystal_name like %s",
        #             (crystal,))
        #         rows = c.fetchall()
        #
        #         for row in rows:
        #             file_checks['crystal'].append(crystal)
        #             file_checks['bound_state'].append(str(row[0]))
        #             file_checks['mod_date'].append(str(row[1]))
        #             file_checks['pdb'].append(str(row[2]))
        #             file_checks['mtz'].append(str(row[3]))
        #             file_checks['2fofc'].append(str(row[4]))
        #             file_checks['fofc'].append(str(row[5]))
        #             file_checks['ligs'].append(str(row[6]))

        # clear up mismatching entries
        in_common = list(set(db_strucids) & set(project_strucids))
        for strucid in db_strucids:
            if strucid not in in_common:
                print(self.protein + ': ' + strucid + ' found in database but not in proasis')
                print('removing entry from db...')
                c.execute("UPDATE proasis_hits set strucid='' where strucid=%s", (strucid,))
                conn.commit()
                c.execute("UPDATE proasis_leads set strucid='' where strucid=%s", (strucid,))
                conn.commit()
                print('\n')

        for strucid in project_strucids:
            if strucid not in in_common:
                print(self.protein + ': ' + strucid + ' found in proasis but not in db')
                print('removing entry from proasis...')
                paf.delete_structure(strucid)
                print('\n')


            # good_structures = {'crystal': [], 'bound_state': [], 'mod_date': [], 'strucid': []}
            # if len(set([len(unique_modification_date), len(unique_bound), len(unique_strucids)])) == 1:
            #     status_list.append(0)
            #     good_list.append(crystal)
            #     for i in range(0, len(unique_bound)):
            #         good_structures['crystal'].append(crystal)
            #         good_structures['bound_state'].append(unique_bound[i])
            #         good_structures['mod_date'].append(unique_modification_date[i])
            #         good_structures['strucid'].append(unique_strucids[i])

            # for i in range(0, len(good_structures['strucid'])):
            #     if good_structures['strucid'][i] not in project_strucids:
            #         print('missing or incorrect strucid in db for '
            #               + str(good_structures['crystal'][i] + ' (' + str(good_structures['strucid'][i]) + ')'))

        #         for key in file_checks.keys():
        #             if '0' in file_checks[key]:
        #                 error_list.append(str('missing ' + str(key) + ' file!'))
        #
        #             if 'None' in file_checks[key]:
        #                 error_list.append(str('None value found for ' + str(key)))
        #
        #     elif len(set([len(unique_modification_date), len(unique_bound), len(unique_strucids)])) > 1:
        #         status_list.append(1)
        #
        # error_frame = pd.DataFrame.from_dict(file_checks)
        # cols = ['crystal', 'bound_state', 'mod_date', 'ligs', 'mtz', 'pdb', '2fofc', 'fofc']
        # error_frame = error_frame[cols]
        # error_frame.sort_values(by=['crystal'], inplace=True)
        #
        # good_frame = pd.DataFrame.from_dict(good_structures)
        # cols = ['crystal', 'bound_state', 'mod_date', 'strucid']
        # good_frame = good_frame[cols]
        # good_frame.sort_values(by=['crystal'], inplace=True)
        #
        #

        # clean up repeats
        repeats = find_proasis_repeats(self.protein)
        for i, x in enumerate(repeats['crystal']):
            bound_list = repeats['bound_confs'][i]
            strucids = repeats['strucids'][i]

            if len(bound_list)==len(strucids):
                if len(list(set(bound_list)))==1:
                    print(str('identical uploaded structures: ' + str(strucids)) + ' (' + x + ')')
                    print('removing repeat structures from proasis, and updating database...')
                    to_delete_strucs=strucids[1:]
                    to_delete_confs=bound_list[1:]

                    for j in range(0, len(to_delete_strucs)):
                        c.execute('DELETE FROM proasis_hits WHERE strucid=%s and bound_conf=%s', (to_delete_strucs[j],
                                                                                                  to_delete_confs[j]))
                        conn.commit()
                        paf.delete_structure(to_delete_strucs[j])

        pd.DataFrame.from_dict(repeats).to_csv('test.csv')