예제 #1
0
 def check_modification_date(self, filename):
     if os.path.isfile(filename):
         proasis_file_date = misc_functions.get_mod_date(filename)
         modification_date = self.mod_date
         if proasis_file_date != modification_date:
             conn, c = db_functions.connectDB()
             c.execute(
                 'SELECT strucid FROM proasis_hits WHERE bound_conf = %s and modification_date = %s',
                 (self.bound_pdb, modification_date))
             rows = c.fetchall()
             for row in rows:
                 if len(str(row[0])) > 1:
                     proasis_api_funcs.delete_structure(str(row[0]))
                     c.execute(
                         'UPDATE proasis_hits SET strucid = NULL WHERE bound_conf = %s and modification_date = %s',
                         (self.bound_pdb, modification_date))
예제 #2
0
def pop_soakdb(database_file):
    conn, c = connectDB()
    # create a table to hold info on sqlite files
    c.execute(
        '''CREATE TABLE IF NOT EXISTS soakdb_files (id SERIAL UNIQUE PRIMARY KEY, filename TEXT, modification_date BIGINT, proposal TEXT, status_code INT);''')
    conn.commit()
    # take proposal number from filepath (for whitelist)
    proposal = database_file.split('/')[5].split('-')[0]
    proc = subprocess.Popen(str('getent group ' + str(proposal)), stdout=subprocess.PIPE, shell=True)
    out, err = proc.communicate()

    # need to put modification date to use in the proasis upload scripts
    modification_date = misc_functions.get_mod_date(database_file)
    c.execute(
        '''INSERT INTO soakdb_files (filename, modification_date, proposal) SELECT %s,%s,%s WHERE NOT EXISTS (SELECT filename, modification_date FROM soakdb_files WHERE filename = %s AND modification_date = %s)''',
        (database_file, int(modification_date), proposal, database_file, int(modification_date)))
    conn.commit()

    return out, err, proposal
예제 #3
0
 def output(self):
     mod_date = misc_functions.get_mod_date(self.reference_structure)
     return luigi.LocalTarget('./leads/' + str(self.name) + '_' + mod_date +
                              '.added')
예제 #4
0
    def run(self):
        # logfile = self.date.strftime('transfer_logs/CheckFiles_%Y%m%d.txt')
        # # logging.basicConfig(filename=logfile, level=# logging.DEBUG, format='%(asctime)s %(message)s',
        #                     datefrmt='%m/%d/%y %H:%M:%S')

        conn, c = db_functions.connectDB()
        exists = db_functions.table_exists(c, 'soakdb_files')

        checked = []

        # Status codes:-
        # 0 = new
        # 1 = changed
        # 2 = not changed

        if exists:
            with self.input().open('r') as f:
                files = f.readlines()

            for filename in files:

                filename_clean = filename.rstrip('\n')

                c.execute(
                    'select filename, modification_date from soakdb_files where filename like %s;',
                    (filename_clean, ))

                for row in c.fetchall():
                    if len(row) > 0:
                        data_file = str(row[0])
                        checked.append(data_file)
                        old_mod_date = str(row[1])
                        current_mod_date = misc_functions.get_mod_date(
                            data_file)

                        if current_mod_date > old_mod_date:
                            # logging.info(str(data_file) + ' has changed!')
                            c.execute(
                                'UPDATE soakdb_files SET status_code = 1 where filename like %s;',
                                (filename_clean, ))
                            c.execute(
                                'UPDATE soakdb_files SET modification_date = %s where filename like %s;',
                                (current_mod_date, filename_clean))
                            conn.commit()
                            # start class to add row and kick off process for that file
                        else:
                            # logging.info(str(data_file) + ' has not changed!')
                            c.execute(
                                'UPDATE soakdb_files SET status_code = 2 where filename like %s;',
                                (filename_clean, ))
                            conn.commit()

                if filename_clean not in checked:
                    # logging.info(filename_clean + ' is a new file!')
                    out, err, proposal = db_functions.pop_soakdb(
                        filename_clean)
                    db_functions.pop_proposals(proposal)
                    c.execute(
                        'UPDATE soakdb_files SET status_code = 0 where filename like %s;',
                        (filename_clean, ))

            c.execute('select filename from soakdb_files;')

            for row in c.fetchall():
                if str(row[0]) not in checked:
                    data_file = str(row[0])
                    file_exists = os.path.isfile(data_file)

                    # if not file_exists:
                    #     # logging.warning(str(data_file) + ' no longer exists! - notify users!')
                    #
                    # else:
                    #     # logging.error(str(row[0]) + ' : something wrong!')

        exists = db_functions.table_exists(c, 'lab')
        if not exists:
            c.execute('UPDATE soakdb_files SET status_code = 0;')
            conn.commit()

        with self.output().open('w') as f:
            f.write('')
예제 #5
0
    def run(self):
        # all data necessary for uploading hits
        crystal_data_dump_dict = {
            'crystal_name': [],
            'protein': [],
            'smiles': [],
            'bound_conf': [],
            'modification_date': [],
            'strucid': []
        }

        # all data necessary for uploading leads
        project_data_dump_dict = {
            'protein': [],
            'pandda_path': [],
            'reference_pdb': [],
            'strucid': []
        }

        outcome_string = '(%3%|%4%|%5%|%6%)'

        conn, c = db_functions.connectDB()

        c.execute(
            '''SELECT crystal_id, bound_conf FROM refinement WHERE outcome SIMILAR TO %s''',
            (str(outcome_string), ))

        rows = c.fetchall()

        print(
            str(len(rows)) +
            ' crystals were found to be in refinement or above')

        for row in rows:

            c.execute(
                '''SELECT smiles, protein FROM lab WHERE crystal_id = %s''',
                (str(row[0]), ))

            lab_table = c.fetchall()

            if len(str(row[0])) < 3:
                continue

            if len(lab_table) > 1:
                print('WARNING: ' + str(row[0]) +
                      ' has multiple entries in the lab table')
                # print lab_table

            for entry in lab_table:
                if len(str(entry[1])) < 2 or 'None' in str(entry[1]):
                    protein_name = str(row[0]).split('-')[0]
                else:
                    protein_name = str(entry[1])

                if len(str(row[1])) < 5:
                    print('No bound conf for ' + str(row[0]))
                    continue

                crystal_data_dump_dict['protein'].append(protein_name)
                crystal_data_dump_dict['smiles'].append(entry[0])
                crystal_data_dump_dict['crystal_name'].append(row[0])
                crystal_data_dump_dict['bound_conf'].append(row[1])
                crystal_data_dump_dict['strucid'].append('')

                try:
                    modification_date = misc_functions.get_mod_date(str(
                        row[1]))
                except:
                    modification_date = ''

                crystal_data_dump_dict['modification_date'].append(
                    modification_date)

            c.execute(
                '''SELECT pandda_path, reference_pdb FROM dimple WHERE crystal_id = %s''',
                (str(row[0]), ))

            pandda_info = c.fetchall()

            for pandda_entry in pandda_info:
                # project_data_dump_dict['crystal_name'].append(row[0])
                project_data_dump_dict['protein'].append(protein_name)
                project_data_dump_dict['pandda_path'].append(pandda_entry[0])
                project_data_dump_dict['reference_pdb'].append(pandda_entry[1])
                project_data_dump_dict['strucid'].append('')

        project_table = pandas.DataFrame.from_dict(project_data_dump_dict)
        crystal_table = pandas.DataFrame.from_dict(crystal_data_dump_dict)

        protein_list = set(list(project_data_dump_dict['protein']))
        print protein_list

        for protein in protein_list:

            self.add_to_postgres(project_table, protein, ['reference_pdb'],
                                 project_data_dump_dict, 'proasis_leads')

            self.add_to_postgres(crystal_table, protein,
                                 ['crystal_name', 'smiles', 'bound_conf'],
                                 crystal_data_dump_dict, 'proasis_hits')

        with self.output().open('wb') as f:
            f.write('')
예제 #6
0
    def run(self):

        conn, c = db_functions.connectDB()
        exists = db_functions.table_exists(c, 'soakdb_files')

        checked = []

        # Status codes:-
        # 0 = new
        # 1 = changed
        # 2 = not changed

        if exists:
            with self.input().open('r') as f:
                files = f.readlines()

            for filename in files:

                filename_clean = filename.rstrip('\n')

                c.execute(
                    'select filename, modification_date from soakdb_files where filename like %s;',
                    (filename_clean, ))

                for row in c.fetchall():
                    if len(row) > 0:
                        data_file = str(row[0])
                        checked.append(data_file)
                        old_mod_date = str(row[1])
                        current_mod_date = misc_functions.get_mod_date(
                            data_file)

                        if current_mod_date > old_mod_date:
                            c.execute(
                                'UPDATE soakdb_files SET status_code = 1 where filename like %s;',
                                (filename_clean, ))
                            c.execute(
                                'UPDATE soakdb_files SET modification_date = %s where filename like %s;',
                                (current_mod_date, filename_clean))
                            conn.commit()
                            # start class to add row and kick off process for that file
                        else:
                            c.execute(
                                'UPDATE soakdb_files SET status_code = 2 where filename like %s;',
                                (filename_clean, ))
                            conn.commit()

                if filename_clean not in checked:
                    out, err, proposal = db_functions.pop_soakdb(
                        filename_clean)
                    db_functions.pop_proposals(proposal)
                    c.execute(
                        'UPDATE soakdb_files SET status_code = 0 where filename like %s;',
                        (filename_clean, ))

            c.execute('select filename from soakdb_files;')

            for row in c.fetchall():
                if str(row[0]) not in checked:
                    data_file = str(row[0])
                    file_exists = os.path.isfile(data_file)

        exists = db_functions.table_exists(c, 'lab')
        if not exists:
            c.execute('UPDATE soakdb_files SET status_code = 0;')
            conn.commit()

        with self.output().open('w') as f:
            f.write('')