Beispiel #1
0
    def run(self):
        fail_count = 0
        # select anything 'in refinement' (3) or above
        refinement = Refinement.objects.filter(outcome__gte=4)
        # set up info for each entry that matches the filter
        for obj in refinement:
            # set up blank fields for entries in proasis hits table
            bound_conf = ''
            files = []
            mtz = ''
            two_fofc = ''
            fofc = ''
            mod_date = ''
            proasis_hit_entry = ''
            entry = ''
            confs = []
            ligand_list = []

            # if there is a pdb file named in the bound_conf field, use it as the upload structure for proasis
            if obj.bound_conf:
                if os.path.isfile(obj.bound_conf):
                    bound_conf = obj.bound_conf
            # otherwise, use the most recent pdb file (according to soakdb)
            elif obj.pdb_latest:
                if os.path.isfile(obj.pdb_latest):
                    # if this is from a refinement folder, find the bound-state pdb file, rather than the ensemble
                    if 'Refine' in obj.pdb_latest:
                        search_path = '/'.join(obj.pdb_latest.split('/')[:-1])
                        files = glob.glob(
                            str(search_path + '/refine*split.bound*.pdb'))
                        if len(files) == 1:
                            bound_conf = files[0]
                    else:
                        # if can't find bound state, just use the latest pdb file
                        bound_conf = obj.pdb_latest
            else:
                # no pdb = no proasis upload (same for mtz, two_fofc and fofc)
                # TODO: Turn this into a function instead of repeating file check
                fail_count += 1
                continue

            mtz = db_functions.check_file_status('refine.mtz', bound_conf)
            two_fofc = db_functions.check_file_status('2fofc.map', bound_conf)
            fofc = db_functions.check_file_status('fofc.map', bound_conf)

            if not mtz[0] or not two_fofc[0] or not fofc[0]:
                fail_count += 1
                continue

            # if a suitable pdb file is found, then search for ligands
            if bound_conf:
                try:
                    pdb_file = open(bound_conf, 'r')
                    ligand_list = []
                    for line in pdb_file:
                        # ignore LIG in link for strange phenix format
                        if "LIG" in line and 'LINK' not in line:
                            try:
                                # ligands identified by 'LIG', with preceeding '.' for alt conf letter
                                lig_string = re.search(r".LIG.......",
                                                       line).group()
                                # just use lig string instead of separating into list items (to handle altconfs)
                                ligand_list.append(lig_string)
                            except:
                                continue
                # if no ligands are found in the pdb file, no upload to proasis (checked that no strucs. had alternative
                # labels in them)
                except:
                    ligand_list = None

            if not ligand_list:
                continue

            # get a unique list of ligands
            unique_ligands = list(set(ligand_list))
            # remove the first letter (alt conf) from unique ligands
            lig_no_conf = [l[1:] for l in unique_ligands]

            for l in lig_no_conf:
                # check whether there are more than 1 entries for any of the lig strings without alt conf
                if lig_no_conf.count(l) > 1:
                    # this is an alt conf situation - add the alt confs to the conf list
                    confs.extend([lig for lig in unique_ligands if l in lig])

            # get the date the pdb file was modified
            mod_date = misc_functions.get_mod_date(bound_conf)

            if mod_date:
                # if there's already an entry for that structure
                if ProasisHits.objects.filter(
                        refinement=obj,
                        crystal_name=obj.crystal_name).exists():
                    # if there are no alternate conformations
                    if not confs:
                        # get the relevant entry
                        entries = ProasisHits.objects.filter(
                            refinement=obj, crystal_name=obj.crystal_name)
                        for entry in entries:
                            # if the pdb file is older than the current one, or it has not been uploaded to proasis
                            if entry.modification_date < mod_date or not entry.strucid:
                                # delete structure and remove files to remove from proasis is strucid exists
                                if entry.strucid:

                                    proasis_out = ProasisOut.objects.filter(
                                        proasis=entry)
                                    for o in proasis_out:
                                        out_dir = os.path.join(o.root, o.start)
                                        shutil.rmtree(out_dir)

                                    proasis_api_funcs.delete_structure(
                                        entry.strucid)
                                    entry.strucid = None
                                    entry.save()

                                    if self.hit_directory in entry.pdb_file:
                                        os.remove(entry.pdb_file)
                                    if self.hit_directory in entry.mtz:
                                        os.remove(entry.mtz)
                                    if self.hit_directory in entry.two_fofc:
                                        os.remove(entry.two_fofc)
                                    if self.hit_directory in entry.fofc:
                                        os.remove(entry.fofc)

                                # otherwise, just update the relevant fields
                                entry.pdb_file = bound_conf
                                entry.modification_date = mod_date
                                entry.mtz = mtz[1]
                                entry.two_fofc = two_fofc[1]
                                entry.fofc = fofc[1]
                                entry.ligand_list = unique_ligands
                                entry.save()
                    # if there ARE alternate conformations
                    else:
                        # for each conformation
                        for conf in confs:
                            # do the same as above, but setting the altconf field too
                            # TODO: functionalise to add altconfs and not repeat method
                            entry = ProasisHits.objects.get(
                                refinement=obj,
                                crystal_name=obj.crystal_name,
                                altconf=conf)
                            if entry.modification_date < mod_date or not entry.strucid:
                                if entry.strucid:
                                    proasis_out = ProasisOut.objects.filter(
                                        proasis=entry)
                                    for o in proasis_out:
                                        out_dir = os.path.join(o.root, o.start)
                                        shutil.rmtree(out_dir)

                                    proasis_api_funcs.delete_structure(
                                        entry.strucid)
                                    entry.strucid = None
                                    entry.save()
                                    if self.hit_directory in entry.pdb_file:
                                        os.remove(entry.pdb_file)
                                    if self.hit_directory in entry.mtz:
                                        os.remove(entry.mtz)
                                    if self.hit_directory in entry.two_fofc:
                                        os.remove(entry.two_fofc)
                                    if self.hit_directory in entry.fofc:
                                        os.remove(entry.fofc)

                                entry.pdb_file = bound_conf
                                entry.modification_date = mod_date
                                entry.mtz = mtz[1]
                                entry.two_fofc = two_fofc[1]
                                entry.fofc = fofc[1]
                                entry.ligand_list = unique_ligands
                                entry.altconf = conf
                                entry.save()

                # if there's not already an entry for that structure
                else:
                    # if no altconfs
                    if not confs:
                        # create entry without an altconf
                        ProasisHits.objects.get_or_create(
                            refinement=obj,
                            crystal_name=obj.crystal_name,
                            pdb_file=bound_conf,
                            modification_date=mod_date,
                            mtz=mtz[1],
                            two_fofc=two_fofc[1],
                            fofc=fofc[1],
                            ligand_list=unique_ligands)
                    # if altconfs
                    if confs:
                        for conf in confs:
                            # create an entry for each altconf
                            # TODO: The pdb file will need to be edited later to pull out other altconfs of the same lig

                            ProasisHits.objects.get_or_create(
                                refinement=obj,
                                crystal_name=obj.crystal_name,
                                pdb_file=bound_conf,
                                modification_date=mod_date,
                                mtz=mtz[1],
                                two_fofc=two_fofc[1],
                                fofc=fofc[1],
                                ligand_list=unique_ligands,
                                altconf=conf)

                dimple = Dimple.objects.filter(crystal_name=obj.crystal_name)
                if dimple.count() == 1:
                    if dimple[0].reference and dimple[
                            0].reference.reference_pdb:
                        if os.path.isfile(dimple[0].reference.reference_pdb):
                            ProasisLeads.objects.get_or_create(
                                reference_pdb=dimple[0].reference)
                        else:
                            if ProasisLeads.objects.filter(
                                    reference_pdb=dimple[0].reference).exists(
                                    ):
                                print('removing...')
                                proasis_lead_entry = ProasisLeads.objects.get(
                                    reference_pdb=dimple[0].reference)
                                proasis_lead_entry.delete()

        print(fail_count)

        with self.output().open('w') as f:
            f.write('')
Beispiel #2
0
def check_file_upload(filename, model, log_directory = DirectoriesConfig().log_directory):
    """Check if a soakdb file has been uploaded to a given django model

    :param filename: filename to check, :class:`transfer_soakdb.CheckFileUpload` self.filename
    :type filename: str
    :param model: model to check if file had uploaded correctly to, :class:`transfer_soakdb.CheckFileUpload` self.model
    :type model: str or model class, not sure tbh, I didn't write the code!
    :return: Should check if file is uploaded correctly
    :rtype: None
    """
    out_err_file = os.path.join(log_directory,
                                str(str(filename.split('/')[3]) +
                                    '_' + str(filename.split('/')[4]) +
                                    '_' + str(filename.split('/')[5]) + '_' +
                                    str(misc_functions.get_mod_date(filename)) +
                                    str(model).replace("<class '", '').replace("'>", '') + '.txt'))


    print(out_err_file)

    results = db_functions.soakdb_query(filename)

    try:
        print(f"Number of rows from file = {len(results)}")
        translations = {Lab: db_functions.lab_translations(),
                        Refinement: db_functions.refinement_translations(),
                        DataProcessing: db_functions.data_processing_translations(),
                        Dimple: db_functions.dimple_translations()}
        translation = translations[model]

        # different from what is in class...
        error_dict = dict(crystal=[], soakdb_field=[], model_field=[], soakdb_value=[], model_value=[])

        for row in results:
            lab_object = model.objects.filter(crystal_name__crystal_name=row['CrystalName'],
                                              crystal_name__visit__filename=str(filename),
                                              crystal_name__compound__smiles=row['CompoundSMILES'])
            if len(lab_object) > 1:
                raise Exception('Multiple Crystals!')
            if len(lab_object) == 0:
                if model == Dimple and not row['DimplePathToPDB'] and not row['DimplePathToMTZ']:
                    pass
                else:
                    raise Exception(
                        f"No entry for {row['CrystalName']}, {row['DimplePathToPDB']}, {row['DimplePathToMTZ']}")
            for key in translation.keys():
                test_xchem_val = eval(f"lab_objects[0].{key}")
                soakdb_val = row[translation[key]]
                if key == 'outcome':
                    pattern = re.compile('-?\d+')
                    try:
                        soakdb_val = int(pattern.findall(str(soakdb_val))[0])
                    except:
                        continue
                if translation[key] == 'CrystalName':
                    test_xchem_val = lab_object[0].crystal_name.crystal_name
                if translation[key] == 'DimpleReferencePDB' and soakdb_val:
                    test_xchem_val = lab_object[0].reference
                    if test_xchem_val is not None:
                        test_xchem_val = lab_object[0].reference.reference_pdb
                if soakdb_val == '' or soakdb_val == 'None' or not soakdb_val:
                    continue
                if isinstance(test_xchem_val, float):
                    if float(test_xchem_val) == float(soakdb_val):
                        continue
                if isinstance(test_xchem_val, int):
                    if int(soakdb_val) == int(test_xchem_val):
                        continue
                if test_xchem_val != soakdb_val:
                    if soakdb_val in [None, 'None', '', '-', 'n/a', 'null', 'pending', 'NULL', '#NAME?', '#NOM?',
                                      'None\t',
                                      'Analysis Pending', 'in-situ']:
                        continue
                    else:
                        error_dict['crystal'].append(str(lab_object[0].crystal_name.crystal_name))
                        error_dict['soakdb_field'].append(translation[key])
                        error_dict['model_field'].append(key)
                        error_dict['soakdb_value'].append(soakdb_val)
                        error_dict['model_value'].append(test_xchem_val)

        if error_dict['crystal']:
            pd.DataFrame.from_dict(error_dict).to_csv(out_err_file)

    except IndexError:
        if 'No item with that key' in traceback.format_exc():
            pass
        else:
            with open(out_err_file, 'w') as f:
                f.write(traceback.format_exc())
            with open(out_err_file, 'a') as f:
                f.write('\n' + str(key))
    except AttributeError:
        with open(out_err_file, 'w') as f:
            f.write(traceback.format_exc())
        with open(out_err_file, 'a') as f:
            f.write('\n' + str(lab_object))
    except:
        with open(out_err_file, 'w') as f:
            f.write(traceback.format_exc())
Beispiel #3
0
def check_files(soak_db_filepath):
    """Check if soakdb file has been updated since last run

    :param soak_db_filepath: Soakdb filepath as defined
    within :class:`transfer_soakdb.CheckFiles` as self.input()[1].path
    :type soak_db_filepath: str
    :return: Will return nothing but should update the status of soakdb file if it needs to...
    :rtype: None
    """
    # Beginning of run(self)
    checked = []
    # Status codes:-
    # 0 = new
    # 1 = changed
    # 2 = not changed

    # self.input()[1].path = soak_db_filepath?
    print(f'INPUT NAME: {soak_db_filepath}')

    # Open file
    with open(soak_db_filepath, 'r') as f:
        files = f.readlines()
        print(f'FILES: {files}')

    for filename in files:
        filename_clean = filename.rstrip('\n')
        soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean))
        print(len(soakdb_query))

        # Consider Switch instead of IFs?
        if len(soakdb_query) == 0:
            print('LEN=0')
            out, err, prop = db_functions.pop_soakdb(filename_clean)
            db_functions.pop_proposals(prop)

        if len(soakdb_query) == 1:
            print('LEN=1')
            # Get filename from query
            data_file = soakdb_query[0].filename
            # add file to list which have been checked
            checked.append(data_file)
            # Get last modification date as stored in soakdb
            old_mod_date = soakdb_query[0].modification_date
            # Get current modification date of file
            current_mod_date = misc_functions.get_mod_date(data_file)
            # get the id of entry to write to
            id_number = soakdb_query[0].id

            print(old_mod_date)
            if not old_mod_date:
                soakdb_query[0].modification_date = current_mod_date
                soakdb_query[0].save()
                old_mod_date = 0

            print(current_mod_date)

            # if the file has changed since the db was last updated for the entry, change status to indicate this
            try:
                if int(current_mod_date) > int(old_mod_date):
                    update_status = SoakdbFiles.objects.get(id=id_number)
                    update_status.status = 1
                    update_status.save()
            except ValueError:
                raise Exception(f"current_mod_date: {current_mod_date}, old_mod_date: {old_mod_date}")

        if len(soakdb_query) > 1:
            raise Exception('More than one entry for file! Something has gone wrong!')

        # If file isn't in XCDB
        if filename_clean not in checked:
            # Add to soakdb
            out, err, proposal = db_functions.pop_soakdb(filename_clean)
            db_functions.pop_proposals(proposal)
            soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean))
            id_number = soakdb_query[0].id
            update_status = SoakdbFiles.objects.get(id=id_number)
            update_status.status = 0
            update_status.save()

    lab = list(Lab.objects.all())
    if not lab:
        # Set all file statuses to 0
        soak_db = SoakdbFiles.objects.all()
        for filename in soak_db:
            filename.status = 0
            filename.save()
Beispiel #4
0
    def run(self):
        # all data necessary for uploading hits
        crystal_data_dump_dict = {
            'crystal_name': [],
            'protein': [],
            'smiles': [],
            'bound_conf': [],
            'modification_date': [],
            'strucid': []
        }

        # all data necessary for uploading leads
        project_data_dump_dict = {
            'protein': [],
            'pandda_path': [],
            'reference_pdb': [],
            'strucid': []
        }

        outcome_string = '(%3%|%4%|%5%|%6%)'

        conn, c = db_functions.connectDB()

        c.execute(
            '''SELECT crystal_id, bound_conf, pdb_latest FROM refinement WHERE outcome SIMILAR TO %s''',
            (str(outcome_string), ))

        rows = c.fetchall()

        print((str(len(rows)) +
               ' crystals were found to be in refinement or above'))

        for row in rows:

            c.execute(
                '''SELECT smiles, protein FROM lab WHERE crystal_id = %s''',
                (str(row[0]), ))

            lab_table = c.fetchall()

            if len(str(row[0])) < 3:
                continue

            if len(lab_table) > 1:
                print(('WARNING: ' + str(row[0]) +
                       ' has multiple entries in the lab table'))
                # print lab_table

            for entry in lab_table:
                if len(str(entry[1])) < 2 or 'None' in str(entry[1]):
                    protein_name = str(row[0]).split('-')[0]
                else:
                    protein_name = str(entry[1])

                crystal_data_dump_dict['protein'].append(protein_name)
                crystal_data_dump_dict['smiles'].append(entry[0])
                crystal_data_dump_dict['crystal_name'].append(row[0])
                crystal_data_dump_dict['bound_conf'].append(row[1])
                crystal_data_dump_dict['strucid'].append('')

                try:
                    modification_date = misc_functions.get_mod_date(str(
                        row[1]))

                except:
                    modification_date = ''

                crystal_data_dump_dict['modification_date'].append(
                    modification_date)

            c.execute(
                '''SELECT pandda_path, reference_pdb FROM dimple WHERE crystal_id = %s''',
                (str(row[0]), ))

            pandda_info = c.fetchall()

            for pandda_entry in pandda_info:
                project_data_dump_dict['protein'].append(protein_name)
                project_data_dump_dict['pandda_path'].append(pandda_entry[0])
                project_data_dump_dict['reference_pdb'].append(pandda_entry[1])
                project_data_dump_dict['strucid'].append('')

        project_table = pandas.DataFrame.from_dict(project_data_dump_dict)
        crystal_table = pandas.DataFrame.from_dict(crystal_data_dump_dict)

        protein_list = set(list(project_data_dump_dict['protein']))
        print(protein_list)

        for protein in protein_list:

            self.add_to_postgres(project_table, protein, ['reference_pdb'],
                                 project_data_dump_dict, 'proasis_leads')

            self.add_to_postgres(crystal_table, protein,
                                 ['crystal_name', 'smiles', 'bound_conf'],
                                 crystal_data_dump_dict, 'proasis_hits')

        with self.output().open('wb') as f:
            f.write('')
Beispiel #5
0
    def run(self):

        conn, c = db_functions.connectDB()
        exists = db_functions.table_exists(c, 'soakdb_files')

        checked = []

        # Status codes:-
        # 0 = new
        # 1 = changed
        # 2 = not changed

        if exists:
            with self.input().open('r') as f:
                files = f.readlines()

            for filename in files:

                filename_clean = filename.rstrip('\n')

                c.execute(
                    'select filename, modification_date, status_code from soakdb_files where filename like %s;',
                    (filename_clean, ))

                for row in c.fetchall():
                    if len(row) > 0:
                        data_file = str(row[0])
                        checked.append(data_file)
                        old_mod_date = str(row[1])
                        current_mod_date = misc_functions.get_mod_date(
                            data_file)

                        if current_mod_date > old_mod_date:
                            c.execute(
                                'UPDATE soakdb_files SET status_code = 1 where filename like %s;',
                                (filename_clean, ))
                            c.execute(
                                'UPDATE soakdb_files SET modification_date = %s where filename like %s;',
                                (current_mod_date, filename_clean))
                            conn.commit()

                if filename_clean not in checked:
                    out, err, proposal = db_functions.pop_soakdb(
                        filename_clean)
                    db_functions.pop_proposals(proposal)
                    c.execute(
                        'UPDATE soakdb_files SET status_code = 0 where filename like %s;',
                        (filename_clean, ))
                    conn.commit()

            c.execute('select filename from soakdb_files;')

            # for row in c.fetchall():
            #     if str(row[0]) not in checked:
            #         data_file = str(row[0])

        exists = db_functions.table_exists(c, 'lab')
        if not exists:
            c.execute('UPDATE soakdb_files SET status_code = 0;')
            conn.commit()

        with self.output().open('w') as f:
            f.write('')
Beispiel #6
0
 def output(self):
     mod_date = misc_functions.get_mod_date(self.soakdb_filename)
     return luigi.LocalTarget(
         str(self.soakdb_filename + '_' + mod_date + '.events'))
Beispiel #7
0
    def run(self):

        outpath = os.path.join(self.input_directory, self.crystal.crystal_name.target.target_name,
                               str(self.crystal.crystal_name.crystal_name + '.pdb'))

        try:
            if not os.path.exists(os.readlink(outpath)):
                os.unlink(outpath)
        except FileNotFoundError:
            pass

        if not os.path.isdir('/'.join(outpath.split('/')[:-1])):
            os.makedirs('/'.join(outpath.split('/')[:-1]))

        file_obj = RefinementObjectFiles(refinement_object=self.crystal)
        file_obj.find_bound_file()
        cutmaps = True
        if file_obj.bound_conf:
            try:
                if os.path.exists(outpath):
                    old = get_mod_date(get_filepath_of_potential_symlink(outpath))
                    new = get_mod_date(file_obj.bound_conf)
                    os.unlink(outpath)
                    if int(new) > int(old):
                        base = outpath.replace('.pdb', '')
                        files = glob.glob(f'{base}*')
                        [os.unlink(x) for x in files]
                    else:
                        cutmaps = False

                os.symlink(file_obj.bound_conf, outpath)
                if cutmaps:
                    # Try to create symlinks for the eventmap, 2fofc and fofc
                    # Get root of file_obj.bound_conf
                    bcdir = os.path.dirname(file_obj.bound_conf)
                    # Check if this is the correct directory (most likely not)
                    fofc = glob.glob(bcdir+'/fofc.map')
                    if len(fofc) < 1:
                        # go one deeper!
                        bcdir = os.path.dirname(bcdir)
                    # Get the files
                    fofc = glob.glob(bcdir + '/fofc.map')
                    fofc2 = glob.glob(bcdir + '/2fofc.map')
                    event_maps = glob.glob(bcdir + '/*event*native*.ccp4')  # nice doesn't capture all of it though...
                    fofc_pth = outpath.replace('.pdb', '_fofc.map')
                    fofc2_pth = outpath.replace('.pdb', '_2fofc.map')

                    # Assumption only one file to use....
                    if len(fofc) > 0:
                        mapmask = '''module load ccp4 && mapmask mapin %s mapout %s xyzin %s << eof
                            border %s
                            end
                        eof
                        ''' % (fofc[0], fofc_pth, outpath, str(0))
                        proc = subprocess.run(mapmask, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True,
                                            executable='/bin/bash')
                    if len(fofc2) > 0:
                        mapmask = '''module load ccp4 && mapmask mapin %s mapout %s xyzin %s << eof
                            border %s
                            end
                        eof
                        ''' % (fofc2[0], fofc2_pth, outpath, str(0))
                        proc = subprocess.run(mapmask, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True,
                                            executable='/bin/bash')

                    # probably should use enumerate
                    if len(event_maps) > 0:
                        event_num = 0
                        for i in event_maps:
                            fn = outpath.replace('.pdb', f'_event_{event_num}.ccp4')
                            mapmask = '''module load ccp4 && mapmask mapin %s mapout %s xyzin %s << eof
                                border %s
                                end
                            eof
                            ''' % (i, fn, outpath, str(0))
                            proc = subprocess.run(mapmask, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True,
                                                executable='/bin/bash')
                            event_num += 1

                if self.prod_smiles:
                    smi = self.prod_smiles
                elif self.smiles:
                    smi = self.smiles
                #                 if self.smiles:
                smi_pth = outpath.replace('.pdb', '_smiles.txt')
                with open(smi_pth, 'w') as f:
                    f.write(str(smi))
                #  f.close() should delete.

            except:
                raise Exception(file_obj.bound_conf)
        else:
            self.crystal.outcome = 3
            self.crystal.save()

        with self.output().open('w') as f:
            f.write('')
Beispiel #8
0
    def run(self):
        out_err_file = str(
            'logs/' + str(self.filename.split('/')[3]) + '_' +
            str(self.filename.split('/')[4]) + '_' +
            str(self.filename.split('/')[5]) + '_' +
            str(misc_functions.get_mod_date(self.filename)) +
            str(self.model).replace("<class '", '').replace("'>", '') + '.txt')

        print(out_err_file)

        results = db_functions.soakdb_query(self.filename)

        try:
            print('Number of rows from file = ' + str(len(results)))

            if len(Crystal.objects.filter(
                    visit__filename=self.filename)) == len(results):
                status = True
            else:
                status = False

            print('Checking same number of rows in test_xchem: ' + str(status))
            if not status:
                raise Exception('FAIL: no of entries in test_xchem = ' + str(
                    len(Crystal.objects.filter(
                        visit__filename=self.filename))))

            proteins = list(
                set([
                    protein for protein in
                    [protein['ProteinName'] for protein in results]
                ]))

            print('Unique targets in soakdb file: ' + str(proteins))

            translations = {
                Lab: db_functions.lab_translations(),
                Refinement: db_functions.refinement_translations(),
                DataProcessing: db_functions.data_processing_translations(),
                Dimple: db_functions.dimple_translations()
            }

            translation = translations[self.model]

            error_dict = {
                'crystal': [],
                'soakdb_field': [],
                'model_field': [],
                'soakdb_value': [],
                'model_value': []
            }
            for row in results:
                lab_object = self.model.objects.filter(
                    crystal_name__crystal_name=row['CrystalName'],
                    crystal_name__visit__filename=str(self.filename),
                    crystal_name__compound__smiles=row['CompoundSMILES'])
                if len(lab_object) > 1:
                    raise Exception('Multiple Crystals!')
                if len(lab_object) == 0:
                    if self.model == Dimple and not row[
                            'DimplePathToPDB'] and not row['DimplePathToMTZ']:
                        pass
                    else:
                        raise Exception('No entry for ' +
                                        str(row['CrystalName'] + ' ' +
                                            row['DimplePathToPDB'] + ' ' +
                                            row['DimplePathToMTZ']))
                for key in translation.keys():
                    test_xchem_val = eval(str('lab_object[0].' + key))
                    soakdb_val = row[translation[key]]
                    if key == 'outcome':
                        pattern = re.compile('-?\d+')
                        try:
                            soakdb_val = int(
                                pattern.findall(str(soakdb_val))[0])
                        except:
                            continue
                    if translation[key] == 'CrystalName':
                        test_xchem_val = lab_object[
                            0].crystal_name.crystal_name
                    if translation[key] == 'DimpleReferencePDB' and soakdb_val:
                        test_xchem_val = lab_object[0].reference
                        if test_xchem_val != None:
                            test_xchem_val = lab_object[
                                0].reference.reference_pdb
                    if soakdb_val == '' or soakdb_val == 'None' or not soakdb_val:
                        continue
                    if isinstance(test_xchem_val, float):
                        if float(test_xchem_val) == float(soakdb_val):
                            continue
                    if isinstance(test_xchem_val, int):
                        if int(soakdb_val) == int(test_xchem_val):
                            continue
                    if test_xchem_val != soakdb_val:
                        if soakdb_val in [
                                None, 'None', '', '-', 'n/a', 'null',
                                'pending', 'NULL', '#NAME?', '#NOM?', 'None\t',
                                'Analysis Pending', 'in-situ'
                        ]:
                            continue
                        else:
                            # try:
                            #     error_dict['crystal'].append(eval(str('lab_object[0].' + key + '.crystal_name')))
                            # except:
                            #     if key=='crystal_name':
                            #         error_dict['crystal'].append(eval(str('lab_object[0].' + key)))
                            #     else:
                            #         raise Exception(key)
                            error_dict['crystal'].append(
                                str(lab_object[0].crystal_name.crystal_name))
                            error_dict['soakdb_field'].append(translation[key])
                            error_dict['model_field'].append(key)
                            error_dict['soakdb_value'].append(soakdb_val)
                            error_dict['model_value'].append(test_xchem_val)

            if error_dict['crystal']:
                pd.DataFrame.from_dict(error_dict).to_csv(out_err_file)

        except IndexError:
            if 'No item with that key' in traceback.format_exc():
                pass
            else:
                with open(out_err_file, 'w') as f:
                    f.write(traceback.format_exc())
                with open(out_err_file, 'a') as f:
                    f.write('\n' + str(key))
        except AttributeError:
            with open(out_err_file, 'w') as f:
                f.write(traceback.format_exc())
            with open(out_err_file, 'a') as f:
                f.write('\n' + str(lab_object))
        except:
            with open(out_err_file, 'w') as f:
                f.write(traceback.format_exc())

        with self.output().open('w') as f:
            f.write('')
Beispiel #9
0
 def output(self):
     mod_date = misc_functions.get_mod_date(self.filename)
     return luigi.LocalTarget(
         str(self.filename + '.' + mod_date + '.checked'))
Beispiel #10
0
 def output(self):
     modification_date = misc_functions.get_mod_date(self.data_file)
     return luigi.LocalTarget(
         str(self.data_file + '_' + str(modification_date) +
             '.transferred'))
Beispiel #11
0
    def run(self):
        soakdb = SoakdbFiles.objects.all()

        # a list to hold filenames that have been checked
        checked = []

        # Status codes:-
        # 0 = new
        # 1 = changed
        # 2 = not changed

        print('INPUT NAME:')
        print(self.input()[1].path)

        with open(self.input()[1].path, 'r') as f:
            files = f.readlines()
            print('FILES:')
            print(files)

        for filename in files:
            # remove any newline characters
            filename_clean = filename.rstrip('\n')
            # find the relevant entry in the soakdbfiles table

            soakdb_query = list(
                SoakdbFiles.objects.filter(filename=filename_clean))

            print(len(soakdb_query))

            # raise an exception if the file is not in the soakdb table
            if len(soakdb_query) == 0:
                print('LEN=0')
                out, err, prop = db_functions.pop_soakdb(filename_clean)
                db_functions.pop_proposals(prop)

            # only one entry should exist per file
            if len(soakdb_query) == 1:
                print('LEN=1')
                # get the filename back from the query
                data_file = soakdb_query[0].filename
                # add the file to the list of those that have been checked
                checked.append(data_file)
                # get the modification date as stored in the db
                old_mod_date = soakdb_query[0].modification_date
                # get the current modification date of the file
                current_mod_date = misc_functions.get_mod_date(data_file)
                # get the id of the entry to write to
                id_number = soakdb_query[0].id

                print(old_mod_date)
                print(current_mod_date)

                # if the file has changed since the db was last updated for the entry, change status to indicate this
                if int(current_mod_date) > int(old_mod_date):
                    update_status = SoakdbFiles.objects.select_for_update(
                    ).get(id=id_number)
                    update_status.status = 1
                    update_status.save()

            # else:
            #     update_status = SoakdbFiles.objects.select_for_update().get(id=id_number)
            #     update_status.status = 0
            #     update_status.save()

            # if there is more than one entry, raise an exception (should never happen - filename field is unique)
            if len(soakdb_query) > 1:
                raise Exception(
                    'More than one entry for file! Something has gone wrong!')

            # if the file is not in the database at all
            if filename_clean not in checked:
                # add the file to soakdb
                out, err, proposal = db_functions.pop_soakdb(filename_clean)
                # add the proposal to proposal
                db_functions.pop_proposals(proposal)
                # retrieve the new db entry
                soakdb_query = list(
                    SoakdbFiles.objects.select_for_update().filter(
                        filename=filename_clean))
                # get the id to update
                id_number = soakdb_query[0].id
                # update the relevant status to 0, indicating it as a new file
                update_status = SoakdbFiles.objects.select_for_update().get(
                    id=id_number)
                update_status.status = 0
                update_status.save()

        # if the lab table is empty, no data has been transferred from the datafiles, so set status of everything to 0
        lab = list(Lab.objects.all())
        if not lab:
            # this is to set all file statuses to 0 (new file)
            soakdb = SoakdbFiles.objects.select_for_update().all()
            for filename in soakdb:
                filename.status = 0
                filename.save()

        # write output to signify job done
        with self.output().open('w') as f:
            f.write('')
Beispiel #12
0
def check_file(filename):
    status=2
    # remove any newline characters
    filename_clean = filename.rstrip('\n')
    # find the relevant entry in the soakdbfiles table

    soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean))

    # raise an exception if the file is not in the soakdb table - not necessary here, I think
    # if len(soakdb_query) == 0:
    #     print('LEN=0')
    #     out, err, prop = db_functions.pop_soakdb(filename_clean)
    #     db_functions.pop_proposals(prop)

    # only one entry should exist per file
    if len(soakdb_query) == 1:
        # get the filename back from the query
        data_file = soakdb_query[0].filename
        # add the file to the list of those that have been checked
        # checked.append(data_file)
        # get the modification date as stored in the db
        old_mod_date = soakdb_query[0].modification_date
        # get the current modification date of the file
        current_mod_date = misc_functions.get_mod_date(data_file)
        # get the id of the entry to write to
        id_number = soakdb_query[0].id

        if not old_mod_date:
            soakdb_query[0].modification_date = current_mod_date
            soakdb_query[0].save()
            old_mod_date = 0

        # if the file has changed since the db was last updated for the entry, change status to indicate this
        try:
            if int(current_mod_date) > int(old_mod_date):
                update_status = SoakdbFiles.objects.get(id=id_number)
                update_status.status = 1
                status = 1
                update_status.save()
        except ValueError:
            raise Exception(str('current_mod_date: ' + str(current_mod_date)
                                + ', old_mod_date: ' + str(old_mod_date)))

        print(current_mod_date)
        print(old_mod_date)

    # if there is more than one entry, raise an exception (should never happen - filename field is unique)
    if len(soakdb_query) > 1:
        raise Exception('More than one entry for file! Something has gone wrong!')

    # if the file is not in the database at all
    if len(soakdb_query) == 0:
        print('This is a new soakDB file, just setting it up in the database!')
        # add the file to soakdb
        out, err, proposal = db_functions.pop_soakdb(filename_clean)
        # add the proposal to proposal
        db_functions.pop_proposals(proposal)
        # retrieve the new db entry
        soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean))
        # get the id to update
        id_number = soakdb_query[0].id
        # update the relevant status to 0, indicating it as a new file
        update_status = SoakdbFiles.objects.get(id=id_number)
        update_status.status = 0
        update_status.save()
        status = 0

    # else:
    #     print('The file has not been updated, using existing XCDB data...')
    #     status = 2

    # if the lab table is empty, no data has been transferred from the datafiles, so set status of everything to 0


    lab = list(Lab.objects.all())
    if not lab:
        # this is to set all file statuses to 0 (new file)
        soakdb = SoakdbFiles.objects.all()
        for filename in soakdb:
            filename.status = 0
            filename.save()

    return status