def run(self): fail_count = 0 # select anything 'in refinement' (3) or above refinement = Refinement.objects.filter(outcome__gte=4) # set up info for each entry that matches the filter for obj in refinement: # set up blank fields for entries in proasis hits table bound_conf = '' files = [] mtz = '' two_fofc = '' fofc = '' mod_date = '' proasis_hit_entry = '' entry = '' confs = [] ligand_list = [] # if there is a pdb file named in the bound_conf field, use it as the upload structure for proasis if obj.bound_conf: if os.path.isfile(obj.bound_conf): bound_conf = obj.bound_conf # otherwise, use the most recent pdb file (according to soakdb) elif obj.pdb_latest: if os.path.isfile(obj.pdb_latest): # if this is from a refinement folder, find the bound-state pdb file, rather than the ensemble if 'Refine' in obj.pdb_latest: search_path = '/'.join(obj.pdb_latest.split('/')[:-1]) files = glob.glob( str(search_path + '/refine*split.bound*.pdb')) if len(files) == 1: bound_conf = files[0] else: # if can't find bound state, just use the latest pdb file bound_conf = obj.pdb_latest else: # no pdb = no proasis upload (same for mtz, two_fofc and fofc) # TODO: Turn this into a function instead of repeating file check fail_count += 1 continue mtz = db_functions.check_file_status('refine.mtz', bound_conf) two_fofc = db_functions.check_file_status('2fofc.map', bound_conf) fofc = db_functions.check_file_status('fofc.map', bound_conf) if not mtz[0] or not two_fofc[0] or not fofc[0]: fail_count += 1 continue # if a suitable pdb file is found, then search for ligands if bound_conf: try: pdb_file = open(bound_conf, 'r') ligand_list = [] for line in pdb_file: # ignore LIG in link for strange phenix format if "LIG" in line and 'LINK' not in line: try: # ligands identified by 'LIG', with preceeding '.' for alt conf letter lig_string = re.search(r".LIG.......", line).group() # just use lig string instead of separating into list items (to handle altconfs) ligand_list.append(lig_string) except: continue # if no ligands are found in the pdb file, no upload to proasis (checked that no strucs. had alternative # labels in them) except: ligand_list = None if not ligand_list: continue # get a unique list of ligands unique_ligands = list(set(ligand_list)) # remove the first letter (alt conf) from unique ligands lig_no_conf = [l[1:] for l in unique_ligands] for l in lig_no_conf: # check whether there are more than 1 entries for any of the lig strings without alt conf if lig_no_conf.count(l) > 1: # this is an alt conf situation - add the alt confs to the conf list confs.extend([lig for lig in unique_ligands if l in lig]) # get the date the pdb file was modified mod_date = misc_functions.get_mod_date(bound_conf) if mod_date: # if there's already an entry for that structure if ProasisHits.objects.filter( refinement=obj, crystal_name=obj.crystal_name).exists(): # if there are no alternate conformations if not confs: # get the relevant entry entries = ProasisHits.objects.filter( refinement=obj, crystal_name=obj.crystal_name) for entry in entries: # if the pdb file is older than the current one, or it has not been uploaded to proasis if entry.modification_date < mod_date or not entry.strucid: # delete structure and remove files to remove from proasis is strucid exists if entry.strucid: proasis_out = ProasisOut.objects.filter( proasis=entry) for o in proasis_out: out_dir = os.path.join(o.root, o.start) shutil.rmtree(out_dir) proasis_api_funcs.delete_structure( entry.strucid) entry.strucid = None entry.save() if self.hit_directory in entry.pdb_file: os.remove(entry.pdb_file) if self.hit_directory in entry.mtz: os.remove(entry.mtz) if self.hit_directory in entry.two_fofc: os.remove(entry.two_fofc) if self.hit_directory in entry.fofc: os.remove(entry.fofc) # otherwise, just update the relevant fields entry.pdb_file = bound_conf entry.modification_date = mod_date entry.mtz = mtz[1] entry.two_fofc = two_fofc[1] entry.fofc = fofc[1] entry.ligand_list = unique_ligands entry.save() # if there ARE alternate conformations else: # for each conformation for conf in confs: # do the same as above, but setting the altconf field too # TODO: functionalise to add altconfs and not repeat method entry = ProasisHits.objects.get( refinement=obj, crystal_name=obj.crystal_name, altconf=conf) if entry.modification_date < mod_date or not entry.strucid: if entry.strucid: proasis_out = ProasisOut.objects.filter( proasis=entry) for o in proasis_out: out_dir = os.path.join(o.root, o.start) shutil.rmtree(out_dir) proasis_api_funcs.delete_structure( entry.strucid) entry.strucid = None entry.save() if self.hit_directory in entry.pdb_file: os.remove(entry.pdb_file) if self.hit_directory in entry.mtz: os.remove(entry.mtz) if self.hit_directory in entry.two_fofc: os.remove(entry.two_fofc) if self.hit_directory in entry.fofc: os.remove(entry.fofc) entry.pdb_file = bound_conf entry.modification_date = mod_date entry.mtz = mtz[1] entry.two_fofc = two_fofc[1] entry.fofc = fofc[1] entry.ligand_list = unique_ligands entry.altconf = conf entry.save() # if there's not already an entry for that structure else: # if no altconfs if not confs: # create entry without an altconf ProasisHits.objects.get_or_create( refinement=obj, crystal_name=obj.crystal_name, pdb_file=bound_conf, modification_date=mod_date, mtz=mtz[1], two_fofc=two_fofc[1], fofc=fofc[1], ligand_list=unique_ligands) # if altconfs if confs: for conf in confs: # create an entry for each altconf # TODO: The pdb file will need to be edited later to pull out other altconfs of the same lig ProasisHits.objects.get_or_create( refinement=obj, crystal_name=obj.crystal_name, pdb_file=bound_conf, modification_date=mod_date, mtz=mtz[1], two_fofc=two_fofc[1], fofc=fofc[1], ligand_list=unique_ligands, altconf=conf) dimple = Dimple.objects.filter(crystal_name=obj.crystal_name) if dimple.count() == 1: if dimple[0].reference and dimple[ 0].reference.reference_pdb: if os.path.isfile(dimple[0].reference.reference_pdb): ProasisLeads.objects.get_or_create( reference_pdb=dimple[0].reference) else: if ProasisLeads.objects.filter( reference_pdb=dimple[0].reference).exists( ): print('removing...') proasis_lead_entry = ProasisLeads.objects.get( reference_pdb=dimple[0].reference) proasis_lead_entry.delete() print(fail_count) with self.output().open('w') as f: f.write('')
def check_file_upload(filename, model, log_directory = DirectoriesConfig().log_directory): """Check if a soakdb file has been uploaded to a given django model :param filename: filename to check, :class:`transfer_soakdb.CheckFileUpload` self.filename :type filename: str :param model: model to check if file had uploaded correctly to, :class:`transfer_soakdb.CheckFileUpload` self.model :type model: str or model class, not sure tbh, I didn't write the code! :return: Should check if file is uploaded correctly :rtype: None """ out_err_file = os.path.join(log_directory, str(str(filename.split('/')[3]) + '_' + str(filename.split('/')[4]) + '_' + str(filename.split('/')[5]) + '_' + str(misc_functions.get_mod_date(filename)) + str(model).replace("<class '", '').replace("'>", '') + '.txt')) print(out_err_file) results = db_functions.soakdb_query(filename) try: print(f"Number of rows from file = {len(results)}") translations = {Lab: db_functions.lab_translations(), Refinement: db_functions.refinement_translations(), DataProcessing: db_functions.data_processing_translations(), Dimple: db_functions.dimple_translations()} translation = translations[model] # different from what is in class... error_dict = dict(crystal=[], soakdb_field=[], model_field=[], soakdb_value=[], model_value=[]) for row in results: lab_object = model.objects.filter(crystal_name__crystal_name=row['CrystalName'], crystal_name__visit__filename=str(filename), crystal_name__compound__smiles=row['CompoundSMILES']) if len(lab_object) > 1: raise Exception('Multiple Crystals!') if len(lab_object) == 0: if model == Dimple and not row['DimplePathToPDB'] and not row['DimplePathToMTZ']: pass else: raise Exception( f"No entry for {row['CrystalName']}, {row['DimplePathToPDB']}, {row['DimplePathToMTZ']}") for key in translation.keys(): test_xchem_val = eval(f"lab_objects[0].{key}") soakdb_val = row[translation[key]] if key == 'outcome': pattern = re.compile('-?\d+') try: soakdb_val = int(pattern.findall(str(soakdb_val))[0]) except: continue if translation[key] == 'CrystalName': test_xchem_val = lab_object[0].crystal_name.crystal_name if translation[key] == 'DimpleReferencePDB' and soakdb_val: test_xchem_val = lab_object[0].reference if test_xchem_val is not None: test_xchem_val = lab_object[0].reference.reference_pdb if soakdb_val == '' or soakdb_val == 'None' or not soakdb_val: continue if isinstance(test_xchem_val, float): if float(test_xchem_val) == float(soakdb_val): continue if isinstance(test_xchem_val, int): if int(soakdb_val) == int(test_xchem_val): continue if test_xchem_val != soakdb_val: if soakdb_val in [None, 'None', '', '-', 'n/a', 'null', 'pending', 'NULL', '#NAME?', '#NOM?', 'None\t', 'Analysis Pending', 'in-situ']: continue else: error_dict['crystal'].append(str(lab_object[0].crystal_name.crystal_name)) error_dict['soakdb_field'].append(translation[key]) error_dict['model_field'].append(key) error_dict['soakdb_value'].append(soakdb_val) error_dict['model_value'].append(test_xchem_val) if error_dict['crystal']: pd.DataFrame.from_dict(error_dict).to_csv(out_err_file) except IndexError: if 'No item with that key' in traceback.format_exc(): pass else: with open(out_err_file, 'w') as f: f.write(traceback.format_exc()) with open(out_err_file, 'a') as f: f.write('\n' + str(key)) except AttributeError: with open(out_err_file, 'w') as f: f.write(traceback.format_exc()) with open(out_err_file, 'a') as f: f.write('\n' + str(lab_object)) except: with open(out_err_file, 'w') as f: f.write(traceback.format_exc())
def check_files(soak_db_filepath): """Check if soakdb file has been updated since last run :param soak_db_filepath: Soakdb filepath as defined within :class:`transfer_soakdb.CheckFiles` as self.input()[1].path :type soak_db_filepath: str :return: Will return nothing but should update the status of soakdb file if it needs to... :rtype: None """ # Beginning of run(self) checked = [] # Status codes:- # 0 = new # 1 = changed # 2 = not changed # self.input()[1].path = soak_db_filepath? print(f'INPUT NAME: {soak_db_filepath}') # Open file with open(soak_db_filepath, 'r') as f: files = f.readlines() print(f'FILES: {files}') for filename in files: filename_clean = filename.rstrip('\n') soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean)) print(len(soakdb_query)) # Consider Switch instead of IFs? if len(soakdb_query) == 0: print('LEN=0') out, err, prop = db_functions.pop_soakdb(filename_clean) db_functions.pop_proposals(prop) if len(soakdb_query) == 1: print('LEN=1') # Get filename from query data_file = soakdb_query[0].filename # add file to list which have been checked checked.append(data_file) # Get last modification date as stored in soakdb old_mod_date = soakdb_query[0].modification_date # Get current modification date of file current_mod_date = misc_functions.get_mod_date(data_file) # get the id of entry to write to id_number = soakdb_query[0].id print(old_mod_date) if not old_mod_date: soakdb_query[0].modification_date = current_mod_date soakdb_query[0].save() old_mod_date = 0 print(current_mod_date) # if the file has changed since the db was last updated for the entry, change status to indicate this try: if int(current_mod_date) > int(old_mod_date): update_status = SoakdbFiles.objects.get(id=id_number) update_status.status = 1 update_status.save() except ValueError: raise Exception(f"current_mod_date: {current_mod_date}, old_mod_date: {old_mod_date}") if len(soakdb_query) > 1: raise Exception('More than one entry for file! Something has gone wrong!') # If file isn't in XCDB if filename_clean not in checked: # Add to soakdb out, err, proposal = db_functions.pop_soakdb(filename_clean) db_functions.pop_proposals(proposal) soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean)) id_number = soakdb_query[0].id update_status = SoakdbFiles.objects.get(id=id_number) update_status.status = 0 update_status.save() lab = list(Lab.objects.all()) if not lab: # Set all file statuses to 0 soak_db = SoakdbFiles.objects.all() for filename in soak_db: filename.status = 0 filename.save()
def run(self): # all data necessary for uploading hits crystal_data_dump_dict = { 'crystal_name': [], 'protein': [], 'smiles': [], 'bound_conf': [], 'modification_date': [], 'strucid': [] } # all data necessary for uploading leads project_data_dump_dict = { 'protein': [], 'pandda_path': [], 'reference_pdb': [], 'strucid': [] } outcome_string = '(%3%|%4%|%5%|%6%)' conn, c = db_functions.connectDB() c.execute( '''SELECT crystal_id, bound_conf, pdb_latest FROM refinement WHERE outcome SIMILAR TO %s''', (str(outcome_string), )) rows = c.fetchall() print((str(len(rows)) + ' crystals were found to be in refinement or above')) for row in rows: c.execute( '''SELECT smiles, protein FROM lab WHERE crystal_id = %s''', (str(row[0]), )) lab_table = c.fetchall() if len(str(row[0])) < 3: continue if len(lab_table) > 1: print(('WARNING: ' + str(row[0]) + ' has multiple entries in the lab table')) # print lab_table for entry in lab_table: if len(str(entry[1])) < 2 or 'None' in str(entry[1]): protein_name = str(row[0]).split('-')[0] else: protein_name = str(entry[1]) crystal_data_dump_dict['protein'].append(protein_name) crystal_data_dump_dict['smiles'].append(entry[0]) crystal_data_dump_dict['crystal_name'].append(row[0]) crystal_data_dump_dict['bound_conf'].append(row[1]) crystal_data_dump_dict['strucid'].append('') try: modification_date = misc_functions.get_mod_date(str( row[1])) except: modification_date = '' crystal_data_dump_dict['modification_date'].append( modification_date) c.execute( '''SELECT pandda_path, reference_pdb FROM dimple WHERE crystal_id = %s''', (str(row[0]), )) pandda_info = c.fetchall() for pandda_entry in pandda_info: project_data_dump_dict['protein'].append(protein_name) project_data_dump_dict['pandda_path'].append(pandda_entry[0]) project_data_dump_dict['reference_pdb'].append(pandda_entry[1]) project_data_dump_dict['strucid'].append('') project_table = pandas.DataFrame.from_dict(project_data_dump_dict) crystal_table = pandas.DataFrame.from_dict(crystal_data_dump_dict) protein_list = set(list(project_data_dump_dict['protein'])) print(protein_list) for protein in protein_list: self.add_to_postgres(project_table, protein, ['reference_pdb'], project_data_dump_dict, 'proasis_leads') self.add_to_postgres(crystal_table, protein, ['crystal_name', 'smiles', 'bound_conf'], crystal_data_dump_dict, 'proasis_hits') with self.output().open('wb') as f: f.write('')
def run(self): conn, c = db_functions.connectDB() exists = db_functions.table_exists(c, 'soakdb_files') checked = [] # Status codes:- # 0 = new # 1 = changed # 2 = not changed if exists: with self.input().open('r') as f: files = f.readlines() for filename in files: filename_clean = filename.rstrip('\n') c.execute( 'select filename, modification_date, status_code from soakdb_files where filename like %s;', (filename_clean, )) for row in c.fetchall(): if len(row) > 0: data_file = str(row[0]) checked.append(data_file) old_mod_date = str(row[1]) current_mod_date = misc_functions.get_mod_date( data_file) if current_mod_date > old_mod_date: c.execute( 'UPDATE soakdb_files SET status_code = 1 where filename like %s;', (filename_clean, )) c.execute( 'UPDATE soakdb_files SET modification_date = %s where filename like %s;', (current_mod_date, filename_clean)) conn.commit() if filename_clean not in checked: out, err, proposal = db_functions.pop_soakdb( filename_clean) db_functions.pop_proposals(proposal) c.execute( 'UPDATE soakdb_files SET status_code = 0 where filename like %s;', (filename_clean, )) conn.commit() c.execute('select filename from soakdb_files;') # for row in c.fetchall(): # if str(row[0]) not in checked: # data_file = str(row[0]) exists = db_functions.table_exists(c, 'lab') if not exists: c.execute('UPDATE soakdb_files SET status_code = 0;') conn.commit() with self.output().open('w') as f: f.write('')
def output(self): mod_date = misc_functions.get_mod_date(self.soakdb_filename) return luigi.LocalTarget( str(self.soakdb_filename + '_' + mod_date + '.events'))
def run(self): outpath = os.path.join(self.input_directory, self.crystal.crystal_name.target.target_name, str(self.crystal.crystal_name.crystal_name + '.pdb')) try: if not os.path.exists(os.readlink(outpath)): os.unlink(outpath) except FileNotFoundError: pass if not os.path.isdir('/'.join(outpath.split('/')[:-1])): os.makedirs('/'.join(outpath.split('/')[:-1])) file_obj = RefinementObjectFiles(refinement_object=self.crystal) file_obj.find_bound_file() cutmaps = True if file_obj.bound_conf: try: if os.path.exists(outpath): old = get_mod_date(get_filepath_of_potential_symlink(outpath)) new = get_mod_date(file_obj.bound_conf) os.unlink(outpath) if int(new) > int(old): base = outpath.replace('.pdb', '') files = glob.glob(f'{base}*') [os.unlink(x) for x in files] else: cutmaps = False os.symlink(file_obj.bound_conf, outpath) if cutmaps: # Try to create symlinks for the eventmap, 2fofc and fofc # Get root of file_obj.bound_conf bcdir = os.path.dirname(file_obj.bound_conf) # Check if this is the correct directory (most likely not) fofc = glob.glob(bcdir+'/fofc.map') if len(fofc) < 1: # go one deeper! bcdir = os.path.dirname(bcdir) # Get the files fofc = glob.glob(bcdir + '/fofc.map') fofc2 = glob.glob(bcdir + '/2fofc.map') event_maps = glob.glob(bcdir + '/*event*native*.ccp4') # nice doesn't capture all of it though... fofc_pth = outpath.replace('.pdb', '_fofc.map') fofc2_pth = outpath.replace('.pdb', '_2fofc.map') # Assumption only one file to use.... if len(fofc) > 0: mapmask = '''module load ccp4 && mapmask mapin %s mapout %s xyzin %s << eof border %s end eof ''' % (fofc[0], fofc_pth, outpath, str(0)) proc = subprocess.run(mapmask, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, executable='/bin/bash') if len(fofc2) > 0: mapmask = '''module load ccp4 && mapmask mapin %s mapout %s xyzin %s << eof border %s end eof ''' % (fofc2[0], fofc2_pth, outpath, str(0)) proc = subprocess.run(mapmask, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, executable='/bin/bash') # probably should use enumerate if len(event_maps) > 0: event_num = 0 for i in event_maps: fn = outpath.replace('.pdb', f'_event_{event_num}.ccp4') mapmask = '''module load ccp4 && mapmask mapin %s mapout %s xyzin %s << eof border %s end eof ''' % (i, fn, outpath, str(0)) proc = subprocess.run(mapmask, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True, executable='/bin/bash') event_num += 1 if self.prod_smiles: smi = self.prod_smiles elif self.smiles: smi = self.smiles # if self.smiles: smi_pth = outpath.replace('.pdb', '_smiles.txt') with open(smi_pth, 'w') as f: f.write(str(smi)) # f.close() should delete. except: raise Exception(file_obj.bound_conf) else: self.crystal.outcome = 3 self.crystal.save() with self.output().open('w') as f: f.write('')
def run(self): out_err_file = str( 'logs/' + str(self.filename.split('/')[3]) + '_' + str(self.filename.split('/')[4]) + '_' + str(self.filename.split('/')[5]) + '_' + str(misc_functions.get_mod_date(self.filename)) + str(self.model).replace("<class '", '').replace("'>", '') + '.txt') print(out_err_file) results = db_functions.soakdb_query(self.filename) try: print('Number of rows from file = ' + str(len(results))) if len(Crystal.objects.filter( visit__filename=self.filename)) == len(results): status = True else: status = False print('Checking same number of rows in test_xchem: ' + str(status)) if not status: raise Exception('FAIL: no of entries in test_xchem = ' + str( len(Crystal.objects.filter( visit__filename=self.filename)))) proteins = list( set([ protein for protein in [protein['ProteinName'] for protein in results] ])) print('Unique targets in soakdb file: ' + str(proteins)) translations = { Lab: db_functions.lab_translations(), Refinement: db_functions.refinement_translations(), DataProcessing: db_functions.data_processing_translations(), Dimple: db_functions.dimple_translations() } translation = translations[self.model] error_dict = { 'crystal': [], 'soakdb_field': [], 'model_field': [], 'soakdb_value': [], 'model_value': [] } for row in results: lab_object = self.model.objects.filter( crystal_name__crystal_name=row['CrystalName'], crystal_name__visit__filename=str(self.filename), crystal_name__compound__smiles=row['CompoundSMILES']) if len(lab_object) > 1: raise Exception('Multiple Crystals!') if len(lab_object) == 0: if self.model == Dimple and not row[ 'DimplePathToPDB'] and not row['DimplePathToMTZ']: pass else: raise Exception('No entry for ' + str(row['CrystalName'] + ' ' + row['DimplePathToPDB'] + ' ' + row['DimplePathToMTZ'])) for key in translation.keys(): test_xchem_val = eval(str('lab_object[0].' + key)) soakdb_val = row[translation[key]] if key == 'outcome': pattern = re.compile('-?\d+') try: soakdb_val = int( pattern.findall(str(soakdb_val))[0]) except: continue if translation[key] == 'CrystalName': test_xchem_val = lab_object[ 0].crystal_name.crystal_name if translation[key] == 'DimpleReferencePDB' and soakdb_val: test_xchem_val = lab_object[0].reference if test_xchem_val != None: test_xchem_val = lab_object[ 0].reference.reference_pdb if soakdb_val == '' or soakdb_val == 'None' or not soakdb_val: continue if isinstance(test_xchem_val, float): if float(test_xchem_val) == float(soakdb_val): continue if isinstance(test_xchem_val, int): if int(soakdb_val) == int(test_xchem_val): continue if test_xchem_val != soakdb_val: if soakdb_val in [ None, 'None', '', '-', 'n/a', 'null', 'pending', 'NULL', '#NAME?', '#NOM?', 'None\t', 'Analysis Pending', 'in-situ' ]: continue else: # try: # error_dict['crystal'].append(eval(str('lab_object[0].' + key + '.crystal_name'))) # except: # if key=='crystal_name': # error_dict['crystal'].append(eval(str('lab_object[0].' + key))) # else: # raise Exception(key) error_dict['crystal'].append( str(lab_object[0].crystal_name.crystal_name)) error_dict['soakdb_field'].append(translation[key]) error_dict['model_field'].append(key) error_dict['soakdb_value'].append(soakdb_val) error_dict['model_value'].append(test_xchem_val) if error_dict['crystal']: pd.DataFrame.from_dict(error_dict).to_csv(out_err_file) except IndexError: if 'No item with that key' in traceback.format_exc(): pass else: with open(out_err_file, 'w') as f: f.write(traceback.format_exc()) with open(out_err_file, 'a') as f: f.write('\n' + str(key)) except AttributeError: with open(out_err_file, 'w') as f: f.write(traceback.format_exc()) with open(out_err_file, 'a') as f: f.write('\n' + str(lab_object)) except: with open(out_err_file, 'w') as f: f.write(traceback.format_exc()) with self.output().open('w') as f: f.write('')
def output(self): mod_date = misc_functions.get_mod_date(self.filename) return luigi.LocalTarget( str(self.filename + '.' + mod_date + '.checked'))
def output(self): modification_date = misc_functions.get_mod_date(self.data_file) return luigi.LocalTarget( str(self.data_file + '_' + str(modification_date) + '.transferred'))
def run(self): soakdb = SoakdbFiles.objects.all() # a list to hold filenames that have been checked checked = [] # Status codes:- # 0 = new # 1 = changed # 2 = not changed print('INPUT NAME:') print(self.input()[1].path) with open(self.input()[1].path, 'r') as f: files = f.readlines() print('FILES:') print(files) for filename in files: # remove any newline characters filename_clean = filename.rstrip('\n') # find the relevant entry in the soakdbfiles table soakdb_query = list( SoakdbFiles.objects.filter(filename=filename_clean)) print(len(soakdb_query)) # raise an exception if the file is not in the soakdb table if len(soakdb_query) == 0: print('LEN=0') out, err, prop = db_functions.pop_soakdb(filename_clean) db_functions.pop_proposals(prop) # only one entry should exist per file if len(soakdb_query) == 1: print('LEN=1') # get the filename back from the query data_file = soakdb_query[0].filename # add the file to the list of those that have been checked checked.append(data_file) # get the modification date as stored in the db old_mod_date = soakdb_query[0].modification_date # get the current modification date of the file current_mod_date = misc_functions.get_mod_date(data_file) # get the id of the entry to write to id_number = soakdb_query[0].id print(old_mod_date) print(current_mod_date) # if the file has changed since the db was last updated for the entry, change status to indicate this if int(current_mod_date) > int(old_mod_date): update_status = SoakdbFiles.objects.select_for_update( ).get(id=id_number) update_status.status = 1 update_status.save() # else: # update_status = SoakdbFiles.objects.select_for_update().get(id=id_number) # update_status.status = 0 # update_status.save() # if there is more than one entry, raise an exception (should never happen - filename field is unique) if len(soakdb_query) > 1: raise Exception( 'More than one entry for file! Something has gone wrong!') # if the file is not in the database at all if filename_clean not in checked: # add the file to soakdb out, err, proposal = db_functions.pop_soakdb(filename_clean) # add the proposal to proposal db_functions.pop_proposals(proposal) # retrieve the new db entry soakdb_query = list( SoakdbFiles.objects.select_for_update().filter( filename=filename_clean)) # get the id to update id_number = soakdb_query[0].id # update the relevant status to 0, indicating it as a new file update_status = SoakdbFiles.objects.select_for_update().get( id=id_number) update_status.status = 0 update_status.save() # if the lab table is empty, no data has been transferred from the datafiles, so set status of everything to 0 lab = list(Lab.objects.all()) if not lab: # this is to set all file statuses to 0 (new file) soakdb = SoakdbFiles.objects.select_for_update().all() for filename in soakdb: filename.status = 0 filename.save() # write output to signify job done with self.output().open('w') as f: f.write('')
def check_file(filename): status=2 # remove any newline characters filename_clean = filename.rstrip('\n') # find the relevant entry in the soakdbfiles table soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean)) # raise an exception if the file is not in the soakdb table - not necessary here, I think # if len(soakdb_query) == 0: # print('LEN=0') # out, err, prop = db_functions.pop_soakdb(filename_clean) # db_functions.pop_proposals(prop) # only one entry should exist per file if len(soakdb_query) == 1: # get the filename back from the query data_file = soakdb_query[0].filename # add the file to the list of those that have been checked # checked.append(data_file) # get the modification date as stored in the db old_mod_date = soakdb_query[0].modification_date # get the current modification date of the file current_mod_date = misc_functions.get_mod_date(data_file) # get the id of the entry to write to id_number = soakdb_query[0].id if not old_mod_date: soakdb_query[0].modification_date = current_mod_date soakdb_query[0].save() old_mod_date = 0 # if the file has changed since the db was last updated for the entry, change status to indicate this try: if int(current_mod_date) > int(old_mod_date): update_status = SoakdbFiles.objects.get(id=id_number) update_status.status = 1 status = 1 update_status.save() except ValueError: raise Exception(str('current_mod_date: ' + str(current_mod_date) + ', old_mod_date: ' + str(old_mod_date))) print(current_mod_date) print(old_mod_date) # if there is more than one entry, raise an exception (should never happen - filename field is unique) if len(soakdb_query) > 1: raise Exception('More than one entry for file! Something has gone wrong!') # if the file is not in the database at all if len(soakdb_query) == 0: print('This is a new soakDB file, just setting it up in the database!') # add the file to soakdb out, err, proposal = db_functions.pop_soakdb(filename_clean) # add the proposal to proposal db_functions.pop_proposals(proposal) # retrieve the new db entry soakdb_query = list(SoakdbFiles.objects.filter(filename=filename_clean)) # get the id to update id_number = soakdb_query[0].id # update the relevant status to 0, indicating it as a new file update_status = SoakdbFiles.objects.get(id=id_number) update_status.status = 0 update_status.save() status = 0 # else: # print('The file has not been updated, using existing XCDB data...') # status = 2 # if the lab table is empty, no data has been transferred from the datafiles, so set status of everything to 0 lab = list(Lab.objects.all()) if not lab: # this is to set all file statuses to 0 (new file) soakdb = SoakdbFiles.objects.all() for filename in soakdb: filename.status = 0 filename.save() return status