def run(self): paths = [ TransferPandda( date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path, AnnotateAllEvents( date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path, InitDBEntries(date=self.date, hit_directory=self.hit_directory).output().path, UploadLeads(date=self.date, hit_directory=self.hit_directory).output().path, UploadHits(date=self.date, hit_directory=self.hit_directory).output().path, WriteBlackLists(date=self.date, hit_directory=self.hit_directory).output().path, os.path.join(DirectoriesConfig().log_directory, 'pipe.done') ] paths.extend( glob.glob( str(DirectoriesConfig().log_directory + '*pipe_run_*.done'))) for path in paths: if os.path.isfile(path): os.remove(path) with self.output().open('w') as f: f.write('')
class StartPipeline(luigi.WrapperTask): date = luigi.DateParameter(default=datetime.datetime.now()) hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory) soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path) date_time = luigi.Parameter( default=datetime.datetime.now().strftime("%Y%m%d%H")) def requires(self): yield StartTransfers() yield AddProjects() yield TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath) yield AnnotateAllEvents(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath) yield InitDBEntries(date=self.date, hit_directory=self.hit_directory) yield UploadLeads(date=self.date, hit_directory=self.hit_directory) yield GetOutFiles() yield WriteBlackLists(date=self.date, hit_directory=self.hit_directory) yield UpdateVerne() def output(self): return luigi.LocalTarget( os.path.join(DirectoriesConfig().log_directory, 'pipe.done')) def run(self): with self.output().open('w') as f: f.write('')
class PostPipeClean(luigi.Task): date = luigi.DateParameter(default=datetime.datetime.now()) hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory) soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path) date_time = luigi.Parameter( default=datetime.datetime.now().strftime("%Y%m%d%H")) log_directory = luigi.Parameter(default=DirectoriesConfig().log_directory) staging_directory = luigi.Parameter( default=DirectoriesConfig().staging_directory) input_directory = luigi.Parameter( default=DirectoriesConfig().input_directory) def requires(self): return StartPipeline() def output(self): # Changing the output to not clog up the main dir return luigi.LocalTarget( os.path.join( self.log_directory, f'pipe_run_{datetime.datetime.now().strftime("%Y%m%d%H%M")}.done' )) def run(self): # paths = [# TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path, # AnnotateAllEvents(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path, # InitDBEntries(date=self.date, hit_directory=self.hit_directory).output().path, # UploadLeads(date=self.date, hit_directory=self.hit_directory).output().path, # UploadHits(date=self.date, hit_directory=self.hit_directory).output().path, # WriteBlackLists(date=self.date, hit_directory=self.hit_directory).output().path, # os.path.join(self.log_directory, 'pipe.done')] paths = [ x for x in glob.glob(os.path.join(self.log_directory, '*', '*')) if 'done' in x ] paths.extend(os.path.join(self.log_directory, 'pipe.done')) paths.extend(glob.glob(str(self.log_directory + '*pipe_run_*.done'))) paths = [ x for x in paths if 'cut' not in x ] # I don't think I want to constantly try to cut the maps... May delete later. for path in paths: if os.path.isfile(path): os.remove(path) with self.output().open('w') as f: f.write('')
class StartPipeline(luigi.WrapperTask): date = luigi.DateParameter(default=datetime.datetime.now()) hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory) soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path) date_time = luigi.Parameter( default=datetime.datetime.now().strftime("%Y%m%d%H")) staging_directory = luigi.Parameter( default=DirectoriesConfig().staging_directory) input_directory = luigi.Parameter( default=DirectoriesConfig().input_directory) def requires(self): # if os.path.exists(os.path.join(self.log_directory + 'pipe.done')): # os.remove(os.path.join(self.log_directory + 'pipe.done')) yield StartTransfers() yield BatchCreateSymbolicLinks() yield BatchAlignTargets() yield BatchCutMaps() yield BatchTranslateFragalysisAPIOutput() # yield fragalysis Stuff? # yield AddProjects() # yield TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath) # yield AnnotateAllEvents(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath) # yield InitDBEntries(date=self.date, hit_directory=self.hit_directory) # yield # yield UploadLeads(date=self.date, hit_directory=self.hit_directory) # yield GetOutFiles() # yield WriteBlackLists(date=self.date, hit_directory=self.hit_directory) # yield UpdateVerne() def output(self): return luigi.LocalTarget( os.path.join(DirectoriesConfig().log_directory, 'pipe.done')) def run(self): with self.output().open('w') as f: f.write('')
def transfer_changed_datafile(data_file, hit_directory, log_directory = DirectoriesConfig().log_directory): """Transfers a changed file to XCDB by calling `transfer_file(data_file)` :param data_file: The soakdb that we want to check if it updated, :class:`TransferChangedDataFile` self.data_file :type data_file: str :param hit_directory: Directory to which the soakdb corresponds to. Usually :class:`TransferChangedDataFile` self.hit_directory :type hit_directory: str :return: Should return nothing but will update the :rtype: None """ print(data_file) maint_exists = db_functions.check_table_sqlite(data_file, 'mainTable') if maint_exists == 1: soakdb_query = SoakdbFiles.objects.get(filename=data_file) print(soakdb_query) split_path = data_file.split('database') search_path = split_path[0] # remove pandda data transfer done file if os.path.isfile(os.path.join(search_path, 'transfer_pandda_data.done')): os.remove(os.path.join(search_path, 'transfer_pandda_data.done')) log_files = find_log_files(search_path).rsplit() print(log_files) for log in log_files: print(f"{log}.run.done") if os.path.isfile(f"{log}.run.done"): os.remove(f"{log}.run.done") if os.path.isfile(f"{log}.sites.done"): os.remove(f"{log}.sites.done") if os.path.isfile(f"{log}.events.done"): os.remove(f"{log}.events.done") # find_logs_out_files = glob.glob(str(search_path + '*.txt')) find_logs_out_files = glob.glob(f"{search_path}*.txt") for f in find_logs_out_files: if is_date(f.replace(search_path, '').replace('.txt', '')): os.remove(f) out, err, proposal = db_functions.pop_soakdb(data_file) db_functions.pop_proposals(proposal) else: print('Main Table does not exist!') transfer_file(data_file)
class PostPipeClean(luigi.Task): date = luigi.DateParameter(default=datetime.datetime.now()) hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory) soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path) date_time = luigi.Parameter( default=datetime.datetime.now().strftime("%Y%m%d%H")) def requires(self): return StartPipeline() def output(self): return luigi.LocalTarget( os.path.join( os.getcwd(), str('pipe_run_' + datetime.datetime.now().strftime("%Y%m%d%H%M") + '.done'))) def run(self): paths = [ TransferPandda( date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path, AnnotateAllEvents( date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path, InitDBEntries(date=self.date, hit_directory=self.hit_directory).output().path, UploadLeads(date=self.date, hit_directory=self.hit_directory).output().path, UploadHits(date=self.date, hit_directory=self.hit_directory).output().path, WriteBlackLists(date=self.date, hit_directory=self.hit_directory).output().path, os.path.join(DirectoriesConfig().log_directory, 'pipe.done') ] paths.extend( glob.glob( str(DirectoriesConfig().log_directory + '*pipe_run_*.done'))) for path in paths: if os.path.isfile(path): os.remove(path) with self.output().open('w') as f: f.write('')
def output(self): return luigi.LocalTarget(os.path.join(DirectoriesConfig().log_directory, 'edstats.done'))
def output(self): filename = os.path.join(DirectoriesConfig().log_directory, str('edstats/' + str(self.crystal) + '_' + str(self.strucid) + '.done')) return luigi.LocalTarget(filename)
def output(self): return luigi.LocalTarget( os.path.join(DirectoriesConfig().log_directory, 'violin_html.done'))
def check_file_upload(filename, model, log_directory = DirectoriesConfig().log_directory): """Check if a soakdb file has been uploaded to a given django model :param filename: filename to check, :class:`transfer_soakdb.CheckFileUpload` self.filename :type filename: str :param model: model to check if file had uploaded correctly to, :class:`transfer_soakdb.CheckFileUpload` self.model :type model: str or model class, not sure tbh, I didn't write the code! :return: Should check if file is uploaded correctly :rtype: None """ out_err_file = os.path.join(log_directory, str(str(filename.split('/')[3]) + '_' + str(filename.split('/')[4]) + '_' + str(filename.split('/')[5]) + '_' + str(misc_functions.get_mod_date(filename)) + str(model).replace("<class '", '').replace("'>", '') + '.txt')) print(out_err_file) results = db_functions.soakdb_query(filename) try: print(f"Number of rows from file = {len(results)}") translations = {Lab: db_functions.lab_translations(), Refinement: db_functions.refinement_translations(), DataProcessing: db_functions.data_processing_translations(), Dimple: db_functions.dimple_translations()} translation = translations[model] # different from what is in class... error_dict = dict(crystal=[], soakdb_field=[], model_field=[], soakdb_value=[], model_value=[]) for row in results: lab_object = model.objects.filter(crystal_name__crystal_name=row['CrystalName'], crystal_name__visit__filename=str(filename), crystal_name__compound__smiles=row['CompoundSMILES']) if len(lab_object) > 1: raise Exception('Multiple Crystals!') if len(lab_object) == 0: if model == Dimple and not row['DimplePathToPDB'] and not row['DimplePathToMTZ']: pass else: raise Exception( f"No entry for {row['CrystalName']}, {row['DimplePathToPDB']}, {row['DimplePathToMTZ']}") for key in translation.keys(): test_xchem_val = eval(f"lab_objects[0].{key}") soakdb_val = row[translation[key]] if key == 'outcome': pattern = re.compile('-?\d+') try: soakdb_val = int(pattern.findall(str(soakdb_val))[0]) except: continue if translation[key] == 'CrystalName': test_xchem_val = lab_object[0].crystal_name.crystal_name if translation[key] == 'DimpleReferencePDB' and soakdb_val: test_xchem_val = lab_object[0].reference if test_xchem_val is not None: test_xchem_val = lab_object[0].reference.reference_pdb if soakdb_val == '' or soakdb_val == 'None' or not soakdb_val: continue if isinstance(test_xchem_val, float): if float(test_xchem_val) == float(soakdb_val): continue if isinstance(test_xchem_val, int): if int(soakdb_val) == int(test_xchem_val): continue if test_xchem_val != soakdb_val: if soakdb_val in [None, 'None', '', '-', 'n/a', 'null', 'pending', 'NULL', '#NAME?', '#NOM?', 'None\t', 'Analysis Pending', 'in-situ']: continue else: error_dict['crystal'].append(str(lab_object[0].crystal_name.crystal_name)) error_dict['soakdb_field'].append(translation[key]) error_dict['model_field'].append(key) error_dict['soakdb_value'].append(soakdb_val) error_dict['model_value'].append(test_xchem_val) if error_dict['crystal']: pd.DataFrame.from_dict(error_dict).to_csv(out_err_file) except IndexError: if 'No item with that key' in traceback.format_exc(): pass else: with open(out_err_file, 'w') as f: f.write(traceback.format_exc()) with open(out_err_file, 'a') as f: f.write('\n' + str(key)) except AttributeError: with open(out_err_file, 'w') as f: f.write(traceback.format_exc()) with open(out_err_file, 'a') as f: f.write('\n' + str(lab_object)) except: with open(out_err_file, 'w') as f: f.write(traceback.format_exc())