Beispiel #1
0
    def run(self):
        paths = [
            TransferPandda(
                date_time=self.date_time,
                soak_db_filepath=self.soak_db_filepath).output().path,
            AnnotateAllEvents(
                date_time=self.date_time,
                soak_db_filepath=self.soak_db_filepath).output().path,
            InitDBEntries(date=self.date,
                          hit_directory=self.hit_directory).output().path,
            UploadLeads(date=self.date,
                        hit_directory=self.hit_directory).output().path,
            UploadHits(date=self.date,
                       hit_directory=self.hit_directory).output().path,
            WriteBlackLists(date=self.date,
                            hit_directory=self.hit_directory).output().path,
            os.path.join(DirectoriesConfig().log_directory, 'pipe.done')
        ]

        paths.extend(
            glob.glob(
                str(DirectoriesConfig().log_directory + '*pipe_run_*.done')))

        for path in paths:
            if os.path.isfile(path):
                os.remove(path)

        with self.output().open('w') as f:
            f.write('')
Beispiel #2
0
class StartPipeline(luigi.WrapperTask):
    date = luigi.DateParameter(default=datetime.datetime.now())
    hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory)
    soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path)
    date_time = luigi.Parameter(
        default=datetime.datetime.now().strftime("%Y%m%d%H"))

    def requires(self):
        yield StartTransfers()
        yield AddProjects()
        yield TransferPandda(date_time=self.date_time,
                             soak_db_filepath=self.soak_db_filepath)
        yield AnnotateAllEvents(date_time=self.date_time,
                                soak_db_filepath=self.soak_db_filepath)
        yield InitDBEntries(date=self.date, hit_directory=self.hit_directory)
        yield UploadLeads(date=self.date, hit_directory=self.hit_directory)
        yield GetOutFiles()
        yield WriteBlackLists(date=self.date, hit_directory=self.hit_directory)
        yield UpdateVerne()

    def output(self):
        return luigi.LocalTarget(
            os.path.join(DirectoriesConfig().log_directory, 'pipe.done'))

    def run(self):
        with self.output().open('w') as f:
            f.write('')
Beispiel #3
0
class PostPipeClean(luigi.Task):
    date = luigi.DateParameter(default=datetime.datetime.now())
    hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory)
    soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path)
    date_time = luigi.Parameter(
        default=datetime.datetime.now().strftime("%Y%m%d%H"))
    log_directory = luigi.Parameter(default=DirectoriesConfig().log_directory)
    staging_directory = luigi.Parameter(
        default=DirectoriesConfig().staging_directory)
    input_directory = luigi.Parameter(
        default=DirectoriesConfig().input_directory)

    def requires(self):
        return StartPipeline()

    def output(self):
        # Changing the output to not clog up the main dir
        return luigi.LocalTarget(
            os.path.join(
                self.log_directory,
                f'pipe_run_{datetime.datetime.now().strftime("%Y%m%d%H%M")}.done'
            ))

    def run(self):
        #  paths = [# TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path,
        #  AnnotateAllEvents(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath).output().path,
        #  InitDBEntries(date=self.date, hit_directory=self.hit_directory).output().path,
        #  UploadLeads(date=self.date, hit_directory=self.hit_directory).output().path,
        #  UploadHits(date=self.date, hit_directory=self.hit_directory).output().path,
        #  WriteBlackLists(date=self.date, hit_directory=self.hit_directory).output().path,
        #        os.path.join(self.log_directory, 'pipe.done')]
        paths = [
            x for x in glob.glob(os.path.join(self.log_directory, '*', '*'))
            if 'done' in x
        ]
        paths.extend(os.path.join(self.log_directory, 'pipe.done'))
        paths.extend(glob.glob(str(self.log_directory + '*pipe_run_*.done')))
        paths = [
            x for x in paths if 'cut' not in x
        ]  # I don't think I want to constantly try to cut the maps... May delete later.

        for path in paths:
            if os.path.isfile(path):
                os.remove(path)

        with self.output().open('w') as f:
            f.write('')
Beispiel #4
0
class StartPipeline(luigi.WrapperTask):
    date = luigi.DateParameter(default=datetime.datetime.now())
    hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory)
    soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path)
    date_time = luigi.Parameter(
        default=datetime.datetime.now().strftime("%Y%m%d%H"))
    staging_directory = luigi.Parameter(
        default=DirectoriesConfig().staging_directory)
    input_directory = luigi.Parameter(
        default=DirectoriesConfig().input_directory)

    def requires(self):
        # if os.path.exists(os.path.join(self.log_directory + 'pipe.done')):
        #     os.remove(os.path.join(self.log_directory + 'pipe.done'))
        yield StartTransfers()
        yield BatchCreateSymbolicLinks()
        yield BatchAlignTargets()
        yield BatchCutMaps()
        yield BatchTranslateFragalysisAPIOutput()
        # yield fragalysis Stuff?
        # yield AddProjects()
        # yield TransferPandda(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath)
        # yield AnnotateAllEvents(date_time=self.date_time, soak_db_filepath=self.soak_db_filepath)
        # yield InitDBEntries(date=self.date, hit_directory=self.hit_directory)
        # yield
        # yield UploadLeads(date=self.date, hit_directory=self.hit_directory)
        # yield GetOutFiles()
        # yield WriteBlackLists(date=self.date, hit_directory=self.hit_directory)
        # yield UpdateVerne()

    def output(self):
        return luigi.LocalTarget(
            os.path.join(DirectoriesConfig().log_directory, 'pipe.done'))

    def run(self):
        with self.output().open('w') as f:
            f.write('')
Beispiel #5
0
def transfer_changed_datafile(data_file, hit_directory, log_directory = DirectoriesConfig().log_directory):
    """Transfers a changed file to XCDB by calling `transfer_file(data_file)`

    :param data_file: The soakdb that we want to check if it updated, :class:`TransferChangedDataFile` self.data_file
    :type data_file: str
    :param hit_directory: Directory to which the soakdb corresponds to. Usually :class:`TransferChangedDataFile` self.hit_directory
    :type hit_directory: str
    :return: Should return nothing but will update the
    :rtype: None
    """
    print(data_file)
    maint_exists = db_functions.check_table_sqlite(data_file, 'mainTable')

    if maint_exists == 1:
        soakdb_query = SoakdbFiles.objects.get(filename=data_file)
        print(soakdb_query)
        split_path = data_file.split('database')
        search_path = split_path[0]

        # remove pandda data transfer done file
        if os.path.isfile(os.path.join(search_path, 'transfer_pandda_data.done')):
            os.remove(os.path.join(search_path, 'transfer_pandda_data.done'))

        log_files = find_log_files(search_path).rsplit()
        print(log_files)
        for log in log_files:
            print(f"{log}.run.done")
            if os.path.isfile(f"{log}.run.done"):
                os.remove(f"{log}.run.done")
            if os.path.isfile(f"{log}.sites.done"):
                os.remove(f"{log}.sites.done")
            if os.path.isfile(f"{log}.events.done"):
                os.remove(f"{log}.events.done")

        # find_logs_out_files = glob.glob(str(search_path + '*.txt'))
        find_logs_out_files = glob.glob(f"{search_path}*.txt")

        for f in find_logs_out_files:
            if is_date(f.replace(search_path, '').replace('.txt', '')):
                os.remove(f)

        out, err, proposal = db_functions.pop_soakdb(data_file)
        db_functions.pop_proposals(proposal)

    else:
        print('Main Table does not exist!')

    transfer_file(data_file)
Beispiel #6
0
class PostPipeClean(luigi.Task):
    date = luigi.DateParameter(default=datetime.datetime.now())
    hit_directory = luigi.Parameter(default=DirectoriesConfig().hit_directory)
    soak_db_filepath = luigi.Parameter(default=SoakDBConfig().default_path)
    date_time = luigi.Parameter(
        default=datetime.datetime.now().strftime("%Y%m%d%H"))

    def requires(self):
        return StartPipeline()

    def output(self):
        return luigi.LocalTarget(
            os.path.join(
                os.getcwd(),
                str('pipe_run_' +
                    datetime.datetime.now().strftime("%Y%m%d%H%M") + '.done')))

    def run(self):
        paths = [
            TransferPandda(
                date_time=self.date_time,
                soak_db_filepath=self.soak_db_filepath).output().path,
            AnnotateAllEvents(
                date_time=self.date_time,
                soak_db_filepath=self.soak_db_filepath).output().path,
            InitDBEntries(date=self.date,
                          hit_directory=self.hit_directory).output().path,
            UploadLeads(date=self.date,
                        hit_directory=self.hit_directory).output().path,
            UploadHits(date=self.date,
                       hit_directory=self.hit_directory).output().path,
            WriteBlackLists(date=self.date,
                            hit_directory=self.hit_directory).output().path,
            os.path.join(DirectoriesConfig().log_directory, 'pipe.done')
        ]

        paths.extend(
            glob.glob(
                str(DirectoriesConfig().log_directory + '*pipe_run_*.done')))

        for path in paths:
            if os.path.isfile(path):
                os.remove(path)

        with self.output().open('w') as f:
            f.write('')
Beispiel #7
0
 def output(self):
     return luigi.LocalTarget(os.path.join(DirectoriesConfig().log_directory, 'edstats.done'))
Beispiel #8
0
 def output(self):
     filename = os.path.join(DirectoriesConfig().log_directory,
                             str('edstats/' + str(self.crystal) + '_' + str(self.strucid) + '.done'))
     return luigi.LocalTarget(filename)
Beispiel #9
0
 def output(self):
     return luigi.LocalTarget(
         os.path.join(DirectoriesConfig().log_directory,
                      'violin_html.done'))
Beispiel #10
0
def check_file_upload(filename, model, log_directory = DirectoriesConfig().log_directory):
    """Check if a soakdb file has been uploaded to a given django model

    :param filename: filename to check, :class:`transfer_soakdb.CheckFileUpload` self.filename
    :type filename: str
    :param model: model to check if file had uploaded correctly to, :class:`transfer_soakdb.CheckFileUpload` self.model
    :type model: str or model class, not sure tbh, I didn't write the code!
    :return: Should check if file is uploaded correctly
    :rtype: None
    """
    out_err_file = os.path.join(log_directory,
                                str(str(filename.split('/')[3]) +
                                    '_' + str(filename.split('/')[4]) +
                                    '_' + str(filename.split('/')[5]) + '_' +
                                    str(misc_functions.get_mod_date(filename)) +
                                    str(model).replace("<class '", '').replace("'>", '') + '.txt'))


    print(out_err_file)

    results = db_functions.soakdb_query(filename)

    try:
        print(f"Number of rows from file = {len(results)}")
        translations = {Lab: db_functions.lab_translations(),
                        Refinement: db_functions.refinement_translations(),
                        DataProcessing: db_functions.data_processing_translations(),
                        Dimple: db_functions.dimple_translations()}
        translation = translations[model]

        # different from what is in class...
        error_dict = dict(crystal=[], soakdb_field=[], model_field=[], soakdb_value=[], model_value=[])

        for row in results:
            lab_object = model.objects.filter(crystal_name__crystal_name=row['CrystalName'],
                                              crystal_name__visit__filename=str(filename),
                                              crystal_name__compound__smiles=row['CompoundSMILES'])
            if len(lab_object) > 1:
                raise Exception('Multiple Crystals!')
            if len(lab_object) == 0:
                if model == Dimple and not row['DimplePathToPDB'] and not row['DimplePathToMTZ']:
                    pass
                else:
                    raise Exception(
                        f"No entry for {row['CrystalName']}, {row['DimplePathToPDB']}, {row['DimplePathToMTZ']}")
            for key in translation.keys():
                test_xchem_val = eval(f"lab_objects[0].{key}")
                soakdb_val = row[translation[key]]
                if key == 'outcome':
                    pattern = re.compile('-?\d+')
                    try:
                        soakdb_val = int(pattern.findall(str(soakdb_val))[0])
                    except:
                        continue
                if translation[key] == 'CrystalName':
                    test_xchem_val = lab_object[0].crystal_name.crystal_name
                if translation[key] == 'DimpleReferencePDB' and soakdb_val:
                    test_xchem_val = lab_object[0].reference
                    if test_xchem_val is not None:
                        test_xchem_val = lab_object[0].reference.reference_pdb
                if soakdb_val == '' or soakdb_val == 'None' or not soakdb_val:
                    continue
                if isinstance(test_xchem_val, float):
                    if float(test_xchem_val) == float(soakdb_val):
                        continue
                if isinstance(test_xchem_val, int):
                    if int(soakdb_val) == int(test_xchem_val):
                        continue
                if test_xchem_val != soakdb_val:
                    if soakdb_val in [None, 'None', '', '-', 'n/a', 'null', 'pending', 'NULL', '#NAME?', '#NOM?',
                                      'None\t',
                                      'Analysis Pending', 'in-situ']:
                        continue
                    else:
                        error_dict['crystal'].append(str(lab_object[0].crystal_name.crystal_name))
                        error_dict['soakdb_field'].append(translation[key])
                        error_dict['model_field'].append(key)
                        error_dict['soakdb_value'].append(soakdb_val)
                        error_dict['model_value'].append(test_xchem_val)

        if error_dict['crystal']:
            pd.DataFrame.from_dict(error_dict).to_csv(out_err_file)

    except IndexError:
        if 'No item with that key' in traceback.format_exc():
            pass
        else:
            with open(out_err_file, 'w') as f:
                f.write(traceback.format_exc())
            with open(out_err_file, 'a') as f:
                f.write('\n' + str(key))
    except AttributeError:
        with open(out_err_file, 'w') as f:
            f.write(traceback.format_exc())
        with open(out_err_file, 'a') as f:
            f.write('\n' + str(lab_object))
    except:
        with open(out_err_file, 'w') as f:
            f.write(traceback.format_exc())