Esempio n. 1
0
def scan_data(data_dir):
    #print("Crawler data: ", data_dir)

    pattern = data_dir + '/*.xtc*'
    debug = False

    # Create sorted file list (glob seems to return files in random order)
    files = glob.glob(pattern)
    files.sort()

    if debug:
        print(files)

    # Extract the run bit from XTC file name
    # Turn the rXXXX string into an integer (for compatibility with old crawler files)
    out = []
    for filename in files:
        thisrun = filename.split('-')[1]
        thisrun = thisrun[1:5]
        out.append(thisrun)

    #print('Number of XTC files: ', len(out))

    # Find unique run values (due to multiple XTC files per run)
    run_list = list(sorted(set(out)))
    nruns = len(run_list)
    #print('Number of unique runs: ', nruns)

    # Default status for each is ready
    status = ['Ready'] * nruns

    # Loop through file names checking for '.inprogress' suffix
    for filename in files:
        if filename.endswith('.inprogress'):
            thisrun = filename.split('-')[1]
            thisrun = thisrun[1:5]
            if thisrun in run_list:
                run_indx = run_list.index(thisrun)
                status[run_indx] = 'Copying'

        if filename.endswith('.fromtape'):
            thisrun = filename.split('-')[1]
            thisrun = thisrun[1:5]
            if thisrun in run_list:
                run_indx = run_list.index(thisrun)
                status[run_indx] = 'Restoring'

    # Create the result
    result = {'run': run_list, 'status': status}

    if debug:
        print(result['run'])

    # Write dict to CSV file
    keys_to_save = ['run', 'status']
    cfel_file.dict_to_csv('data_status.csv', result, keys_to_save)
Esempio n. 2
0
    def relabel_dataset(self):

        # Simple dialog box: http: // www.tutorialspoint.com / pyqt / pyqt_qinputdialog_widget.htm
        text, ok = PyQt4.QtGui.QInputDialog.getText(self,
                                                    'Change dataset label',
                                                    'New label:')
        if ok == False:
            return
        newlabel = str(text)
        print('New label is: ', newlabel)

        dataset_csv = cfel_file.csv_to_dict('datasets.csv')

        # Label all selected runs
        runs = self.selected_runs()
        for i, run in enumerate(runs['run']):

            # Format directory string
            olddir = runs['directory'][i]
            newdir = '---'

            if olddir != '---':
                newdir = 'r{:04d}'.format(int(run))
                newdir += '-' + newlabel

            # Update Dataset in table
            table_row = runs['row'][i]
            self.table.setItem(table_row, 1,
                               PyQt4.QtGui.QTableWidgetItem(newlabel))
            self.table.setItem(table_row, 5,
                               PyQt4.QtGui.QTableWidgetItem(newdir))

            # Update dataset file
            if run in dataset_csv['Run']:
                ds_indx = dataset_csv['Run'].index(run)
                dataset_csv['DatasetID'][ds_indx] = newlabel
                dataset_csv['Directory'][ds_indx] = newdir
            else:
                dataset_csv['Run'].append(run)
                dataset_csv['DatasetID'].append(newlabel)
                dataset_csv['Directory'].append(newdir)
                dataset_csv['iniFile'].append('---')

            # Rename the directory
            if olddir != '---':
                cmdarr = [
                    'mv', self.config['hdf5dir'] + '/' + olddir,
                    self.config['hdf5dir'] + '/' + newdir
                ]
                self.spawn_subprocess(cmdarr)

        # Sort dataset file to keep it in order
        # Save datasets file
        keys_to_save = ['Run', 'DatasetID', 'Directory', 'iniFile']
        cfel_file.dict_to_csv('datasets.csv', dataset_csv, keys_to_save)
Esempio n. 3
0
def scan_data(data_dir):
    #print("Crawler data: ", data_dir)

    debug = False

    # Create sorted file list (glob seems to return files in random order)
    run_dirs = os.listdir(data_dir)
    run_dirs.sort()

    if debug:
        print(run_dirs)


    # Find unique run values (due to multiple XTC files per run)
    run_list = list(sorted(set(run_dirs)))
    nruns = len(run_list)


    # Default status for each is ready (ie: directory exists)
    status = ['Ready']*nruns


    # Status tag is number of .cbf files
    for i, dir in enumerate(run_list):
        str = data_dir+'/'+dir+'/**/*.cbf'
        cbf = glob.glob(str, recursive=True)
        status[i] = len(cbf)
        # Debugging
        #print(str)
        #print(len(cbf), cbf)

    # Create the result
    result = {
        'run': run_list,
        'status' : status
    }

    if debug:
        print(result['run'])

    # Write dict to CSV file
    keys_to_save = ['run','status']
    cfel_file.dict_to_csv('data_status.csv', result, keys_to_save)
Esempio n. 4
0
def scan_data(data_dir):
    #print("Crawler data: ", data_dir)

    pattern = data_dir + '/*.xtc*'
    debug = False

    # Create sorted file list (glob seems to return files in random order)
    files = glob.glob(pattern)
    files.sort()

    if debug:
        print(files)

    # Extract the run bit from XTC file name
    # Turn the rXXXX string into an integer (for compatibility with old crawler files)
    out = []
    for filename in files:
        thisrun = filename.split('-')[1]
        thisrun = thisrun[1:5]
        out.append(thisrun)

    #print('Number of XTC files: ', len(out))


    # Find unique run values (due to multiple XTC files per run)
    run_list = list(sorted(set(out)))
    nruns = len(run_list)
    #print('Number of unique runs: ', nruns)


    # Default status for each is ready
    status = ['Ready']*nruns

    # Loop through file names checking for '.inprogress' suffix
    for filename in files:
        if filename.endswith('.inprogress'):
            thisrun = filename.split('-')[1]
            thisrun = thisrun[1:5]
            if thisrun in run_list:
                run_indx = run_list.index(thisrun)
                status[run_indx] = 'Copying'

        if filename.endswith('.fromtape'):
            thisrun = filename.split('-')[1]
            thisrun = thisrun[1:5]
            if thisrun in run_list:
                run_indx = run_list.index(thisrun)
                status[run_indx] = 'Restoring'

    # Create the result
    result = {
        'run': run_list,
        'status' : status
    }

    if debug:
        print(result['run'])

    # Write dict to CSV file
    keys_to_save = ['run','status']
    cfel_file.dict_to_csv('data_status.csv', result, keys_to_save)
Esempio n. 5
0
def scan_crystfel(crystfel_dir):

    debug = False

    #printf, fout, '# Run, status, directory, processed, hits, hitrate%, mtime'

    run_out = ['r0001']
    status_out = ['---']
    directory_out = ['---']
    processed_out = ['---']
    indexed_out = ['---']
    indexrate_out = ['---']
    mtime_out = ['---']

    # Create sorted file list or files come in seemingly random order
    directories = glob.glob(crystfel_dir + '/r*')
    directories.sort()
    if debug:
        print(directories)

    for dir in directories:

        # Default values are blanks
        processed = '---'
        indexed = '---'
        indexrate = '---'
        mtime = '---'

        # Extract the directory name and run number
        directory = os.path.basename(dir)
        run = directory[:5]

        # Presence of directory means job has been submitted
        status = 'Submitted'

        # Job is running if bsub file exists
        if os.path.exists(dir + '/bsub.log'):
            status = 'Running'
        else:
            continue

        # "Exited with code" in bsub.log means terminated
        # "Final:" in bsub.log means job finished cleanly
        with open(dir + '/bsub.log') as f:
            data = f.read()
            if "Exited with exit code" in data:
                status = 'Terminated'
            if "Exited with exit code 143" in data:
                status = 'Killed'
            if "Final:" in data:
                status = 'Finished'

            position = data.rfind("%")
            indexrate = data[position - 4:position + 1]
            #print(position, ' : ', indexrate)

        # Append to main list
        run_out.append(run)
        directory_out.append(directory)
        status_out.append(status)
        processed_out.append(processed)
        indexed_out.append(indexed)
        indexrate_out.append(indexrate)
        mtime_out.append(mtime)
    #endfor

    # Create the result
    result = {
        'run': run_out,
        'status': status_out,
        'directory': directory_out,
        'processed': processed_out,
        'indexed': indexed_out,
        'indexrate%': indexrate_out
    }

    # Write dict to CSV file
    keys_to_save = [
        'run', 'status', 'directory', 'processed', 'indexed', 'indexrate%'
    ]
    cfel_file.dict_to_csv('crystfel_status.csv', result, keys_to_save)
Esempio n. 6
0
def crawler_merge(info):
    #print("Crawler merge")

    if info.datatype is 'XTC' or info.datatype is 'exfel':
        XTCdirToInt = True
    else:
        XTCdirToInt = False

    #
    #   Fix legacy issue with old datasets.txt format the first time we encounter it
    #
    if os.path.exists('datasets.txt') and not os.path.exists('datasets.csv'):
        print('Updating old datasets.txt format to new datasets.csv format')
        oldstyle = cfel_file.csv_to_dict('datasets.txt')

        oldstyle.update({'Run': oldstyle['# Run']})
        oldstyle.update({'iniFile': ['---'] * len(oldstyle['Run'])})
        del oldstyle['# Run']

        keys_to_save = ['Run', 'DatasetID', 'Directory', 'iniFile']
        cfel_file.dict_to_csv('datasets.csv', oldstyle, keys_to_save)

    #
    #   Read .csv files
    #
    data = cfel_file.csv_to_dict('data_status.csv')
    #run,status

    cheetah = cfel_file.csv_to_dict('cheetah_status.csv')
    #run,status,directory,processed,hits,hitrate%

    crystfel = cfel_file.csv_to_dict('crystfel_status.csv')
    #run,status,directory,processed,indexed,indexrate%

    #datasets = cfel_file.csv_to_dict('datasets.txt')
    datasets = cfel_file.csv_to_dict('datasets.csv')
    #Run, DatasetID, Directory, iniFile

    # Check for missing data
    #if data=={} or cheetah=={} or datasets=={}:
    #    return

    #
    # Update for P11:
    #   Run identifier should be a string and not a number, so don't do this conversion at P11
    #   Eventually adopt this at SLAC too...
    #
    # Old: convert r0002 (string) to 2 (integer) so that run is in the same format in each dict
    #   This may disappear later if datasets['run'] is in the same format and we fix the de-referencing elsewhere
    #
    if XTCdirToInt:
        try:
            if data != {}:
                for i, run in enumerate(data['run']):
                    run_num = int(run[1:])
                    data['run'][i] = run_num
        except:
            pass

        try:
            if cheetah != {}:
                for i, run in enumerate(cheetah['run']):
                    run_num = int(run[1:])
                    cheetah['run'][i] = run_num
        except:
            pass

        try:
            if crystfel != {}:
                for i, run in enumerate(crystfel['run']):
                    run_num = int(run[1:])
                    crystfel['run'][i] = run_num
        except:
            pass

        try:
            if datasets != {}:
                for i, run in enumerate(datasets['Run']):
                    #run_num = int(run[1:])
                    run_num = int(run)
                    datasets['Run'][i] = run_num
        except:
            pass

    # Find unique run identifiers
    # (some runs may be missing from some of the tables)
    all_runs = []
    if 'run' in data.keys():
        all_runs += data['run']
    if 'run' in cheetah.keys():
        all_runs += cheetah['run']
    if 'run' in crystfel.keys():
        all_runs += crystfel['run']
    if 'Run' in datasets.keys():
        all_runs += datasets['Run']

    uniq_runs = list(sorted(set(all_runs)))
    #print(uniq_runs)

    # Output should be:
    # Run, Dataset, XTC, Cheetah, CrystFEL, H5 Directory, Nprocessed, Nhits, Nindex, Hitrate%
    run_out = []
    dataset_out = []
    datastatus_out = []
    cheetahstatus_out = []
    crystfel_out = []
    h5dir_out = []
    nprocessed_out = []
    nhits_out = []
    nindexed_out = []
    hitrate_out = []
    inifile_out = []
    calibfile_out = []

    #
    # Loop through all possible runs and collate information
    #   being sensible when data is not in one of the other files
    #
    for run in uniq_runs:

        # Stuff contained in XTC info
        # run,status
        datastatus = '---'
        if data != {}:
            if run in data['run']:
                i = data['run'].index(run)
                datastatus = data['status'][i]

        # Stuff contained in datasets file
        # Run, DatasetID, Directory
        dataset = '---'
        h5dir = '---'
        inifile = '---'
        calibfile = '---'
        if datasets != {}:
            if run in datasets['Run']:
                i = datasets['Run'].index(run)
                dataset = datasets['DatasetID'][i].strip()
                h5dir = datasets['Directory'][i].strip()
                inifile = datasets['iniFile'][i].strip()
                if ('calibFile' in datasets.keys()):
                    calibfile = datasets['calibFile'][i].strip()

        # Stuff contained in Cheetah status file
        # Match on dataset directory (to handle one run having multiple output directories)
        # Check run numbers match to guard against matching '---' entries
        # run,status,directory,processed,hits,hitrate%
        cheetahstatus = '---'
        nprocessed = '---'
        nhits = '---'
        hitrate = '---'
        if cheetah != {}:
            # Use any matches in the directory column (handles multiple directories per run)
            if h5dir in cheetah['directory']:
                i = cheetah['directory'].index(h5dir)
                if cheetah['run'][i] == run:
                    cheetahstatus = cheetah['status'][i].strip()
                    nprocessed = cheetah['processed'][i].strip()
                    nhits = cheetah['hits'][i].strip()
                    hitrate = cheetah['hitrate%'][i].strip()

            # Else fall back to the first directory matching the run number
            elif run in cheetah['run']:
                i = cheetah['run'].index(run)
                cheetahstatus = cheetah['status'][i].strip()
                nprocessed = cheetah['processed'][i].strip()
                nhits = cheetah['hits'][i].strip()
                hitrate = cheetah['hitrate%'][i].strip()

            if hitrate.replace('.', '', 1).isnumeric():
                hitrate = '{:0.2f}'.format(float(hitrate))

        # CrystFEL stuff is not yet included
        crystfel_status = '---'
        indexrate = '---'
        if crystfel != {}:
            # Use any matches in the directory column (handles multiple directories per run)
            if h5dir in crystfel['directory']:
                i = crystfel['directory'].index(h5dir)
                if crystfel['run'][i] == run:
                    crystfel_status = crystfel['status'][i].strip()
                    indexrate = crystfel['indexrate%'][i].strip()
            # Else fall back to the first directory matching the run number
            elif run in crystfel['run']:
                i = crystfel['run'].index(run)
                crystfel_status = crystfel['status'][i].strip()
                indexrate = crystfel['indexrate%'][i].strip()

        # Concatenate info for this run into output list
        run_out.append(run)
        datastatus_out.append(datastatus)
        dataset_out.append(dataset)
        h5dir_out.append(h5dir)
        cheetahstatus_out.append(cheetahstatus)
        nprocessed_out.append(nprocessed)
        nhits_out.append(nhits)
        hitrate_out.append(hitrate)
        crystfel_out.append(crystfel_status)
        nindexed_out.append(indexrate)
        inifile_out.append(inifile)
        calibfile_out.append(calibfile)

    #
    # Output should be:
    # Run, Dataset, XTC, Cheetah, CrystFEL, H5 Directory, , Nhits, Nindex, Hitrate%
    #
    result = {
        'Run': run_out,
        'Dataset': dataset_out,
        'Rawdata': datastatus_out,
        'Cheetah': cheetahstatus_out,
        'CrystFEL': crystfel_out,
        'H5Directory': h5dir_out,
        'Nprocessed': nprocessed_out,
        'Nhits': nhits_out,
        'Nindex': nindexed_out,
        'Hitrate%': hitrate_out,
        'Recipe': inifile_out,
        'Calibration': calibfile_out
    }

    # Write dict to CSV file
    keys_to_save = [
        'Run', 'Dataset', 'Rawdata', 'Cheetah', 'CrystFEL', 'H5Directory',
        'Nprocessed', 'Nhits', 'Nindex', 'Hitrate%', 'Recipe', 'Calibration'
    ]
    cfel_file.dict_to_csv('crawler.txt', result, keys_to_save)
Esempio n. 7
0
    def run_cheetah(self):

        # Find .ini files for dropdown list
        inifile_list = []
        for file in glob.iglob('../process/*.ini'):
            basename = os.path.basename(file)
            inifile_list.append(basename)
        #inifile_list = ['test1.ini','test2.ini']

        # Info needed for the dialog box
        dialog_info = {
            'inifile_list': inifile_list,
            'lastini': self.lastini,
            'lasttag': self.lasttag
        }
        # Dialog box for dataset label and ini file
        gui, ok = gui_dialogs.run_cheetah_gui.cheetah_dialog(dialog_info)

        # Extract values from return dict
        dataset = gui['dataset']
        inifile = gui['inifile']

        # Exit if cancel was pressed
        if ok == False:
            return

        dataset_csv = cfel_file.csv_to_dict('datasets.csv')

        self.lasttag = dataset
        self.lastini = inifile

        # Process all selected runs
        runs = self.selected_runs()
        for i, run in enumerate(runs['run']):
            print('------------ Start Cheetah process script ------------')
            cmdarr = [self.config['process'], run, inifile, dataset]
            self.spawn_subprocess(cmdarr)

            # Format directory string
            dir = 'r{:04d}'.format(int(run))
            dir += '-' + dataset

            #Update Dataset and Cheetah status in table
            table_row = runs['row'][i]
            self.table.setItem(table_row, 1,
                               PyQt4.QtGui.QTableWidgetItem(dataset))
            self.table.setItem(table_row, 5, PyQt4.QtGui.QTableWidgetItem(dir))
            self.table.setItem(table_row, 3,
                               PyQt4.QtGui.QTableWidgetItem('Submitted'))

            # Update dataset file
            if run in dataset_csv['Run']:
                ds_indx = dataset_csv['Run'].index(run)
                dataset_csv['DatasetID'][ds_indx] = dataset
                dataset_csv['Directory'][ds_indx] = dir
                dataset_csv['iniFile'][ds_indx] = inifile
            else:
                dataset_csv['Run'].append(run)
                dataset_csv['DatasetID'].append(dataset)
                dataset_csv['Directory'].append(dir)
                dataset_csv['iniFile'].append(inifile)
            print('------------ Finish Cheetah process script ------------')

        # Sort dataset file to keep it in order

        # Save datasets file
        keys_to_save = ['Run', 'DatasetID', 'Directory', 'iniFile']
        cfel_file.dict_to_csv('datasets.csv', dataset_csv, keys_to_save)
Esempio n. 8
0
def scan_data(data_dir):
    #print("Crawler data: ", data_dir)

    pattern = data_dir + '/r*'
    debug = False

    # Create sorted file list (glob seems to return files in random order)
    rundirs = glob.glob(pattern)
    if debug:
        print(pattern)
        print(rundirs)

    # Strip the preceeding stuff
    for i in range(len(rundirs)):
        rundirs[i] = os.path.basename(rundirs[i])
    #files.sort()

    if debug:
        print(rundirs)

    # Extract the run bit from XTC file name
    # Turn the rXXXX string into an integer (for compatibility with old crawler files)
    run_list = []
    for filename in rundirs:
        #filebase=os.path.basename(filename)
        thisrun = filename[1:]
        run_list.append(thisrun)

    #print('Number of XTC files: ', len(out))


    # Find unique run values (due to multiple XTC files per run)
    #run_list = list(sorted(set(out)))

    nruns = len(run_list)
    print('Number of unique runs: ', nruns)


    # Default status for each is ready
    status = ['Copying']*nruns


    # Check directory contents for AGIPD files
    for dir in rundirs:

        run = dir[1:]

        pattern = data_dir + '/' + dir + '/*AGIPD*'
        files = glob.glob(pattern)
        #files = os.path.basename(files)

        if not are_agipd_files_ready(files):
            run_indx = run_list.index(run)
            status[run_indx] = 'Copying'
        else:
            run_indx = run_list.index(run)
            status[run_indx] = 'Ready'


    # Create the result
    result = {
        'run': run_list,
        'status' : status
    }

    if debug:
        print(result['run'])

    # Write dict to CSV file
    keys_to_save = ['run','status']
    cfel_file.dict_to_csv('data_status.csv', result, keys_to_save)
Esempio n. 9
0
def scan_data(data_dir):
    #print("Crawler data: ", data_dir)

    pattern = data_dir + '/r*'
    debug = False

    # Create sorted file list (glob seems to return files in random order)
    rundirs = glob.glob(pattern)
    if debug:
        print(pattern)
        print(rundirs)

    # Strip the preceeding stuff
    for i in range(len(rundirs)):
        rundirs[i] = os.path.basename(rundirs[i])
    #files.sort()

    if debug:
        print(rundirs)

    # Extract the run bit from XTC file name
    # Turn the rXXXX string into an integer (for compatibility with old crawler files)
    run_list = []
    for filename in rundirs:
        #filebase=os.path.basename(filename)
        thisrun = filename[1:]
        run_list.append(thisrun)

    #print('Number of XTC files: ', len(out))


    # Find unique run values (due to multiple XTC files per run)
    #run_list = list(sorted(set(out)))

    nruns = len(run_list)
    print('Number of unique runs: ', nruns)


    # Default status for each is ready
    status = ['Copying']*nruns


    # Check directory contents for AGIPD files
    for dir in rundirs:

        run = dir[1:]

        pattern = data_dir + '/' + dir + '/*AGIPD*'
        files = glob.glob(pattern)
        #files = os.path.basename(files)

        # Case of no AGIPD files (but run directory has been created)
        if len(files) is 0:
            run_indx = run_list.index(run)
            status[run_indx] = 'noAGIPD'
            continue

        # Check whether all AGIPD files are there
        if not are_agipd_files_ready(files):
            run_indx = run_list.index(run)
            status[run_indx] = 'Incomplete'
        else:
            run_indx = run_list.index(run)
            status[run_indx] = 'Ready'


    # Create the result
    result = {
        'run': run_list,
        'status' : status
    }

    if debug:
        print(result['run'])

    # Write dict to CSV file
    keys_to_save = ['run','status']
    cfel_file.dict_to_csv('data_status.csv', result, keys_to_save)
Esempio n. 10
0
    def run_cheetah(self):

        # Find .ini files for dropdown list
        inifile_list = []
        for file in glob.iglob('../process/*.ini'):
            basename = os.path.basename(file)
            inifile_list.append(basename)
        #inifile_list = ['test1.ini','test2.ini']

        # Info needed for the dialog box
        dialog_info = {
            'inifile_list': inifile_list,
            'lastini': self.lastini,
            'lastcalib': self.lastcalib,
            'lasttag': self.lasttag
        }
        # Dialog box for dataset label and ini file
        gui, ok = gui_dialogs.run_cheetah_gui.cheetah_dialog(dialog_info)

        # Exit if cancel was pressed
        if ok == False:
            return

        # Extract values from return dict
        dataset = gui['dataset']
        inifile = gui['inifile']
        calibfile = gui['calibfile']
        self.lasttag = dataset
        self.lastini = inifile
        self.lastcalib = calibfile

        try:
            dataset_csv = cfel_file.csv_to_dict('datasets.csv')
        except:
            print('Error occured reading datasets.csv (blank file?)')
            print('Check file contents.  Will return and do nothing.')
            return

        # Failing to read the dataset file looses all information (bad)
        if len(dataset_csv['DatasetID']) is 0:
            print("Error reading datasets.csv (blank file)")
            print("Try again...")
            return

        if 'calibFile' not in dataset_csv.keys():
            print('Adding calibFile to datasets.csv')
            dataset_csv['calibFile'] = dataset_csv['iniFile']

        # Process all selected runs
        runs = self.selected_runs()
        for i, run in enumerate(runs['run']):
            print('------------ Start Cheetah process script ------------')
            cmdarr = [self.config['process'], run, inifile, calibfile, dataset]
            cfel_file.spawn_subprocess(cmdarr, shell=True)

            # Format output directory string
            # This clumsily selects between using run numbers and using directory names
            # Need to fix this up sometime
            print("Location: ", self.compute_location['location'])
            if 'LCLS' in self.compute_location['location']:
                dir = 'r{:04d}'.format(int(run))
            elif 'max-exfl' in self.compute_location['location']:
                dir = 'r{:04d}'.format(int(run))
            elif 'max-cfel' in self.compute_location['location']:
                dir = 'r{:04d}'.format(int(run))
            else:
                dir = run
            dir += '-' + dataset
            print('Output directory: ', dir)

            #Update Dataset and Cheetah status in table
            table_row = runs['row'][i]
            self.table.setItem(table_row, 1,
                               PyQt5.QtWidgets.QTableWidgetItem(dataset))
            self.table.setItem(table_row, 3,
                               PyQt5.QtWidgets.QTableWidgetItem('Submitted'))
            self.table.setItem(table_row, 5,
                               PyQt5.QtWidgets.QTableWidgetItem(dir))

            self.table.setItem(table_row, 10,
                               PyQt5.QtWidgets.QTableWidgetItem(inifile))
            self.table.setItem(table_row, 11,
                               PyQt5.QtWidgets.QTableWidgetItem(calibfile))

            self.table.item(table_row,
                            3).setBackground(PyQt5.QtGui.QColor(255, 255, 100))

            # Update dataset file
            if run in dataset_csv['Run']:
                ds_indx = dataset_csv['Run'].index(run)
                dataset_csv['DatasetID'][ds_indx] = dataset
                dataset_csv['Directory'][ds_indx] = dir
                dataset_csv['iniFile'][ds_indx] = inifile
                dataset_csv['calibFile'][ds_indx] = calibfile
            else:
                dataset_csv['Run'].append(run)
                dataset_csv['DatasetID'].append(dataset)
                dataset_csv['Directory'].append(dir)
                dataset_csv['iniFile'].append(inifile)
                dataset_csv['calibFile'].append(calibfile)
            print('------------ Finish Cheetah process script ------------')

        # Sort dataset file to keep it in order

        # Save datasets file
        keys_to_save = [
            'Run', 'DatasetID', 'Directory', 'iniFile', 'calibFile'
        ]
        cfel_file.dict_to_csv('datasets.csv', dataset_csv, keys_to_save)
Esempio n. 11
0
def scan_hdf5(hdf5_dir):
    #print("Crawler HDF5: ", hdf5_dir)

    debug = False
    pattern = hdf5_dir + '/*/status.txt'

    #printf, fout, '# Run, status, directory, processed, hits, hitrate%, mtime'

    run_out = []
    status_out = []
    directory_out = []
    processed_out = []
    hits_out = []
    hitrate_out = []
    mtime_out = []

    # Create sorted file list or files come in seemingly random order
    files = glob.glob(pattern)
    files.sort()
    if debug:
        print(files)

    #for filename in glob.iglob(pattern):
    for filename in files:

        # Default values are blanks
        run = ''
        status = ''
        directory = ''
        processed = ''
        hits = ''
        hitrate = ''
        mtime = ''

        # Extract the Cheetah HDF5 directory name
        basename = os.path.basename(filename)
        dirname = os.path.dirname(filename)
        dirname2 = os.path.basename(dirname)
        directory = dirname2

        # Extract the run number (Warning: LCLS-specific)
        if directory.startswith('r'):
            run = directory[:5]
        else:
            part = directory.partition('-')
            run = part[0]
        #run = directory[1:5]

        #print(filename)
        f = open(filename, 'r')
        for line in f:
            #print(line, end='')
            part = line.partition(':')

            if part[0] == 'Status':
                status = part[2].strip()

            if part[0] == 'Frames processed':
                processed = part[2].strip()

            if part[0] == 'Number of hits':
                hits = part[2].strip()
        #endfor
        f.close()

        # Calculate hit rate (with some error checking)
        if hits != '' and processed != '' and processed != '0':
            hitrate = 100 * (float(hits) / float(processed))
        else:
            hitrate = '---'

        # Diagnostic
        if debug:
            print("---------------")
            print("Run: ", run)
            print(directory)
            print(status)
            print(processed)
            print(hits)
            print(hitrate)

        # Append to main list
        run_out.append(run)
        directory_out.append(directory)
        status_out.append(status)
        processed_out.append(processed)
        hits_out.append(hits)
        hitrate_out.append(str(hitrate))
        mtime_out.append(mtime)
    #endfor

    # Create the result
    result = {
        'run': run_out,
        'status': status_out,
        'directory': directory_out,
        'processed': processed_out,
        'hits': hits_out,
        'hitrate%': hitrate_out
    }

    # Sorting solved by sorting the file list
    # For future reference, to return indices of the sorted list
    # you can use the python sorting functions' key parameter to sort the index array instead.
    # >>> s = [2, 3, 1, 4, 5]
    # >>> sorted(range(len(s)), key=lambda k: s[k])
    # [2, 0, 1, 3, 4]
    # http://stackoverflow.com/questions/7851077/how-to-return-index-of-a-sorted-list

    # Write dict to CSV file
    keys_to_save = [
        'run', 'status', 'directory', 'processed', 'hits', 'hitrate%'
    ]
    cfel_file.dict_to_csv('cheetah_status.csv', result, keys_to_save)