Beispiel #1
0
def read_report_file_yaml(report_file, date_obj, utag):
    """
    Extract information from a run_log.yaml file, and returns a dictionary
    """
    try:
        with open(report_file, 'r') as rf:
            try:
                data = yaml.load(rf)
            except yaml.YAMLError as exc:
                APP_LOGGER.error("YMALError %s received" % exc)
                return None
        if not data:
            APP_LOGGER.debug("YAML file, %s, is empty." % report_file)
            return None
        data[DATETIME] = date_obj
        data[FILE_TYPE] = 'yaml'
        data[UTAG] = utag
        if USER in data and isinstance(data[USER], str):
            data[USER] = [user.strip() for user in data[USER].split(',')]

        # distinguish reports from Web UI and Client UI
        if CARTRIDGE_BC not in data:
            report_obj = RunReportWebUI.from_dict(**data)
        else:
            report_obj = RunReportClientUI.from_dict(**data)
        return report_obj.as_dict()
    except:
        APP_LOGGER.error("Error raised for report %s: %s" %
                         (report_file, traceback.format_exc()))
        return None
Beispiel #2
0
def read_report_file_txt(report_file, date_obj, utag):
    """
    Extract information from a run_log.txt file, and returns a dictionary
    """
    try:
        with open(report_file, 'r') as rf:
            lines = rf.readlines()
        if not lines:
            APP_LOGGER.error("The log file, %s, is empty." % report_file)
            return None
        data = {FILE_TYPE: 'txt', DATETIME: date_obj, UTAG: utag}
        for i, line in enumerate(lines):
            if line.strip():
                try:
                    key, value = line.split(':')
                    key, value = key.strip(), value.strip()
                    if key == USER_TXT and value:
                        data[key] = [user.strip() for user in value.split(',')]
                    elif key in [
                            RUN_DESCRIPTION_TXT, EXIT_NOTES_TXT, TDI_STACKS_TXT
                    ]:
                        values = [value]
                        j = i + 1
                        while j < len(lines) and ':' not in lines[j]:
                            values.append(lines[j].strip())
                            j += 1
                        if key == TDI_STACKS_TXT:
                            regex = ARCHIVES_PATH + '/[^/]+'
                            data[key] = re.findall(regex, ''.join(values))
                        else:
                            data[key] = ', '.join(values)
                    elif value:
                        data[key] = value
                except:
                    continue
        report_obj = RunReportWebUI.from_dict(**data)
        return report_obj.as_dict()
    except:
        APP_LOGGER.error("Error raised for report %s: %s" %
                         (report_file, traceback.format_exc()))
        return None
def update_archives():
    '''
    Update the database with available primary analysis archives.  It is not
    an error if zero archives are available at this moment.

    @return True if database is successfully updated, False otherwise
    '''
    APP_LOGGER.info("Updating database with available archives...")
    exist_archives = _DB_CONNECTOR.distinct(ARCHIVES_COLLECTION, ARCHIVE)
    if os.path.isdir(ARCHIVES_PATH):
        # Remove archives named similarly (same name, different capitalization)
        archives = io_utilities.get_subfolders(ARCHIVES_PATH)

        # Check yyyy_mm/dd/HHMM_pilotX location
        run_folders = get_run_folders()
        for folder in run_folders:
            archives.extend(io_utilities.get_subfolders(folder))

        new_archives = [
            x for x in archives if os.path.basename(x) not in exist_archives
        ]
        records = [{
            ARCHIVE: os.path.basename(archive),
            ARCHIVE_PATH: remove_disk_directory(archive)
        } for archive in new_archives]

        APP_LOGGER.info("Found %d archives" % (len(records)))
        if len(records) > 0:
            # There is a possible race condition here. Ideally these operations
            # would be performed in concert atomically
            _DB_CONNECTOR.insert(ARCHIVES_COLLECTION, records)
    else:
        APP_LOGGER.error(
            "Couldn't locate archives path '%s', to update database." %
            ARCHIVES_PATH)
        return False

    APP_LOGGER.info("Database successfully updated with available archives.")
    return True
Beispiel #4
0
    def handle_request(cls, query_params, path_fields):
        '''
        Example API call: http://<hostname>:<port>/api/v1/MeltingTemperatures/<user>/IDT?name=foo&sequence=bar
        
        In the above example, query_params would be {"name": "foo", 
        "sequence": "bar"} and path_fields would be [<user>]. After collecting 
        input parameters, call process_request(). Then return the results in the 
        requested format.
        '''
        (params_dict, _) = cls._parse_query_params(query_params)
        cls._handle_path_fields(path_fields, params_dict)

        response = {}
        http_status_code = None
        try:
            response, http_status_code = cls.process_request(params_dict)
        except:
            APP_LOGGER.error("Failed to delete records: %s" %
                             traceback.format_exc())
            http_status_code = 500
            response[ERROR] = str(sys.exc_info()[1])

        return (make_clean_response(response, http_status_code), None, None)
Beispiel #5
0
def update_run_reports(date_folders=None):
    '''
    Update the database with available run reports.  It is not an error
    if zero reports are available at this moment.

    @return True if database is successfully updated, False otherwise
    '''
    APP_LOGGER.info("Updating database with available run reports...")

    # fetch utags from run report collection
    db_utags = _DB_CONNECTOR.distinct(RUN_REPORT_COLLECTION, UTAG)

    if os.path.isdir(RUN_REPORT_PATH):
        if date_folders is None:
            try:
                latest_date = _DB_CONNECTOR.find_max(RUN_REPORT_COLLECTION,
                                                     DATETIME)[DATETIME]
            except TypeError:
                latest_date = datetime.now()

            def valid_date(folder):
                date_obj = get_date_object(folder)
                return date_obj >= latest_date - timedelta(days=6)

            date_folders = [
                folder for folder in os.listdir(RUN_REPORT_PATH)
                if re.match('\d{2}_\d{2}_\d{2}', folder) and valid_date(folder)
            ]

            # New file location
            new_date_folders = get_date_folders()
            date_folders.extend(f for f in new_date_folders if valid_date(f))

        date_folders = [os.path.join(RUN_REPORT_PATH, f) for f in date_folders]
        date_folders = [f for f in date_folders if os.path.isdir(f)]

        reports = list()
        for folder in date_folders:
            for sf in os.listdir(folder):
                report_file_path = get_run_info_path(folder, sf)
                if report_file_path is None: continue

                date_obj = get_date_object(folder)
                data_folder = os.path.join(RUN_REPORT_PATH, folder, sf)

                utag = set_utag(date_obj, sf)
                if utag not in db_utags:  # if not exists, need to insert to collection
                    log_data = read_report_file(report_file_path, date_obj,
                                                utag)
                    if log_data is None or all(
                            not log_data[DEVICE_NAME].lower().startswith(x)
                            for x in ['pilot', 'beta']):
                        log_data = {DATETIME: date_obj, UTAG: utag}
                    if IMAGE_STACKS in log_data:
                        # add image stacks to archive collection
                        update_image_stacks(log_data, data_folder)
                        # find HDF5 datasets and add them to HDF5 collection
                        hdf5_datasets = get_hdf5_datasets(
                            log_data, data_folder)
                        log_data[IMAGE_STACKS].extend(hdf5_datasets)
                    # add report direcotry path
                    log_data[DIR_PATH] = remove_disk_directory(
                        os.path.dirname(report_file_path))
                    reports.append(log_data)
                else:  # if exists, check HDF5 collection for new datasets
                    log_data = _DB_CONNECTOR.find_one(RUN_REPORT_COLLECTION,
                                                      UTAG, utag)

                    # If previously a run report was not there or had wrong format,
                    # the mongo documents only has three or four fields, _id, datetime,
                    # unique_tag, and maybe dir_path. If this occurs, try reading the
                    # run report again.
                    if not set(log_data.keys()) - set(
                        [ID, DATETIME, UTAG, DIR_PATH]):
                        log_data = read_report_file(report_file_path, date_obj,
                                                    utag)
                        if log_data is None or all(
                                not log_data[DEVICE_NAME].lower().startswith(x)
                                for x in ['pilot', 'beta']):
                            continue
                        # add report direcotry path
                        log_data[DIR_PATH] = remove_disk_directory(
                            os.path.dirname(report_file_path))
                        # add image stacks to archive collection
                        update_image_stacks(log_data, data_folder)

                    if IMAGE_STACKS in log_data:
                        # find HDF5 datasets and add new records to HDF5 collection
                        new_datasets = set(
                            get_hdf5_datasets(log_data, data_folder))
                        if new_datasets:
                            # exclude uploaded HDF5 datasets
                            exist_datasets = set([
                                d for d in log_data[IMAGE_STACKS] if
                                isinstance(d, str) or isinstance(d, unicode)
                            ])
                            new_datasets = list(new_datasets - exist_datasets)
                            if new_datasets:
                                _DB_CONNECTOR.update(
                                    RUN_REPORT_COLLECTION, {UTAG: utag}, {
                                        "$addToSet": {
                                            IMAGE_STACKS: {
                                                '$each': new_datasets
                                            }
                                        }
                                    })
                                APP_LOGGER.info(
                                    'Updated run report utag=%s with %d datasets'
                                    % (utag, len(new_datasets)))

        APP_LOGGER.info("Found %d run reports" % (len(reports)))
        if len(reports) > 0:
            # There is a possible race condition here. Ideally these operations
            # would be performed in concert atomically
            _DB_CONNECTOR.insert(RUN_REPORT_COLLECTION, reports)
    else:
        APP_LOGGER.error(
            "Couldn't locate run report path '%s', to update database." %
            RUN_REPORT_PATH)
        return False

    APP_LOGGER.info(
        "Database successfully updated with available run reports.")
    return True
Beispiel #6
0
    def process_request(cls, params_dict):
        image_stack_tgz = params_dict[cls._file_param][0]
        stack_type = params_dict[cls._stack_type_param][0]
        img_stack_name = params_dict[cls._name_param][0]
        short_desc = params_dict[cls._short_desc_param][0]
        http_status_code = 200
        uuid = str(uuid4())
        tmp_archive_path = os.path.join(TMP_PATH, uuid + '.tar.gz')
        archive_path = os.path.join(RESULTS_PATH, uuid + '.tar.gz')
        json_response = {
            FILENAME: image_stack_tgz.filename,
            UUID: uuid,
            DATESTAMP: datetime.today(),
        }

        try:
            # check tar file
            image_stack_tgz.save(tmp_archive_path)
            image_stack_tgz.close()

            tar_error, nimgs = check_mon_tar_structure(tmp_archive_path,
                                                       stack_type)

            # check for existing image stacks
            existing_stacks = cls._DB_CONNECTOR.find(IMAGES_COLLECTION, {
                NAME: img_stack_name,
                STACK_TYPE: stack_type
            }, [NAME])
            if existing_stacks:
                http_status_code = 403
                json_response[ERROR] = 'Image stack with given name already ' \
                            'exists.'
            elif tar_error:
                APP_LOGGER.error(tar_error)
                http_status_code = 415
                json_response[ERROR] = tar_error
            else:
                url = 'http://%s/results/%s/%s' % (
                    HOSTNAME, PORT, os.path.basename(archive_path))
                shutil.copy(tmp_archive_path, archive_path)
                json_response[RESULT] = archive_path
                json_response[URL] = url
                json_response[NAME] = img_stack_name
                json_response[DESCRIPTION] = short_desc
                json_response[NUM_IMAGES] = nimgs
                json_response[STACK_TYPE] = stack_type
                cls._DB_CONNECTOR.insert(IMAGES_COLLECTION, [json_response])
        except IOError:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 415
            json_response[ERROR] = str(sys.exc_info()[1])
        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 500
            json_response[ERROR] = str(sys.exc_info()[1])
        finally:
            if ID in json_response:
                del json_response[ID]
            silently_remove_file(tmp_archive_path)

        return make_clean_response(json_response, http_status_code)
def update_hdf5s():
    APP_LOGGER.info("Updating database with available HDF5 files...")

    # check if run report path exists
    if not os.path.isdir(RUN_REPORT_PATH):
        APP_LOGGER.error(
            "Couldn't locate run report path '%s', to update database." %
            RUN_REPORT_PATH)
        return False

    # find new hdf5 files, using nested listdirs, way faster than glob, os.walk, or scandir
    # only search two subdirectories within the run report folder
    # assumes each the hdf5 file is in a subfolder in the run report folder
    database_paths = set(
        _DB_CONNECTOR.distinct_sorted(HDF5_COLLECTION, HDF5_PATH))
    current_paths = set()
    for par_ in os.listdir(RUN_REPORT_PATH):
        report_dir = os.path.join(RUN_REPORT_PATH, par_)
        if os.path.isdir(report_dir):
            for sub_ in os.listdir(report_dir):
                subdir = os.path.join(report_dir, sub_)
                if os.path.isdir(subdir):
                    hdf5s = [
                        f for f in os.listdir(subdir)
                        if os.path.splitext(f)[-1] in VALID_HDF5_EXTENSIONS
                    ]
                    hdf5_paths = [os.path.join(subdir, f) for f in hdf5s]
                    current_paths.update(hdf5_paths)

    # Check yyyy_mm/dd/HHMM_pilotX location
    run_folders = get_run_folders()
    for folder in run_folders:
        hdf5s = [
            f for f in os.listdir(folder)
            if os.path.splitext(f)[-1] in VALID_HDF5_EXTENSIONS
        ]
        hdf5_paths = [os.path.join(folder, f) for f in hdf5s]
        current_paths.update(hdf5_paths)

    # update database with any new files
    new_hdf5_paths = current_paths - database_paths
    new_records = list()
    for hdf5_path in new_hdf5_paths:
        try:
            with h5py.File(hdf5_path) as h5_file:
                dataset_names = h5_file.keys()
            for dsname in dataset_names:
                if any(
                        re.match(pat, dsname) for pat in [
                            r'^\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}',
                            r'^Pilot\d+_\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}'
                        ]):
                    new_records.append({
                        HDF5_PATH:
                        remove_disk_directory(hdf5_path),
                        HDF5_DATASET:
                        dsname,
                    })
        except:
            APP_LOGGER.exception(
                'Unable to get dataset information from HDF5 file: %s' %
                hdf5_path)

    if new_records:
        # There is a possible race condition here. Ideally these operations
        # would be performed in concert atomically
        _DB_CONNECTOR.insert(HDF5_COLLECTION, new_records)
        APP_LOGGER.info('Updated database with %s new HDF5 files' %
                        len(new_records))
    else:
        APP_LOGGER.info('Unable to find any new HDF5 files')

    return True
    def process_request(cls, params_dict):
        users            = params_dict[cls._users_param]
        date             = params_dict[cls._date_param][0]
        archive_name     = params_dict[cls._archive_param][0]
        beta             = params_dict[cls._beta_param][0]
        device           = params_dict[cls._device_param][0]
        dye_prof_metrics = params_dict[cls._dye_profile_metrics_param]
        surfactant       = params_dict[cls._surfactant_param][0]
        
        json_response = {}
        
        # Ensure archive directory is valid
        try:
            archives = get_archive_dirs(archive_name)
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[ERROR] = str(sys.exc_info()[1])
            return make_clean_response(json_response, 500)
        
        # Ensure only one valid archive is found
        if len(archives) != 1:
            APP_LOGGER.warning("Expected 1 archive, found %d" % len(archives))
            return make_clean_response(json_response, 404)

        response = {
                    USERS: users,
                    DATE: date,
                    ARCHIVE: archives[0],
                    BETA: beta,
                    DEVICE: device,
                    DYE_PROFILE_METRICS: dye_prof_metrics,
                    SURFACTANT: surfactant,
                    STATUS: JOB_STATUS.submitted,                # @UndefinedVariable
                    JOB_TYPE_NAME: JOB_TYPE.dye_profile_images,  # @UndefinedVariable
                    SUBMIT_DATESTAMP: datetime.today(),
                   }
        status_code = 200
        
        
        
        
        try:
            
#             # Create helper functions
#             callable = PaProcessCallable(archive, dyes, device,
#                                              major, minor,
#                                              offset, use_iid, 
#                                              outfile_path, 
#                                              config_path,
#                                              response[UUID], 
#                                              cls._DB_CONNECTOR)
#             callback = make_process_callback(response[UUID], 
#                                              outfile_path, 
#                                              config_path,
#                                              cls._DB_CONNECTOR)
# 
#             # Add to queue and update DB
#             cls._DB_CONNECTOR.insert(PA_PROCESS_COLLECTION, [response])
#             cls._EXECUTION_MANAGER.add_job(response[UUID], 
#                                            abs_callable, callback)
        except:
            APP_LOGGER.exception(traceback.format_exc())
            response[ERROR]  = str(sys.exc_info()[1])
            status_code = 500
        finally:
            if ID in response:
                del response[ID]
        
        
        
        
        
        
        
        http_status_code = 200
        uuid             = str(uuid4())
        tmp_archive_path = os.path.join(TMP_PATH, uuid + ".tar.gz")
        archive_path     = os.path.join(RESULTS_PATH, uuid + ".tar.gz")
        json_response    = { 
                            FILENAME: image_stack_tgz.filename,
                            UUID: uuid,
                            DATESTAMP: datetime.today(),
                           }

        try:
            # check tar file
            image_stack_tgz.save(tmp_archive_path)
            image_stack_tgz.close()
            tar_error, nimgs = check_ham_tar_structure(tmp_archive_path, HAM)

            # check for existing image stacks
            existing_stacks = cls._DB_CONNECTOR.find(IMAGES_COLLECTION,
                                                     {NAME: img_stack_name, STACK_TYPE: HAM},
                                                     [NAME])

            # check for exp def
            exp_defs     = ExperimentDefinitions()
            exp_def_uuid = exp_defs.get_experiment_uuid(exp_def_name)

            if existing_stacks:
                http_status_code = 403
                json_response[ERROR] = "Image stack with given name already " \
                            "exists."
            elif not exp_def_uuid:
                http_status_code = 404
                json_response[ERROR] = "Couldn't locate UUID for " \
                    "experiment definition."
            elif tar_error:
                APP_LOGGER.error(tar_error)
                http_status_code = 415
                json_response[ERROR] = tar_error
            else:
                url = "http://%s/results/%s/%s" % (HOSTNAME, PORT,
                                                   os.path.basename(archive_path))
                shutil.copy(tmp_archive_path, archive_path)
                json_response[RESULT]       = archive_path
                json_response[URL]          = url
                json_response[NAME]         = img_stack_name
                json_response[DESCRIPTION]  = short_desc
                json_response[EXP_DEF_NAME] = exp_def_name
                json_response[EXP_DEF_UUID] = exp_def_uuid
                json_response[NUM_IMAGES]   = nimgs
                json_response[STACK_TYPE]  = HAM
                cls._DB_CONNECTOR.insert(IMAGES_COLLECTION,
                                         [json_response])
        except IOError:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code     = 415
            json_response[ERROR] = str(sys.exc_info()[1])
        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code     = 500
            json_response[ERROR] = str(sys.exc_info()[1])
        finally:
            if ID in json_response:
                del json_response[ID]
            silently_remove_file(tmp_archive_path)
        
        return make_clean_response(json_response, http_status_code)
Beispiel #9
0
    def process_request(cls, params_dict):
        image_stack_tgz = params_dict[cls._file_param][0]
        exp_def_name = params_dict[cls._exp_defs_param][0]
        img_stack_name = params_dict[cls._name_param][0]
        short_desc = params_dict[cls._short_desc_param][0]
        http_status_code = 200
        uuid = str(uuid4())
        tmp_archive_path = os.path.join(TMP_PATH, uuid + ".tar.gz")
        archive_path = os.path.join(RESULTS_PATH, uuid + ".tar.gz")
        json_response = {
            FILENAME: image_stack_tgz.filename,
            UUID: uuid,
            DATESTAMP: datetime.today(),
        }

        try:
            # check tar file
            image_stack_tgz.save(tmp_archive_path)
            image_stack_tgz.close()
            tar_error, nimgs = check_ham_tar_structure(tmp_archive_path, HAM)

            # check for existing image stacks
            existing_stacks = cls._DB_CONNECTOR.find(IMAGES_COLLECTION, {
                NAME: img_stack_name,
                STACK_TYPE: HAM
            }, [NAME])

            # check for exp def
            exp_def_fetcher = ExpDefHandler()
            exp_def_uuid = exp_def_fetcher.get_experiment_uuid(exp_def_name)

            if existing_stacks:
                http_status_code = 403
                json_response[ERROR] = "Image stack with given name already " \
                            "exists."
            elif not exp_def_uuid:
                http_status_code = 404
                json_response[ERROR] = "Couldn't locate UUID for " \
                    "experiment definition."
            elif tar_error:
                APP_LOGGER.error(tar_error)
                http_status_code = 415
                json_response[ERROR] = tar_error
            else:
                url = "http://%s/results/%s/%s" % (
                    HOSTNAME, PORT, os.path.basename(archive_path))
                shutil.copy(tmp_archive_path, archive_path)
                json_response[RESULT] = archive_path
                json_response[URL] = url
                json_response[NAME] = img_stack_name
                json_response[DESCRIPTION] = short_desc
                json_response[EXP_DEF_NAME] = exp_def_name
                json_response[EXP_DEF_UUID] = exp_def_uuid
                json_response[NUM_IMAGES] = nimgs
                json_response[STACK_TYPE] = HAM
                cls._DB_CONNECTOR.insert(IMAGES_COLLECTION, [json_response])
        except IOError:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 415
            json_response[ERROR] = str(sys.exc_info()[1])
        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 500
            json_response[ERROR] = str(sys.exc_info()[1])
        finally:
            if ID in json_response:
                del json_response[ID]
            silently_remove_file(tmp_archive_path)

        return make_clean_response(json_response, http_status_code)