コード例 #1
0
def update_image_stacks(log_data, data_folder):
    """
    Check whether the image_stacks in a run report document exist in archive collection.
    If not, add them to database.

    @param log_data:            the document of run report yaml
    @param date_folder:         folder where data is located
    """
    if log_data is None or IMAGE_STACKS not in log_data: return

    new_records = list()
    for image_stack in log_data[IMAGE_STACKS]:
        exist_record = _DB_CONNECTOR.find_one(ARCHIVES_COLLECTION, ARCHIVE,
                                              image_stack)
        if not exist_record:
            for folder in [ARCHIVES_PATH, data_folder]:
                archive_path = os.path.join(folder, image_stack)
                if os.path.isdir(archive_path):
                    new_records.append({
                        ARCHIVE:
                        image_stack,
                        ARCHIVE_PATH:
                        remove_disk_directory(archive_path)
                    })
                    break

    if new_records:
        APP_LOGGER.info('Found %d image stacks: %s' %
                        (len(new_records), new_records))
        _DB_CONNECTOR.insert(ARCHIVES_COLLECTION, new_records)
コード例 #2
0
 def absorption_callback(future):
     try:
         _ = future.result()
         update = {
             "$set": {
                 STATUS: JOB_STATUS.succeeded,  # @UndefinedVariable
                 RESULT: outfile_path,
                 FINISH_DATESTAMP: datetime.today(),
                 URL: "http://%s/results/%s/%s" % (HOSTNAME, PORT, uuid)
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0:
             db_connector.update(ABSORPTION_COLLECTION, query, update)
         elif os.path.isfile(outfile_path):
             os.remove(outfile_path)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         error_msg = str(sys.exc_info()[1])
         update = {
             "$set": {
                 STATUS: JOB_STATUS.failed,  # @UndefinedVariable
                 RESULT: None,
                 FINISH_DATESTAMP: datetime.today(),
                 ERROR: error_msg
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0:
             db_connector.update(ABSORPTION_COLLECTION, query, update)
         elif os.path.isfile(outfile_path):
             os.remove(outfile_path)
コード例 #3
0
def parse_pa_data_src(pa_data_src_name):
    """
    Determine primary analysis data source type (HDF5 or image stack) and return
    a list containing the archive paths and dataset names

    @param pa_data_src_name:    String, name of data source, could be either
                                the HDF5 dataset name or a folder name containing
                                image stacks
    @return:                    A list of tuples, each tuple contains the primary analysis
                                datasource name and a bool indicating whether or not it is HDF5.
    """
    # archives is a list of tuples, each tuple contains the path and the dataset name
    archives = list()
    if is_hdf5_archive(pa_data_src_name):
        archives.append((pa_data_src_name, True))
        APP_LOGGER.info('%s is an HDF5 file.' % pa_data_src_name)
    elif is_image_archive(pa_data_src_name):
        image_archive_paths = io_utilities.get_archive_dirs(
            pa_data_src_name, min_num_images=PA_MIN_NUM_IMAGES)
        for img_src_name in image_archive_paths:
            archives.append((
                img_src_name,
                False,
            ))
        APP_LOGGER.info('%s is an image stack.' % pa_data_src_name)
    else:
        raise Exception(
            'Unable to determine if %s is an image stack or HDF5 file.' %
            pa_data_src_name)

    return archives
コード例 #4
0
ファイル: TagsPostFunction.py プロジェクト: dicara/flask-app
    def process_request(cls, params_dict):
        tags = [t for t in params_dict[cls.tags_parameter] if t]
        report_uuid = params_dict[cls.report_uuid_parameter][0]

        http_status_code = 200
        json_response = {RUN_REPORT_UUID: report_uuid, TAGS: tags}

        try:
            cls._DB_CONNECTOR.update(RUN_REPORT_COLLECTION,
                                     {UUID: report_uuid},
                                     {'$addToSet': {
                                         TAGS: {
                                             '$each': tags
                                         }
                                     }})
            APP_LOGGER.info("Updated run report uuid=%s with tags %s." %
                            (report_uuid, tags))

            json_response[STATUS] = SUCCEEDED
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[STATUS] = FAILED
            json_response[ERROR] = str(sys.exc_info()[1])
            http_status_code = 500

        return make_clean_response(json_response, http_status_code)
コード例 #5
0
    def process_request(cls, params_dict):
        dataset = params_dict[cls.dataset_parameter][0]
        report_uuid = params_dict[cls.report_uuid_parameter][0]

        http_status_code = 200
        json_response = {RUN_REPORT_UUID: report_uuid, HDF5_DATASET: dataset}

        try:
            cls._DB_CONNECTOR.update(
                RUN_REPORT_COLLECTION, {UUID: report_uuid},
                {'$pull': {
                    IMAGE_STACKS: {
                        'name': dataset,
                        'upload': True
                    }
                }})
            cls._DB_CONNECTOR.remove(HDF5_COLLECTION, {HDF5_DATASET: dataset})
            json_response.update({"unassociate": True})
            APP_LOGGER.info("Removed dataset name=%s from run report uuid=%s" %
                            (dataset, report_uuid))
        except:
            APP_LOGGER.exception(traceback.format_exc())
            json_response[ERROR] = str(sys.exc_info()[1])
            http_status_code = 500

        return json_response, http_status_code
コード例 #6
0
 def process_callback(future):
     try:
         _ = future.result()
         update = { "$set": {
                              STATUS: JOB_STATUS.succeeded, # @UndefinedVariable
                              RESULT: outfile_path,
                              FINISH_DATESTAMP: datetime.today(),
                              URL: get_results_url(outfile_path),
                            }
                 }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0:
             db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         error_msg = str(sys.exc_info()[1])
         update    = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable
                                RESULT: None,
                                FINISH_DATESTAMP: datetime.today(),
                                ERROR: error_msg}}
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0:
             db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
コード例 #7
0
    def process_request(cls, params_dict):
        metadata_file = params_dict[cls._file_param][0]
        application = params_dict[cls._app_param][0]
        json_response = {FILENAME: metadata_file.filename}
        http_status_code = 200
        file_uuid = str(uuid4())
        path = os.path.join(TMP_PATH, file_uuid)

        try:
            metadata_file.save(path)
            metadata_file.close()
            dialect = get_dialect(path)
            if dialect:
                probe_ids = cls._DB_CONNECTOR.distinct(
                    PROBE_METADATA_COLLECTION, PROBE_ID)
                ids_are_unique = cls.update_db(dialect, path, probe_ids,
                                               application)
                if not ids_are_unique:
                    http_status_code = 403
            else:
                http_status_code = 415
                json_response[ERROR] = "Invalid file format - file must " \
                    "be either tab or comma delimited."
        except IOError:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 415
            json_response[ERROR] = str(sys.exc_info()[1])
        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 500
            json_response[ERROR] = str(sys.exc_info()[1])
        finally:
            silently_remove_file(path)

        return make_clean_response(json_response, http_status_code)
コード例 #8
0
def update_run_report(date_folders):
    """
    List of date folders in the form MM_DD_YY that you want to update.

    @param date_folders:
    """
    # fetch utags in run report collection
    db_utags = _DB_CONNECTOR.distinct(RUN_REPORT_COLLECTION, UTAG)

    if os.path.isdir(RUN_REPORT_PATH):

        reports = list()
        for folder in date_folders:
            path = os.path.join(RUN_REPORT_PATH, folder)
            if not os.path.isdir(path):
                continue

            date_obj = datetime.strptime(folder, '%m_%d_%y')

            for sf in os.listdir(path):
                report_file_path = get_run_info_path(path, sf)
                if report_file_path is None: continue

                utag = set_utag(date_obj, sf)
                if utag not in db_utags: # if not exists, need to insert to collection
                    log_data = read_report_file(report_file_path, date_obj, utag)
                    if log_data is None:
                        log_data = {DATETIME: date_obj, UTAG: utag}
                    if IMAGE_STACKS in log_data:
                        hdf5_datasets= get_hdf5_datasets(log_data, folder, sf)

                        log_data[IMAGE_STACKS].extend(hdf5_datasets)

                    reports.append(log_data)
                    print report_file_path
                else: # if exists, check HDF5 collection for new datasets
                    log_data = _DB_CONNECTOR.find_one(RUN_REPORT_COLLECTION, UTAG, utag)

                    # If previously a run report was not there or had wrong format,
                    # the mongo documents only has three fields, _id, datetime, and
                    # unique_tag. If this occurs, try reading the run report again.
                    if len(log_data.keys()) == 3:
                        log_data = read_report_file(report_file_path, date_obj, utag)

                    if log_data is not None and IMAGE_STACKS in log_data:
                        hdf5_datasets = get_hdf5_datasets(log_data, folder, sf)
                        exist_datasets = log_data[IMAGE_STACKS]

                        if set(hdf5_datasets) - set(exist_datasets):
                            updated_datasets = list(set(hdf5_datasets) | set(exist_datasets))
                            _DB_CONNECTOR.update(
                                    RUN_REPORT_COLLECTION,
                                    {UTAG: utag},
                                    {"$set": {IMAGE_STACKS: updated_datasets}})

        APP_LOGGER.info("Found %d run reports" % (len(reports)))
        if len(reports) > 0:
            # There is a possible race condition here. Ideally these operations
            # would be performed in concert atomically
            _DB_CONNECTOR.insert(RUN_REPORT_COLLECTION, reports)
コード例 #9
0
    def parse_args(self, raw_args):
        '''
        Convert input strings to their appropriate types.
        '''
        if len(raw_args) > 1 and not self.allow_multiple:
            APP_LOGGER.warning(
                "Multiple parameter values for %s are not permitted. Only using the first value: %s."
                % (self.name, raw_args[0]))
            raw_args = raw_args[:1]

        if len(raw_args) < 1:
            if self.default is not None:
                raw_args = [self.default]
            elif self.required:
                raise Exception("Required argument %s not provided." %
                                self.name)

        converted_args = self._convert_args(raw_args)
        if self.enum:
            valid_args = set(self.enum)
            if not set(converted_args).issubset(valid_args):
                invalid_args = set(converted_args).difference(valid_args)
                raise Exception(
                    "Provided arguments %s not a subset of valid arguments: %s"
                    % (invalid_args, valid_args))

        return converted_args
コード例 #10
0
    def set_defaults(self):
        """
        There are certain parameters that the user may not have sent
        but that can come from the experiment definition, set them here.

        Set workflow based on experiment type. The first 3 stages in each workflow are
        primary analysis, identity, and assay caller. The 4th stage depends on the
        type of experiment, i.e., genotyper API for hotspot experiment, exploratory API
        for exploratory experiment, and sequencing API for sequencing experiment.
        """
        try:
            exp_def_fetcher = ExpDefHandler()
            experiment = exp_def_fetcher.get_experiment_definition(
                self.parameters[EXP_DEF])

            self.exp_type = experiment.exp_type
            self.workflow = [PROCESS, IDENTITY, ASSAY_CALLER
                             ] + [WORKFLOW_LOOKUP[self.exp_type]]
            self.document_list = [PA_DOCUMENT, ID_DOCUMENT, AC_DOCUMENT] + \
                                 [DOCUMENT_LOOKUP[self.exp_type]]

            if DYES not in self.parameters or \
               DYE_LEVELS not in self.parameters or \
               NUM_PROBES not in self.parameters or \
               PICO1_DYE not in self.parameters:
                # get dyes and number of levels
                dye_levels = defaultdict(int)
                for barcode in experiment.barcodes:
                    for dye_name, lvl in barcode.dye_levels.items():
                        dye_levels[dye_name] = max(dye_levels[dye_name],
                                                   int(lvl + 1))
                if DYES not in self.parameters:
                    self.parameters[DYES] = dye_levels.keys()
                if DYE_LEVELS not in self.parameters:
                    self.parameters[DYE_LEVELS] = dye_levels.items()
                if NUM_PROBES not in self.parameters:
                    self.parameters[NUM_PROBES] = len(experiment.barcodes)
                if PICO1_DYE not in self.parameters:
                    self.parameters[PICO1_DYE] = None
        except:
            APP_LOGGER.exception(traceback.format_exc())

        # set parameters for anything user might not have set
        if FILTERED_DYES not in self.parameters:
            self.parameters[FILTERED_DYES] = list()

        if IGNORED_DYES not in self.parameters:
            self.parameters[IGNORED_DYES] = list()

        if CONTINUOUS_PHASE not in self.parameters:
            self.parameters[CONTINUOUS_PHASE] = False

        if DEV_MODE not in self.parameters:
            self.parameters[DEV_MODE] = DEFAULT_DEV_MODE

        if DRIFT_COMPENSATE not in self.parameters:
            self.parameters[DRIFT_COMPENSATE] = DEFAULT_DRIFT_COMPENSATE
コード例 #11
0
    def process_request(cls, params_dict):
        job_uuids = params_dict[cls.job_uuid_param]
        job_name = params_dict[cls.job_name_param][0]
        exp_def_name = params_dict[cls.exp_defs_param][0]
        required_drops = params_dict[cls.req_drops_param][0]

        json_response = {GENOTYPER: []}
        status_codes = list()
        for i, assay_caller_uuid in enumerate(job_uuids):
            if len(job_uuids) == 1:
                cur_job_name = job_name
            else:
                cur_job_name = "%s-%d" % (job_name, i)
            status_code = 200

            if cur_job_name in cls._DB_CONNECTOR.distinct(
                    SA_GENOTYPER_COLLECTION, JOB_NAME):
                status_code = 403
                json_response[GENOTYPER].append({ERROR: 'Job exists.'})
            else:
                try:
                    # Create helper functions
                    genotyper_callable = SaGenotyperCallable(
                        assay_caller_uuid, exp_def_name, required_drops,
                        cls._DB_CONNECTOR, cur_job_name)
                    response = copy.deepcopy(genotyper_callable.document)
                    callback = make_process_callback(
                        genotyper_callable.uuid, exp_def_name,
                        genotyper_callable.ac_result_path,
                        genotyper_callable.ignored_dyes,
                        genotyper_callable.outfile_path, cls._DB_CONNECTOR,
                        cur_job_name)

                    # Add to queue
                    cls._EXECUTION_MANAGER.add_job(response[UUID],
                                                   genotyper_callable,
                                                   callback)

                except:
                    APP_LOGGER.exception(
                        "Error processing Genotyper post request.")
                    response = {
                        JOB_NAME: cur_job_name,
                        ERROR: str(sys.exc_info()[1])
                    }
                    status_code = 500
                finally:
                    if ID in response:
                        del response[ID]
                    json_response[GENOTYPER].append(response)

            status_codes.append(status_code)

        # If all jobs submitted successfully, then 200 should be returned.
        # Otherwise, the maximum status code seems good enough.
        return make_clean_response(json_response, max(status_codes))
コード例 #12
0
 def process_request(cls, params_dict):
     try:
         valid_files = [
             fp for fp in os.listdir(MODIFIED_ARCHIVES_PATH)
             if allowed_file(os.path.join(MODIFIED_ARCHIVES_PATH, fp))
         ]
         return (valid_files, [], None)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
コード例 #13
0
ファイル: TagsGetFunction.py プロジェクト: dicara/flask-app
 def process_request(cls, params_dict):
     try:
         reports = cls._DB_CONNECTOR.find(RUN_REPORT_COLLECTION,
                                          {TAGS: {
                                              '$exists': True
                                          }})
         user_tags = set(t for r in reports for t in r[TAGS])
         return (list(user_tags), [], None)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
コード例 #14
0
ファイル: FullAnalysisUtils.py プロジェクト: dicara/flask-app
    def _combine_sa(self, output_path, id_report_path, gt_png_path,
                    gt_png_sum_path, gt_kde_path, gt_kde_sum_path):
        """
        Combine Identity report, Assay Caller scatter plot, and Genotyper PNG

        @param id_report_path:          pathname of identity report
        @param gt_png_path:             pathname of genotyper scatter PNG
        @param gt_png_sum_path:         pathname of genotyper scatter sum PNG
        @param gt_kde_path:             pathname of genotyper KDE PNG
        @param gt_kde_sum_path:         pathname of genotyper KDE sum PNG
        """
        try:
            path = output_path + '_png_id'
            doc = SimpleDocTemplate(path, pagesize=landscape(letter))
            story = list()

            story.append(self.get_image(gt_png_sum_path))
            story.append(PageBreak())

            story.append(self.get_image(gt_kde_sum_path))
            story.append(PageBreak())

            styles = getSampleStyleSheet()
            id_title = Paragraph('Identity Report', styles['h2'])
            story.append(id_title)
            story.append(Spacer(1, 0.2 * inch))

            with open(id_report_path, 'r') as id_report:
                lines = id_report.readlines()
                for line in lines:
                    styles = getSampleStyleSheet()
                    left_indent = (len(line) - len(line.lstrip())) * 5
                    styles.add(
                        ParagraphStyle(name='custom_style',
                                       fontName=FONT_NAME_STD,
                                       fontSize=FONT_SIZE,
                                       leftIndent=left_indent))
                    p = Paragraph(line, styles['custom_style'])
                    story.append(p)
                story.append(PageBreak())

            doc.build(story,
                      onFirstPage=self.standard_page,
                      onLaterPages=self.standard_page)

            self._merge_pdfs(output_path, gt_png_path, gt_kde_path, path)

            os.unlink(path)
            return True
        except:
            APP_LOGGER.exception(traceback.format_exc())
            return False
コード例 #15
0
    def process_request(cls, params_dict):
        probes_file_uuid = params_dict[cls._probes_param][0]
        targets_file_uuid = params_dict[cls._targets_param][0]
        strict = params_dict[cls._strict_param][0]
        job_name = params_dict[cls._job_name_param][0]

        json_response = {
            PROBES: probes_file_uuid,
            TARGETS: targets_file_uuid,
            STRICT: strict,
            UUID: str(uuid4()),
            STATUS: JOB_STATUS.submitted,  # @UndefinedVariable
            JOB_NAME: job_name,
            JOB_TYPE_NAME: JOB_TYPE.absorption,  # @UndefinedVariable
            SUBMIT_DATESTAMP: datetime.today(),
        }
        http_status_code = 200

        if job_name in cls._DB_CONNECTOR.distinct(ABSORPTION_COLLECTION,
                                                  JOB_NAME):
            http_status_code = 403
        else:
            try:
                probes_path = cls._DB_CONNECTOR.find_one(
                    PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH]
                targets_path = cls._DB_CONNECTOR.find_one(
                    TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH]
                outfile_path = os.path.join(RESULTS_PATH, json_response[UUID])

                # Create helper functions
                abs_callable = AbsorbtionCallable(targets_path, probes_path,
                                                  strict, outfile_path,
                                                  json_response[UUID],
                                                  cls._DB_CONNECTOR)
                callback = make_absorption_callback(json_response[UUID],
                                                    outfile_path,
                                                    cls._DB_CONNECTOR)
                # Add to queue and update DB
                cls._DB_CONNECTOR.insert(ABSORPTION_COLLECTION,
                                         [json_response])
                cls._EXECUTION_MANAGER.add_job(json_response[UUID],
                                               abs_callable, callback)
            except:
                APP_LOGGER.exception(traceback.format_exc())
                json_response[ERROR] = str(sys.exc_info()[1])
                http_status_code = 500
            finally:
                if ID in json_response:
                    del json_response[ID]

        return make_clean_response(json_response, http_status_code)
コード例 #16
0
    def process_request(cls, params_dict):
        probes_file_uuid = params_dict[ParameterFactory.file_uuid(
            "probes", PROBES_COLLECTION)][0]
        targets_file_uuid = params_dict[ParameterFactory.file_uuid(
            "targets", TARGETS_COLLECTION)][0]
        absorb = params_dict[ParameterFactory.boolean(
            "absorb", "Check for absorbed probes.")][0]
        num = params_dict[ParameterFactory.integer(
            "num",
            "Minimum number of probes for a target.",
            default=3,
            minimum=1)][0]
        job_name = params_dict[ParameterFactory.lc_string(
            JOB_NAME, "Unique name to give this job.")][0]

        json_response = {
            PROBES: probes_file_uuid,
            TARGETS: targets_file_uuid,
            ABSORB: absorb,
            NUM: num,
            UUID: str(uuid4()),
            STATUS: JOB_STATUS.submitted,  # @UndefinedVariable
            JOB_NAME: job_name,
            DATESTAMP: datetime.today(),
        }
        http_status_code = 200

        if job_name in cls._DB_CONNECTOR.distinct(VALIDATION_COLLECTION,
                                                  JOB_NAME):
            http_status_code = 403
        else:
            try:
                probes_path = cls._DB_CONNECTOR.find_one(
                    PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH]
                targets_path = cls._DB_CONNECTOR.find_one(
                    TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH]

                #ADD VALIDATOR JOB TO QUEUE

                cls._DB_CONNECTOR.insert(VALIDATION_COLLECTION,
                                         [json_response])
                del json_response[ID]
            except:
                APP_LOGGER.exception(traceback.format_exc())
                json_response[ERROR] = str(sys.exc_info()[1])
                http_status_code = 500

        return make_clean_response(json_response, http_status_code)
コード例 #17
0
    def process_request(cls, params_dict):
        uuid = None
        if cls.uuid_parameter in params_dict and \
            params_dict[cls.uuid_parameter][0]:
            uuid = params_dict[cls.uuid_parameter][0]

        # if uuid exists, return a single report with all fields
        if uuid is not None:
            report = cls._DB_CONNECTOR.find_one(RUN_REPORT_COLLECTION, UUID,
                                                uuid)
            if report is None:
                APP_LOGGER.debug("Run report uuid=%s does not exist." % uuid)
                return ([], [], None)

            del report[ID]
            return ([report], report.keys(), None)

        if cls.refresh_parameter in params_dict and \
           params_dict[cls.refresh_parameter][0]:
            if cls.start_date in params_dict and params_dict[
                    cls.start_date][0]:
                start_date = params_dict[cls.start_date][0]
                if cls.end_date in params_dict and params_dict[
                        cls.end_date][0]:
                    end_date = params_dict[cls.end_date][0]
                else:
                    end_date = datetime.now()
                # Old file location 05_10_17
                date_folders = [
                    d.strftime("%m_%d_%y")
                    for d in daterange(start_date, end_date)
                ]
                # New file location 2017_05/10
                date_folders.extend(
                    d.strftime("%Y_%m/%d")
                    for d in daterange(start_date, end_date))
            else:
                date_folders = None
            update_run_reports(date_folders)

        if cls.cart_sn_parameter in params_dict and \
            params_dict[cls.cart_sn_parameter][0]:
            return get_run_reports(params_dict[cls.cart_sn_parameter][0])
        else:
            return get_run_reports()
コード例 #18
0
def get_hdf5_datasets(log_data, data_folder):
    """
    Fetch the HDF5 archives associated with a run report.

    @param log_data:            the document of run report yaml
    @param date_folder:         folder where data is located
    """
    if log_data is None or RUN_ID not in log_data: return set()

    run_id = log_data[RUN_ID]
    hdf5_paths = [
        os.path.join(data_folder, f + '.h5')
        for f in [run_id, run_id + '-baseline']
        if os.path.isfile(os.path.join(data_folder, f + '.h5'))
    ]
    all_datasets = set()

    for path in hdf5_paths:
        exist_records = _DB_CONNECTOR.find(
            HDF5_COLLECTION, {HDF5_PATH: remove_disk_directory(path)})
        if exist_records:
            all_datasets.update(set(r[HDF5_DATASET] for r in exist_records))
            continue

        new_records = list()
        try:
            with h5py.File(path) as h5_file:
                dataset_names = h5_file.keys()
            for dsname in dataset_names:
                if re.match(r'^\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}', dsname):
                    new_records.append({
                        HDF5_PATH: remove_disk_directory(path),
                        HDF5_DATASET: dsname,
                    })
        except:
            APP_LOGGER.exception(
                'Unable to get dataset information from HDF5 file: %s' % path)

        if new_records:
            APP_LOGGER.info('Found %d datasets from HDF5 file: %s' %
                            (len(new_records), path))
            _DB_CONNECTOR.insert(HDF5_COLLECTION, new_records)
            all_datasets.update(set(r[HDF5_DATASET] for r in new_records))

    return all_datasets
コード例 #19
0
        def gen_dye_scatterplot(dyes, sys_listener_path):
            try:
                analysis_df = pandas.read_table(self.analysis_file,
                                                sep=sniff_delimiter(
                                                    self.analysis_file))
                ac_df = pandas.read_table(self.tmp_outfile_path,
                                          sep=sniff_delimiter(
                                              self.tmp_outfile_path))
                analysis_df['assay'] = False
                analysis_df.loc[analysis_df['identity'].notnull(),
                                'assay'] = ac_df['assay'].values

                # System listener inputs
                dyn_align_offsets = {}
                temps = {}
                steps = {}
                if sys_listener_path is not None:
                    sys_listener_dir = os.path.dirname(sys_listener_path)
                    clamp_temp_tp = ClampTempTopicParser()
                    old_channel_offset_tp = OldChannelOffsetTopicParser()
                    channel_offset_tp = ChannelOffsetTopicParser()
                    dyn_align_steps_tp = DynamicAlignStepsParser()
                    topic_parsers = [
                        clamp_temp_tp, old_channel_offset_tp,
                        channel_offset_tp, dyn_align_steps_tp
                    ]
                    sys_listener_parser = SystemListenerParser(
                        sys_listener_dir, topic_parsers=topic_parsers)
                    temps = sys_listener_parser.get_topic_results(
                        clamp_temp_tp.topic)
                    dyn_align_offsets = sys_listener_parser.get_topic_results(
                        channel_offset_tp.topic)
                    if len(dyn_align_offsets) < 1:
                        APP_LOGGER.info("Using old channel offset parser...")
                        dyn_align_offsets = sys_listener_parser.get_topic_results(
                            old_channel_offset_tp.topic)
                    else:
                        APP_LOGGER.info("Using new channel offset parser...")
                    steps = sys_listener_parser.get_topic_results(
                        dyn_align_steps_tp.topic)

                generate_dye_scatterplots(analysis_df,
                                          dyes,
                                          self.tmp_dyes_plot_path,
                                          self.job_name,
                                          self.pico1_dye,
                                          dyn_align_offsets=dyn_align_offsets,
                                          temps=temps,
                                          steps=steps)
                shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path)
                APP_LOGGER.info("Dyes scatter plot generated for %s." % \
                    self.job_name)
            except:
                APP_LOGGER.exception("Dyes scatter plot generation failed.")
コード例 #20
0
ファイル: ExpDefUtils.py プロジェクト: dicara/flask-app
def get_experiment_defintions():
    """
    Retrieve experiment definition from EXP_DEF_COLLECTION.
    """
    columns = OrderedDict()
    columns[ID] = 0
    columns[UUID] = 1
    columns[NAME] = 1
    columns[VARIANTS] = 1
    columns[DYES] = 1
    columns[TYPE] = 1

    column_names = columns.keys()
    column_names.remove(ID)

    exp_defs = _DB_CONNECTOR.find(EXP_DEF_COLLECTION, {}, columns)
    APP_LOGGER.info('Retrieved %d experiment definitions.' \
                    % (len(exp_defs), ))
    return (exp_defs, column_names, None)
コード例 #21
0
 def process_callback(future):
     try:
         _ = future.result()
         update = {
             '$set': {
                 STATUS: JOB_STATUS.succeeded,  # @UndefinedVariable
                 RESULT: outfile_path,
                 URL: get_results_url(outfile_path),
                 SCATTER_PLOT: scatter_plot_path,
                 SCATTER_PLOT_URL: get_results_url(scatter_plot_path),
                 DYES_SCATTER_PLOT: dyes_scatter_plot_path,
                 DYES_SCATTER_PLOT_URL:
                 get_results_url(dyes_scatter_plot_path),
                 FINISH_DATESTAMP: datetime.today(),
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query,
                                  {})) > 0:
             db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
             silently_remove_file(scatter_plot_path)
             silently_remove_file(dyes_scatter_plot_path)
     except:
         APP_LOGGER.exception(traceback.format_exc())
         error_msg = str(sys.exc_info()[1])
         update = {
             '$set': {
                 STATUS: JOB_STATUS.failed,  # @UndefinedVariable
                 RESULT: None,
                 FINISH_DATESTAMP: datetime.today(),
                 ERROR: error_msg
             }
         }
         # If job has been deleted, then delete result and don't update DB.
         if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query,
                                  {})) > 0:
             db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update)
         else:
             silently_remove_file(outfile_path)
             silently_remove_file(scatter_plot_path)
             silently_remove_file(dyes_scatter_plot_path)
コード例 #22
0
    def process_callback(future):
        try:
            _ = future.result()
            report_errors = check_report_for_errors(report_path)
            update_data = { STATUS: JOB_STATUS.succeeded,
                            RESULT: outfile_path,
                            URL: get_results_url(outfile_path),
                            PLOT: plot_path,
                            REPORT: report_path,
                            PLOT_URL: get_results_url(plot_path),
                            REPORT_URL: get_results_url(report_path),
                            PLATE_PLOT_URL: get_results_url(plate_plot_path),
                            TEMPORAL_PLOT_URL: get_results_url(temporal_plot_path),
                            DROP_COUNT_PLOT_URL: get_results_url(drop_count_plot_path),
                            FINISH_DATESTAMP: datetime.today()}
            if report_errors:
                update_data[ERROR] = ' '.join(report_errors)

            update = {"$set": update_data}
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0:
                db_connector.update(SA_IDENTITY_COLLECTION, query, update)
            else:
                silently_remove_file(report_path)
                silently_remove_file(outfile_path)
                silently_remove_file(plot_path)
        except:
            APP_LOGGER.exception(traceback.format_exc())
            error_msg = str(sys.exc_info()[1])

            update    = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable
                                   RESULT: None,
                                   FINISH_DATESTAMP: datetime.today(),
                                   ERROR: error_msg}}
            if os.path.isfile(report_path):
                update['$set'][REPORT_URL]
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0:
                db_connector.update(SA_IDENTITY_COLLECTION, query, update)
            else:
                silently_remove_file(report_path)
                silently_remove_file(outfile_path)
                silently_remove_file(plot_path)
コード例 #23
0
    def _generate(self, ndyes, nchoose=5):
        """
        @param ndyes:   1nteger, number of dyes to use per solution
        #param nchoose: Integer, maximum number of combinations that will be further optimized
        """
        # check to see if the minimum maximum levels of dyes can make the requested number of dyes
        min_nbarcodes = numpy.product(self._barcode_min_nlvls[numpy.argsort(self._barcode_min_nlvls)[:ndyes]])
        max_nbarcodes = numpy.product(self._barcode_max_nlvls[numpy.argsort(self._barcode_max_nlvls)[-ndyes:]])

        # too many dyes were selected
        if min_nbarcodes > self._requested_nbarcodes:
            APP_LOGGER.info('Cannot generate requested number of barcodes (%d).  '
                            'Smallest library would have %d barcodes.' %
                            (self._requested_nbarcodes, min_nbarcodes))
            return

        # too few dyes were selected
        if max_nbarcodes < self._requested_nbarcodes:
            APP_LOGGER.info('Cannot generate requested number of barcodes (%d).  '
                            'Largest library would have %d barcodes.' %
                            (self._requested_nbarcodes, max_nbarcodes))
            return

        # find the optimal number of levels for each dye combination
        requested_dye_idxs = set(range(len(self._requested_dye_lots)))
        optimal_nlvls = list()
        for dye_idxs in itertools.combinations(xrange(len(self._barcode_profiles)), ndyes):
            dye_idxs = numpy.array(dye_idxs)

            # ignore combinations that do not include requested dyes
            if self.need_additional_db_dyes and \
                    self._requested_dye_lots and \
                    not requested_dye_idxs.issubset(dye_idxs):
                continue

            # ignore combinations in which the peaks are too close
            peaks = numpy.concatenate((self._barcode_peaks[dye_idxs], self._non_barcode_peaks))
            if numpy.any(numpy.diff(numpy.sort(peaks)) < self._min_peak_difference):
                continue

            try:
                candidate_nlvls, candidate_lowest_peak = self._calc_optimal_nlvls(dye_idxs)
                optimal_nlvls.append((candidate_lowest_peak, dye_idxs, candidate_nlvls))
            except Exception as e:
                APP_LOGGER.exception(e)

        optimal_nlvls.sort(key=lambda x: x[0])

        for _, dye_idxs, nlvls in optimal_nlvls[: nchoose]:
            try:
                self._make_design(nlvls, dye_idxs)
            except Exception as e:
                APP_LOGGER.exception(e)
コード例 #24
0
    def process_request(cls, params_dict):

        dye_metrics = params_dict[cls._dyes_metrics]
        drop_ave = params_dict[cls._drop_ave_diameter][0]
        drop_std = params_dict[cls._drop_std_diameter][0]
        http_status_code = 200
        uuid = str(uuid4())
        json_response = {
            UUID: uuid,
            DATESTAMP: datetime.today(),
        }
        try:
            dye_names = list()
            nlvls = list()
            intensities = list()
            for dye_name, nlvl, low, high in dye_metrics:
                dye_names.append(dye_name)
                nlvls.append(nlvl)
                intensities.append((low, high))

            centroids = make_centroids(nlvls, intensities)
            clusters = make_clusters(centroids,
                                     drop_ave=drop_ave,
                                     drop_std=drop_std)
            collisions = check_collision(clusters)

            json_response[DROP_AVE_DIAMETER] = drop_ave
            json_response[DROP_STD_DIAMETER] = drop_std
            json_response[DYE_METRICS] = map(list, dye_metrics)
            json_response['collisions'] = collisions
            json_response['nclusters'] = numpy.product(nlvls)

        except IOError:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 415
            json_response[ERROR] = str(sys.exc_info()[1])
        except:
            APP_LOGGER.exception(traceback.format_exc())
            http_status_code = 500
            json_response[ERROR] = str(sys.exc_info()[1])

        return make_clean_response(json_response, http_status_code)
コード例 #25
0
    def _calc_dye_max_intensities(self, dye_idxs, nlvls, resolution=100.0):
        """
        The ideal library will take full advantage of our intensity space, which
        peaks at 65535 intensity units.  This function attempt to optimize the
        maximum level of each dye by recomposing the profiles and testing that
        they do not saturate.

        @param dye_idxs:    1D numpy array of the indexes of the barcode dyes
        @param nlvls:       1D numpy array of the number of levels for each dye
        @param resolution:  Float, intensity unit spacing, i.e. resolution of 100.0
                            would result in intensities of: 1000.0, 1100.00, 1200.0...
        @return:            1D numpy of maximum intensities for each dye.
        """
        dye_max_intensities = None
        # test various percent cutoffs
        for percent_best in numpy.arange(2.5, 25, 2.5):
            try:
                # make a group of scalars for each dye (dimension)
                scalars = [numpy.linspace(10000.0, MAX_INTEN, resolution).reshape(-1, 1) for _ in dye_idxs]
                # create barcode profiles by summing each combination of dyes profiles
                # to find an optimal max barcode profile
                scalar_combos = scalars.pop(0)
                while scalars:
                    scalar_combos = numpy.hstack((
                        numpy.repeat(scalar_combos, resolution, axis=0),
                        numpy.tile(scalars.pop(0), (len(scalar_combos), 1))
                    ))
                    scalar_combos = self._rm_saturated(scalar_combos, dye_idxs)
                    scalar_combos = self._rm_most_variable(scalar_combos, percent_best, nlvls)

                midx = numpy.argmax(numpy.sum(scalar_combos, axis=1))

                dye_max_intensities = scalar_combos[midx]
                break
            except Exception as e:
                APP_LOGGER.exception(e)

        if dye_max_intensities is None or len(dye_max_intensities) != len(dye_idxs):
            raise Exception('A library cannot be generated from this combination of dyes.')

        return dye_max_intensities
コード例 #26
0
def read_report_file_txt(report_file, date_obj, utag):
    """
    Extract information from a run_log.txt file, and returns a dictionary
    """
    try:
        with open(report_file, 'r') as rf:
            lines = rf.readlines()
        if not lines:
            APP_LOGGER.error("The log file, %s, is empty." % report_file)
            return None
        data = {FILE_TYPE: 'txt', DATETIME: date_obj, UTAG: utag}
        for i, line in enumerate(lines):
            if line.strip():
                try:
                    key, value = line.split(':')
                    key, value = key.strip(), value.strip()
                    if key == USER_TXT and value:
                        data[key] = [user.strip() for user in value.split(',')]
                    elif key in [
                            RUN_DESCRIPTION_TXT, EXIT_NOTES_TXT, TDI_STACKS_TXT
                    ]:
                        values = [value]
                        j = i + 1
                        while j < len(lines) and ':' not in lines[j]:
                            values.append(lines[j].strip())
                            j += 1
                        if key == TDI_STACKS_TXT:
                            regex = ARCHIVES_PATH + '/[^/]+'
                            data[key] = re.findall(regex, ''.join(values))
                        else:
                            data[key] = ', '.join(values)
                    elif value:
                        data[key] = value
                except:
                    continue
        report_obj = RunReportWebUI.from_dict(**data)
        return report_obj.as_dict()
    except:
        APP_LOGGER.error("Error raised for report %s: %s" %
                         (report_file, traceback.format_exc()))
        return None
コード例 #27
0
def read_report_file(report_file, date_obj, utag):
    """
    Extract information from a run log file in txt or yaml format
    The path of a sample run_info file is:
    /mnt/runs/run_reprots/04_05_16/Tue05_1424_beta17/run_info.txt
    date_obj is based on 04_05_16
    utag is 2016_04_05_Tue05_1424_beta17
    """
    if not report_file:
        APP_LOGGER.debug("File pathname, %s, is an empty string." %
                         report_file)
        return None

    basename = os.path.basename(report_file).lower()
    if basename.endswith('txt'):
        return read_report_file_txt(report_file, date_obj, utag)
    elif basename.endswith('yaml'):
        return read_report_file_yaml(report_file, date_obj, utag)
    else:
        APP_LOGGER.debug("File extension must be txt or yaml.")
        return None
コード例 #28
0
def read_report_file_yaml(report_file, date_obj, utag):
    """
    Extract information from a run_log.yaml file, and returns a dictionary
    """
    try:
        with open(report_file, 'r') as rf:
            try:
                data = yaml.load(rf)
            except yaml.YAMLError as exc:
                APP_LOGGER.error("YMALError %s received" % exc)
                return None
        if not data:
            APP_LOGGER.debug("YAML file, %s, is empty." % report_file)
            return None
        data[DATETIME] = date_obj
        data[FILE_TYPE] = 'yaml'
        data[UTAG] = utag
        if USER in data and isinstance(data[USER], str):
            data[USER] = [user.strip() for user in data[USER].split(',')]

        # distinguish reports from Web UI and Client UI
        if CARTRIDGE_BC not in data:
            report_obj = RunReportWebUI.from_dict(**data)
        else:
            report_obj = RunReportClientUI.from_dict(**data)
        return report_obj.as_dict()
    except:
        APP_LOGGER.error("Error raised for report %s: %s" %
                         (report_file, traceback.format_exc()))
        return None
コード例 #29
0
    def process_request(cls, params_dict):
        targets_file = params_dict[ParameterFactory.file(
            "Targets FASTA file.")][0]
        json_response = {FILENAME: targets_file.filename}
        http_status_code = 200
        file_uuid = str(uuid4())

        path = os.path.join(TARGETS_UPLOAD_PATH, file_uuid)
        existing_filenames = cls._DB_CONNECTOR.distinct(
            TARGETS_COLLECTION, FILENAME)
        if os.path.exists(path) or targets_file.filename in existing_filenames:
            http_status_code = 403
        elif validate_fasta(targets_file) == False:
            http_status_code = 415
        else:
            try:
                targets_file.save(path)
                targets_file.close()
                json_response[URL] = "http://%s/uploads/%s/targets/%s" % (
                    HOSTNAME, PORT, file_uuid)
                json_response[FILEPATH] = path
                json_response[UUID] = file_uuid
                json_response[DATESTAMP] = datetime.today()
                json_response[TYPE] = "targets"
                if "." in targets_file.filename:
                    json_response[FORMAT] = targets_file.filename.split(
                        ".")[-1]
                else:
                    json_response[FORMAT] = "Unknown"

                cls._DB_CONNECTOR.insert(TARGETS_COLLECTION, [json_response])
            except:
                APP_LOGGER.exception(traceback.format_exc())
                json_response[ERROR] = str(sys.exc_info()[1])
                http_status_code = 500
            finally:
                if ID in json_response:
                    del json_response[ID]

        return make_clean_response(json_response, http_status_code)
コード例 #30
0
    def process_callback(future):
        try:
            _ = future.result()

            update = { "$set": {
                                 STATUS: JOB_STATUS.succeeded, # @UndefinedVariable
                                 RESULT: outfile_path,
                                 URL: get_results_url(os.path.join(dirname, uuid)),
                                 PNG: os.path.join(dirname, scatter_ind_pdf_fn),
                                 PNG_URL: get_results_url(os.path.join(dirname, scatter_ind_pdf_fn)),
                                 PNG_SUM: os.path.join(dirname, scatter_png_fn),
                                 PNG_SUM_URL: get_results_url(os.path.join(dirname, scatter_png_fn)),
                                 KDE_PNG: os.path.join(dirname, kde_ind_pdf_fn),
                                 KDE_PNG_URL: get_results_url(os.path.join(dirname, kde_ind_pdf_fn)),
                                 KDE_PNG_SUM: os.path.join(dirname, kde_png_fn),
                                 KDE_PNG_SUM_URL: get_results_url(os.path.join(dirname, kde_png_fn)),
                                 FINISH_DATESTAMP: datetime.today(),
                               }
                    }
        except:
            APP_LOGGER.exception("Error in Exploratory post request process callback.")
            error_msg = str(sys.exc_info()[1])
            update    = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable
                                   RESULT: None,
                                   PDF: None,
                                   PNG: None,
                                   PNG_SUM: None,
                                   FINISH_DATESTAMP: datetime.today(),
                                   ERROR: error_msg}}
        finally:
            # If job has been deleted, then delete result and don't update DB.
            if len(db_connector.find(SA_EXPLORATORY_COLLECTION, query, {})) > 0:
                db_connector.update(SA_EXPLORATORY_COLLECTION, query, update)
            else:
                silently_remove_file(outfile_path)
                silently_remove_file(os.path.join(dirname, scatter_png_fn))
                silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn))
                silently_remove_file(os.path.join(dirname, kde_png_fn))
                silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn))