def update_image_stacks(log_data, data_folder): """ Check whether the image_stacks in a run report document exist in archive collection. If not, add them to database. @param log_data: the document of run report yaml @param date_folder: folder where data is located """ if log_data is None or IMAGE_STACKS not in log_data: return new_records = list() for image_stack in log_data[IMAGE_STACKS]: exist_record = _DB_CONNECTOR.find_one(ARCHIVES_COLLECTION, ARCHIVE, image_stack) if not exist_record: for folder in [ARCHIVES_PATH, data_folder]: archive_path = os.path.join(folder, image_stack) if os.path.isdir(archive_path): new_records.append({ ARCHIVE: image_stack, ARCHIVE_PATH: remove_disk_directory(archive_path) }) break if new_records: APP_LOGGER.info('Found %d image stacks: %s' % (len(new_records), new_records)) _DB_CONNECTOR.insert(ARCHIVES_COLLECTION, new_records)
def absorption_callback(future): try: _ = future.result() update = { "$set": { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, FINISH_DATESTAMP: datetime.today(), URL: "http://%s/results/%s/%s" % (HOSTNAME, PORT, uuid) } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0: db_connector.update(ABSORPTION_COLLECTION, query, update) elif os.path.isfile(outfile_path): os.remove(outfile_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { "$set": { STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0: db_connector.update(ABSORPTION_COLLECTION, query, update) elif os.path.isfile(outfile_path): os.remove(outfile_path)
def parse_pa_data_src(pa_data_src_name): """ Determine primary analysis data source type (HDF5 or image stack) and return a list containing the archive paths and dataset names @param pa_data_src_name: String, name of data source, could be either the HDF5 dataset name or a folder name containing image stacks @return: A list of tuples, each tuple contains the primary analysis datasource name and a bool indicating whether or not it is HDF5. """ # archives is a list of tuples, each tuple contains the path and the dataset name archives = list() if is_hdf5_archive(pa_data_src_name): archives.append((pa_data_src_name, True)) APP_LOGGER.info('%s is an HDF5 file.' % pa_data_src_name) elif is_image_archive(pa_data_src_name): image_archive_paths = io_utilities.get_archive_dirs( pa_data_src_name, min_num_images=PA_MIN_NUM_IMAGES) for img_src_name in image_archive_paths: archives.append(( img_src_name, False, )) APP_LOGGER.info('%s is an image stack.' % pa_data_src_name) else: raise Exception( 'Unable to determine if %s is an image stack or HDF5 file.' % pa_data_src_name) return archives
def process_request(cls, params_dict): tags = [t for t in params_dict[cls.tags_parameter] if t] report_uuid = params_dict[cls.report_uuid_parameter][0] http_status_code = 200 json_response = {RUN_REPORT_UUID: report_uuid, TAGS: tags} try: cls._DB_CONNECTOR.update(RUN_REPORT_COLLECTION, {UUID: report_uuid}, {'$addToSet': { TAGS: { '$each': tags } }}) APP_LOGGER.info("Updated run report uuid=%s with tags %s." % (report_uuid, tags)) json_response[STATUS] = SUCCEEDED except: APP_LOGGER.exception(traceback.format_exc()) json_response[STATUS] = FAILED json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 return make_clean_response(json_response, http_status_code)
def process_request(cls, params_dict): dataset = params_dict[cls.dataset_parameter][0] report_uuid = params_dict[cls.report_uuid_parameter][0] http_status_code = 200 json_response = {RUN_REPORT_UUID: report_uuid, HDF5_DATASET: dataset} try: cls._DB_CONNECTOR.update( RUN_REPORT_COLLECTION, {UUID: report_uuid}, {'$pull': { IMAGE_STACKS: { 'name': dataset, 'upload': True } }}) cls._DB_CONNECTOR.remove(HDF5_COLLECTION, {HDF5_DATASET: dataset}) json_response.update({"unassociate": True}) APP_LOGGER.info("Removed dataset name=%s from run report uuid=%s" % (dataset, report_uuid)) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 return json_response, http_status_code
def process_callback(future): try: _ = future.result() update = { "$set": { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, FINISH_DATESTAMP: datetime.today(), URL: get_results_url(outfile_path), } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0: db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update) else: silently_remove_file(outfile_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg}} # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0: db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update) else: silently_remove_file(outfile_path)
def process_request(cls, params_dict): metadata_file = params_dict[cls._file_param][0] application = params_dict[cls._app_param][0] json_response = {FILENAME: metadata_file.filename} http_status_code = 200 file_uuid = str(uuid4()) path = os.path.join(TMP_PATH, file_uuid) try: metadata_file.save(path) metadata_file.close() dialect = get_dialect(path) if dialect: probe_ids = cls._DB_CONNECTOR.distinct( PROBE_METADATA_COLLECTION, PROBE_ID) ids_are_unique = cls.update_db(dialect, path, probe_ids, application) if not ids_are_unique: http_status_code = 403 else: http_status_code = 415 json_response[ERROR] = "Invalid file format - file must " \ "be either tab or comma delimited." except IOError: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 415 json_response[ERROR] = str(sys.exc_info()[1]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) finally: silently_remove_file(path) return make_clean_response(json_response, http_status_code)
def update_run_report(date_folders): """ List of date folders in the form MM_DD_YY that you want to update. @param date_folders: """ # fetch utags in run report collection db_utags = _DB_CONNECTOR.distinct(RUN_REPORT_COLLECTION, UTAG) if os.path.isdir(RUN_REPORT_PATH): reports = list() for folder in date_folders: path = os.path.join(RUN_REPORT_PATH, folder) if not os.path.isdir(path): continue date_obj = datetime.strptime(folder, '%m_%d_%y') for sf in os.listdir(path): report_file_path = get_run_info_path(path, sf) if report_file_path is None: continue utag = set_utag(date_obj, sf) if utag not in db_utags: # if not exists, need to insert to collection log_data = read_report_file(report_file_path, date_obj, utag) if log_data is None: log_data = {DATETIME: date_obj, UTAG: utag} if IMAGE_STACKS in log_data: hdf5_datasets= get_hdf5_datasets(log_data, folder, sf) log_data[IMAGE_STACKS].extend(hdf5_datasets) reports.append(log_data) print report_file_path else: # if exists, check HDF5 collection for new datasets log_data = _DB_CONNECTOR.find_one(RUN_REPORT_COLLECTION, UTAG, utag) # If previously a run report was not there or had wrong format, # the mongo documents only has three fields, _id, datetime, and # unique_tag. If this occurs, try reading the run report again. if len(log_data.keys()) == 3: log_data = read_report_file(report_file_path, date_obj, utag) if log_data is not None and IMAGE_STACKS in log_data: hdf5_datasets = get_hdf5_datasets(log_data, folder, sf) exist_datasets = log_data[IMAGE_STACKS] if set(hdf5_datasets) - set(exist_datasets): updated_datasets = list(set(hdf5_datasets) | set(exist_datasets)) _DB_CONNECTOR.update( RUN_REPORT_COLLECTION, {UTAG: utag}, {"$set": {IMAGE_STACKS: updated_datasets}}) APP_LOGGER.info("Found %d run reports" % (len(reports))) if len(reports) > 0: # There is a possible race condition here. Ideally these operations # would be performed in concert atomically _DB_CONNECTOR.insert(RUN_REPORT_COLLECTION, reports)
def parse_args(self, raw_args): ''' Convert input strings to their appropriate types. ''' if len(raw_args) > 1 and not self.allow_multiple: APP_LOGGER.warning( "Multiple parameter values for %s are not permitted. Only using the first value: %s." % (self.name, raw_args[0])) raw_args = raw_args[:1] if len(raw_args) < 1: if self.default is not None: raw_args = [self.default] elif self.required: raise Exception("Required argument %s not provided." % self.name) converted_args = self._convert_args(raw_args) if self.enum: valid_args = set(self.enum) if not set(converted_args).issubset(valid_args): invalid_args = set(converted_args).difference(valid_args) raise Exception( "Provided arguments %s not a subset of valid arguments: %s" % (invalid_args, valid_args)) return converted_args
def set_defaults(self): """ There are certain parameters that the user may not have sent but that can come from the experiment definition, set them here. Set workflow based on experiment type. The first 3 stages in each workflow are primary analysis, identity, and assay caller. The 4th stage depends on the type of experiment, i.e., genotyper API for hotspot experiment, exploratory API for exploratory experiment, and sequencing API for sequencing experiment. """ try: exp_def_fetcher = ExpDefHandler() experiment = exp_def_fetcher.get_experiment_definition( self.parameters[EXP_DEF]) self.exp_type = experiment.exp_type self.workflow = [PROCESS, IDENTITY, ASSAY_CALLER ] + [WORKFLOW_LOOKUP[self.exp_type]] self.document_list = [PA_DOCUMENT, ID_DOCUMENT, AC_DOCUMENT] + \ [DOCUMENT_LOOKUP[self.exp_type]] if DYES not in self.parameters or \ DYE_LEVELS not in self.parameters or \ NUM_PROBES not in self.parameters or \ PICO1_DYE not in self.parameters: # get dyes and number of levels dye_levels = defaultdict(int) for barcode in experiment.barcodes: for dye_name, lvl in barcode.dye_levels.items(): dye_levels[dye_name] = max(dye_levels[dye_name], int(lvl + 1)) if DYES not in self.parameters: self.parameters[DYES] = dye_levels.keys() if DYE_LEVELS not in self.parameters: self.parameters[DYE_LEVELS] = dye_levels.items() if NUM_PROBES not in self.parameters: self.parameters[NUM_PROBES] = len(experiment.barcodes) if PICO1_DYE not in self.parameters: self.parameters[PICO1_DYE] = None except: APP_LOGGER.exception(traceback.format_exc()) # set parameters for anything user might not have set if FILTERED_DYES not in self.parameters: self.parameters[FILTERED_DYES] = list() if IGNORED_DYES not in self.parameters: self.parameters[IGNORED_DYES] = list() if CONTINUOUS_PHASE not in self.parameters: self.parameters[CONTINUOUS_PHASE] = False if DEV_MODE not in self.parameters: self.parameters[DEV_MODE] = DEFAULT_DEV_MODE if DRIFT_COMPENSATE not in self.parameters: self.parameters[DRIFT_COMPENSATE] = DEFAULT_DRIFT_COMPENSATE
def process_request(cls, params_dict): job_uuids = params_dict[cls.job_uuid_param] job_name = params_dict[cls.job_name_param][0] exp_def_name = params_dict[cls.exp_defs_param][0] required_drops = params_dict[cls.req_drops_param][0] json_response = {GENOTYPER: []} status_codes = list() for i, assay_caller_uuid in enumerate(job_uuids): if len(job_uuids) == 1: cur_job_name = job_name else: cur_job_name = "%s-%d" % (job_name, i) status_code = 200 if cur_job_name in cls._DB_CONNECTOR.distinct( SA_GENOTYPER_COLLECTION, JOB_NAME): status_code = 403 json_response[GENOTYPER].append({ERROR: 'Job exists.'}) else: try: # Create helper functions genotyper_callable = SaGenotyperCallable( assay_caller_uuid, exp_def_name, required_drops, cls._DB_CONNECTOR, cur_job_name) response = copy.deepcopy(genotyper_callable.document) callback = make_process_callback( genotyper_callable.uuid, exp_def_name, genotyper_callable.ac_result_path, genotyper_callable.ignored_dyes, genotyper_callable.outfile_path, cls._DB_CONNECTOR, cur_job_name) # Add to queue cls._EXECUTION_MANAGER.add_job(response[UUID], genotyper_callable, callback) except: APP_LOGGER.exception( "Error processing Genotyper post request.") response = { JOB_NAME: cur_job_name, ERROR: str(sys.exc_info()[1]) } status_code = 500 finally: if ID in response: del response[ID] json_response[GENOTYPER].append(response) status_codes.append(status_code) # If all jobs submitted successfully, then 200 should be returned. # Otherwise, the maximum status code seems good enough. return make_clean_response(json_response, max(status_codes))
def process_request(cls, params_dict): try: valid_files = [ fp for fp in os.listdir(MODIFIED_ARCHIVES_PATH) if allowed_file(os.path.join(MODIFIED_ARCHIVES_PATH, fp)) ] return (valid_files, [], None) except: APP_LOGGER.exception(traceback.format_exc()) return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
def process_request(cls, params_dict): try: reports = cls._DB_CONNECTOR.find(RUN_REPORT_COLLECTION, {TAGS: { '$exists': True }}) user_tags = set(t for r in reports for t in r[TAGS]) return (list(user_tags), [], None) except: APP_LOGGER.exception(traceback.format_exc()) return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
def _combine_sa(self, output_path, id_report_path, gt_png_path, gt_png_sum_path, gt_kde_path, gt_kde_sum_path): """ Combine Identity report, Assay Caller scatter plot, and Genotyper PNG @param id_report_path: pathname of identity report @param gt_png_path: pathname of genotyper scatter PNG @param gt_png_sum_path: pathname of genotyper scatter sum PNG @param gt_kde_path: pathname of genotyper KDE PNG @param gt_kde_sum_path: pathname of genotyper KDE sum PNG """ try: path = output_path + '_png_id' doc = SimpleDocTemplate(path, pagesize=landscape(letter)) story = list() story.append(self.get_image(gt_png_sum_path)) story.append(PageBreak()) story.append(self.get_image(gt_kde_sum_path)) story.append(PageBreak()) styles = getSampleStyleSheet() id_title = Paragraph('Identity Report', styles['h2']) story.append(id_title) story.append(Spacer(1, 0.2 * inch)) with open(id_report_path, 'r') as id_report: lines = id_report.readlines() for line in lines: styles = getSampleStyleSheet() left_indent = (len(line) - len(line.lstrip())) * 5 styles.add( ParagraphStyle(name='custom_style', fontName=FONT_NAME_STD, fontSize=FONT_SIZE, leftIndent=left_indent)) p = Paragraph(line, styles['custom_style']) story.append(p) story.append(PageBreak()) doc.build(story, onFirstPage=self.standard_page, onLaterPages=self.standard_page) self._merge_pdfs(output_path, gt_png_path, gt_kde_path, path) os.unlink(path) return True except: APP_LOGGER.exception(traceback.format_exc()) return False
def process_request(cls, params_dict): probes_file_uuid = params_dict[cls._probes_param][0] targets_file_uuid = params_dict[cls._targets_param][0] strict = params_dict[cls._strict_param][0] job_name = params_dict[cls._job_name_param][0] json_response = { PROBES: probes_file_uuid, TARGETS: targets_file_uuid, STRICT: strict, UUID: str(uuid4()), STATUS: JOB_STATUS.submitted, # @UndefinedVariable JOB_NAME: job_name, JOB_TYPE_NAME: JOB_TYPE.absorption, # @UndefinedVariable SUBMIT_DATESTAMP: datetime.today(), } http_status_code = 200 if job_name in cls._DB_CONNECTOR.distinct(ABSORPTION_COLLECTION, JOB_NAME): http_status_code = 403 else: try: probes_path = cls._DB_CONNECTOR.find_one( PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH] targets_path = cls._DB_CONNECTOR.find_one( TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH] outfile_path = os.path.join(RESULTS_PATH, json_response[UUID]) # Create helper functions abs_callable = AbsorbtionCallable(targets_path, probes_path, strict, outfile_path, json_response[UUID], cls._DB_CONNECTOR) callback = make_absorption_callback(json_response[UUID], outfile_path, cls._DB_CONNECTOR) # Add to queue and update DB cls._DB_CONNECTOR.insert(ABSORPTION_COLLECTION, [json_response]) cls._EXECUTION_MANAGER.add_job(json_response[UUID], abs_callable, callback) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 finally: if ID in json_response: del json_response[ID] return make_clean_response(json_response, http_status_code)
def process_request(cls, params_dict): probes_file_uuid = params_dict[ParameterFactory.file_uuid( "probes", PROBES_COLLECTION)][0] targets_file_uuid = params_dict[ParameterFactory.file_uuid( "targets", TARGETS_COLLECTION)][0] absorb = params_dict[ParameterFactory.boolean( "absorb", "Check for absorbed probes.")][0] num = params_dict[ParameterFactory.integer( "num", "Minimum number of probes for a target.", default=3, minimum=1)][0] job_name = params_dict[ParameterFactory.lc_string( JOB_NAME, "Unique name to give this job.")][0] json_response = { PROBES: probes_file_uuid, TARGETS: targets_file_uuid, ABSORB: absorb, NUM: num, UUID: str(uuid4()), STATUS: JOB_STATUS.submitted, # @UndefinedVariable JOB_NAME: job_name, DATESTAMP: datetime.today(), } http_status_code = 200 if job_name in cls._DB_CONNECTOR.distinct(VALIDATION_COLLECTION, JOB_NAME): http_status_code = 403 else: try: probes_path = cls._DB_CONNECTOR.find_one( PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH] targets_path = cls._DB_CONNECTOR.find_one( TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH] #ADD VALIDATOR JOB TO QUEUE cls._DB_CONNECTOR.insert(VALIDATION_COLLECTION, [json_response]) del json_response[ID] except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 return make_clean_response(json_response, http_status_code)
def process_request(cls, params_dict): uuid = None if cls.uuid_parameter in params_dict and \ params_dict[cls.uuid_parameter][0]: uuid = params_dict[cls.uuid_parameter][0] # if uuid exists, return a single report with all fields if uuid is not None: report = cls._DB_CONNECTOR.find_one(RUN_REPORT_COLLECTION, UUID, uuid) if report is None: APP_LOGGER.debug("Run report uuid=%s does not exist." % uuid) return ([], [], None) del report[ID] return ([report], report.keys(), None) if cls.refresh_parameter in params_dict and \ params_dict[cls.refresh_parameter][0]: if cls.start_date in params_dict and params_dict[ cls.start_date][0]: start_date = params_dict[cls.start_date][0] if cls.end_date in params_dict and params_dict[ cls.end_date][0]: end_date = params_dict[cls.end_date][0] else: end_date = datetime.now() # Old file location 05_10_17 date_folders = [ d.strftime("%m_%d_%y") for d in daterange(start_date, end_date) ] # New file location 2017_05/10 date_folders.extend( d.strftime("%Y_%m/%d") for d in daterange(start_date, end_date)) else: date_folders = None update_run_reports(date_folders) if cls.cart_sn_parameter in params_dict and \ params_dict[cls.cart_sn_parameter][0]: return get_run_reports(params_dict[cls.cart_sn_parameter][0]) else: return get_run_reports()
def get_hdf5_datasets(log_data, data_folder): """ Fetch the HDF5 archives associated with a run report. @param log_data: the document of run report yaml @param date_folder: folder where data is located """ if log_data is None or RUN_ID not in log_data: return set() run_id = log_data[RUN_ID] hdf5_paths = [ os.path.join(data_folder, f + '.h5') for f in [run_id, run_id + '-baseline'] if os.path.isfile(os.path.join(data_folder, f + '.h5')) ] all_datasets = set() for path in hdf5_paths: exist_records = _DB_CONNECTOR.find( HDF5_COLLECTION, {HDF5_PATH: remove_disk_directory(path)}) if exist_records: all_datasets.update(set(r[HDF5_DATASET] for r in exist_records)) continue new_records = list() try: with h5py.File(path) as h5_file: dataset_names = h5_file.keys() for dsname in dataset_names: if re.match(r'^\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}', dsname): new_records.append({ HDF5_PATH: remove_disk_directory(path), HDF5_DATASET: dsname, }) except: APP_LOGGER.exception( 'Unable to get dataset information from HDF5 file: %s' % path) if new_records: APP_LOGGER.info('Found %d datasets from HDF5 file: %s' % (len(new_records), path)) _DB_CONNECTOR.insert(HDF5_COLLECTION, new_records) all_datasets.update(set(r[HDF5_DATASET] for r in new_records)) return all_datasets
def gen_dye_scatterplot(dyes, sys_listener_path): try: analysis_df = pandas.read_table(self.analysis_file, sep=sniff_delimiter( self.analysis_file)) ac_df = pandas.read_table(self.tmp_outfile_path, sep=sniff_delimiter( self.tmp_outfile_path)) analysis_df['assay'] = False analysis_df.loc[analysis_df['identity'].notnull(), 'assay'] = ac_df['assay'].values # System listener inputs dyn_align_offsets = {} temps = {} steps = {} if sys_listener_path is not None: sys_listener_dir = os.path.dirname(sys_listener_path) clamp_temp_tp = ClampTempTopicParser() old_channel_offset_tp = OldChannelOffsetTopicParser() channel_offset_tp = ChannelOffsetTopicParser() dyn_align_steps_tp = DynamicAlignStepsParser() topic_parsers = [ clamp_temp_tp, old_channel_offset_tp, channel_offset_tp, dyn_align_steps_tp ] sys_listener_parser = SystemListenerParser( sys_listener_dir, topic_parsers=topic_parsers) temps = sys_listener_parser.get_topic_results( clamp_temp_tp.topic) dyn_align_offsets = sys_listener_parser.get_topic_results( channel_offset_tp.topic) if len(dyn_align_offsets) < 1: APP_LOGGER.info("Using old channel offset parser...") dyn_align_offsets = sys_listener_parser.get_topic_results( old_channel_offset_tp.topic) else: APP_LOGGER.info("Using new channel offset parser...") steps = sys_listener_parser.get_topic_results( dyn_align_steps_tp.topic) generate_dye_scatterplots(analysis_df, dyes, self.tmp_dyes_plot_path, self.job_name, self.pico1_dye, dyn_align_offsets=dyn_align_offsets, temps=temps, steps=steps) shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path) APP_LOGGER.info("Dyes scatter plot generated for %s." % \ self.job_name) except: APP_LOGGER.exception("Dyes scatter plot generation failed.")
def get_experiment_defintions(): """ Retrieve experiment definition from EXP_DEF_COLLECTION. """ columns = OrderedDict() columns[ID] = 0 columns[UUID] = 1 columns[NAME] = 1 columns[VARIANTS] = 1 columns[DYES] = 1 columns[TYPE] = 1 column_names = columns.keys() column_names.remove(ID) exp_defs = _DB_CONNECTOR.find(EXP_DEF_COLLECTION, {}, columns) APP_LOGGER.info('Retrieved %d experiment definitions.' \ % (len(exp_defs), )) return (exp_defs, column_names, None)
def process_callback(future): try: _ = future.result() update = { '$set': { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, URL: get_results_url(outfile_path), SCATTER_PLOT: scatter_plot_path, SCATTER_PLOT_URL: get_results_url(scatter_plot_path), DYES_SCATTER_PLOT: dyes_scatter_plot_path, DYES_SCATTER_PLOT_URL: get_results_url(dyes_scatter_plot_path), FINISH_DATESTAMP: datetime.today(), } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query, {})) > 0: db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(scatter_plot_path) silently_remove_file(dyes_scatter_plot_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { '$set': { STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query, {})) > 0: db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(scatter_plot_path) silently_remove_file(dyes_scatter_plot_path)
def process_callback(future): try: _ = future.result() report_errors = check_report_for_errors(report_path) update_data = { STATUS: JOB_STATUS.succeeded, RESULT: outfile_path, URL: get_results_url(outfile_path), PLOT: plot_path, REPORT: report_path, PLOT_URL: get_results_url(plot_path), REPORT_URL: get_results_url(report_path), PLATE_PLOT_URL: get_results_url(plate_plot_path), TEMPORAL_PLOT_URL: get_results_url(temporal_plot_path), DROP_COUNT_PLOT_URL: get_results_url(drop_count_plot_path), FINISH_DATESTAMP: datetime.today()} if report_errors: update_data[ERROR] = ' '.join(report_errors) update = {"$set": update_data} # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0: db_connector.update(SA_IDENTITY_COLLECTION, query, update) else: silently_remove_file(report_path) silently_remove_file(outfile_path) silently_remove_file(plot_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg}} if os.path.isfile(report_path): update['$set'][REPORT_URL] # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0: db_connector.update(SA_IDENTITY_COLLECTION, query, update) else: silently_remove_file(report_path) silently_remove_file(outfile_path) silently_remove_file(plot_path)
def _generate(self, ndyes, nchoose=5): """ @param ndyes: 1nteger, number of dyes to use per solution #param nchoose: Integer, maximum number of combinations that will be further optimized """ # check to see if the minimum maximum levels of dyes can make the requested number of dyes min_nbarcodes = numpy.product(self._barcode_min_nlvls[numpy.argsort(self._barcode_min_nlvls)[:ndyes]]) max_nbarcodes = numpy.product(self._barcode_max_nlvls[numpy.argsort(self._barcode_max_nlvls)[-ndyes:]]) # too many dyes were selected if min_nbarcodes > self._requested_nbarcodes: APP_LOGGER.info('Cannot generate requested number of barcodes (%d). ' 'Smallest library would have %d barcodes.' % (self._requested_nbarcodes, min_nbarcodes)) return # too few dyes were selected if max_nbarcodes < self._requested_nbarcodes: APP_LOGGER.info('Cannot generate requested number of barcodes (%d). ' 'Largest library would have %d barcodes.' % (self._requested_nbarcodes, max_nbarcodes)) return # find the optimal number of levels for each dye combination requested_dye_idxs = set(range(len(self._requested_dye_lots))) optimal_nlvls = list() for dye_idxs in itertools.combinations(xrange(len(self._barcode_profiles)), ndyes): dye_idxs = numpy.array(dye_idxs) # ignore combinations that do not include requested dyes if self.need_additional_db_dyes and \ self._requested_dye_lots and \ not requested_dye_idxs.issubset(dye_idxs): continue # ignore combinations in which the peaks are too close peaks = numpy.concatenate((self._barcode_peaks[dye_idxs], self._non_barcode_peaks)) if numpy.any(numpy.diff(numpy.sort(peaks)) < self._min_peak_difference): continue try: candidate_nlvls, candidate_lowest_peak = self._calc_optimal_nlvls(dye_idxs) optimal_nlvls.append((candidate_lowest_peak, dye_idxs, candidate_nlvls)) except Exception as e: APP_LOGGER.exception(e) optimal_nlvls.sort(key=lambda x: x[0]) for _, dye_idxs, nlvls in optimal_nlvls[: nchoose]: try: self._make_design(nlvls, dye_idxs) except Exception as e: APP_LOGGER.exception(e)
def process_request(cls, params_dict): dye_metrics = params_dict[cls._dyes_metrics] drop_ave = params_dict[cls._drop_ave_diameter][0] drop_std = params_dict[cls._drop_std_diameter][0] http_status_code = 200 uuid = str(uuid4()) json_response = { UUID: uuid, DATESTAMP: datetime.today(), } try: dye_names = list() nlvls = list() intensities = list() for dye_name, nlvl, low, high in dye_metrics: dye_names.append(dye_name) nlvls.append(nlvl) intensities.append((low, high)) centroids = make_centroids(nlvls, intensities) clusters = make_clusters(centroids, drop_ave=drop_ave, drop_std=drop_std) collisions = check_collision(clusters) json_response[DROP_AVE_DIAMETER] = drop_ave json_response[DROP_STD_DIAMETER] = drop_std json_response[DYE_METRICS] = map(list, dye_metrics) json_response['collisions'] = collisions json_response['nclusters'] = numpy.product(nlvls) except IOError: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 415 json_response[ERROR] = str(sys.exc_info()[1]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) return make_clean_response(json_response, http_status_code)
def _calc_dye_max_intensities(self, dye_idxs, nlvls, resolution=100.0): """ The ideal library will take full advantage of our intensity space, which peaks at 65535 intensity units. This function attempt to optimize the maximum level of each dye by recomposing the profiles and testing that they do not saturate. @param dye_idxs: 1D numpy array of the indexes of the barcode dyes @param nlvls: 1D numpy array of the number of levels for each dye @param resolution: Float, intensity unit spacing, i.e. resolution of 100.0 would result in intensities of: 1000.0, 1100.00, 1200.0... @return: 1D numpy of maximum intensities for each dye. """ dye_max_intensities = None # test various percent cutoffs for percent_best in numpy.arange(2.5, 25, 2.5): try: # make a group of scalars for each dye (dimension) scalars = [numpy.linspace(10000.0, MAX_INTEN, resolution).reshape(-1, 1) for _ in dye_idxs] # create barcode profiles by summing each combination of dyes profiles # to find an optimal max barcode profile scalar_combos = scalars.pop(0) while scalars: scalar_combos = numpy.hstack(( numpy.repeat(scalar_combos, resolution, axis=0), numpy.tile(scalars.pop(0), (len(scalar_combos), 1)) )) scalar_combos = self._rm_saturated(scalar_combos, dye_idxs) scalar_combos = self._rm_most_variable(scalar_combos, percent_best, nlvls) midx = numpy.argmax(numpy.sum(scalar_combos, axis=1)) dye_max_intensities = scalar_combos[midx] break except Exception as e: APP_LOGGER.exception(e) if dye_max_intensities is None or len(dye_max_intensities) != len(dye_idxs): raise Exception('A library cannot be generated from this combination of dyes.') return dye_max_intensities
def read_report_file_txt(report_file, date_obj, utag): """ Extract information from a run_log.txt file, and returns a dictionary """ try: with open(report_file, 'r') as rf: lines = rf.readlines() if not lines: APP_LOGGER.error("The log file, %s, is empty." % report_file) return None data = {FILE_TYPE: 'txt', DATETIME: date_obj, UTAG: utag} for i, line in enumerate(lines): if line.strip(): try: key, value = line.split(':') key, value = key.strip(), value.strip() if key == USER_TXT and value: data[key] = [user.strip() for user in value.split(',')] elif key in [ RUN_DESCRIPTION_TXT, EXIT_NOTES_TXT, TDI_STACKS_TXT ]: values = [value] j = i + 1 while j < len(lines) and ':' not in lines[j]: values.append(lines[j].strip()) j += 1 if key == TDI_STACKS_TXT: regex = ARCHIVES_PATH + '/[^/]+' data[key] = re.findall(regex, ''.join(values)) else: data[key] = ', '.join(values) elif value: data[key] = value except: continue report_obj = RunReportWebUI.from_dict(**data) return report_obj.as_dict() except: APP_LOGGER.error("Error raised for report %s: %s" % (report_file, traceback.format_exc())) return None
def read_report_file(report_file, date_obj, utag): """ Extract information from a run log file in txt or yaml format The path of a sample run_info file is: /mnt/runs/run_reprots/04_05_16/Tue05_1424_beta17/run_info.txt date_obj is based on 04_05_16 utag is 2016_04_05_Tue05_1424_beta17 """ if not report_file: APP_LOGGER.debug("File pathname, %s, is an empty string." % report_file) return None basename = os.path.basename(report_file).lower() if basename.endswith('txt'): return read_report_file_txt(report_file, date_obj, utag) elif basename.endswith('yaml'): return read_report_file_yaml(report_file, date_obj, utag) else: APP_LOGGER.debug("File extension must be txt or yaml.") return None
def read_report_file_yaml(report_file, date_obj, utag): """ Extract information from a run_log.yaml file, and returns a dictionary """ try: with open(report_file, 'r') as rf: try: data = yaml.load(rf) except yaml.YAMLError as exc: APP_LOGGER.error("YMALError %s received" % exc) return None if not data: APP_LOGGER.debug("YAML file, %s, is empty." % report_file) return None data[DATETIME] = date_obj data[FILE_TYPE] = 'yaml' data[UTAG] = utag if USER in data and isinstance(data[USER], str): data[USER] = [user.strip() for user in data[USER].split(',')] # distinguish reports from Web UI and Client UI if CARTRIDGE_BC not in data: report_obj = RunReportWebUI.from_dict(**data) else: report_obj = RunReportClientUI.from_dict(**data) return report_obj.as_dict() except: APP_LOGGER.error("Error raised for report %s: %s" % (report_file, traceback.format_exc())) return None
def process_request(cls, params_dict): targets_file = params_dict[ParameterFactory.file( "Targets FASTA file.")][0] json_response = {FILENAME: targets_file.filename} http_status_code = 200 file_uuid = str(uuid4()) path = os.path.join(TARGETS_UPLOAD_PATH, file_uuid) existing_filenames = cls._DB_CONNECTOR.distinct( TARGETS_COLLECTION, FILENAME) if os.path.exists(path) or targets_file.filename in existing_filenames: http_status_code = 403 elif validate_fasta(targets_file) == False: http_status_code = 415 else: try: targets_file.save(path) targets_file.close() json_response[URL] = "http://%s/uploads/%s/targets/%s" % ( HOSTNAME, PORT, file_uuid) json_response[FILEPATH] = path json_response[UUID] = file_uuid json_response[DATESTAMP] = datetime.today() json_response[TYPE] = "targets" if "." in targets_file.filename: json_response[FORMAT] = targets_file.filename.split( ".")[-1] else: json_response[FORMAT] = "Unknown" cls._DB_CONNECTOR.insert(TARGETS_COLLECTION, [json_response]) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 finally: if ID in json_response: del json_response[ID] return make_clean_response(json_response, http_status_code)
def process_callback(future): try: _ = future.result() update = { "$set": { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, URL: get_results_url(os.path.join(dirname, uuid)), PNG: os.path.join(dirname, scatter_ind_pdf_fn), PNG_URL: get_results_url(os.path.join(dirname, scatter_ind_pdf_fn)), PNG_SUM: os.path.join(dirname, scatter_png_fn), PNG_SUM_URL: get_results_url(os.path.join(dirname, scatter_png_fn)), KDE_PNG: os.path.join(dirname, kde_ind_pdf_fn), KDE_PNG_URL: get_results_url(os.path.join(dirname, kde_ind_pdf_fn)), KDE_PNG_SUM: os.path.join(dirname, kde_png_fn), KDE_PNG_SUM_URL: get_results_url(os.path.join(dirname, kde_png_fn)), FINISH_DATESTAMP: datetime.today(), } } except: APP_LOGGER.exception("Error in Exploratory post request process callback.") error_msg = str(sys.exc_info()[1]) update = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, PDF: None, PNG: None, PNG_SUM: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg}} finally: # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_EXPLORATORY_COLLECTION, query, {})) > 0: db_connector.update(SA_EXPLORATORY_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(os.path.join(dirname, scatter_png_fn)) silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn)) silently_remove_file(os.path.join(dirname, kde_png_fn)) silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn))