def absorption_callback(future): try: _ = future.result() update = { "$set": { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, FINISH_DATESTAMP: datetime.today(), URL: "http://%s/results/%s/%s" % (HOSTNAME, PORT, uuid) } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0: db_connector.update(ABSORPTION_COLLECTION, query, update) elif os.path.isfile(outfile_path): os.remove(outfile_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { "$set": { STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(ABSORPTION_COLLECTION, query, {})) > 0: db_connector.update(ABSORPTION_COLLECTION, query, update) elif os.path.isfile(outfile_path): os.remove(outfile_path)
def process_request(cls, params_dict): dataset = params_dict[cls.dataset_parameter][0] report_uuid = params_dict[cls.report_uuid_parameter][0] http_status_code = 200 json_response = {RUN_REPORT_UUID: report_uuid, HDF5_DATASET: dataset} try: cls._DB_CONNECTOR.update( RUN_REPORT_COLLECTION, {UUID: report_uuid}, {'$pull': { IMAGE_STACKS: { 'name': dataset, 'upload': True } }}) cls._DB_CONNECTOR.remove(HDF5_COLLECTION, {HDF5_DATASET: dataset}) json_response.update({"unassociate": True}) APP_LOGGER.info("Removed dataset name=%s from run report uuid=%s" % (dataset, report_uuid)) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 return json_response, http_status_code
def process_request(cls, params_dict): tags = [t for t in params_dict[cls.tags_parameter] if t] report_uuid = params_dict[cls.report_uuid_parameter][0] http_status_code = 200 json_response = {RUN_REPORT_UUID: report_uuid, TAGS: tags} try: cls._DB_CONNECTOR.update(RUN_REPORT_COLLECTION, {UUID: report_uuid}, {'$addToSet': { TAGS: { '$each': tags } }}) APP_LOGGER.info("Updated run report uuid=%s with tags %s." % (report_uuid, tags)) json_response[STATUS] = SUCCEEDED except: APP_LOGGER.exception(traceback.format_exc()) json_response[STATUS] = FAILED json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 return make_clean_response(json_response, http_status_code)
def process_callback(future): try: _ = future.result() update = { "$set": { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, FINISH_DATESTAMP: datetime.today(), URL: get_results_url(outfile_path), } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0: db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update) else: silently_remove_file(outfile_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg}} # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(PA_CONVERT_IMAGES_COLLECTION, query, {})) > 0: db_connector.update(PA_CONVERT_IMAGES_COLLECTION, query, update) else: silently_remove_file(outfile_path)
def process_request(cls, params_dict): metadata_file = params_dict[cls._file_param][0] application = params_dict[cls._app_param][0] json_response = {FILENAME: metadata_file.filename} http_status_code = 200 file_uuid = str(uuid4()) path = os.path.join(TMP_PATH, file_uuid) try: metadata_file.save(path) metadata_file.close() dialect = get_dialect(path) if dialect: probe_ids = cls._DB_CONNECTOR.distinct( PROBE_METADATA_COLLECTION, PROBE_ID) ids_are_unique = cls.update_db(dialect, path, probe_ids, application) if not ids_are_unique: http_status_code = 403 else: http_status_code = 415 json_response[ERROR] = "Invalid file format - file must " \ "be either tab or comma delimited." except IOError: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 415 json_response[ERROR] = str(sys.exc_info()[1]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) finally: silently_remove_file(path) return make_clean_response(json_response, http_status_code)
def set_defaults(self): """ There are certain parameters that the user may not have sent but that can come from the experiment definition, set them here. Set workflow based on experiment type. The first 3 stages in each workflow are primary analysis, identity, and assay caller. The 4th stage depends on the type of experiment, i.e., genotyper API for hotspot experiment, exploratory API for exploratory experiment, and sequencing API for sequencing experiment. """ try: exp_def_fetcher = ExpDefHandler() experiment = exp_def_fetcher.get_experiment_definition( self.parameters[EXP_DEF]) self.exp_type = experiment.exp_type self.workflow = [PROCESS, IDENTITY, ASSAY_CALLER ] + [WORKFLOW_LOOKUP[self.exp_type]] self.document_list = [PA_DOCUMENT, ID_DOCUMENT, AC_DOCUMENT] + \ [DOCUMENT_LOOKUP[self.exp_type]] if DYES not in self.parameters or \ DYE_LEVELS not in self.parameters or \ NUM_PROBES not in self.parameters or \ PICO1_DYE not in self.parameters: # get dyes and number of levels dye_levels = defaultdict(int) for barcode in experiment.barcodes: for dye_name, lvl in barcode.dye_levels.items(): dye_levels[dye_name] = max(dye_levels[dye_name], int(lvl + 1)) if DYES not in self.parameters: self.parameters[DYES] = dye_levels.keys() if DYE_LEVELS not in self.parameters: self.parameters[DYE_LEVELS] = dye_levels.items() if NUM_PROBES not in self.parameters: self.parameters[NUM_PROBES] = len(experiment.barcodes) if PICO1_DYE not in self.parameters: self.parameters[PICO1_DYE] = None except: APP_LOGGER.exception(traceback.format_exc()) # set parameters for anything user might not have set if FILTERED_DYES not in self.parameters: self.parameters[FILTERED_DYES] = list() if IGNORED_DYES not in self.parameters: self.parameters[IGNORED_DYES] = list() if CONTINUOUS_PHASE not in self.parameters: self.parameters[CONTINUOUS_PHASE] = False if DEV_MODE not in self.parameters: self.parameters[DEV_MODE] = DEFAULT_DEV_MODE if DRIFT_COMPENSATE not in self.parameters: self.parameters[DRIFT_COMPENSATE] = DEFAULT_DRIFT_COMPENSATE
def process_request(cls, params_dict): job_uuids = params_dict[cls.job_uuid_param] job_name = params_dict[cls.job_name_param][0] exp_def_name = params_dict[cls.exp_defs_param][0] required_drops = params_dict[cls.req_drops_param][0] json_response = {GENOTYPER: []} status_codes = list() for i, assay_caller_uuid in enumerate(job_uuids): if len(job_uuids) == 1: cur_job_name = job_name else: cur_job_name = "%s-%d" % (job_name, i) status_code = 200 if cur_job_name in cls._DB_CONNECTOR.distinct( SA_GENOTYPER_COLLECTION, JOB_NAME): status_code = 403 json_response[GENOTYPER].append({ERROR: 'Job exists.'}) else: try: # Create helper functions genotyper_callable = SaGenotyperCallable( assay_caller_uuid, exp_def_name, required_drops, cls._DB_CONNECTOR, cur_job_name) response = copy.deepcopy(genotyper_callable.document) callback = make_process_callback( genotyper_callable.uuid, exp_def_name, genotyper_callable.ac_result_path, genotyper_callable.ignored_dyes, genotyper_callable.outfile_path, cls._DB_CONNECTOR, cur_job_name) # Add to queue cls._EXECUTION_MANAGER.add_job(response[UUID], genotyper_callable, callback) except: APP_LOGGER.exception( "Error processing Genotyper post request.") response = { JOB_NAME: cur_job_name, ERROR: str(sys.exc_info()[1]) } status_code = 500 finally: if ID in response: del response[ID] json_response[GENOTYPER].append(response) status_codes.append(status_code) # If all jobs submitted successfully, then 200 should be returned. # Otherwise, the maximum status code seems good enough. return make_clean_response(json_response, max(status_codes))
def process_request(cls, params_dict): try: valid_files = [ fp for fp in os.listdir(MODIFIED_ARCHIVES_PATH) if allowed_file(os.path.join(MODIFIED_ARCHIVES_PATH, fp)) ] return (valid_files, [], None) except: APP_LOGGER.exception(traceback.format_exc()) return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
def process_request(cls, params_dict): try: reports = cls._DB_CONNECTOR.find(RUN_REPORT_COLLECTION, {TAGS: { '$exists': True }}) user_tags = set(t for r in reports for t in r[TAGS]) return (list(user_tags), [], None) except: APP_LOGGER.exception(traceback.format_exc()) return ([{ERROR: str(sys.exc_info()[1])}], [ERROR], None)
def gen_dye_scatterplot(dyes, sys_listener_path): try: analysis_df = pandas.read_table(self.analysis_file, sep=sniff_delimiter( self.analysis_file)) ac_df = pandas.read_table(self.tmp_outfile_path, sep=sniff_delimiter( self.tmp_outfile_path)) analysis_df['assay'] = False analysis_df.loc[analysis_df['identity'].notnull(), 'assay'] = ac_df['assay'].values # System listener inputs dyn_align_offsets = {} temps = {} steps = {} if sys_listener_path is not None: sys_listener_dir = os.path.dirname(sys_listener_path) clamp_temp_tp = ClampTempTopicParser() old_channel_offset_tp = OldChannelOffsetTopicParser() channel_offset_tp = ChannelOffsetTopicParser() dyn_align_steps_tp = DynamicAlignStepsParser() topic_parsers = [ clamp_temp_tp, old_channel_offset_tp, channel_offset_tp, dyn_align_steps_tp ] sys_listener_parser = SystemListenerParser( sys_listener_dir, topic_parsers=topic_parsers) temps = sys_listener_parser.get_topic_results( clamp_temp_tp.topic) dyn_align_offsets = sys_listener_parser.get_topic_results( channel_offset_tp.topic) if len(dyn_align_offsets) < 1: APP_LOGGER.info("Using old channel offset parser...") dyn_align_offsets = sys_listener_parser.get_topic_results( old_channel_offset_tp.topic) else: APP_LOGGER.info("Using new channel offset parser...") steps = sys_listener_parser.get_topic_results( dyn_align_steps_tp.topic) generate_dye_scatterplots(analysis_df, dyes, self.tmp_dyes_plot_path, self.job_name, self.pico1_dye, dyn_align_offsets=dyn_align_offsets, temps=temps, steps=steps) shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path) APP_LOGGER.info("Dyes scatter plot generated for %s." % \ self.job_name) except: APP_LOGGER.exception("Dyes scatter plot generation failed.")
def _generate(self, ndyes, nchoose=5): """ @param ndyes: 1nteger, number of dyes to use per solution #param nchoose: Integer, maximum number of combinations that will be further optimized """ # check to see if the minimum maximum levels of dyes can make the requested number of dyes min_nbarcodes = numpy.product(self._barcode_min_nlvls[numpy.argsort(self._barcode_min_nlvls)[:ndyes]]) max_nbarcodes = numpy.product(self._barcode_max_nlvls[numpy.argsort(self._barcode_max_nlvls)[-ndyes:]]) # too many dyes were selected if min_nbarcodes > self._requested_nbarcodes: APP_LOGGER.info('Cannot generate requested number of barcodes (%d). ' 'Smallest library would have %d barcodes.' % (self._requested_nbarcodes, min_nbarcodes)) return # too few dyes were selected if max_nbarcodes < self._requested_nbarcodes: APP_LOGGER.info('Cannot generate requested number of barcodes (%d). ' 'Largest library would have %d barcodes.' % (self._requested_nbarcodes, max_nbarcodes)) return # find the optimal number of levels for each dye combination requested_dye_idxs = set(range(len(self._requested_dye_lots))) optimal_nlvls = list() for dye_idxs in itertools.combinations(xrange(len(self._barcode_profiles)), ndyes): dye_idxs = numpy.array(dye_idxs) # ignore combinations that do not include requested dyes if self.need_additional_db_dyes and \ self._requested_dye_lots and \ not requested_dye_idxs.issubset(dye_idxs): continue # ignore combinations in which the peaks are too close peaks = numpy.concatenate((self._barcode_peaks[dye_idxs], self._non_barcode_peaks)) if numpy.any(numpy.diff(numpy.sort(peaks)) < self._min_peak_difference): continue try: candidate_nlvls, candidate_lowest_peak = self._calc_optimal_nlvls(dye_idxs) optimal_nlvls.append((candidate_lowest_peak, dye_idxs, candidate_nlvls)) except Exception as e: APP_LOGGER.exception(e) optimal_nlvls.sort(key=lambda x: x[0]) for _, dye_idxs, nlvls in optimal_nlvls[: nchoose]: try: self._make_design(nlvls, dye_idxs) except Exception as e: APP_LOGGER.exception(e)
def _combine_sa(self, output_path, id_report_path, gt_png_path, gt_png_sum_path, gt_kde_path, gt_kde_sum_path): """ Combine Identity report, Assay Caller scatter plot, and Genotyper PNG @param id_report_path: pathname of identity report @param gt_png_path: pathname of genotyper scatter PNG @param gt_png_sum_path: pathname of genotyper scatter sum PNG @param gt_kde_path: pathname of genotyper KDE PNG @param gt_kde_sum_path: pathname of genotyper KDE sum PNG """ try: path = output_path + '_png_id' doc = SimpleDocTemplate(path, pagesize=landscape(letter)) story = list() story.append(self.get_image(gt_png_sum_path)) story.append(PageBreak()) story.append(self.get_image(gt_kde_sum_path)) story.append(PageBreak()) styles = getSampleStyleSheet() id_title = Paragraph('Identity Report', styles['h2']) story.append(id_title) story.append(Spacer(1, 0.2 * inch)) with open(id_report_path, 'r') as id_report: lines = id_report.readlines() for line in lines: styles = getSampleStyleSheet() left_indent = (len(line) - len(line.lstrip())) * 5 styles.add( ParagraphStyle(name='custom_style', fontName=FONT_NAME_STD, fontSize=FONT_SIZE, leftIndent=left_indent)) p = Paragraph(line, styles['custom_style']) story.append(p) story.append(PageBreak()) doc.build(story, onFirstPage=self.standard_page, onLaterPages=self.standard_page) self._merge_pdfs(output_path, gt_png_path, gt_kde_path, path) os.unlink(path) return True except: APP_LOGGER.exception(traceback.format_exc()) return False
def process_request(cls, params_dict): probes_file_uuid = params_dict[cls._probes_param][0] targets_file_uuid = params_dict[cls._targets_param][0] strict = params_dict[cls._strict_param][0] job_name = params_dict[cls._job_name_param][0] json_response = { PROBES: probes_file_uuid, TARGETS: targets_file_uuid, STRICT: strict, UUID: str(uuid4()), STATUS: JOB_STATUS.submitted, # @UndefinedVariable JOB_NAME: job_name, JOB_TYPE_NAME: JOB_TYPE.absorption, # @UndefinedVariable SUBMIT_DATESTAMP: datetime.today(), } http_status_code = 200 if job_name in cls._DB_CONNECTOR.distinct(ABSORPTION_COLLECTION, JOB_NAME): http_status_code = 403 else: try: probes_path = cls._DB_CONNECTOR.find_one( PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH] targets_path = cls._DB_CONNECTOR.find_one( TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH] outfile_path = os.path.join(RESULTS_PATH, json_response[UUID]) # Create helper functions abs_callable = AbsorbtionCallable(targets_path, probes_path, strict, outfile_path, json_response[UUID], cls._DB_CONNECTOR) callback = make_absorption_callback(json_response[UUID], outfile_path, cls._DB_CONNECTOR) # Add to queue and update DB cls._DB_CONNECTOR.insert(ABSORPTION_COLLECTION, [json_response]) cls._EXECUTION_MANAGER.add_job(json_response[UUID], abs_callable, callback) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 finally: if ID in json_response: del json_response[ID] return make_clean_response(json_response, http_status_code)
def process_request(cls, params_dict): probes_file_uuid = params_dict[ParameterFactory.file_uuid( "probes", PROBES_COLLECTION)][0] targets_file_uuid = params_dict[ParameterFactory.file_uuid( "targets", TARGETS_COLLECTION)][0] absorb = params_dict[ParameterFactory.boolean( "absorb", "Check for absorbed probes.")][0] num = params_dict[ParameterFactory.integer( "num", "Minimum number of probes for a target.", default=3, minimum=1)][0] job_name = params_dict[ParameterFactory.lc_string( JOB_NAME, "Unique name to give this job.")][0] json_response = { PROBES: probes_file_uuid, TARGETS: targets_file_uuid, ABSORB: absorb, NUM: num, UUID: str(uuid4()), STATUS: JOB_STATUS.submitted, # @UndefinedVariable JOB_NAME: job_name, DATESTAMP: datetime.today(), } http_status_code = 200 if job_name in cls._DB_CONNECTOR.distinct(VALIDATION_COLLECTION, JOB_NAME): http_status_code = 403 else: try: probes_path = cls._DB_CONNECTOR.find_one( PROBES_COLLECTION, UUID, probes_file_uuid)[FILEPATH] targets_path = cls._DB_CONNECTOR.find_one( TARGETS_COLLECTION, UUID, targets_file_uuid)[FILEPATH] #ADD VALIDATOR JOB TO QUEUE cls._DB_CONNECTOR.insert(VALIDATION_COLLECTION, [json_response]) del json_response[ID] except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 return make_clean_response(json_response, http_status_code)
def get_hdf5_datasets(log_data, data_folder): """ Fetch the HDF5 archives associated with a run report. @param log_data: the document of run report yaml @param date_folder: folder where data is located """ if log_data is None or RUN_ID not in log_data: return set() run_id = log_data[RUN_ID] hdf5_paths = [ os.path.join(data_folder, f + '.h5') for f in [run_id, run_id + '-baseline'] if os.path.isfile(os.path.join(data_folder, f + '.h5')) ] all_datasets = set() for path in hdf5_paths: exist_records = _DB_CONNECTOR.find( HDF5_COLLECTION, {HDF5_PATH: remove_disk_directory(path)}) if exist_records: all_datasets.update(set(r[HDF5_DATASET] for r in exist_records)) continue new_records = list() try: with h5py.File(path) as h5_file: dataset_names = h5_file.keys() for dsname in dataset_names: if re.match(r'^\d{4}-\d{2}-\d{2}_\d{4}\.\d{2}', dsname): new_records.append({ HDF5_PATH: remove_disk_directory(path), HDF5_DATASET: dsname, }) except: APP_LOGGER.exception( 'Unable to get dataset information from HDF5 file: %s' % path) if new_records: APP_LOGGER.info('Found %d datasets from HDF5 file: %s' % (len(new_records), path)) _DB_CONNECTOR.insert(HDF5_COLLECTION, new_records) all_datasets.update(set(r[HDF5_DATASET] for r in new_records)) return all_datasets
def process_callback(future): try: _ = future.result() update = { '$set': { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, URL: get_results_url(outfile_path), SCATTER_PLOT: scatter_plot_path, SCATTER_PLOT_URL: get_results_url(scatter_plot_path), DYES_SCATTER_PLOT: dyes_scatter_plot_path, DYES_SCATTER_PLOT_URL: get_results_url(dyes_scatter_plot_path), FINISH_DATESTAMP: datetime.today(), } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query, {})) > 0: db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(scatter_plot_path) silently_remove_file(dyes_scatter_plot_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { '$set': { STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_ASSAY_CALLER_COLLECTION, query, {})) > 0: db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(scatter_plot_path) silently_remove_file(dyes_scatter_plot_path)
def process_callback(future): try: _ = future.result() report_errors = check_report_for_errors(report_path) update_data = { STATUS: JOB_STATUS.succeeded, RESULT: outfile_path, URL: get_results_url(outfile_path), PLOT: plot_path, REPORT: report_path, PLOT_URL: get_results_url(plot_path), REPORT_URL: get_results_url(report_path), PLATE_PLOT_URL: get_results_url(plate_plot_path), TEMPORAL_PLOT_URL: get_results_url(temporal_plot_path), DROP_COUNT_PLOT_URL: get_results_url(drop_count_plot_path), FINISH_DATESTAMP: datetime.today()} if report_errors: update_data[ERROR] = ' '.join(report_errors) update = {"$set": update_data} # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0: db_connector.update(SA_IDENTITY_COLLECTION, query, update) else: silently_remove_file(report_path) silently_remove_file(outfile_path) silently_remove_file(plot_path) except: APP_LOGGER.exception(traceback.format_exc()) error_msg = str(sys.exc_info()[1]) update = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg}} if os.path.isfile(report_path): update['$set'][REPORT_URL] # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_IDENTITY_COLLECTION, query, {})) > 0: db_connector.update(SA_IDENTITY_COLLECTION, query, update) else: silently_remove_file(report_path) silently_remove_file(outfile_path) silently_remove_file(plot_path)
def process_request(cls, params_dict): dye_metrics = params_dict[cls._dyes_metrics] drop_ave = params_dict[cls._drop_ave_diameter][0] drop_std = params_dict[cls._drop_std_diameter][0] http_status_code = 200 uuid = str(uuid4()) json_response = { UUID: uuid, DATESTAMP: datetime.today(), } try: dye_names = list() nlvls = list() intensities = list() for dye_name, nlvl, low, high in dye_metrics: dye_names.append(dye_name) nlvls.append(nlvl) intensities.append((low, high)) centroids = make_centroids(nlvls, intensities) clusters = make_clusters(centroids, drop_ave=drop_ave, drop_std=drop_std) collisions = check_collision(clusters) json_response[DROP_AVE_DIAMETER] = drop_ave json_response[DROP_STD_DIAMETER] = drop_std json_response[DYE_METRICS] = map(list, dye_metrics) json_response['collisions'] = collisions json_response['nclusters'] = numpy.product(nlvls) except IOError: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 415 json_response[ERROR] = str(sys.exc_info()[1]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) return make_clean_response(json_response, http_status_code)
def _calc_dye_max_intensities(self, dye_idxs, nlvls, resolution=100.0): """ The ideal library will take full advantage of our intensity space, which peaks at 65535 intensity units. This function attempt to optimize the maximum level of each dye by recomposing the profiles and testing that they do not saturate. @param dye_idxs: 1D numpy array of the indexes of the barcode dyes @param nlvls: 1D numpy array of the number of levels for each dye @param resolution: Float, intensity unit spacing, i.e. resolution of 100.0 would result in intensities of: 1000.0, 1100.00, 1200.0... @return: 1D numpy of maximum intensities for each dye. """ dye_max_intensities = None # test various percent cutoffs for percent_best in numpy.arange(2.5, 25, 2.5): try: # make a group of scalars for each dye (dimension) scalars = [numpy.linspace(10000.0, MAX_INTEN, resolution).reshape(-1, 1) for _ in dye_idxs] # create barcode profiles by summing each combination of dyes profiles # to find an optimal max barcode profile scalar_combos = scalars.pop(0) while scalars: scalar_combos = numpy.hstack(( numpy.repeat(scalar_combos, resolution, axis=0), numpy.tile(scalars.pop(0), (len(scalar_combos), 1)) )) scalar_combos = self._rm_saturated(scalar_combos, dye_idxs) scalar_combos = self._rm_most_variable(scalar_combos, percent_best, nlvls) midx = numpy.argmax(numpy.sum(scalar_combos, axis=1)) dye_max_intensities = scalar_combos[midx] break except Exception as e: APP_LOGGER.exception(e) if dye_max_intensities is None or len(dye_max_intensities) != len(dye_idxs): raise Exception('A library cannot be generated from this combination of dyes.') return dye_max_intensities
def process_request(cls, params_dict): targets_file = params_dict[ParameterFactory.file( "Targets FASTA file.")][0] json_response = {FILENAME: targets_file.filename} http_status_code = 200 file_uuid = str(uuid4()) path = os.path.join(TARGETS_UPLOAD_PATH, file_uuid) existing_filenames = cls._DB_CONNECTOR.distinct( TARGETS_COLLECTION, FILENAME) if os.path.exists(path) or targets_file.filename in existing_filenames: http_status_code = 403 elif validate_fasta(targets_file) == False: http_status_code = 415 else: try: targets_file.save(path) targets_file.close() json_response[URL] = "http://%s/uploads/%s/targets/%s" % ( HOSTNAME, PORT, file_uuid) json_response[FILEPATH] = path json_response[UUID] = file_uuid json_response[DATESTAMP] = datetime.today() json_response[TYPE] = "targets" if "." in targets_file.filename: json_response[FORMAT] = targets_file.filename.split( ".")[-1] else: json_response[FORMAT] = "Unknown" cls._DB_CONNECTOR.insert(TARGETS_COLLECTION, [json_response]) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 finally: if ID in json_response: del json_response[ID] return make_clean_response(json_response, http_status_code)
def process_callback(future): try: _ = future.result() update = { "$set": { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, URL: get_results_url(os.path.join(dirname, uuid)), PNG: os.path.join(dirname, scatter_ind_pdf_fn), PNG_URL: get_results_url(os.path.join(dirname, scatter_ind_pdf_fn)), PNG_SUM: os.path.join(dirname, scatter_png_fn), PNG_SUM_URL: get_results_url(os.path.join(dirname, scatter_png_fn)), KDE_PNG: os.path.join(dirname, kde_ind_pdf_fn), KDE_PNG_URL: get_results_url(os.path.join(dirname, kde_ind_pdf_fn)), KDE_PNG_SUM: os.path.join(dirname, kde_png_fn), KDE_PNG_SUM_URL: get_results_url(os.path.join(dirname, kde_png_fn)), FINISH_DATESTAMP: datetime.today(), } } except: APP_LOGGER.exception("Error in Exploratory post request process callback.") error_msg = str(sys.exc_info()[1]) update = { "$set": {STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, PDF: None, PNG: None, PNG_SUM: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg}} finally: # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_EXPLORATORY_COLLECTION, query, {})) > 0: db_connector.update(SA_EXPLORATORY_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(os.path.join(dirname, scatter_png_fn)) silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn)) silently_remove_file(os.path.join(dirname, kde_png_fn)) silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn))
def process_request(cls, params_dict): tag = params_dict[cls.tag_parameter][0] report_uuid = params_dict[cls.report_uuid_parameter][0] http_status_code = 200 json_response = {RUN_REPORT_UUID: report_uuid, TAGS: [tag]} try: cls._DB_CONNECTOR.update(RUN_REPORT_COLLECTION, {UUID: report_uuid}, {'$pull': { TAGS: tag }}) json_response[STATUS] = SUCCEEDED APP_LOGGER.info("Removed tag name=%s from run report uuid=%s" % (tag, report_uuid)) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) json_response[STATUS] = FAILED http_status_code = 500 return json_response, http_status_code
def get_datasets_from_files(filepaths): """ Given the paths of HDF5/image stack files, return a tuple of a dictionary and a boolean. The dictionary has (filepath, set of datasets) as key, value. The boolean indicates whether any file contains dataset(s) with duplicate name(s). @param filepaths: filepaths """ if not filepaths: return dict(), False all_exist_datasets = _DB_CONNECTOR.distinct(HDF5_COLLECTION, HDF5_DATASET) fp_to_datasets = defaultdict(set) duplicate = False for fp in filepaths: if fp.lower().endswith('.h5'): try: with h5py.File(fp, 'r') as h5_file: dataset_names = h5_file.keys() for dsname in dataset_names: if not dsname.lower().startswith("laser_power"): if dsname not in all_exist_datasets: fp_to_datasets[fp].add(dsname) else: duplicate = True except: APP_LOGGER.exception( 'Unable to get dataset information from HDF5 file: %s' % fp) # check if there are duplicate datasets in fp_to_datasets unique_datasets = set() for datasets in fp_to_datasets.values(): unique_datasets = unique_datasets | datasets if len(unique_datasets) < sum(len(d) for d in fp_to_datasets.values()): duplicate = True return fp_to_datasets, duplicate
def process_request(cls, params_dict): replay_stack_name = params_dict[cls._name_param][0] ham_stack_name = params_dict[cls._ham_imgs_param][0] mon1_stack_name = params_dict[cls._mon1_imgs_param][0] mon2_stack_name = params_dict[cls._mon2_imgs_param][0] short_desc = params_dict[cls._short_desc_param][0] http_status_code = 200 uuid = str(uuid4()) json_response = {DATESTAMP: datetime.today()} try: # check for existing exists existing_replay_stacks = cls._DB_CONNECTOR.find( IMAGES_COLLECTION, { NAME: replay_stack_name, STACK_TYPE: REPLAY }, [NAME, RESULT]) existing_ham_stacks = cls._DB_CONNECTOR.find( IMAGES_COLLECTION, { NAME: ham_stack_name, STACK_TYPE: HAM }, [RESULT]) existing_mon1_stacks = cls._DB_CONNECTOR.find( IMAGES_COLLECTION, { NAME: mon1_stack_name, STACK_TYPE: MONITOR1 }, [RESULT]) existing_mon2_stacks = cls._DB_CONNECTOR.find( IMAGES_COLLECTION, { NAME: mon2_stack_name, STACK_TYPE: MONITOR2 }, [RESULT]) similar_replay_stacks = cls._DB_CONNECTOR.find( IMAGES_COLLECTION, { HAM_NAME: ham_stack_name, MON1_NAME: mon1_stack_name, MON2_NAME: mon2_stack_name, STACK_TYPE: REPLAY }, [NAME, RESULT]) # verify replay stack name is unique if existing_replay_stacks: http_status_code = 403 json_response[ERROR] = 'Replay image stack with given name already ' \ 'exists.' # check if similar replay stack already exists elif similar_replay_stacks: similar_name = similar_replay_stacks[0][NAME] http_status_code = 403 json_response[ ERROR] = 'Similar replay stack named "%s" already exists.' % similar_name # if no similar stack exists enter it into the database else: tmp_path = '' try: # temporary path for taring, untaring, etc... tmp_path = tempfile.mkdtemp() # create a replay directory replay_dir_path = os.path.join(tmp_path, 'replay') os.mkdir(replay_dir_path) # make readme readme_file_name = 'README' readme_path = os.path.join(replay_dir_path, readme_file_name) readme_str = '\n'.join([ replay_stack_name, ham_stack_name, mon1_stack_name, mon2_stack_name, short_desc ]) with open(readme_path, 'w') as fh: fh.write(readme_str) # create new tar file new_tf_name = uuid + '.tar.gz' new_tf_path = os.path.join(tmp_path, new_tf_name) new_tf = tarfile.open(new_tf_path, 'w:gz') # add readme and images extract_imgs(existing_ham_stacks[0][RESULT], replay_dir_path) extract_imgs(existing_mon1_stacks[0][RESULT], replay_dir_path) extract_imgs(existing_mon2_stacks[0][RESULT], replay_dir_path) new_tf.add(replay_dir_path, 'replay', filter=set_tar_permissions) new_tf.add(readme_path, readme_file_name, filter=set_tar_permissions) new_tf.close() # move new tar file to results directory archive_path = os.path.join(RESULTS_PATH, new_tf_name) shutil.move(new_tf_path, archive_path) # insert into database url = 'http://%s/results/%s/%s' % ( HOSTNAME, PORT, os.path.basename(archive_path)) json_response[FILENAME] = new_tf_name json_response[RESULT] = archive_path json_response[URL] = url json_response[UUID] = uuid json_response[HAM_NAME] = ham_stack_name json_response[MON1_NAME] = mon1_stack_name json_response[MON2_NAME] = mon2_stack_name json_response[NAME] = replay_stack_name json_response[STACK_TYPE] = REPLAY json_response[DESCRIPTION] = short_desc cls._DB_CONNECTOR.insert(IMAGES_COLLECTION, [json_response]) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) finally: silently_remove_tree(tmp_path) except: APP_LOGGER.exception(traceback.format_exc()) http_status_code = 500 json_response[ERROR] = str(sys.exc_info()[1]) finally: if ID in json_response: del json_response[ID] return make_clean_response(json_response, http_status_code)
def process_callback(future): try: _ = future.result() dirname = os.path.dirname(outfile_path) vcf_fn = os.path.basename(outfile_path) basename = os.path.splitext(vcf_fn)[0] pdf_fn = '%s.%s' % (basename, PDF) scatter_png_fn = '%s_scatter.%s' % (basename, PNG) scatter_ind_pdf_fn = '%s_scatter_ind.%s' % (basename, PDF) kde_png_fn = '%s_kde.%s' % (basename, PNG) kde_ind_pdf_fn = '%s_kde_ind.%s' % (basename, PDF) generate_plots(exp_def_name, ac_result_path, os.path.splitext(outfile_path)[0], ignored_dyes=ignored_dyes, data_set_name=cur_job_name) update = { "$set": { STATUS: JOB_STATUS.succeeded, # @UndefinedVariable RESULT: outfile_path, URL: get_results_url(os.path.join(dirname, vcf_fn)), PDF: os.path.join(dirname, pdf_fn), PDF_URL: get_results_url(os.path.join(dirname, pdf_fn)), PNG: os.path.join(dirname, scatter_ind_pdf_fn), PNG_URL: get_results_url(os.path.join(dirname, scatter_ind_pdf_fn)), PNG_SUM: os.path.join(dirname, scatter_png_fn), PNG_SUM_URL: get_results_url(os.path.join(dirname, scatter_png_fn)), KDE_PNG: os.path.join(dirname, kde_ind_pdf_fn), KDE_PNG_URL: get_results_url(os.path.join(dirname, kde_ind_pdf_fn)), KDE_PNG_SUM: os.path.join(dirname, kde_png_fn), KDE_PNG_SUM_URL: get_results_url(os.path.join(dirname, kde_png_fn)), FINISH_DATESTAMP: datetime.today(), } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_GENOTYPER_COLLECTION, query, {})) > 0: db_connector.update(SA_GENOTYPER_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(os.path.join(dirname, pdf_fn)) silently_remove_file(os.path.join(dirname, scatter_png_fn)) silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn)) silently_remove_file(os.path.join(dirname, kde_png_fn)) silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn)) except: APP_LOGGER.exception( "Error in Genotyper post request process callback.") error_msg = str(sys.exc_info()[1]) update = { "$set": { STATUS: JOB_STATUS.failed, # @UndefinedVariable RESULT: None, PDF: None, PNG: None, PNG_SUM: None, FINISH_DATESTAMP: datetime.today(), ERROR: error_msg } } # If job has been deleted, then delete result and don't update DB. if len(db_connector.find(SA_GENOTYPER_COLLECTION, query, {})) > 0: db_connector.update(SA_GENOTYPER_COLLECTION, query, update) else: silently_remove_file(outfile_path) silently_remove_file(os.path.join(dirname, pdf_fn)) silently_remove_file(os.path.join(dirname, scatter_png_fn)) silently_remove_file(os.path.join(dirname, scatter_ind_pdf_fn)) silently_remove_file(os.path.join(dirname, kde_png_fn)) silently_remove_file(os.path.join(dirname, kde_ind_pdf_fn))
def process_request(cls, params_dict): archive_names = params_dict[cls.archives_param] job_name = params_dict[cls.job_name_param][0] json_response = {CONVERT_IMAGES: []} # Ensure archive directory is valid try: archives = list() for archive_name in archive_names: archives.extend(get_archive_dirs(archive_name, extensions=["bin"])) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) return make_clean_response(json_response, 500) # Ensure at least one valid archive is found if len(archives) < 1: return make_clean_response(json_response, 404) # Process each archive status_codes = [] for i, archive in enumerate(archives): if len(archives) == 1: cur_job_name = job_name else: cur_job_name = "%s-%d" % (job_name, i) response = { ARCHIVE: archive, UUID: str(uuid4()), STATUS: JOB_STATUS.submitted, # @UndefinedVariable JOB_NAME: cur_job_name, JOB_TYPE_NAME: JOB_TYPE.pa_convert_images, # @UndefinedVariable SUBMIT_DATESTAMP: datetime.today(), } status_code = 200 if cur_job_name in cls._DB_CONNECTOR.distinct(PA_CONVERT_IMAGES_COLLECTION, JOB_NAME): status_code = 403 else: try: results_folder = get_results_folder() outfile_path = os.path.join(results_folder, response[UUID] + ".tar.gz") # Create helper functions abs_callable = PaConvertImagesCallable(archive, outfile_path, response[UUID], cls._DB_CONNECTOR) callback = make_process_callback(response[UUID], outfile_path, cls._DB_CONNECTOR) # Add to queue and update DB cls._DB_CONNECTOR.insert(PA_CONVERT_IMAGES_COLLECTION, [response]) cls._EXECUTION_MANAGER.add_job(response[UUID], abs_callable, callback) del response[ID] except: APP_LOGGER.exception(traceback.format_exc()) response[ERROR] = str(sys.exc_info()[1]) status_code = 500 json_response[CONVERT_IMAGES].append(response) status_codes.append(status_code) # If all jobs submitted successfully, then 200 should be returned. # Otherwise, the maximum status code seems good enough. return make_clean_response(json_response, max(status_codes))
def process_request(cls, params_dict): filenames = params_dict[cls.filenames_parameter] report_uuid = params_dict[cls.report_uuid_parameter][0] http_status_code = 200 json_response = {RUN_REPORT_UUID: report_uuid, FILENAMES: filenames} filepaths = [ os.path.join(MODIFIED_ARCHIVES_PATH, secure_filename(fn)) for fn in filenames ] if not filenames or not report_uuid or not all( allowed_file(fp) for fp in filepaths): http_status_code = 400 elif any( cls._DB_CONNECTOR.find_one(HDF5_COLLECTION, HDF5_PATH, {'$regex': fn + '$'}) is not None for fn in filenames): http_status_code = 403 else: try: fp_to_datasets, duplicate = get_datasets_from_files(filepaths) if not fp_to_datasets or duplicate: http_status_code = 403 else: new_hdf5_records = [{ HDF5_PATH: fp, HDF5_DATASET: dsname, "upload": True } for fp in fp_to_datasets for dsname in fp_to_datasets[fp]] cls._DB_CONNECTOR.insert(HDF5_COLLECTION, new_hdf5_records) APP_LOGGER.info('Updated database with %d new HDF5 files' % len(new_hdf5_records)) run_report = cls._DB_CONNECTOR.find_one( RUN_REPORT_COLLECTION, UUID, report_uuid) if run_report: exist_datasets = set([ d for d in run_report[IMAGE_STACKS] if isinstance(d, str) or isinstance(d, unicode) ]) new_datasets = set() for datasets in fp_to_datasets.values(): new_datasets = new_datasets | datasets new_datasets = list(new_datasets - exist_datasets) if new_datasets: cls._DB_CONNECTOR.update( RUN_REPORT_COLLECTION, {UUID: report_uuid}, { '$addToSet': { IMAGE_STACKS: { '$each': [{ 'name': d, 'upload': True } for d in new_datasets] } } }) APP_LOGGER.info( "Updated run report uuid=%s with %d HDF5 datasets." % (report_uuid, len(new_datasets))) del run_report[ID] json_response.update({ "run_report": run_report, "uploaded": new_datasets }) else: json_response.update({ "error": "Run report uuid=%s does not exist." % report_uuid }) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) http_status_code = 500 return make_clean_response(json_response, http_status_code)
def process_request(cls, params_dict): job_uuids = params_dict[cls.job_uuid_param] job_name = params_dict[cls.job_name_param][0] pico1_dye=None if cls.pico1_dye_param in params_dict: pico1_dye = params_dict[cls.pico1_dye_param][0] use_pico1_filter = True if pico1_dye is None: use_pico1_filter = False pico2_dye=None if cls.pico2_dye_param in params_dict: pico2_dye = params_dict[cls.pico2_dye_param][0] use_pico2_filter = True if pico2_dye is None: use_pico2_filter = False assay_dye = None if cls.assay_dye_param in params_dict: assay_dye = params_dict[cls.assay_dye_param][0] num_probes = params_dict[cls.n_probes_param][0] training_factor = params_dict[cls.training_param][0] dye_levels = params_dict[cls.dye_levels_param] filtered_dyes = list() if cls.filtered_dyes_param in params_dict: filtered_dyes = params_dict[cls.filtered_dyes_param] ignored_dyes = list() if cls.ignored_dyes_param in params_dict: ignored_dyes = params_dict[cls.ignored_dyes_param] ui_threshold = params_dict[cls.ui_threshold_param][0] if cls.dev_mode_param in params_dict and \ params_dict[cls.dev_mode_param][0]: dev_mode = params_dict[cls.dev_mode_param][0] else: dev_mode = DEFAULT_DEV_MODE if cls.drift_compensate_param in params_dict and \ params_dict[cls.drift_compensate_param][0]: drift_compensate = params_dict[cls.drift_compensate_param][0] else: drift_compensate = DEFAULT_DRIFT_COMPENSATE if cls.continuous_phase_param in params_dict and \ params_dict[cls.continuous_phase_param][0]: use_pico_thresh = True else: use_pico_thresh = False if cls.ignore_lowest_barcode in params_dict and \ params_dict[cls.ignore_lowest_barcode][0]: ignore_lowest_barcode = params_dict[cls.ignore_lowest_barcode][0] else: ignore_lowest_barcode = DEFAULT_IGNORE_LOWEST_BARCODE max_uninj_ratio = params_dict[cls.max_ui_ratio_param][0] json_response = {IDENTITY: []} # Ensure analysis job exists try: criteria = {UUID: {"$in": job_uuids}} projection = {ID: 0, RESULT: 1, UUID: 1, CONFIG: 1} pa_process_jobs = cls._DB_CONNECTOR.find(PA_PROCESS_COLLECTION, criteria, projection) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) return make_clean_response(json_response, 500) # Ensure at least one valid analysis job exists if len(pa_process_jobs) < 1: return make_clean_response(json_response, 404) # Process each archive status_codes = [] for i, pa_uuid in enumerate(job_uuids): if len(pa_process_jobs) == 1: cur_job_name = job_name else: cur_job_name = "%s-%d" % (job_name, i) status_code = 200 if cur_job_name in cls._DB_CONNECTOR.distinct(SA_IDENTITY_COLLECTION, JOB_NAME): status_code = 403 json_response[IDENTITY].append({ERROR: 'Job exists.'}) else: try: # Create helper functions sai_callable = SaIdentityCallable(pa_uuid, num_probes, training_factor, assay_dye, use_pico1_filter, use_pico2_filter, pico1_dye, pico2_dye, dye_levels, ignored_dyes, filtered_dyes, ui_threshold, max_uninj_ratio, cls._DB_CONNECTOR, job_name, use_pico_thresh, ignore_lowest_barcode, dev_mode, drift_compensate) response = copy.deepcopy(sai_callable.document) callback = make_process_callback(sai_callable.uuid, sai_callable.outfile_path, sai_callable.plot_path, sai_callable.report_path, sai_callable.plate_plot_path, sai_callable.temporal_plot_path, sai_callable.drop_count_plot_path, cls._DB_CONNECTOR) # Add to queue cls._EXECUTION_MANAGER.add_job(sai_callable.uuid, sai_callable, callback) except: APP_LOGGER.exception(traceback.format_exc()) response = {JOB_NAME: cur_job_name, ERROR: str(sys.exc_info()[1])} status_code = 500 finally: if ID in response: del response[ID] json_response[IDENTITY].append(response) status_codes.append(status_code) # If all jobs submitted successfully, then 200 should be returned. # Otherwise, the maximum status code seems good enough. return make_clean_response(json_response, max(status_codes))
def __call__(self): update = { '$set': { STATUS: JOB_STATUS.running, # @UndefinedVariable START_DATESTAMP: datetime.today() } } query = {UUID: self.uuid} self.db_connector.update(SA_ASSAY_CALLER_COLLECTION, query, update) def gen_dye_scatterplot(dyes, sys_listener_path): try: analysis_df = pandas.read_table(self.analysis_file, sep=sniff_delimiter( self.analysis_file)) ac_df = pandas.read_table(self.tmp_outfile_path, sep=sniff_delimiter( self.tmp_outfile_path)) analysis_df['assay'] = False analysis_df.loc[analysis_df['identity'].notnull(), 'assay'] = ac_df['assay'].values # System listener inputs dyn_align_offsets = {} temps = {} steps = {} if sys_listener_path is not None: sys_listener_dir = os.path.dirname(sys_listener_path) clamp_temp_tp = ClampTempTopicParser() old_channel_offset_tp = OldChannelOffsetTopicParser() channel_offset_tp = ChannelOffsetTopicParser() dyn_align_steps_tp = DynamicAlignStepsParser() topic_parsers = [ clamp_temp_tp, old_channel_offset_tp, channel_offset_tp, dyn_align_steps_tp ] sys_listener_parser = SystemListenerParser( sys_listener_dir, topic_parsers=topic_parsers) temps = sys_listener_parser.get_topic_results( clamp_temp_tp.topic) dyn_align_offsets = sys_listener_parser.get_topic_results( channel_offset_tp.topic) if len(dyn_align_offsets) < 1: APP_LOGGER.info("Using old channel offset parser...") dyn_align_offsets = sys_listener_parser.get_topic_results( old_channel_offset_tp.topic) else: APP_LOGGER.info("Using new channel offset parser...") steps = sys_listener_parser.get_topic_results( dyn_align_steps_tp.topic) generate_dye_scatterplots(analysis_df, dyes, self.tmp_dyes_plot_path, self.job_name, self.pico1_dye, dyn_align_offsets=dyn_align_offsets, temps=temps, steps=steps) shutil.copy(self.tmp_dyes_plot_path, self.dyes_plot_path) APP_LOGGER.info("Dyes scatter plot generated for %s." % \ self.job_name) except: APP_LOGGER.exception("Dyes scatter plot generation failed.") try: safe_make_dirs(self.tmp_path) exp_def_fetcher = ExpDefHandler() experiment = exp_def_fetcher.get_experiment_definition( self.exp_def_name) model_file_dict = available_models(self.ac_method) if self.ac_model is None: classifier_file = None else: if self.ac_model in model_file_dict: classifier_file = model_file_dict[self.ac_model] else: APP_LOGGER.exception( "Assay caller model %s is unavailable for method %s." % (self.ac_method, self.ac_model)) raise Exception( "Assay caller model %s is unavailable for method %s." % (self.ac_method, self.ac_model)) AssayCallManager(self.num_probes, in_file=self.analysis_file, out_file=self.tmp_outfile_path, scatter_plot_file=self.tmp_scatter_plot_path, training_factor=self.training_factor, assay=self.assay_dye, fiducial=self.pico2_dye, controls=experiment.negative_controls.barcodes, ctrl_thresh=self.ctrl_thresh, n_jobs=8, controls_filtering=self.ctrl_filter, assay_type=self.ac_method, classifier_file=classifier_file) if not os.path.isfile(self.tmp_outfile_path): raise Exception('Secondary analysis assay caller job ' + 'failed: output file not generated.') shutil.copy(self.tmp_outfile_path, self.outfile_path) gen_dye_scatterplot(experiment.dyes, self.get_sys_listener_path()) if os.path.isfile(self.tmp_scatter_plot_path): shutil.copy(self.tmp_scatter_plot_path, self.scatter_plot_path) finally: # Regardless of success or failure, remove the copied archive directory shutil.rmtree(self.tmp_path, ignore_errors=True)
def process_request(cls, params_dict): job_uuids = params_dict[cls.job_uuid_param] job_name = params_dict[cls.job_name_param][0] exp_def_name = params_dict[cls.exp_defs_param][0] training_factor = params_dict[cls.training_param][0] ctrl_thresh = params_dict[cls.ctrl_thresh][0] ctrl_filter = params_dict[cls.ctrl_filter][0] ac_method = params_dict[cls.ac_method][0] ac_model = None if cls.ac_model in params_dict and params_dict[cls.ac_model][0]: ac_model = params_dict[cls.ac_model][0] json_response = {ASSAY_CALLER: []} # Ensure analysis job exists try: criteria = {UUID: {'$in': job_uuids}} projection = {ID: 0, RESULT: 1, UUID: 1} sa_identity_jobs = cls._DB_CONNECTOR.find(SA_IDENTITY_COLLECTION, criteria, projection) except: APP_LOGGER.exception(traceback.format_exc()) json_response[ERROR] = str(sys.exc_info()[1]) return make_clean_response(json_response, 500) # Process each archive status_codes = [] for i, sa_identity_job in enumerate(sa_identity_jobs): if len(sa_identity_jobs) == 1: cur_job_name = job_name else: cur_job_name = '%s-%d' % (job_name, i) status_code = 200 if cur_job_name in cls._DB_CONNECTOR.distinct( SA_ASSAY_CALLER_COLLECTION, JOB_NAME): status_code = 403 json_response[ASSAY_CALLER].append({ERROR: 'Job exists.'}) else: try: if not os.path.isfile(sa_identity_job[RESULT]): raise InvalidFileError(sa_identity_job[RESULT]) # Create helper functions sac_callable = SaAssayCallerCallable( sa_identity_job[UUID], exp_def_name, training_factor, ctrl_thresh, cls._DB_CONNECTOR, cur_job_name, ctrl_filter, ac_method, ac_model) response = copy.deepcopy(sac_callable.document) callback = make_process_callback( sac_callable.uuid, sac_callable.outfile_path, sac_callable.scatter_plot_path, sac_callable.dyes_plot_path, cls._DB_CONNECTOR) # Add to queue cls._EXECUTION_MANAGER.add_job(response[UUID], sac_callable, callback) except: APP_LOGGER.exception(traceback.format_exc()) response = { JOB_NAME: cur_job_name, ERROR: str(sys.exc_info()[1]) } status_code = 500 finally: if ID in response: del response[ID] json_response[ASSAY_CALLER].append(response) status_codes.append(status_code) # If all jobs submitted successfully, then 200 should be returned. # Otherwise, the maximum status code seems good enough. return make_clean_response(json_response, max(status_codes))