def __init__(self, *args, **kwargs): super(MetricsTest, self).__init__(*args, **kwargs) # even with unittest.skip, need to wrap potentially failing constructor # in try/except try: self.cnv_qlplate = get_plate(local('cnv.qlp')) self.duplex_qlplate = get_plate(local('duplex.qlp')) self.fpfn_qlplate = get_plate(local('fpfn.qlp')) self.red_qlplate = get_plate(local('red.qlp')) except IOError, e: self.cnv_qlplate = None self.duplex_qlplate = None self.fpfn_qlplate = None self.red_qlplate = None
def command(self): app = self.load_wsgi_app() min_id = 0 if len(self.args) > 1: min_id = self.args[0] storage = QLStorageSource(app.config) qlbplates = Session.query(QLBPlate).filter(QLBPlate.id > min_id).order_by('id').all() for qlbplate in qlbplates: try: path = storage.qlbplate_path(qlbplate) except Exception: print "Could not find plate: %s (%s)" % (qlbplate.plate.name if qlbplate.plate else 'Name unknown', qlbplate.id) continue try: qlplate = get_plate(path) except Exception: print "Could not read plate: %s (%s)" % (qlbplate.plate.name if qlbplate.plate else 'Name Unknown', qlbplate.id) continue if qlplate.is_fam_vic: qlbplate.dyeset = QLBPlate.DYESET_FAM_VIC elif qlplate.is_fam_hex: qlbplate.dyeset = QLBPlate.DYESET_FAM_HEX elif qlplate.is_eva_green: qlbplate.dyeset = QLBPlate.DYESET_EVA else: qlbplate.dyeset = QLBPlate.DYESET_UNKNOWN print "Assigned dye %s - %s (%s)" % (qlbplate.dyeset, qlbplate.plate.name if qlbplate.plate else 'Name Unknown', qlbplate.id) Session.commit()
def command(self): app = self.load_wsgi_app() storage = QLStorageSource(app.config) if len(self.args) < 2: print self.__class__.usage return for i in range(0,len(self.args)-1): plate_id = int(self.args[i]) plate = dbplate_tree(plate_id) try: plate_path = storage.plate_path(plate) except Exception, e: print "Could not read plate: %s" % plate_id continue qlplate = get_plate(plate_path) if not qlplate: raise ValueError, "Could not read plate: %s" % plate_path else: print "Processing %s" % plate_path try: from qtools.lib.metrics import DEFAULT_CNV_CALC, compute_metric_foreach_qlwell plate_metrics = plate.metrics[0] compute_metric_foreach_qlwell(qlplate, plate_metrics, DEFAULT_CNV_CALC) except Exception, e: import sys, traceback traceback.print_exc(file=sys.stdout) Session.rollback() return
def command(self): app = self.load_wsgi_app() # enforce config.ini if len(self.args) < 2: raise ValueError, self.__class__.usage plate_id = int(self.args[0]) plate = dbplate_tree(plate_id) if not plate: raise ValueError, "Invalid plate id: %s" % plate_id storage = QLStorageSource(app.config) # for now, no reprocessing. plate_path = storage.plate_path(plate) qlplate = get_plate(plate_path) if not qlplate: raise ValueError, "Could not read plate: %s" % plate_path try: plate_metrics = plate.metrics[0] self.process_plate( qlplate, plate_metrics) except Exception, e: import sys, traceback traceback.print_exc(file=sys.stdout) Session.rollback() return
def __qlwell_from_threshold_form(self, id): from qtools.lib.qlb_factory import get_plate self.__setup_db_context(int(id)) path = self.__plate_path() plate = get_plate(path) qlwell = plate.analyzed_wells.get(c.well.well_name, None) if not qlwell: abort(404) else: return qlwell
def add_qlp_file_record(source, path): """ Attempt to create a QLP file record. Adds to the current SQLAlchemy Session object, but does not commit (will rollback, however, if there is a problem) Returns (record, valid) tuple """ path_id = source.path_id(path) valid_file = True try: plate = get_plate(path) if plate.host_software is not None and plate.host_software: version = VERSION_RE.search(plate.host_software).group(0) else: plate.host_software = 'Unknown' version = 'Unknown' file_metadata = dict(version=version, type='processed', run_id=run_id(path_id), read_status=0, mtime=datetime.fromtimestamp(os.stat(path).st_mtime), runtime=datetime.strptime(plate.host_datetime, '%Y:%m:%d %H:%M:%S'), dirname=os.path.dirname(path_id), basename=os.path.basename(path_id)) qlbfile = QLBFile(**file_metadata) except Exception, e: # do not add file if the text file is busy # (exists, but does not contain valid data) if hasattr(e, 'errno'): if e.errno in (errno.ETXTBSY, errno.EBUSY): print e valid_file = False plate = None qlbfile = None return (qlbfile, plate, valid_file) print e file_metadata = {'run_id': run_id(path_id), 'dirname': os.path.dirname(path_id), 'basename': os.path.basename(path_id), 'read_status': -10, 'type': 'unknown', 'version': '', 'mtime': datetime.fromtimestamp(os.stat(path).st_mtime)} qlbfile = QLBFile(**file_metadata) valid_file = False plate = None
def update_reprocess_analysis_group_data(analysis_group, reprocess_config, config, logger): """ Given an analysis_gropu and repocessor, relaod each into qtools """ update_status = 0 plates = analysis_group.plates # remove old metrics if present for plate in plates: pm = [pm for pm in plate.metrics if pm.reprocess_config_id == reprocess_config.id] # should only be of length 1, but just to be safe for p in pm: Session.delete(p) # TODO: how to make this whole operation transactional Session.commit() plate_ids = [plate.id for plate in plates] data_root = config['qlb.reprocess_root'] file_source = QLPReprocessedFileSource(data_root, reprocess_config) for id in plate_ids: dbplate = dbplate_tree(id) # TODO: right abstraction? plate_path = file_source.full_path(analysis_group, dbplate) print "Reading/updating metrics for %s" % plate_path qlplate = get_plate(plate_path) if not qlplate: print "Could not read plate: %s" % plate_path continue plate_metrics = get_beta_plate_metrics(dbplate, qlplate, reprocess_config) Session.add(plate_metrics) del qlplate Session.commit() return update_status
def process_plates(self, app, analysis_group, reprocess_config): storage = QLStorageSource(app.config) plates = analysis_group.plates for plate in plates: pms = [pm for pm in plate.metrics if pm.reprocess_config_id == reprocess_config.id] if not pms: print "Cannot find analysis group for plate %s" % plate.id else: pm = pms[0] dbplate = dbplate_tree(plate.id) if reprocess_config: data_root = app.config['qlb.reprocess_root'] storage = QLPReprocessedFileSource(data_root, reprocess_config) else: storage = QLStorageSource(app.config) try: if reprocess_config: plate_path = storage.full_path(analysis_group, dbplate) else: plate_path = storage.plate_path(dbplate) except Exception, e: print "Could not read plate: %s" % plate.id continue qlplate = get_plate(plate_path) if not qlplate: raise ValueError, "Could not read plate: %s" % plate_path else: print "Processing %s" % plate_path try: self.backfill_plate(qlplate, pm) Session.commit() except Exception, e: print "Could not process plate %s" % dbplate.id import sys, traceback traceback.print_exc(file=sys.stdout) Session.rollback() continue
def command(self): app = self.load_wsgi_app() storage = QLStorageSource(app.config) # enforce config.ini if len(self.args) > 1: plate_id = int(self.args[0]) else: plate_id = 1 ## default start.... plate_metrics = Session.query(PlateMetric).filter(PlateMetric.plate_id >= plate_id)\ .options(joinedload(PlateMetric.plate, innerjoin=True))\ .options(joinedload(PlateMetric.reprocess_config)) # TODO come up with version that takes care of reprocessed plates as well # (by iterating through analysis groups, most likely) for pm in plate_metrics: if pm.from_reprocessed: continue plate = dbplate_tree(pm.plate_id) try: plate_path = storage.plate_path(plate) except Exception: print "Could not read plate: %s" % pm.plate_id continue try: qlplate = get_plate(plate_path) except Exception: print "Could not read plate: %s" % plate_path continue if not qlplate: print "Could not read plate: %s" % plate_path continue else: print "Processing %s: %s..." % (pm.plate_id, plate_path) self.process_plate(qlplate, pm)
def get_plate_objects_from_channels(storage, qlbwell_channels): plates = set([c.well.plate for c in qlbwell_channels]) return dict([((p.file.dirname, p.file.basename), get_plate(storage.qlbplate_path(p))) for p in plates])
def get_plate_objects(storage, qlbwells): plates = set([w.plate for w in qlbwells]) return dict([((p.file.dirname, p.file.basename), get_plate(storage.qlbplate_path(p))) for p in plates])
def command(self): from qtools.lib.mplot import plot_cluster_2d, cleanup as plt_cleanup app = self.load_wsgi_app() image_root = app.config['qlb.image_store'] image_source = QLBImageSource(image_root) # enforce config.ini if len(self.args) < 2: raise ValueError, self.__class_.usage analysis_group_id = int(self.args[0]) if len(self.args) == 3: reprocess_config = Session.query(ReprocessConfig).filter_by(code=self.args[1]).one() reprocess_config_id = reprocess_config.id else: reprocess_config = None reprocess_config_id = None if reprocess_config: data_root = app.config['qlb.reprocess_root'] storage = QLPReprocessedFileSource(data_root, reprocess_config) else: storage = QLStorageSource(app.config) analysis_group = Session.query(AnalysisGroup).get(analysis_group_id) if not analysis_group: raise ValueError, "No analysis group for id %s" % analysis_group_id plates = analysis_group.plates for plate in plates: # TODO: UGH THIS CODE INVARIANT SUCKS (should merge QLReprocessedFile/QLStorageSources) if reprocess_config: plate_path = storage.full_path(analysis_group, plate) else: plate_path = storage.plate_path(plate) print "Reading %s" % plate_path qlplate = get_plate(plate_path) if not qlplate: print "Could not read plate: %s" % plate.name continue print "Generating thumbnails for %s" % plate.name for name, qlwell in sorted(qlplate.analyzed_wells.items()): # TODO abstract into utility image generation function (thumbnail.py?) threshold_fallback = qlwell.clustering_method == QLWell.CLUSTERING_TYPE_THRESHOLD fig = plot_cluster_2d(qlwell.peaks, width=60, height=60, thresholds=[qlwell.channels[0].statistics.threshold, qlwell.channels[1].statistics.threshold], boundaries=[0,0,12000,24000], show_axes=False, antialiased=True, unclassified_alpha=0.5, use_manual_clusters=not well_channel_automatic_classification(qlwell), highlight_thresholds=threshold_fallback) image_path = image_source.get_path('%s/%s_2d.png' % (plate.qlbplate.id, name)) print image_path fig.savefig(image_path, format='png', dpi=72) plt_cleanup(fig)
def command(self): app = self.load_wsgi_app() # enforce config.ini if len(self.args) < 2: raise ValueError, self.__class__.usage analysis_group_id = int(self.args[0]) if len(self.args) == 3: reprocess_config = Session.query(ReprocessConfig).filter_by(code=self.args[1]).one() reprocess_config_id = reprocess_config.id else: reprocess_config = None reprocess_config_id = None analysis_group = Session.query(AnalysisGroup).get(analysis_group_id) if not analysis_group: raise ValueError, "No analysis group for id %s" % analysis_group_id plates = analysis_group.plates # todo: add in reprocess config id for plate in plates: pm = [pm for pm in plate.metrics if pm.reprocess_config_id == reprocess_config_id] # should only be of length 1, but just to be safe for p in pm: Session.delete(p) # TODO: how to make this whole operation transactional Session.commit() # this is a little tricky in the ORM world. only get the # ids of the analysis_group plates, so that you can load the plate # and all the necessary children plate_ids = [plate.id for plate in plates] if reprocess_config_id is None: storage = QLStorageSource(app.config) for id in plate_ids: dbplate = dbplate_tree(id) plate_path = storage.plate_path(dbplate) print "Reading/updating metrics for %s" % plate_path qlplate = get_plate(plate_path) if not qlplate: print "Could not read plate: %s" % plate_path continue plate_metrics = get_beta_plate_metrics(dbplate, qlplate) Session.add(plate_metrics) del qlplate else: data_root = app.config['qlb.reprocess_root'] file_source = QLPReprocessedFileSource(data_root, reprocess_config) for id in plate_ids: dbplate = dbplate_tree(id) # TODO: right abstraction? plate_path = file_source.full_path(analysis_group, dbplate) print "Reading/updating metrics for %s" % plate_path qlplate = get_plate(plate_path) if not qlplate: print "Could not read plate: %s" % plate_path continue plate_metrics = get_beta_plate_metrics(dbplate, qlplate, reprocess_config) Session.add(plate_metrics) del qlplate Session.commit()
def __scan_plate(file_source, image_source, path_id, path, mtime_dict, plate_type=None, file_lists=None): """ The method responsible for taking a QLP file on disk and creating thumbnails and adding/updating records in the database based off the contents of that file. This is a nasty abstraction, I apologize. TODO: make this more natural along add/update line, do not require use of mtime_dict or file_list (or return file_list as files touched) Returns the Plate object of the added/updated plate, or None if there was no touch/error. :param file_source: The source of the QLP files (QLStorageSource) :param image_source: The source/sink of thumbnails (QLStorageSource) :param path_id: The unique identifier of the plate file. Computed by run_id() :param path: The actual file path of the QLP. :param mtime_dict: A mapping between plates and their last updated times. This will indicate whether or not a plate is 'dirty' with respect to the DB. :param plate_type: A plate type. Supplying this will indicate that the special metrics corresponding to that plate type should be computed during the scan. :param file_lists: A logging object used in the scan to record files that are missing, poorly processed, etc. Side-effected by this method. """ if not file_lists: file_lists = defaultdict(list) # if the file is not being tracked, attempt to add it if not mtime_dict.has_key(path_id): print "Adding plate: %s" % path qlbfile, qlplate, valid_file = add_qlp_file_record(file_source, path) if not valid_file: print "Invalid file: %s" % path file_lists['invalid_plates'].append(path) return None elif path.endswith('HFE_Plate.qlp'): qlbfile.read_status = -7 print "Ignoring HFE Plate: %s" % path Session.commit() return None elif qlbfile.version is 'Unknown': qlbfile.read_status = -8 print "Ignoring plate run with unknown QS version: %s" % path Session.commit() return None if(qlbfile.version_tuple < (0,1,1,9)): # we don't recognize the QLP file version, ditch qlbfile.read_status = -2 Session.commit() return None qlbplate, valid_plate = add_qlp_plate_record(qlplate, qlbfile) if not valid_plate: # invalid plate print "Could not read plate: %s" % path qlbfile.read_status = -20 Session.commit() file_lists['unreadable_plates'].append(path) return None for well_name, proc_qlwell in sorted(qlplate.analyzed_wells.items()): # remove empty/blank wells generated by eng group if (well_name is None or well_name == ''): del qlplate.analyzed_wells[well_name] continue raw_qlwell = None # TODO: abstract? well_loc = "%s_%s_RAW.qlb" % (path[:-4], well_name) # process QLP only if not os.path.isfile(well_loc): print "Could not find well file: %s" % well_loc file_lists['missing_wells'].append(well_loc) well_file = None # proceed, as file may just not have been delivered valid_file = True else: well_file, raw_qlwell, valid_file = add_qlb_file_record(file_source, well_loc) if not valid_file: print "Invalid well file: %s" % well_loc file_lists['invalid_wells'].append(well_loc) continue qlbwell, valid_well = add_qlb_well_record(well_file, well_name, proc_qlwell, raw_qlwell) if valid_well: qlbplate.wells.append(qlbwell) # bug 829: if there are invalid wells, do not process the plate; # wait for the well files to complete processing, get on next run # # if file_lists['invalid_wells']: print "Skipping plate processing (invalid well): %s" % path Session.rollback() return None # continue plate plate_meta = plate_from_qlp(qlbplate) Session.add(plate_meta) qlbplate.plate = plate_meta validation_test = get_product_validation_plate(qlplate, plate_meta) if not validation_test: if not apply_setup_to_plate(qlplate, plate_meta): apply_template_to_plate(qlplate, plate_meta) # OK, try it now try: for well in qlbplate.wells: if well.file_id != -1: well.file.read_status = 1 qlbplate.file.read_status = 1 Session.commit() write_images_stats_for_plate(qlbplate, qlplate, image_source, override_plate_type=plate_type) Session.commit() qlbplate.plate.score = Plate.compute_score(qlbplate.plate) Session.commit() if validation_test: validation_test.plate_id = qlbplate.plate.id Session.add(validation_test) Session.commit() file_lists['added_plates'].append(path) return plate_meta except Exception, e: print e print "Could not process new plate: %s" % path file_lists['unwritable_plates'].append(path) Session.rollback() elif time_equals(mtime_dict[path_id][1], datetime.fromtimestamp(os.stat(path).st_mtime)): return None else: # strategy: reprocess the plate and update. qlbfile = Session.query(QLBFile).get(mtime_dict[path_id][0]) if not qlbfile: print "No file for path: %s" % path return None elif path.endswith('HFE_Plate.qlp'): qlbfile.mtime = datetime.fromtimestamp(os.stat(path).st_mtime) Session.commit() return None qlbplates = Session.query(QLBPlate).filter_by(file_id=qlbfile.id).\ options(joinedload_all(QLBPlate.wells, QLBWell.channels)).all() if not qlbplates: print "No plate for read file: %s" % path return None qlbplate = qlbplates[0] if not qlbplate.plate_id: print "No plate for read file (plate deleted): %s" % path qlbfile.mtime = datetime.fromtimestamp(os.stat(path).st_mtime) Session.commit() return None print "Updating plate %s/%s: %s" % (qlbplate.plate_id, qlbplate.id, path) qlplate = get_plate(path) updated = update_qlp_plate_record(qlbplate, qlplate) if not updated: print "Could not read updated file" Session.rollback() qlbplate.file.read_status = -30 Session.commit() file_lists['unreadable_plates'].append(path) return None # this is basically the same as on add -- abstract? # # TODO (GitHub Issue 30): handle case where a previously analyzed well is switched to 'Not Used' for well_name, proc_qlwell in sorted(qlplate.analyzed_wells.items()): raw_qlwell = None # TODO: abstract? well_loc = "%s_%s_RAW.qlb" % (path[:-4], well_name) qlbwells = [well for well in qlbplate.wells if well.well_name == well_name] if not qlbwells: # add qlb file record if not os.path.isfile(well_loc): print "Could not find well file: %s" % well_loc well_file = None valid_file = True file_lists['missing_wells'].append(well_loc) else: well_file, raw_qlwell, valid_file = add_qlb_file_record(file_source, well_loc) if not valid_file: print "Invalid well file: %s" % well_loc file_lists['invalid_wells'].append(well_loc) continue qlbwell, valid_well = add_qlb_well_record(well_file, well_name, proc_qlwell, raw_qlwell) if valid_well: qlbplate.wells.append(qlbwell) else: file_lists['invalid_wells'].append(well_loc) print "Could not add well %s: %s" % (well_name, well_loc) else: qlbwell = qlbwells[0] if not os.path.isfile(well_loc): print "Could not find well file to update: %s" % well_loc file_lists['missing_wells'].append(well_loc) update_qlb_well_record(qlbwell, well_name, proc_qlwell, None) else: if qlbwell.file_id == -1: well_file, raw_qlwell, valid_file = add_qlb_file_record(file_source, well_loc) if valid_file: qlbwell.file = well_file update_qlb_well_record(qlbwell, well_name, proc_qlwell, raw_qlwell) # in lieu of updating plate meta (though it maybe should be done) qlbplate.plate.program_version = qlbplate.host_software try: for well in qlbplate.wells: if well.file_id != -1 and well.file: well.file.read_status = 1 qlbplate.file.read_status = 1 qlbfile.mtime = datetime.fromtimestamp(os.stat(path).st_mtime) Session.commit() # this is where updating the dirty bits would come in handy write_images_stats_for_plate(qlbplate, qlplate, image_source, overwrite=True, override_plate_type=plate_type) Session.commit() qlbplate.plate.score = Plate.compute_score(qlbplate.plate) Session.commit() file_lists['updated_plates'].append(path) return qlbplate.plate except Exception, e: print e print "Could not update plate %s/%s: %s" % (qlbplate.plate_id, qlbplate.id, path) file_lists['unwritable_plates'].append(path) Session.rollback()
from qtools.lib.nstats.peaks import * import os, unittest from qtools.lib.qlb_factory import get_plate def local(path): """ Maybe this exists? """ return "%s/%s" % (os.path.dirname(__file__), path) try: jack = get_plate(local('6543.qlp')) carryover = get_plate(local('7174.qlp')) biggap = get_plate(local('7587.qlp')) events = get_plate(local('8569.qlp')) colorcomp = get_plate(local('8977.qlp')) except IOError, e: jack = None carryover = None biggap = None events = None colorcomp = None @unittest.skip("Needs QLP files") class TestPeaks(unittest.TestCase): def setUp(self): self.jack = jack self.carryover = carryover self.events = events self.colorcomp = colorcomp