def test_auto_comp(self): """ Tests the auto compensation subroutine of comp_scale_FCS_data This function will provide testing of the auto_comp_tweak function called \ by comp_scale_FCS_data when auto_comp flag is turned on. """ Convert_CytName = {'H0152':'1', 'H4710082':'3', '1':'1', '2':'2', '3':'3'} filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords, strict=False,comp_flag='table',singlet_flag="fixed", viable_flag='fixed') cols = ['FSC-H', 'CD15 FITC'] b = a.data.loc[100:105, cols] b_expect = pd.DataFrame({'FSC-H': {105: 0.25751877, 100: 0.29451752, 101: 0.32627106, 102: 0.42173004}, 'CD15 FITC': {105: 0.79197961, 100: 0.79530305, 101: 0.44847226, 102: 0.898543}}, dtype='float32') np.testing.assert_allclose(b.loc[:, cols].values, b_expect.loc[:, cols].values, rtol=1e-3, atol=0, err_msg="Results are more different \ than tolerable")
def test_FCS_processing(self): """ Test running processing Looking at small set of events (100:105) and FSC and CD15 channel and making sure \ that result is the same as when this function was initially setup """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, comp_flag='table', singlet_flag='fixed', viable_flag='fixed') if write_csv: a.data.to_pickle(data('fcs_data.pkl')) print("\nProcessed FCS data was successfully pickled\n") else: comparison_data = pd.read_pickle(data('fcs_data.pkl')) np.testing.assert_allclose(a.data.values, comparison_data.values, rtol=1e-3, atol=0, err_msg="FCS Data results are more \ different than tolerable")
def test_feature_extraction(self): """ tests ND_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords, strict=False, rescale_lim=(-0.5,1.0), comp_flag='table',singlet_flag='fixed', viable_flag='fixed') a.feature_extraction(extraction_type='FULL', bins=10) binned_data = a.FCS_features out_coords = binned_data.Return_Coordinates([1,2,3,4]) if write_csv: out_coords.to_pickle(data('test_coordinates.pkl')) print "Test_coordinates was succefully pickled" f = open(data('test_histogram.pkl'),'w') pickle.dump(binned_data.histogram,f) f.close() print "Test histogram was succefully pickled" else: test_coords = pd.read_pickle(data('test_coordinates.pkl')) f = open(data('test_histogram.pkl'),'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(out_coords.values,test_coords.values) np.testing.assert_allclose(binned_data.histogram.data,test_histogram.data)
def action(args): # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) i = 0 done = False for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) filepath = path.join(args.dir, relpath) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=coords, strict=False, auto_comp=False) outfile = 'output/' + '_'.join([case, str(case_tube_idx), a.case_tube.replace(' ', '_'), a.date.strftime("%Y%m%d")]) + '.png' a.comp_visualize_FCS(outfile=outfile) i += 1 if args.n_files is not None and i >= args.n_files: done = True break if done is True: break
def addCustomCaseData(self, file, whittle=True): """ Method to load file (tab-text) into database table CustomCaseData This must include <case_number> and <category> columns Note: this will overwrite existing data """ a = Lab_pred_table(db=self, file=file) # ### Handle column names ### a.dat.columns = [c.lower() for c in a.dat.columns.values] a_cols = a.dat.columns.tolist() # Convert 'CASE*' => 'case_number' case_index = next((index for index, value in enumerate(a_cols) if value[:4] == 'case'), None) if case_index is not None: a_cols[case_index] = 'case_number' db_cols = CustomCaseData.__mapper__.columns.keys() cols = [c for c in a_cols if c.lower() in db_cols] # Add second column if only captured 1 column and rename to <category> if (len(db_cols) > 1) and (len(cols) == 1): cols.append(db_cols[1]) a_cols[1] = cols[1] a.dat.columns = a_cols # ### If one of columns matches use it ### if (len(cols) > 0): log.info('Adding file %s to db %s' % (file, self.db_file)) a.dat = a.dat[cols] # Identify cases in custom data but not in meta db db_case_list = zip(*queryDB(self, getCases=True, not_flagged=False).results)[0] cases_missing_in_db = a.dat.case_number.loc[~a.dat.case_number.isin(db_case_list)].\ tolist() # Write custom data a.dat.to_sql('CustomCaseData', con=self.engine, if_exists='replace', index=False) # Add empty FCS objects to db for each in cases_to_exclude for c in cases_missing_in_db: log.info('Making empty FCS for {}'.format(c)) fFCS = FCS(case_number=c, flag='CustomData_ONLY', error_message='Added to db because in custom list but not in metadb') fFCS.meta_to_db(db=self, add_lists=True) else: raise ValueError("File %s does not have columns 'case_number' and 'category'" % (file))
def test_auto_singlet(self): """ Tests auto singlet gating """ filepath = data(test_fcs_fn) #filepath = "/home/ngdavid/FCS_Data/Myeloid/12-00035/12-00035_Myeloid 1.fcs" a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords, strict=False, rescale_lim=(-0.5,1.0), comp_flag='table',singlet_flag='auto', viable_flag='fixed',classes=5, singlet_verbose=True,save_dir=self.mkoutdir())
def test_auto_comp(self): """ Tests the auto compensation subroutine of comp_scale_FCS_data This function will provide testing of the auto_comp_tweak function called \ by comp_scale_FCS_data when auto_comp flag is turned on. """ Convert_CytName = { 'H0152': '1', 'H4710082': '3', '1': '1', '2': '2', '3': '3' } filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, comp_flag='table', singlet_flag="fixed", viable_flag='fixed') cols = ['FSC-H', 'CD15 FITC'] b = a.data.loc[100:105, cols] b_expect = pd.DataFrame( { 'FSC-H': { 105: 0.25751877, 100: 0.29451752, 101: 0.32627106, 102: 0.42173004 }, 'CD15 FITC': { 105: 0.79197961, 100: 0.79530305, 101: 0.44847226, 102: 0.898543 } }, dtype='float32') np.testing.assert_allclose(b.loc[:, cols].values, b_expect.loc[:, cols].values, rtol=1e-3, atol=0, err_msg="Results are more different \ than tolerable")
def test_meta_to_db(self): """ Make sure that the push of meta data to db 'runs' NOTE: not explicitly checking what is in the db """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filepath = data(test_fcs_fn) a = FCS(filepath=filepath) db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) """
def test_query_getfiles(self): """ Testing querying database for getfiles dict_dict """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test empty query q_dict = {'getfiles': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00031']} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00032']} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request daterange q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = {'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True} self.assertEqual(db.query(**q_dict).results, {})
def test_comp_vis(self): """ Tests the compensation visualizer subroutine in FCS successfully writes file """ filepath = data(test_fcs_fn) outfile = path.join(self.mkoutdir(), 'test_visualization.png') a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords, strict=False, rescale_lim=(-0.5,1.0), comp_flag='table',singlet_flag='fixed', viable_flag='fixed') a.comp_visualize_FCS(outfile=outfile)
def test_auto_singlet(self): """ Tests auto singlet gating """ filepath = data(test_fcs_fn) #filepath = "/home/ngdavid/FCS_Data/Myeloid/12-00035/12-00035_Myeloid 1.fcs" a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, rescale_lim=(-0.5, 1.0), comp_flag='table', singlet_flag='auto', viable_flag='fixed', classes=5, singlet_verbose=True, save_dir=self.mkoutdir())
def test_query_getTubeInfo(self): """ Testing querying FCS_database for information """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True } self.assertEqual( db.query(**q_dict).results, { u'12-00031': { u'Myeloid 1': { datetime.datetime(2012, 1, 3, 12, 0, 15): u'testfiles/12-00031_Myeloid 1.fcs' } } }) # Test specific negative request daterange q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getTubeInfo': True } self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = { 'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True } self.assertEqual(db.query(**q_dict).results, {})
def action(args): log.info('Creating hdf5 file [%s] with features extracted by method [%s]' % (args.hdf5_fp, args.feature_extraction_method)) # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) # Create HDF5 object HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber) # initalize empty list to append case_tube_idx that failed feature extraction feature_failed_CTIx = [] num_results = len(list(chain(*q.results.values()))) i = 1 log.info("Found {} case_tube_idx's".format(num_results)) for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): # this nested for loop iterates over all case_tube_idx log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" % (case, case_tube_idx, relpath, i, num_results)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) fFCS.feature_extraction(extraction_type=args.feature_extraction_method, bins=10) HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx, FCS=fFCS, db=db) except ValueError, e: print("Skipping feature extraction for case: {} because of 'ValueError {}'". format(case, str(e))) except KeyError, e: print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e)) except IntegrityError, e: print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \ of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
def action(args): # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Copy database to out database shutil.copyfile(args.db, args.outdb) out_db = FCSdatabase(db=args.outdb, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) n = 0 done = False for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, auto_comp=False, **vars(args)) fFCS.extract_FCS_histostats() except: fFCS.flag = 'stats_extraction_fail' fFCS.error_message = str(sys.exc_info()[0]) fFCS.histostats_to_db(db=out_db) n += 1 if args.n is not None and n >= args.n: done = True break if done is True: break
def test_2d_feature_extraction(self): """ tests 2D_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) a.feature_extraction(extraction_type='2d', bins=50) log.debug("Feature Extraction was successful") binned_data = a.FCS_features log.debug(binned_data.histogram) if write_csv: f = open(data('2d_test_histogram.pkl'), 'w') pickle.dump(binned_data.histogram, f) f.close() print "Test histogram was succefully pickled" else: f = open(data('2d_test_histogram.pkl'), 'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(binned_data.histogram.data, test_histogram.data)
def test_feature_extraction(self): """ tests ND_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, rescale_lim=(-0.5, 1.0), comp_flag='table', singlet_flag='fixed', viable_flag='fixed') a.feature_extraction(extraction_type='FULL', bins=10) binned_data = a.FCS_features out_coords = binned_data.Return_Coordinates([1, 2, 3, 4]) if write_csv: out_coords.to_pickle(data('test_coordinates.pkl')) print "Test_coordinates was succefully pickled" f = open(data('test_histogram.pkl'), 'w') pickle.dump(binned_data.histogram, f) f.close() print "Test histogram was succefully pickled" else: test_coords = pd.read_pickle(data('test_coordinates.pkl')) f = open(data('test_histogram.pkl'), 'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(out_coords.values, test_coords.values) np.testing.assert_allclose(binned_data.histogram.data, test_histogram.data)
def action(args): # Connect to database db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) i = 0 done = False for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath)) filepath = path.join(args.dir, relpath) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=coords, strict=False, auto_comp=False) outfile = 'output/' + '_'.join([ case, str(case_tube_idx), a.case_tube.replace(' ', '_'), a.date.strftime("%Y%m%d") ]) + '.png' a.comp_visualize_FCS(outfile=outfile) i += 1 if args.n_files is not None and i >= args.n_files: done = True break if done is True: break
def test_GatingToggle(self): """ Tests the HistoStats information subroutines :return: """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, rescale_lim=(-0.5, 1.0), comp_flag='table', singlet_flag=None, viable_flag='fixed') a.extract_FCS_histostats() if write_csv: pd.Series(a.TubeStats).to_pickle(data('GatingTubeStats.pkl')) print("\nHistoStats successfully written\n") else: TubeStats = pd.read_pickle(data('GatingTubeStats.pkl')) np.testing.assert_allclose( pd.Series(a.TubeStats).values, TubeStats.values, rtol=1e-3, atol=0, err_msg="Tube Statistics results are more \ different than tolerable")
def worker(in_list, **kwargs): """ Still need to work on handling of cases that did not extract correctly """ filepath = in_list[0] case_tube_idx = in_list[1] fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, auto_comp=False, **kwargs) fFCS.extract_FCS_histostats() fFCS.clear_FCS_cache() except: fFCS.flag = 'stats_extraction_fail' fFCS.error_message = str(sys.exc_info()[0]) return fFCS
def test_FCS_processing(self): """ Test running processing Looking at small set of events (100:105) and FSC and CD15 channel and making sure \ that result is the same as when this function was initially setup """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords, strict=False,comp_flag='table',singlet_flag='fixed', viable_flag='fixed') if write_csv: a.data.to_pickle(data('fcs_data.pkl')) print("\nProcessed FCS data was successfully pickled\n") else: comparison_data = pd.read_pickle(data('fcs_data.pkl')) np.testing.assert_allclose(a.data.values, comparison_data.values, rtol=1e-3, atol=0, err_msg="FCS Data results are more \ different than tolerable")
def test_empty_FCS(self): """ Testing loading FCS filepath that does not load properly ==> empty """ filename = "99-80923_Fake.fcs" filepath = data(filename) with warnings.catch_warnings(): warnings.simplefilter("ignore") a = FCS(filepath=filepath) self.assertEqual(a.filepath, filepath) self.assertEqual(a.filename, filename) self.assertEqual(a.case_number, '99-80923') self.assertFalse(hasattr(a, 'num_events'))
def action(args): # Collect files/dirs Finder = Find_Clinical_FCS_Files(Filelist_Path=args.file_list) # Connect to database (and rebuild) db = FCSdatabase(db=args.db_filepath, rebuild=True) print "Building database %s" % db.db_file # Process files/dirs case_tube_idx = 0 for f in Finder.filenames: try: fFCS = FCS(filepath=f, case_tube_idx=case_tube_idx) fFCS.meta_to_db(db=db, dir=args.dir, add_lists=True) except: print "Skipping FCS %s because of unknown error related to: %s" % \ (f, sys.exc_info()[0]) print("{:6d} Cases uploaded\r".format(case_tube_idx)), case_tube_idx += 1 if args.n is not None and case_tube_idx >= args.n: break
def test_GatingToggle(self): """ Tests the HistoStats information subroutines :return: """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords, strict=False, rescale_lim=(-0.5,1.0), comp_flag='table',singlet_flag=None, viable_flag='fixed') a.extract_FCS_histostats() if write_csv: pd.Series(a.TubeStats).to_pickle(data('GatingTubeStats.pkl')) print("\nHistoStats successfully written\n") else: TubeStats = pd.read_pickle(data('GatingTubeStats.pkl')) np.testing.assert_allclose(pd.Series(a.TubeStats).values, TubeStats.values, rtol=1e-3, atol=0, err_msg="Tube Statistics results are more \ different than tolerable")
def test_2d_feature_extraction(self): """ tests 2D_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5,1), strict=False, auto_comp=False) a.feature_extraction(extraction_type='2d', bins=50) log.debug("Feature Extraction was successful") binned_data = a.FCS_features log.debug(binned_data.histogram) if write_csv: f = open(data('2d_test_histogram.pkl'),'w') pickle.dump(binned_data.histogram,f) f.close() print "Test histogram was succefully pickled" else: f = open(data('2d_test_histogram.pkl'),'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(binned_data.histogram.data,test_histogram.data)
def test_query_getTubeInfo(self): """ Testing querying FCS_database for information """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {u'12-00031': {u'Myeloid 1': {datetime.datetime(2012, 1, 3, 12, 0, 15): u'testfiles/12-00031_Myeloid 1.fcs'}}}) # Test specific negative request daterange q_dict = {'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = {'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getTubeInfo': True} self.assertEqual(db.query(**q_dict).results, {})
def test_ML_push_pull(self): """ tests MergedFeature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5') ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # feature hdf initialization FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp) # push fcs_features FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) feature_DF, not_in_data, merge_fail = FT_HDF_obj.make_single_tube_analysis( [FCS_obj.case_tube_idx]) ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp, clobber=True) ML_HDF_obj.push_features(feature_DF) ML_HDF_obj.push_annotations( pd.DataFrame([[test_fcs_fn, 0]], columns=['case_num', 'annotation']))
def test_push_pull(self): """ tests Feature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') HDF_fp = path.join(self.mkoutdir(), 'test_Feature_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # hdf initialization HDF_obj = Feature_IO(filepath=HDF_fp) # push fcs_features HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) # pull fcs_features output = HDF_obj.get_fcs_features(FCS_obj.case_tube_idx) #test single case retrieval log.debug(output) np.testing.assert_allclose(output.data, FCS_obj.FCS_features.histogram.data) cti_list = pd.DataFrame(data= np.array([['13-12345','1',"Dummy Error"]]), index=[1], columns=['casenum','cti','errormessage']) # push failed_cti list to "meta data" HDF_obj.push_failed_cti_list(cti_list) # pull meta data from HDF5 file meta_data = HDF_obj.get_meta_data() log.debug("File meta data is {}".format(meta_data))
def action(args): log.info('Creating hdf5 file [%s] with features extracted by method [%s]' % (args.hdf5_fp, args.feature_extraction_method)) # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) # Create HDF5 object HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber) # initalize empty list to append case_tube_idx that failed feature extraction feature_failed_CTIx = [] num_results = len(list(chain(*q.results.values()))) i = 1 log.info("Found {} case_tube_idx's".format(num_results)) for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): # this nested for loop iterates over all case_tube_idx log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" % (case, case_tube_idx, relpath, i, num_results)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) fFCS.feature_extraction( extraction_type=args.feature_extraction_method, bins=10) HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx, FCS=fFCS, db=db) except ValueError, e: print( "Skipping feature extraction for case: {} because of 'ValueError {}'" .format(case, str(e))) except KeyError, e: print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e)) except IntegrityError, e: print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \ of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
def test_comp_vis(self): """ Tests the compensation visualizer subroutine in FCS successfully writes file """ filepath = data(test_fcs_fn) outfile = path.join(self.mkoutdir(), 'test_visualization.png') a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, rescale_lim=(-0.5, 1.0), comp_flag='table', singlet_flag='fixed', viable_flag='fixed') a.comp_visualize_FCS(outfile=outfile)
def test_loadFCS(self): """ Testing loading FCS from file using FCS and loadFCS modules """ filename = "12-00031_Myeloid 1.fcs" filepath = data(filename) a = FCS(filepath=filepath, import_dataframe=True) if write_csv is True: write = {} write['filepath'] = a.filepath write['filename'] = a.filename write['case_number'] = a.case_number write['cytometer'] = a.cytometer write['date'] = a.date write['case_tube'] = a.case_tube write['num_events'] = a.num_events # write['version'] = a.version header_info = pd.Series(write) header_info.to_pickle(data('header_info.pkl')) a.parameters.to_pickle(data('parameter_info.pkl')) log.info('LoadFCS header and Parameter data successfully written') else: header_info = pd.read_pickle(data('header_info.pkl')) self.assertFalse(a.empty) self.assertEqual(a.filepath, header_info['filepath']) self.assertEqual(a.filename, header_info['filename']) self.assertEqual(a.case_number, header_info['case_number']) self.assertEqual(a.cytometer, header_info['cytometer']) self.assertEqual(a.date, header_info['date']) self.assertEqual(a.case_tube, header_info['case_tube']) self.assertEqual(a.num_events, header_info['num_events']) # self.assertEqual(a.version, header_info['version']) self.assertTrue(hasattr(a, 'data')) parameters = pd.read_pickle(data('parameter_info.pkl')) assert_frame_equal(a.parameters, parameters)
def test_ML_push_pull(self): """ tests MergedFeature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5') ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # feature hdf initialization FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp) # push fcs_features FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) feature_DF,not_in_data,merge_fail = FT_HDF_obj.make_single_tube_analysis([FCS_obj.case_tube_idx]) ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp,clobber=True) ML_HDF_obj.push_features(feature_DF) ML_HDF_obj.push_annotations(pd.DataFrame([[test_fcs_fn,0]], columns=['case_num','annotation']))
start = self.header['text_start'] stop = self.header['text_stop'] text = self.__get_block(start, stop) delim = text[0] if delim == r'|': delim = '\|' if delim == r'\a'[0]: # test for delimiter being \ delim = '\\\\' # regex will require it to be \\ if delim != text[-1]: warn("text in segment does not start and end with delimiter") tmp = text[1:-1].replace('$', '') # match the delimited character unless it's doubled regex = compile('(?<=[^%s])%s(?!%s)' % (delim, delim, delim)) tmp = regex.split(tmp) return dict(zip([x.lower() for x in tmp[::2]], tmp[1::2])) if __name__ == "__main__": filepath = "/home/ngdavid/Desktop/Ubuntu_Dropbox/Myeloid_Data/Myeloid/10-13469/10-13469_Myeloid 1.fcs" #filepath = "/home/ngdavid/Desktop/Ubuntu_Dropbox/Myeloid_Data/14-21257_B Cells.fcs" from FlowAnal.FCS import FCS a = FCS() FCSobject = loadFCS(FCS=a, filepath=filepath, version='test', import_dataframe=False) print FCSobject.date print FCSobject.case_tube print FCSobject.parameters
coords = { 'singlet': [(0.01, 0.06), (0.60, 0.75), (0.93, 0.977), (0.988, 0.86), (0.456, 0.379), (0.05, 0.0), (0.0, 0.0)], 'viable': [(0.358, 0.174), (0.609, 0.241), (0.822, 0.132), (0.989, 0.298), (1.0, 1.0), (0.5, 1.0), (0.358, 0.174)] } comp_file = { 'H0152': root + '/FlowAnal/data/Spectral_Overlap_Lib_LSRA.txt', '2': root + '/FlowAnal/data/Spectral_Overlap_Lib_LSRB.txt' } filename = "12-00031_Myeloid 1.fcs" filepath = data(filename) FCS_obj = FCS(filepath=filepath, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=coords, strict=False) figure() ax = ['SSC-H', 'CD45 APC-H7'] plot(FCS_obj.data[ax[0]], FCS_obj.data[ax[1]], 'b,') title(FCS_obj.case_tube) xlim(0, 1) ylim(0, 1) xlabel(ax[0]) ylabel(ax[1]) """ filename = "12-00005_Bone Marrow WBC.fcs"
def data(fname): return path.join(datadir, fname) coords={'singlet': [ (0.01,0.06), (0.60,0.75), (0.93,0.977), (0.988,0.86), (0.456,0.379),(0.05,0.0),(0.0,0.0)], 'viable': [ (0.358,0.174), (0.609,0.241), (0.822,0.132), (0.989,0.298), (1.0,1.0),(0.5,1.0),(0.358,0.174)]} comp_file={'H0152':root+'/FlowAnal/data/Spectral_Overlap_Lib_LSRA.txt', '2':root+'/FlowAnal/data/Spectral_Overlap_Lib_LSRB.txt'} filename = "12-00031_Myeloid 1.fcs" filepath = data(filename) FCS_obj = FCS(filepath=filepath, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=coords, strict=False) figure() ax=['SSC-H','CD45 APC-H7'] plot(FCS_obj.data[ax[0]],FCS_obj.data[ax[1]],'b,') title(FCS_obj.case_tube) xlim(0,1) ylim(0,1) xlabel(ax[0]) ylabel(ax[1]) """ filename = "12-00005_Bone Marrow WBC.fcs"
def test_query_getfiles(self): """ Testing querying database for getfiles dict_dict """ root_dir = path.abspath('.') outfile = path.join(self.mkoutdir(), 'test.db') filename = "12-00031_Myeloid 1.fcs" filepath = path.abspath(data(filename)) a = FCS(filepath=filepath) # from FlowAnal.database.FCS_database import FCSdatabase db = FCSdatabase(db=outfile, rebuild=True) a.meta_to_db(db=db, dir=root_dir) # Test specific positive request q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True } self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test empty query q_dict = {'getfiles': True} self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00031']} self.assertEqual( db.query(**q_dict).results, {u'12-00031': { 0: u'testfiles/12-00031_Myeloid 1.fcs' }}) # Test case query POS q_dict = {'getfiles': True, 'cases': ['12-00032']} self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request daterange q_dict = { 'tubes': ['Myeloid 1'], 'daterange': ['2012-01-01', '2012-01-02'], 'getfiles': True } self.assertEqual(db.query(**q_dict).results, {}) # Test specific negative request tubes q_dict = { 'tubes': ['Myeloid 2'], 'daterange': ['2012-01-01', '2012-01-04'], 'getfiles': True } self.assertEqual(db.query(**q_dict).results, {})