def test_2d_feature_extraction(self): """ tests 2D_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) a.feature_extraction(extraction_type='2d', bins=50) log.debug("Feature Extraction was successful") binned_data = a.FCS_features log.debug(binned_data.histogram) if write_csv: f = open(data('2d_test_histogram.pkl'), 'w') pickle.dump(binned_data.histogram, f) f.close() print "Test histogram was succefully pickled" else: f = open(data('2d_test_histogram.pkl'), 'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(binned_data.histogram.data, test_histogram.data)
def test_feature_extraction(self): """ tests ND_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords, strict=False, rescale_lim=(-0.5,1.0), comp_flag='table',singlet_flag='fixed', viable_flag='fixed') a.feature_extraction(extraction_type='FULL', bins=10) binned_data = a.FCS_features out_coords = binned_data.Return_Coordinates([1,2,3,4]) if write_csv: out_coords.to_pickle(data('test_coordinates.pkl')) print "Test_coordinates was succefully pickled" f = open(data('test_histogram.pkl'),'w') pickle.dump(binned_data.histogram,f) f.close() print "Test histogram was succefully pickled" else: test_coords = pd.read_pickle(data('test_coordinates.pkl')) f = open(data('test_histogram.pkl'),'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(out_coords.values,test_coords.values) np.testing.assert_allclose(binned_data.histogram.data,test_histogram.data)
def test_feature_extraction(self): """ tests ND_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, strict=False, rescale_lim=(-0.5, 1.0), comp_flag='table', singlet_flag='fixed', viable_flag='fixed') a.feature_extraction(extraction_type='FULL', bins=10) binned_data = a.FCS_features out_coords = binned_data.Return_Coordinates([1, 2, 3, 4]) if write_csv: out_coords.to_pickle(data('test_coordinates.pkl')) print "Test_coordinates was succefully pickled" f = open(data('test_histogram.pkl'), 'w') pickle.dump(binned_data.histogram, f) f.close() print "Test histogram was succefully pickled" else: test_coords = pd.read_pickle(data('test_coordinates.pkl')) f = open(data('test_histogram.pkl'), 'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(out_coords.values, test_coords.values) np.testing.assert_allclose(binned_data.histogram.data, test_histogram.data)
def action(args): log.info('Creating hdf5 file [%s] with features extracted by method [%s]' % (args.hdf5_fp, args.feature_extraction_method)) # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) # Create HDF5 object HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber) # initalize empty list to append case_tube_idx that failed feature extraction feature_failed_CTIx = [] num_results = len(list(chain(*q.results.values()))) i = 1 log.info("Found {} case_tube_idx's".format(num_results)) for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): # this nested for loop iterates over all case_tube_idx log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" % (case, case_tube_idx, relpath, i, num_results)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) fFCS.feature_extraction( extraction_type=args.feature_extraction_method, bins=10) HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx, FCS=fFCS, db=db) except ValueError, e: print( "Skipping feature extraction for case: {} because of 'ValueError {}'" .format(case, str(e))) except KeyError, e: print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e)) except IntegrityError, e: print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \ of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
def action(args): log.info('Creating hdf5 file [%s] with features extracted by method [%s]' % (args.hdf5_fp, args.feature_extraction_method)) # Connect to database log.info("Loading database input %s" % args.db) db = FCSdatabase(db=args.db, rebuild=False) # Create query q = db.query(exporttype='dict_dict', getfiles=True, **vars(args)) # Create HDF5 object HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber) # initalize empty list to append case_tube_idx that failed feature extraction feature_failed_CTIx = [] num_results = len(list(chain(*q.results.values()))) i = 1 log.info("Found {} case_tube_idx's".format(num_results)) for case, case_info in q.results.items(): for case_tube_idx, relpath in case_info.items(): # this nested for loop iterates over all case_tube_idx log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" % (case, case_tube_idx, relpath, i, num_results)) filepath = path.join(args.dir, relpath) fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True) try: fFCS.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) fFCS.feature_extraction(extraction_type=args.feature_extraction_method, bins=10) HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx, FCS=fFCS, db=db) except ValueError, e: print("Skipping feature extraction for case: {} because of 'ValueError {}'". format(case, str(e))) except KeyError, e: print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e)) except IntegrityError, e: print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \ of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
def test_push_pull(self): """ tests Feature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') HDF_fp = path.join(self.mkoutdir(), 'test_Feature_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # hdf initialization HDF_obj = Feature_IO(filepath=HDF_fp) # push fcs_features HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) # pull fcs_features output = HDF_obj.get_fcs_features(FCS_obj.case_tube_idx) #test single case retrieval log.debug(output) np.testing.assert_allclose(output.data, FCS_obj.FCS_features.histogram.data) cti_list = pd.DataFrame(data= np.array([['13-12345','1',"Dummy Error"]]), index=[1], columns=['casenum','cti','errormessage']) # push failed_cti list to "meta data" HDF_obj.push_failed_cti_list(cti_list) # pull meta data from HDF5 file meta_data = HDF_obj.get_meta_data() log.debug("File meta data is {}".format(meta_data))
def test_ML_push_pull(self): """ tests MergedFeature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5') ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # feature hdf initialization FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp) # push fcs_features FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) feature_DF, not_in_data, merge_fail = FT_HDF_obj.make_single_tube_analysis( [FCS_obj.case_tube_idx]) ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp, clobber=True) ML_HDF_obj.push_features(feature_DF) ML_HDF_obj.push_annotations( pd.DataFrame([[test_fcs_fn, 0]], columns=['case_num', 'annotation']))
def test_ML_push_pull(self): """ tests MergedFeature_IO.push_fcs_features """ # intialize filepaths FCS_fp = data(test_fcs_fn) DB_fp = path.join(self.mkoutdir(), 'test.db') FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5') ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5') # fcs initilaization FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True) FCS_obj.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5, 1), strict=False, auto_comp=False) FCS_obj.feature_extraction(extraction_type='Full', bins=10) log.debug(FCS_obj.FCS_features.histogram) # db initialization DB_obj = FCSdatabase(db=DB_fp, rebuild=True) FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.')) log.debug(FCS_obj.case_tube_idx) # feature hdf initialization FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp) # push fcs_features FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx, FCS=FCS_obj, db=DB_obj) feature_DF,not_in_data,merge_fail = FT_HDF_obj.make_single_tube_analysis([FCS_obj.case_tube_idx]) ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp,clobber=True) ML_HDF_obj.push_features(feature_DF) ML_HDF_obj.push_annotations(pd.DataFrame([[test_fcs_fn,0]], columns=['case_num','annotation']))
def test_2d_feature_extraction(self): """ tests 2D_Feature_Extraction """ filepath = data(test_fcs_fn) a = FCS(filepath=filepath, import_dataframe=True) a.comp_scale_FCS_data(compensation_file=comp_file, gate_coords=gate_coords, rescale_lim=(-0.5,1), strict=False, auto_comp=False) a.feature_extraction(extraction_type='2d', bins=50) log.debug("Feature Extraction was successful") binned_data = a.FCS_features log.debug(binned_data.histogram) if write_csv: f = open(data('2d_test_histogram.pkl'),'w') pickle.dump(binned_data.histogram,f) f.close() print "Test histogram was succefully pickled" else: f = open(data('2d_test_histogram.pkl'),'r') test_histogram = pickle.load(f) f.close() np.testing.assert_allclose(binned_data.histogram.data,test_histogram.data)