Beispiel #1
0
    def test_auto_comp(self):
        """ Tests the auto compensation subroutine of comp_scale_FCS_data

        This function will provide testing of the auto_comp_tweak function called \
        by comp_scale_FCS_data when auto_comp flag is turned on.
        """

        Convert_CytName = {'H0152':'1', 'H4710082':'3',
                           '1':'1', '2':'2', '3':'3'}

        filepath = data(test_fcs_fn)
        
        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords,
                              strict=False,comp_flag='table',singlet_flag="fixed",
                              viable_flag='fixed')

        cols = ['FSC-H', 'CD15 FITC']
        b = a.data.loc[100:105, cols]

        b_expect = pd.DataFrame({'FSC-H': {105: 0.25751877, 100: 0.29451752,
                                           101: 0.32627106, 102: 0.42173004},
                                 'CD15 FITC': {105: 0.79197961, 100: 0.79530305,
                                               101: 0.44847226, 102: 0.898543}}, dtype='float32')
        np.testing.assert_allclose(b.loc[:, cols].values, b_expect.loc[:, cols].values,
                                   rtol=1e-3, atol=0, err_msg="Results are more different \
                                   than tolerable")
Beispiel #2
0
    def test_FCS_processing(self):
        """ Test running processing

        Looking at small set of events (100:105) and FSC and CD15 channel and making sure \
        that result is the same as when this function was initially setup
        """

        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,
                              gate_coords=gate_coords,
                              strict=False,
                              comp_flag='table',
                              singlet_flag='fixed',
                              viable_flag='fixed')

        if write_csv:
            a.data.to_pickle(data('fcs_data.pkl'))
            print("\nProcessed FCS data was successfully pickled\n")
        else:
            comparison_data = pd.read_pickle(data('fcs_data.pkl'))
            np.testing.assert_allclose(a.data.values,
                                       comparison_data.values,
                                       rtol=1e-3,
                                       atol=0,
                                       err_msg="FCS Data results are more \
                                       different than tolerable")
Beispiel #3
0
    def test_feature_extraction(self):
        """ tests ND_Feature_Extraction """
        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords,
                              strict=False, rescale_lim=(-0.5,1.0),
                              comp_flag='table',singlet_flag='fixed',
                              viable_flag='fixed')
        a.feature_extraction(extraction_type='FULL', bins=10)

        binned_data = a.FCS_features
        out_coords = binned_data.Return_Coordinates([1,2,3,4])

        if write_csv:
            out_coords.to_pickle(data('test_coordinates.pkl'))
            print "Test_coordinates was succefully pickled"
            f = open(data('test_histogram.pkl'),'w')
            pickle.dump(binned_data.histogram,f)
            f.close()
            print "Test histogram was succefully pickled"
        else:
            test_coords = pd.read_pickle(data('test_coordinates.pkl'))
            f = open(data('test_histogram.pkl'),'r')
            test_histogram = pickle.load(f)
            f.close()
            np.testing.assert_allclose(out_coords.values,test_coords.values)
            np.testing.assert_allclose(binned_data.histogram.data,test_histogram.data)
Beispiel #4
0
def action(args):
    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    i = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)

            a = FCS(filepath=filepath, import_dataframe=True)
            a.comp_scale_FCS_data(compensation_file=comp_file,
                                  gate_coords=coords,
                                  strict=False, auto_comp=False)
            outfile = 'output/' + '_'.join([case, str(case_tube_idx),
                                            a.case_tube.replace(' ', '_'),
                                            a.date.strftime("%Y%m%d")]) + '.png'
            a.comp_visualize_FCS(outfile=outfile)

            i += 1
            if args.n_files is not None and i >= args.n_files:
                done = True
                break
        if done is True:
            break
Beispiel #5
0
    def addCustomCaseData(self, file, whittle=True):
        """ Method to load file (tab-text) into database table CustomCaseData

        This must include <case_number> and <category> columns
        Note: this will overwrite existing data
        """

        a = Lab_pred_table(db=self, file=file)

        # ### Handle column names ###
        a.dat.columns = [c.lower() for c in a.dat.columns.values]
        a_cols = a.dat.columns.tolist()

        # Convert 'CASE*' => 'case_number'
        case_index = next((index for index, value in enumerate(a_cols)
                           if value[:4] == 'case'), None)
        if case_index is not None:
            a_cols[case_index] = 'case_number'

        db_cols = CustomCaseData.__mapper__.columns.keys()
        cols = [c for c in a_cols if c.lower() in db_cols]

        # Add second column if only captured 1 column and rename to <category>
        if (len(db_cols) > 1) and (len(cols) == 1):
            cols.append(db_cols[1])
            a_cols[1] = cols[1]

        a.dat.columns = a_cols

        # ### If one of columns matches use it ###
        if (len(cols) > 0):
            log.info('Adding file %s to db %s' % (file, self.db_file))

            a.dat = a.dat[cols]

            # Identify cases in custom data but not in meta db
            db_case_list = zip(*queryDB(self, getCases=True, not_flagged=False).results)[0]
            cases_missing_in_db = a.dat.case_number.loc[~a.dat.case_number.isin(db_case_list)].\
                               tolist()

            # Write custom data
            a.dat.to_sql('CustomCaseData', con=self.engine,
                         if_exists='replace', index=False)

            # Add empty FCS objects to db for each in cases_to_exclude
            for c in cases_missing_in_db:
                log.info('Making empty FCS for {}'.format(c))
                fFCS = FCS(case_number=c,
                           flag='CustomData_ONLY',
                           error_message='Added to db because in custom list but not in metadb')
                fFCS.meta_to_db(db=self, add_lists=True)
        else:
            raise ValueError("File %s does not have columns 'case_number' and 'category'" % (file))
Beispiel #6
0
 def test_auto_singlet(self):
     """
     Tests auto singlet gating
     """
     filepath = data(test_fcs_fn)
     #filepath = "/home/ngdavid/FCS_Data/Myeloid/12-00035/12-00035_Myeloid 1.fcs"
     a = FCS(filepath=filepath, import_dataframe=True)
     a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords,
                           strict=False, rescale_lim=(-0.5,1.0),
                           comp_flag='table',singlet_flag='auto',
                           viable_flag='fixed',classes=5,
                           singlet_verbose=True,save_dir=self.mkoutdir())
Beispiel #7
0
    def test_auto_comp(self):
        """ Tests the auto compensation subroutine of comp_scale_FCS_data

        This function will provide testing of the auto_comp_tweak function called \
        by comp_scale_FCS_data when auto_comp flag is turned on.
        """

        Convert_CytName = {
            'H0152': '1',
            'H4710082': '3',
            '1': '1',
            '2': '2',
            '3': '3'
        }

        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,
                              gate_coords=gate_coords,
                              strict=False,
                              comp_flag='table',
                              singlet_flag="fixed",
                              viable_flag='fixed')

        cols = ['FSC-H', 'CD15 FITC']
        b = a.data.loc[100:105, cols]

        b_expect = pd.DataFrame(
            {
                'FSC-H': {
                    105: 0.25751877,
                    100: 0.29451752,
                    101: 0.32627106,
                    102: 0.42173004
                },
                'CD15 FITC': {
                    105: 0.79197961,
                    100: 0.79530305,
                    101: 0.44847226,
                    102: 0.898543
                }
            },
            dtype='float32')
        np.testing.assert_allclose(b.loc[:, cols].values,
                                   b_expect.loc[:, cols].values,
                                   rtol=1e-3,
                                   atol=0,
                                   err_msg="Results are more different \
                                   than tolerable")
Beispiel #8
0
    def test_meta_to_db(self):
        """ Make sure that the push of meta data to db 'runs'

        NOTE: not explicitly checking what is in the db
        """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')

        filepath = data(test_fcs_fn)
        a = FCS(filepath=filepath)
        db = FCSdatabase(db=outfile, rebuild=True)
        a.meta_to_db(db=db, dir=root_dir)
        """
Beispiel #9
0
    def test_query_getfiles(self):
        """ Testing querying database for getfiles dict_dict """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test empty query
        q_dict = {'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test case query POS
        q_dict = {'getfiles': True,
                  'cases': ['12-00031']}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test case query POS
        q_dict = {'getfiles': True,
                  'cases': ['12-00032']}
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request daterange
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-02'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {})

        # Test specific negative request tubes
        q_dict = {'tubes': ['Myeloid 2'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results, {})
Beispiel #10
0
    def test_comp_vis(self):
        """
        Tests the compensation visualizer subroutine in FCS successfully writes file
        """

        filepath = data(test_fcs_fn)

        outfile = path.join(self.mkoutdir(), 'test_visualization.png')

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords,
                              strict=False, rescale_lim=(-0.5,1.0),
                              comp_flag='table',singlet_flag='fixed',
                              viable_flag='fixed')
        a.comp_visualize_FCS(outfile=outfile)
Beispiel #11
0
    def test_meta_to_db(self):
        """ Make sure that the push of meta data to db 'runs'

        NOTE: not explicitly checking what is in the db
        """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')

        filepath = data(test_fcs_fn)
        a = FCS(filepath=filepath)
        db = FCSdatabase(db=outfile, rebuild=True)
        a.meta_to_db(db=db, dir=root_dir)

        """
Beispiel #12
0
 def test_auto_singlet(self):
     """
     Tests auto singlet gating
     """
     filepath = data(test_fcs_fn)
     #filepath = "/home/ngdavid/FCS_Data/Myeloid/12-00035/12-00035_Myeloid 1.fcs"
     a = FCS(filepath=filepath, import_dataframe=True)
     a.comp_scale_FCS_data(compensation_file=comp_file,
                           gate_coords=gate_coords,
                           strict=False,
                           rescale_lim=(-0.5, 1.0),
                           comp_flag='table',
                           singlet_flag='auto',
                           viable_flag='fixed',
                           classes=5,
                           singlet_verbose=True,
                           save_dir=self.mkoutdir())
    def test_query_getTubeInfo(self):
        """ Testing querying FCS_database for information """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getTubeInfo': True
        }
        self.assertEqual(
            db.query(**q_dict).results, {
                u'12-00031': {
                    u'Myeloid 1': {
                        datetime.datetime(2012, 1, 3, 12, 0, 15):
                        u'testfiles/12-00031_Myeloid 1.fcs'
                    }
                }
            })

        # Test specific negative request daterange
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-02'],
            'getTubeInfo': True
        }
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request tubes
        q_dict = {
            'tubes': ['Myeloid 2'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getTubeInfo': True
        }
        self.assertEqual(db.query(**q_dict).results, {})
Beispiel #14
0
def action(args):
    log.info('Creating hdf5 file [%s] with features extracted by method [%s]' %
             (args.hdf5_fp, args.feature_extraction_method))

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    # Create HDF5 object
    HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber)

    # initalize empty list to append case_tube_idx that failed feature extraction
    feature_failed_CTIx = []

    num_results = len(list(chain(*q.results.values())))
    i = 1
    log.info("Found {} case_tube_idx's".format(num_results))
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            # this nested for loop iterates over all case_tube_idx
            log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" %
                     (case, case_tube_idx, relpath, i, num_results))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         rescale_lim=(-0.5, 1),
                                         strict=False, auto_comp=False)
                fFCS.feature_extraction(extraction_type=args.feature_extraction_method,
                                        bins=10)
                HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx,
                                          FCS=fFCS, db=db)
            except ValueError, e:
                print("Skipping feature extraction for case: {} because of 'ValueError {}'".
                      format(case, str(e)))
            except KeyError, e:
                print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e))
            except IntegrityError, e:
                print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \
                of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
def action(args):

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Copy database to out database
    shutil.copyfile(args.db, args.outdb)
    out_db = FCSdatabase(db=args.outdb, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    n = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" %
                     (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath,
                       case_tube_idx=case_tube_idx,
                       import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         strict=False,
                                         auto_comp=False,
                                         **vars(args))
                fFCS.extract_FCS_histostats()
            except:
                fFCS.flag = 'stats_extraction_fail'
                fFCS.error_message = str(sys.exc_info()[0])

            fFCS.histostats_to_db(db=out_db)

            n += 1
            if args.n is not None and n >= args.n:
                done = True
                break
        if done is True:
            break
Beispiel #16
0
    def test_2d_feature_extraction(self):
        """ tests 2D_Feature_Extraction """

        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,
                              gate_coords=gate_coords,
                              rescale_lim=(-0.5, 1),
                              strict=False,
                              auto_comp=False)
        a.feature_extraction(extraction_type='2d', bins=50)
        log.debug("Feature Extraction was successful")
        binned_data = a.FCS_features
        log.debug(binned_data.histogram)
        if write_csv:
            f = open(data('2d_test_histogram.pkl'), 'w')
            pickle.dump(binned_data.histogram, f)
            f.close()
            print "Test histogram was succefully pickled"
        else:
            f = open(data('2d_test_histogram.pkl'), 'r')
            test_histogram = pickle.load(f)
            f.close()
            np.testing.assert_allclose(binned_data.histogram.data,
                                       test_histogram.data)
Beispiel #17
0
    def test_feature_extraction(self):
        """ tests ND_Feature_Extraction """
        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,
                              gate_coords=gate_coords,
                              strict=False,
                              rescale_lim=(-0.5, 1.0),
                              comp_flag='table',
                              singlet_flag='fixed',
                              viable_flag='fixed')
        a.feature_extraction(extraction_type='FULL', bins=10)

        binned_data = a.FCS_features
        out_coords = binned_data.Return_Coordinates([1, 2, 3, 4])

        if write_csv:
            out_coords.to_pickle(data('test_coordinates.pkl'))
            print "Test_coordinates was succefully pickled"
            f = open(data('test_histogram.pkl'), 'w')
            pickle.dump(binned_data.histogram, f)
            f.close()
            print "Test histogram was succefully pickled"
        else:
            test_coords = pd.read_pickle(data('test_coordinates.pkl'))
            f = open(data('test_histogram.pkl'), 'r')
            test_histogram = pickle.load(f)
            f.close()
            np.testing.assert_allclose(out_coords.values, test_coords.values)
            np.testing.assert_allclose(binned_data.histogram.data,
                                       test_histogram.data)
Beispiel #18
0
def action(args):
    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    i = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" %
                     (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)

            a = FCS(filepath=filepath, import_dataframe=True)
            a.comp_scale_FCS_data(compensation_file=comp_file,
                                  gate_coords=coords,
                                  strict=False,
                                  auto_comp=False)
            outfile = 'output/' + '_'.join([
                case,
                str(case_tube_idx),
                a.case_tube.replace(' ', '_'),
                a.date.strftime("%Y%m%d")
            ]) + '.png'
            a.comp_visualize_FCS(outfile=outfile)

            i += 1
            if args.n_files is not None and i >= args.n_files:
                done = True
                break
        if done is True:
            break
Beispiel #19
0
    def test_GatingToggle(self):
        """ Tests the HistoStats information subroutines
        :return:
        """

        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,
                              gate_coords=gate_coords,
                              strict=False,
                              rescale_lim=(-0.5, 1.0),
                              comp_flag='table',
                              singlet_flag=None,
                              viable_flag='fixed')
        a.extract_FCS_histostats()

        if write_csv:
            pd.Series(a.TubeStats).to_pickle(data('GatingTubeStats.pkl'))
            print("\nHistoStats successfully written\n")
        else:
            TubeStats = pd.read_pickle(data('GatingTubeStats.pkl'))

            np.testing.assert_allclose(
                pd.Series(a.TubeStats).values,
                TubeStats.values,
                rtol=1e-3,
                atol=0,
                err_msg="Tube Statistics results are more \
                                       different than tolerable")
def worker(in_list, **kwargs):
    """
    Still need to work on handling of cases that did not extract correctly
    """
    filepath = in_list[0]
    case_tube_idx = in_list[1]
    fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True)
    try:
        fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                 gate_coords=gate_coords,
                                 strict=False, auto_comp=False, **kwargs)
        fFCS.extract_FCS_histostats()
        fFCS.clear_FCS_cache()
    except:
        fFCS.flag = 'stats_extraction_fail'
        fFCS.error_message = str(sys.exc_info()[0])

    return fFCS
Beispiel #21
0
    def test_FCS_processing(self):
        """ Test running processing

        Looking at small set of events (100:105) and FSC and CD15 channel and making sure \
        that result is the same as when this function was initially setup
        """

        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords,
                              strict=False,comp_flag='table',singlet_flag='fixed',
                              viable_flag='fixed')

        if write_csv:
            a.data.to_pickle(data('fcs_data.pkl'))
            print("\nProcessed FCS data was successfully pickled\n")
        else:
            comparison_data = pd.read_pickle(data('fcs_data.pkl'))
            np.testing.assert_allclose(a.data.values, comparison_data.values,
                                       rtol=1e-3, atol=0, err_msg="FCS Data results are more \
                                       different than tolerable")
Beispiel #22
0
def worker(in_list, **kwargs):
    """
    Still need to work on handling of cases that did not extract correctly
    """
    filepath = in_list[0]
    case_tube_idx = in_list[1]
    fFCS = FCS(filepath=filepath,
               case_tube_idx=case_tube_idx,
               import_dataframe=True)
    try:
        fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                 gate_coords=gate_coords,
                                 strict=False,
                                 auto_comp=False,
                                 **kwargs)
        fFCS.extract_FCS_histostats()
        fFCS.clear_FCS_cache()
    except:
        fFCS.flag = 'stats_extraction_fail'
        fFCS.error_message = str(sys.exc_info()[0])

    return fFCS
Beispiel #23
0
    def test_empty_FCS(self):
        """ Testing loading FCS filepath that does not load properly ==> empty """

        filename = "99-80923_Fake.fcs"
        filepath = data(filename)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            a = FCS(filepath=filepath)

        self.assertEqual(a.filepath, filepath)
        self.assertEqual(a.filename, filename)
        self.assertEqual(a.case_number, '99-80923')
        self.assertFalse(hasattr(a, 'num_events'))
def action(args):
    # Collect files/dirs
    Finder = Find_Clinical_FCS_Files(Filelist_Path=args.file_list)

    # Connect to database (and rebuild)
    db = FCSdatabase(db=args.db_filepath, rebuild=True)
    print "Building database %s" % db.db_file

    # Process files/dirs
    case_tube_idx = 0
    for f in Finder.filenames:
        try:
            fFCS = FCS(filepath=f, case_tube_idx=case_tube_idx)
            fFCS.meta_to_db(db=db, dir=args.dir, add_lists=True)
        except:
            print "Skipping FCS %s because of unknown error related to: %s" % \
                (f, sys.exc_info()[0])

        print("{:6d} Cases uploaded\r".format(case_tube_idx)),
        case_tube_idx += 1

        if args.n is not None and case_tube_idx >= args.n:
            break
Beispiel #25
0
def action(args):
    # Collect files/dirs
    Finder = Find_Clinical_FCS_Files(Filelist_Path=args.file_list)

    # Connect to database (and rebuild)
    db = FCSdatabase(db=args.db_filepath, rebuild=True)
    print "Building database %s" % db.db_file

    # Process files/dirs
    case_tube_idx = 0
    for f in Finder.filenames:
        try:
            fFCS = FCS(filepath=f, case_tube_idx=case_tube_idx)
            fFCS.meta_to_db(db=db, dir=args.dir, add_lists=True)
        except:
            print "Skipping FCS %s because of unknown error related to: %s" % \
                (f, sys.exc_info()[0])

        print("{:6d} Cases uploaded\r".format(case_tube_idx)),
        case_tube_idx += 1

        if args.n is not None and case_tube_idx >= args.n:
            break
Beispiel #26
0
    def test_GatingToggle(self):
        """ Tests the HistoStats information subroutines
        :return:
        """

        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,gate_coords=gate_coords,
                              strict=False, rescale_lim=(-0.5,1.0),
                              comp_flag='table',singlet_flag=None,
                              viable_flag='fixed')
        a.extract_FCS_histostats()

        if write_csv:
            pd.Series(a.TubeStats).to_pickle(data('GatingTubeStats.pkl'))
            print("\nHistoStats successfully written\n")
        else:
            TubeStats = pd.read_pickle(data('GatingTubeStats.pkl'))

            np.testing.assert_allclose(pd.Series(a.TubeStats).values, TubeStats.values,
                                       rtol=1e-3, atol=0, err_msg="Tube Statistics results are more \
                                       different than tolerable")
Beispiel #27
0
    def test_2d_feature_extraction(self):
        """ tests 2D_Feature_Extraction """

        filepath = data(test_fcs_fn)

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,
                              gate_coords=gate_coords, rescale_lim=(-0.5,1),
                              strict=False, auto_comp=False)
        a.feature_extraction(extraction_type='2d', bins=50)
        log.debug("Feature Extraction was successful")
        binned_data = a.FCS_features
        log.debug(binned_data.histogram)
        if write_csv:
            f = open(data('2d_test_histogram.pkl'),'w')
            pickle.dump(binned_data.histogram,f)
            f.close()
            print "Test histogram was succefully pickled"
        else:
            f = open(data('2d_test_histogram.pkl'),'r')
            test_histogram = pickle.load(f)
            f.close()
            np.testing.assert_allclose(binned_data.histogram.data,test_histogram.data)
Beispiel #28
0
    def test_query_getTubeInfo(self):
        """ Testing querying FCS_database for information """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {u'Myeloid 1':
                                        {datetime.datetime(2012, 1, 3, 12, 0, 15):
                                         u'testfiles/12-00031_Myeloid 1.fcs'}}})

        # Test specific negative request daterange
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-02'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results,
                         {})

        # Test specific negative request tubes
        q_dict = {'tubes': ['Myeloid 2'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results, {})
Beispiel #29
0
    def test_ML_push_pull(self):
        """
        tests MergedFeature_IO.push_fcs_features
        """
        # intialize filepaths
        FCS_fp = data(test_fcs_fn)
        DB_fp = path.join(self.mkoutdir(), 'test.db')
        FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5')
        ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5')

        # fcs initilaization
        FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True)
        FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                                    gate_coords=gate_coords,
                                    rescale_lim=(-0.5, 1),
                                    strict=False,
                                    auto_comp=False)
        FCS_obj.feature_extraction(extraction_type='Full', bins=10)
        log.debug(FCS_obj.FCS_features.histogram)

        # db initialization
        DB_obj = FCSdatabase(db=DB_fp, rebuild=True)
        FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.'))
        log.debug(FCS_obj.case_tube_idx)

        # feature hdf initialization
        FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp)

        # push fcs_features
        FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx,
                                     FCS=FCS_obj,
                                     db=DB_obj)

        feature_DF, not_in_data, merge_fail = FT_HDF_obj.make_single_tube_analysis(
            [FCS_obj.case_tube_idx])

        ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp, clobber=True)

        ML_HDF_obj.push_features(feature_DF)

        ML_HDF_obj.push_annotations(
            pd.DataFrame([[test_fcs_fn, 0]],
                         columns=['case_num', 'annotation']))
Beispiel #30
0
    def test_push_pull(self):
        """
        tests Feature_IO.push_fcs_features
        """
        # intialize filepaths
        FCS_fp = data(test_fcs_fn)
        DB_fp = path.join(self.mkoutdir(), 'test.db')
        HDF_fp = path.join(self.mkoutdir(), 'test_Feature_HDF.hdf5')

        # fcs initilaization
        FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True)
        FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                                    gate_coords=gate_coords, rescale_lim=(-0.5, 1),
                                    strict=False, auto_comp=False)
        FCS_obj.feature_extraction(extraction_type='Full', bins=10)
        log.debug(FCS_obj.FCS_features.histogram)

        # db initialization
        DB_obj = FCSdatabase(db=DB_fp, rebuild=True)
        FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.'))
        log.debug(FCS_obj.case_tube_idx)

        # hdf initialization
        HDF_obj = Feature_IO(filepath=HDF_fp)

        # push fcs_features
        HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx,
                                  FCS=FCS_obj, db=DB_obj)

        # pull fcs_features
        output = HDF_obj.get_fcs_features(FCS_obj.case_tube_idx) #test single case retrieval
        log.debug(output)
        np.testing.assert_allclose(output.data, FCS_obj.FCS_features.histogram.data)
        
        cti_list = pd.DataFrame(data= np.array([['13-12345','1',"Dummy Error"]]),
                                index=[1],
                                columns=['casenum','cti','errormessage'])
        # push failed_cti list to "meta data"
        HDF_obj.push_failed_cti_list(cti_list)
                
        # pull meta data from HDF5 file
        meta_data = HDF_obj.get_meta_data()
        log.debug("File meta data is {}".format(meta_data))
def action(args):

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Copy database to out database
    shutil.copyfile(args.db, args.outdb)
    out_db = FCSdatabase(db=args.outdb, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    n = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         strict=False, auto_comp=False, **vars(args))
                fFCS.extract_FCS_histostats()
            except:
                fFCS.flag = 'stats_extraction_fail'
                fFCS.error_message = str(sys.exc_info()[0])

            fFCS.histostats_to_db(db=out_db)

            n += 1
            if args.n is not None and n >= args.n:
                done = True
                break
        if done is True:
            break
Beispiel #32
0
def action(args):
    log.info('Creating hdf5 file [%s] with features extracted by method [%s]' %
             (args.hdf5_fp, args.feature_extraction_method))

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    # Create HDF5 object
    HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber)

    # initalize empty list to append case_tube_idx that failed feature extraction
    feature_failed_CTIx = []

    num_results = len(list(chain(*q.results.values())))
    i = 1
    log.info("Found {} case_tube_idx's".format(num_results))
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            # this nested for loop iterates over all case_tube_idx
            log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" %
                     (case, case_tube_idx, relpath, i, num_results))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath,
                       case_tube_idx=case_tube_idx,
                       import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         rescale_lim=(-0.5, 1),
                                         strict=False,
                                         auto_comp=False)
                fFCS.feature_extraction(
                    extraction_type=args.feature_extraction_method, bins=10)
                HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx,
                                          FCS=fFCS,
                                          db=db)
            except ValueError, e:
                print(
                    "Skipping feature extraction for case: {} because of 'ValueError {}'"
                    .format(case, str(e)))
            except KeyError, e:
                print "Skipping FCS %s because of KeyError: %s" % (filepath,
                                                                   str(e))
            except IntegrityError, e:
                print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \
                of IntegrityError: {}".format(case, case_tube_idx, filepath,
                                              str(e))
Beispiel #33
0
    def test_comp_vis(self):
        """
        Tests the compensation visualizer subroutine in FCS successfully writes file
        """

        filepath = data(test_fcs_fn)

        outfile = path.join(self.mkoutdir(), 'test_visualization.png')

        a = FCS(filepath=filepath, import_dataframe=True)
        a.comp_scale_FCS_data(compensation_file=comp_file,
                              gate_coords=gate_coords,
                              strict=False,
                              rescale_lim=(-0.5, 1.0),
                              comp_flag='table',
                              singlet_flag='fixed',
                              viable_flag='fixed')
        a.comp_visualize_FCS(outfile=outfile)
Beispiel #34
0
    def test_loadFCS(self):
        """ Testing loading FCS from file using FCS and loadFCS modules """

        filename = "12-00031_Myeloid 1.fcs"
        filepath = data(filename)
        a = FCS(filepath=filepath, import_dataframe=True)

        if write_csv is True:
            write = {}
            write['filepath'] = a.filepath
            write['filename'] = a.filename
            write['case_number'] = a.case_number
            write['cytometer'] = a.cytometer
            write['date'] = a.date
            write['case_tube'] = a.case_tube
            write['num_events'] = a.num_events
            #            write['version'] = a.version
            header_info = pd.Series(write)
            header_info.to_pickle(data('header_info.pkl'))
            a.parameters.to_pickle(data('parameter_info.pkl'))
            log.info('LoadFCS header and Parameter data successfully written')
        else:
            header_info = pd.read_pickle(data('header_info.pkl'))
            self.assertFalse(a.empty)
            self.assertEqual(a.filepath, header_info['filepath'])
            self.assertEqual(a.filename, header_info['filename'])
            self.assertEqual(a.case_number, header_info['case_number'])
            self.assertEqual(a.cytometer, header_info['cytometer'])
            self.assertEqual(a.date, header_info['date'])
            self.assertEqual(a.case_tube, header_info['case_tube'])
            self.assertEqual(a.num_events, header_info['num_events'])
            #            self.assertEqual(a.version, header_info['version'])
            self.assertTrue(hasattr(a, 'data'))

            parameters = pd.read_pickle(data('parameter_info.pkl'))
            assert_frame_equal(a.parameters, parameters)
    def test_ML_push_pull(self):
        """
        tests MergedFeature_IO.push_fcs_features
        """
        # intialize filepaths
        FCS_fp = data(test_fcs_fn)
        DB_fp = path.join(self.mkoutdir(), 'test.db')
        FT_HDF_fp = path.join(self.mkoutdir(), 'test_FT_HDF.hdf5')
        ML_HDF_fp = path.join(self.mkoutdir(), 'test_ML_HDF.hdf5')

        # fcs initilaization
        FCS_obj = FCS(filepath=FCS_fp, import_dataframe=True)
        FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                                    gate_coords=gate_coords, rescale_lim=(-0.5, 1),
                                    strict=False, auto_comp=False)
        FCS_obj.feature_extraction(extraction_type='Full', bins=10)
        log.debug(FCS_obj.FCS_features.histogram)

        # db initialization
        DB_obj = FCSdatabase(db=DB_fp, rebuild=True)
        FCS_obj.meta_to_db(db=DB_obj, dir=path.abspath('.'))
        log.debug(FCS_obj.case_tube_idx)

        # feature hdf initialization
        FT_HDF_obj = Feature_IO(filepath=FT_HDF_fp)

        # push fcs_features
        FT_HDF_obj.push_fcs_features(case_tube_idx=FCS_obj.case_tube_idx,
                                     FCS=FCS_obj, db=DB_obj)
        
        feature_DF,not_in_data,merge_fail = FT_HDF_obj.make_single_tube_analysis([FCS_obj.case_tube_idx])

        ML_HDF_obj = MergedFeatures_IO(filepath=ML_HDF_fp,clobber=True)
        
        ML_HDF_obj.push_features(feature_DF)
        
        ML_HDF_obj.push_annotations(pd.DataFrame([[test_fcs_fn,0]],
                                    columns=['case_num','annotation']))
Beispiel #36
0
        start = self.header['text_start']
        stop = self.header['text_stop']
        text = self.__get_block(start, stop)
        delim = text[0]
        if delim == r'|':
            delim = '\|'
        if delim == r'\a'[0]:  # test for delimiter being \
            delim = '\\\\'  # regex will require it to be \\
        if delim != text[-1]:
            warn("text in segment does not start and end with delimiter")
        tmp = text[1:-1].replace('$', '')
        # match the delimited character unless it's doubled
        regex = compile('(?<=[^%s])%s(?!%s)' % (delim, delim, delim))
        tmp = regex.split(tmp)
        return dict(zip([x.lower() for x in tmp[::2]], tmp[1::2]))


if __name__ == "__main__":
    filepath = "/home/ngdavid/Desktop/Ubuntu_Dropbox/Myeloid_Data/Myeloid/10-13469/10-13469_Myeloid 1.fcs"
    #filepath = "/home/ngdavid/Desktop/Ubuntu_Dropbox/Myeloid_Data/14-21257_B Cells.fcs"
    from FlowAnal.FCS import FCS
    a = FCS()
    FCSobject = loadFCS(FCS=a,
                        filepath=filepath,
                        version='test',
                        import_dataframe=False)

    print FCSobject.date
    print FCSobject.case_tube
    print FCSobject.parameters
Beispiel #37
0
    coords = {
        'singlet': [(0.01, 0.06), (0.60, 0.75), (0.93, 0.977), (0.988, 0.86),
                    (0.456, 0.379), (0.05, 0.0), (0.0, 0.0)],
        'viable': [(0.358, 0.174), (0.609, 0.241), (0.822, 0.132),
                   (0.989, 0.298), (1.0, 1.0), (0.5, 1.0), (0.358, 0.174)]
    }

    comp_file = {
        'H0152': root + '/FlowAnal/data/Spectral_Overlap_Lib_LSRA.txt',
        '2': root + '/FlowAnal/data/Spectral_Overlap_Lib_LSRB.txt'
    }

    filename = "12-00031_Myeloid 1.fcs"
    filepath = data(filename)

    FCS_obj = FCS(filepath=filepath, import_dataframe=True)

    FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                                gate_coords=coords,
                                strict=False)

    figure()
    ax = ['SSC-H', 'CD45 APC-H7']
    plot(FCS_obj.data[ax[0]], FCS_obj.data[ax[1]], 'b,')
    title(FCS_obj.case_tube)
    xlim(0, 1)
    ylim(0, 1)
    xlabel(ax[0])
    ylabel(ax[1])
"""
    filename = "12-00005_Bone Marrow WBC.fcs"
    def data(fname):
        return path.join(datadir, fname)


    coords={'singlet': [ (0.01,0.06), (0.60,0.75), (0.93,0.977), (0.988,0.86),
                         (0.456,0.379),(0.05,0.0),(0.0,0.0)],
            'viable': [ (0.358,0.174), (0.609,0.241), (0.822,0.132), (0.989,0.298),
                        (1.0,1.0),(0.5,1.0),(0.358,0.174)]}

    comp_file={'H0152':root+'/FlowAnal/data/Spectral_Overlap_Lib_LSRA.txt',
               '2':root+'/FlowAnal/data/Spectral_Overlap_Lib_LSRB.txt'}

    filename = "12-00031_Myeloid 1.fcs"
    filepath = data(filename)

    FCS_obj = FCS(filepath=filepath, import_dataframe=True)

    FCS_obj.comp_scale_FCS_data(compensation_file=comp_file,
                            gate_coords=coords,
                            strict=False)

    figure()
    ax=['SSC-H','CD45 APC-H7']
    plot(FCS_obj.data[ax[0]],FCS_obj.data[ax[1]],'b,')
    title(FCS_obj.case_tube)
    xlim(0,1)
    ylim(0,1)
    xlabel(ax[0])
    ylabel(ax[1])
"""
    filename = "12-00005_Bone Marrow WBC.fcs"
    def test_query_getfiles(self):
        """ Testing querying database for getfiles dict_dict """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getfiles': True
        }
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test empty query
        q_dict = {'getfiles': True}
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test case query POS
        q_dict = {'getfiles': True, 'cases': ['12-00031']}
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test case query POS
        q_dict = {'getfiles': True, 'cases': ['12-00032']}
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request daterange
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-02'],
            'getfiles': True
        }
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request tubes
        q_dict = {
            'tubes': ['Myeloid 2'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getfiles': True
        }
        self.assertEqual(db.query(**q_dict).results, {})