def action(args):

    if args.tubes is None:
        raise ValueError('Tube types must be selected using option --tubes <>')

    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Get case_tube_idx list [in numeric order]
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))
    case_tube_list = []
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath))
            case_tube_list.append(case_tube_idx)
    case_tube_list.sort()

    # Get features
    HDF_obj = HDF5_IO(filepath=args.hdf5_fp, clobber=False)
    features_df = HDF_obj.make_single_tube_analysis(case_tube_list)
    log.debug(features_df.head())

    # Get annotations [ordered by case_tube_idx]
    annotation_df = db.query(exporttype='df', getCaseAnnotations=True, **vars(args)).results
    log.debug(annotation_df.head())
def action(args):

    raise Exception("DEPRECATED METHOD")

    # Copy database
    shutil.copyfile(args.db, args.outdb)
    outdb = FCSdatabase(db=args.outdb, rebuild=False)

    # Add text table and whittle cases not in db (unless args says otherwise)
    outdb.addCustomCaseData(file=args.file, whittle=not args.no_whittle)

    if args.no_whittle is False:
        # Delete cases in db not in Custom table [do not add to exclusions table]
        outdb.query(delCasesByCustom=True)

    outdb.close()
def action(args):

    raise Exception("DEPRECATED METHOD")

    # Copy database
    shutil.copyfile(args.db, args.outdb)
    outdb = FCSdatabase(db=args.outdb, rebuild=False)

    # Add text table and whittle cases not in db (unless args says otherwise)
    outdb.addCustomCaseData(file=args.file, whittle=not args.no_whittle)

    if args.no_whittle is False:
        # Delete cases in db not in Custom table [do not add to exclusions table]
        outdb.query(delCasesByCustom=True)

    outdb.close()
Esempio n. 4
0
def action(args):
    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    i = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" %
                     (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)

            a = FCS(filepath=filepath, import_dataframe=True)
            a.comp_scale_FCS_data(compensation_file=comp_file,
                                  gate_coords=coords,
                                  strict=False,
                                  auto_comp=False)
            outfile = 'output/' + '_'.join([
                case,
                str(case_tube_idx),
                a.case_tube.replace(' ', '_'),
                a.date.strftime("%Y%m%d")
            ]) + '.png'
            a.comp_visualize_FCS(outfile=outfile)

            i += 1
            if args.n_files is not None and i >= args.n_files:
                done = True
                break
        if done is True:
            break
Esempio n. 5
0
def action(args):
    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    i = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)

            a = FCS(filepath=filepath, import_dataframe=True)
            a.comp_scale_FCS_data(compensation_file=comp_file,
                                  gate_coords=coords,
                                  strict=False, auto_comp=False)
            outfile = 'output/' + '_'.join([case, str(case_tube_idx),
                                            a.case_tube.replace(' ', '_'),
                                            a.date.strftime("%Y%m%d")]) + '.png'
            a.comp_visualize_FCS(outfile=outfile)

            i += 1
            if args.n_files is not None and i >= args.n_files:
                done = True
                break
        if done is True:
            break
Esempio n. 6
0
    def test_query_getTubeInfo(self):
        """ Testing querying FCS_database for information """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getTubeInfo': True
        }
        self.assertEqual(
            db.query(**q_dict).results, {
                u'12-00031': {
                    u'Myeloid 1': {
                        datetime.datetime(2012, 1, 3, 12, 0, 15):
                        u'testfiles/12-00031_Myeloid 1.fcs'
                    }
                }
            })

        # Test specific negative request daterange
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-02'],
            'getTubeInfo': True
        }
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request tubes
        q_dict = {
            'tubes': ['Myeloid 2'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getTubeInfo': True
        }
        self.assertEqual(db.query(**q_dict).results, {})
Esempio n. 7
0
def action(args):

    if args.tubes is None:
        raise ValueError('Tube types must be selected using option --tubes <>')

    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Get case_tube_idx list [in numeric order]
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))
    case_tube_list = []
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" %
                     (case, case_tube_idx, relpath))
            case_tube_list.append(case_tube_idx)
    case_tube_list.sort()

    # Get features
    Feature_obj = Feature_IO(filepath=args.feature_fp, clobber=False)
    features_df, not_in_cti, merge_fail_cti = Feature_obj.make_single_tube_analysis(
        case_tube_list)
    log.debug("Feature DF: {} \n Case_tube_indices that failed: {}".format(
        features_df.head(), merge_fail_cti))

    # Get annotations [ordered by case_tube_idx]
    annotation_df = db.query(exporttype='df',
                             getCaseAnnotations=True,
                             **vars(args)).results
    log.debug(annotation_df.head())

    # this is a dummy for now, not sure where to generate this list from.
    """Function to convert merge_fail_cti to merge_fail_case_nums"""
    """Not sure what error codes to provide"""
    not_found_df = pd.DataFrame(['12-12345', 'Not found'],
                                columns=["case_num", "error_code"])
    """This has only been partially tested, can not be done without an annotation_df"""
    # Open/Create MergedData
    MLinput_obj = MergedFeatures_IO(filepath=args.MLinput_fp, clobber=True)
    MLinput_obj.push_features(features_df)
    MLinput_obj.push_annotations(annotation_df)
    MLinput_obj.push_not_found(not_found_df)
Esempio n. 8
0
def action(args):
    log.info('Creating hdf5 file [%s] with features extracted by method [%s]' %
             (args.hdf5_fp, args.feature_extraction_method))

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    # Create HDF5 object
    HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber)

    # initalize empty list to append case_tube_idx that failed feature extraction
    feature_failed_CTIx = []

    num_results = len(list(chain(*q.results.values())))
    i = 1
    log.info("Found {} case_tube_idx's".format(num_results))
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            # this nested for loop iterates over all case_tube_idx
            log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" %
                     (case, case_tube_idx, relpath, i, num_results))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath,
                       case_tube_idx=case_tube_idx,
                       import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         rescale_lim=(-0.5, 1),
                                         strict=False,
                                         auto_comp=False)
                fFCS.feature_extraction(
                    extraction_type=args.feature_extraction_method, bins=10)
                HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx,
                                          FCS=fFCS,
                                          db=db)
            except ValueError, e:
                print(
                    "Skipping feature extraction for case: {} because of 'ValueError {}'"
                    .format(case, str(e)))
            except KeyError, e:
                print "Skipping FCS %s because of KeyError: %s" % (filepath,
                                                                   str(e))
            except IntegrityError, e:
                print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \
                of IntegrityError: {}".format(case, case_tube_idx, filepath,
                                              str(e))
Esempio n. 9
0
def action(args):

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Copy database to out database
    shutil.copyfile(args.db, args.outdb)
    out_db = FCSdatabase(db=args.outdb, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    q_list = []
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            q_list.append((path.join(args.dir, relpath), case_tube_idx))

    log.info("Length of q_list is {}".format(len(q_list)))

    # Setup lists
    n = args.load  # length of sublists
    sublists = [q_list[i:i + n] for i in range(0, len(q_list), n)]
    log.info("Number of sublists to process: {}".format(len(sublists)))

    # Setup args
    vargs = {
        key: value
        for key, value in vars(args).items()
        if key in ['nosinglet', 'noviability']
    }

    i = 0
    for sublist in sublists:
        p = Pool(args.workers)
        results = [
            p.apply_async(worker, args=(case_info, ), kwds=vargs)
            for case_info in sublist
        ]
        p.close()

        for f in results:
            i += 1
            fFCS = f.get()
            fFCS.histostats_to_db(db=out_db)
            del fFCS
            print "Case_tubes: {} of {} have been processed\r".format(
                i, len(q_list)),
        del results

        if args.testing is True:
            break  # run loop once then break if testing
Esempio n. 10
0
def action(args):

    # Identify query option
    if (args.getfiles is False and args.getTubeInfo is False):
        raise Exception("ERROR: Must select either --getfiles or --getTubeinfo")

    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(**vars(args))

    if args.out_file is None:
        pprint.pprint(q.results)
Esempio n. 11
0
def action(args):
    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)
            a = FCS(filepath=filepath, case_tube_idx=case_tube_idx)

            # Do something
            print a.case_tube
Esempio n. 12
0
    def test_query_getTubeInfo(self):
        """ Testing querying FCS_database for information """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {u'Myeloid 1':
                                        {datetime.datetime(2012, 1, 3, 12, 0, 15):
                                         u'testfiles/12-00031_Myeloid 1.fcs'}}})

        # Test specific negative request daterange
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-02'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results,
                         {})

        # Test specific negative request tubes
        q_dict = {'tubes': ['Myeloid 2'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getTubeInfo': True}
        self.assertEqual(db.query(**q_dict).results, {})
def action(args):
    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Get case_tube_idx list
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))
    case_tube_list = []
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath))
            case_tube_list.append(case_tube_idx)

    # Get features
    HDF_obj = HDF5_IO(filepath=args.hdf, clobber=False)
    features_df = HDF_obj.make_single_tube_analysis(case_tube_list)

    print features_df
Esempio n. 14
0
def action(args):

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Copy database to out database
    shutil.copyfile(args.db, args.outdb)
    out_db = FCSdatabase(db=args.outdb, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    q_list = []
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            q_list.append((path.join(args.dir, relpath), case_tube_idx))

    log.info("Length of q_list is {}".format(len(q_list)))

    # Setup lists
    n = args.load  # length of sublists
    sublists = [q_list[i:i+n] for i in range(0, len(q_list), n)]
    log.info("Number of sublists to process: {}".format(len(sublists)))

    # Setup args
    vargs = {key: value for key, value in vars(args).items()
             if key in ['nosinglet', 'noviability']}

    i = 0
    for sublist in sublists:
        p = Pool(args.workers)
        results = [p.apply_async(worker, args=(case_info, ), kwds=vargs)
                   for case_info in sublist]
        p.close()

        for f in results:
            i += 1
            fFCS = f.get()
            fFCS.histostats_to_db(db=out_db)
            del fFCS
            print "Case_tubes: {} of {} have been processed\r".format(i, len(q_list)),
        del results

        if args.testing is True:
            break  # run loop once then break if testing
Esempio n. 15
0
def action(args):
    log.info('Creating hdf5 file [%s] with features extracted by method [%s]' %
             (args.hdf5_fp, args.feature_extraction_method))

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    # Create HDF5 object
    HDF_obj = Feature_IO(filepath=args.hdf5_fp, clobber=args.clobber)

    # initalize empty list to append case_tube_idx that failed feature extraction
    feature_failed_CTIx = []

    num_results = len(list(chain(*q.results.values())))
    i = 1
    log.info("Found {} case_tube_idx's".format(num_results))
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            # this nested for loop iterates over all case_tube_idx
            log.info("Case: %s, Case_tube_idx: %s, File: %s [%s of %s]" %
                     (case, case_tube_idx, relpath, i, num_results))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         rescale_lim=(-0.5, 1),
                                         strict=False, auto_comp=False)
                fFCS.feature_extraction(extraction_type=args.feature_extraction_method,
                                        bins=10)
                HDF_obj.push_fcs_features(case_tube_idx=case_tube_idx,
                                          FCS=fFCS, db=db)
            except ValueError, e:
                print("Skipping feature extraction for case: {} because of 'ValueError {}'".
                      format(case, str(e)))
            except KeyError, e:
                print "Skipping FCS %s because of KeyError: %s" % (filepath, str(e))
            except IntegrityError, e:
                print "Skipping Case: {}, Tube: {}, Date: {}, filepath: {} because \
                of IntegrityError: {}".format(case, case_tube_idx, filepath, str(e))
Esempio n. 16
0
def action(args):
    # Connect to database
    db = FCSdatabase(db=args.db, rebuild=False)

    # Get case_tube_idx list
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))
    case_tube_list = []
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" %
                     (case, case_tube_idx, relpath))
            case_tube_list.append(case_tube_idx)

    # Get features
    HDF_obj = HDF5_IO(filepath=args.hdf, clobber=False)
    features_df = HDF_obj.make_single_tube_analysis(case_tube_list)

    print features_df
Esempio n. 17
0
def action(args):

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Copy database to out database
    shutil.copyfile(args.db, args.outdb)
    out_db = FCSdatabase(db=args.outdb, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    n = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" %
                     (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath,
                       case_tube_idx=case_tube_idx,
                       import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         strict=False,
                                         auto_comp=False,
                                         **vars(args))
                fFCS.extract_FCS_histostats()
            except:
                fFCS.flag = 'stats_extraction_fail'
                fFCS.error_message = str(sys.exc_info()[0])

            fFCS.histostats_to_db(db=out_db)

            n += 1
            if args.n is not None and n >= args.n:
                done = True
                break
        if done is True:
            break
Esempio n. 18
0
    def test_query_getfiles(self):
        """ Testing querying database for getfiles dict_dict """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test empty query
        q_dict = {'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test case query POS
        q_dict = {'getfiles': True,
                  'cases': ['12-00031']}
        self.assertEqual(db.query(**q_dict).results,
                         {u'12-00031': {0: u'testfiles/12-00031_Myeloid 1.fcs'}})

        # Test case query POS
        q_dict = {'getfiles': True,
                  'cases': ['12-00032']}
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request daterange
        q_dict = {'tubes': ['Myeloid 1'],
                  'daterange': ['2012-01-01', '2012-01-02'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results,
                         {})

        # Test specific negative request tubes
        q_dict = {'tubes': ['Myeloid 2'],
                  'daterange': ['2012-01-01', '2012-01-04'],
                  'getfiles': True}
        self.assertEqual(db.query(**q_dict).results, {})
Esempio n. 19
0
def action(args):

    # Connect to database
    log.info("Loading database input %s" % args.db)
    db = FCSdatabase(db=args.db, rebuild=False)

    # Copy database to out database
    shutil.copyfile(args.db, args.outdb)
    out_db = FCSdatabase(db=args.outdb, rebuild=False)

    # Create query
    q = db.query(exporttype='dict_dict', getfiles=True, **vars(args))

    n = 0
    done = False
    for case, case_info in q.results.items():
        for case_tube_idx, relpath in case_info.items():
            log.info("Case: %s, Case_tube_idx: %s, File: %s" % (case, case_tube_idx, relpath))
            filepath = path.join(args.dir, relpath)
            fFCS = FCS(filepath=filepath, case_tube_idx=case_tube_idx, import_dataframe=True)

            try:
                fFCS.comp_scale_FCS_data(compensation_file=comp_file,
                                         gate_coords=gate_coords,
                                         strict=False, auto_comp=False, **vars(args))
                fFCS.extract_FCS_histostats()
            except:
                fFCS.flag = 'stats_extraction_fail'
                fFCS.error_message = str(sys.exc_info()[0])

            fFCS.histostats_to_db(db=out_db)

            n += 1
            if args.n is not None and n >= args.n:
                done = True
                break
        if done is True:
            break
Esempio n. 20
0
def action(args):

    argd = vars(args)  # Collect options
    if argd['cases'] is not None or argd['case_tube_idxs'] is not None:
        raise ValueError('Should I be able to pass cases or cti\'s here??')

    # Connect to database
    db = FCSdatabase(db=argd['db'], rebuild=False)

    # Get features_HDF case_tube_idx's
    HDF_feature_obj = Feature_IO(filepath=argd['feature_hdf5_fp'],
                                 clobber=False)
    feature_cti = HDF_feature_obj.get_case_tube_idxs()

    # Load annotations (row-index is case_number)
    ann = CustomData(args.case_annot).dat
    ann_cases = Set(ann.index.tolist())
    log.debug("Annotation cases: {}".format(ann_cases))

    # Get/pick case, case_tube_idx list
    feature_cases = Set(
        db.query(getCases=True, aslist=True,
                 case_tube_idxs=feature_cti).results)
    log.debug("Feature cases: {}".format(feature_cases))

    # Identify annotation cases not represented in HDF5
    exclusions_dic = {}
    exclusions_dic['no_features'] = list(ann_cases - feature_cases)

    # Cases to consider (insersection of annotations and HDF5 features)
    cases_to_consider = ann_cases & feature_cases
    argd['cases'] = list(cases_to_consider)
    argd['case_tube_idxs'] = feature_cti
    q = db.query(pick_cti=True, **argd)
    case_tube_index_list = q.results.case_tube_idx.tolist()
    case_list = Set(q.results.case_number.tolist())
    log.debug('Selected case/cti: {}'.format(
        [[list(case_list)[i], case_tube_index_list[i]]
         for i in range(len(case_tube_index_list))]))

    # Keep track of cases that were excluded at the query step
    exclusions_dic['excluded_by_DB_query'] = list(cases_to_consider -
                                                  case_list)
    log.debug(exclusions_dic)

    # Get features [assuming that features are returned in order!]
    features_df, not_in_data, merge_fail = HDF_feature_obj.make_single_tube_analysis(
        case_tube_index_list)
    features_df.set_index('bin_num', drop=True, inplace=True)
    features_df.columns = case_list
    features_df = features_df.T
    log.debug(features_df.head())

    # Get annotations [ordered by case_tube_idx]
    annotation_df = ann.loc[list(case_list), :]
    log.debug(annotation_df.head())

    # Send features_df, annotation_df, and exclusions to ML_input_HDF5 (args.ml_hdf5_fp)
    Merged_ML_feat_obj = MergedFeatures_IO(filepath=args.ml_hdf5_fp,
                                           clobber=True)

    Merged_ML_feat_obj.push_features(features_df)
    Merged_ML_feat_obj.push_annotations(annotation_df)
    Merged_ML_feat_obj.push_not_found(
        exclusions_dic)  # exclusions is a dictionary
Esempio n. 21
0
def action(args):

    argd = vars(args)  # Collect options
    if argd['cases'] is not None or argd['case_tube_idxs'] is not None:
        raise ValueError('Should I be able to pass cases or cti\'s here??')

    # Connect to database
    db = FCSdatabase(db=argd['db'], rebuild=False)

    # Get features_HDF case_tube_idx's
    HDF_feature_obj = Feature_IO(filepath=argd['feature_hdf5_fp'],
                                 clobber=False)
    feature_cti = HDF_feature_obj.get_case_tube_idxs()

    # Load annotations (row-index is case_number)
    ann = CustomData(args.case_annot).dat
    ann_cases = Set(ann.index.tolist())
    log.debug("Annotation cases: {}".format(ann_cases))

    # Get/pick case, case_tube_idx list
    feature_cases = Set(db.query(getCases=True,
                                 aslist=True,
                                 case_tube_idxs=feature_cti).results)
    log.debug("Feature cases: {}".format(feature_cases))

    # Identify annotation cases not represented in HDF5
    exclusions_dic = {}
    exclusions_dic['no_features'] = list(ann_cases - feature_cases)

    # Cases to consider (insersection of annotations and HDF5 features)
    cases_to_consider = ann_cases & feature_cases
    argd['cases'] = list(cases_to_consider)
    argd['case_tube_idxs'] = feature_cti
    q = db.query(pick_cti=True,
                 **argd)
    case_tube_index_list = q.results.case_tube_idx.tolist()
    case_list = Set(q.results.case_number.tolist())
    log.debug('Selected case/cti: {}'.
              format([[list(case_list)[i], case_tube_index_list[i]]
                      for i in range(len(case_tube_index_list))]))

    # Keep track of cases that were excluded at the query step
    exclusions_dic['excluded_by_DB_query'] = list(cases_to_consider - case_list)
    log.debug(exclusions_dic)

    # Get features [assuming that features are returned in order!]
    features_df, not_in_data, merge_fail = HDF_feature_obj.make_single_tube_analysis(case_tube_index_list)
    features_df.set_index('bin_num', drop=True, inplace=True)
    features_df.columns = case_list
    features_df = features_df.T
    log.debug(features_df.head())

    # Get annotations [ordered by case_tube_idx]
    annotation_df = ann.loc[list(case_list), :]
    log.debug(annotation_df.head())

    # Send features_df, annotation_df, and exclusions to ML_input_HDF5 (args.ml_hdf5_fp)
    Merged_ML_feat_obj = MergedFeatures_IO(filepath=args.ml_hdf5_fp,
                                           clobber=True)

    Merged_ML_feat_obj.push_features(features_df)
    Merged_ML_feat_obj.push_annotations(annotation_df)
    Merged_ML_feat_obj.push_not_found(exclusions_dic)  # exclusions is a dictionary
Esempio n. 22
0
    def test_query_getfiles(self):
        """ Testing querying database for getfiles dict_dict """

        root_dir = path.abspath('.')
        outfile = path.join(self.mkoutdir(), 'test.db')
        filename = "12-00031_Myeloid 1.fcs"
        filepath = path.abspath(data(filename))

        a = FCS(filepath=filepath)

        # from FlowAnal.database.FCS_database import FCSdatabase
        db = FCSdatabase(db=outfile, rebuild=True)

        a.meta_to_db(db=db, dir=root_dir)

        # Test specific positive request
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getfiles': True
        }
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test empty query
        q_dict = {'getfiles': True}
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test case query POS
        q_dict = {'getfiles': True, 'cases': ['12-00031']}
        self.assertEqual(
            db.query(**q_dict).results,
            {u'12-00031': {
                0: u'testfiles/12-00031_Myeloid 1.fcs'
            }})

        # Test case query POS
        q_dict = {'getfiles': True, 'cases': ['12-00032']}
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request daterange
        q_dict = {
            'tubes': ['Myeloid 1'],
            'daterange': ['2012-01-01', '2012-01-02'],
            'getfiles': True
        }
        self.assertEqual(db.query(**q_dict).results, {})

        # Test specific negative request tubes
        q_dict = {
            'tubes': ['Myeloid 2'],
            'daterange': ['2012-01-01', '2012-01-04'],
            'getfiles': True
        }
        self.assertEqual(db.query(**q_dict).results, {})