コード例 #1
0
ファイル: SWAP.py プロジェクト: rgavazzi/SpaceWarps
        new_samplefile = swap.get_new_filename(tonights.parameters,'training_false_positives')
        print "SWAP: saving false positives..."
        N = swap.write_list(sample,new_samplefile,item='false_positive')
        print "SWAP: "+str(N)+" lines written to "+new_samplefile

        new_samplefile = swap.get_new_filename(tonights.parameters,'training_false_negatives')
        print "SWAP: saving false negatives..."
        N = swap.write_list(sample,new_samplefile,item='false_negative')
        print "SWAP: "+str(N)+" lines written to "+new_samplefile

        # Also write out catalogs of subjects, including the ZooID, subject ID,
        # how many classifications, and probability:

        catalog = swap.get_new_filename(tonights.parameters,'candidate_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        Nlenses,Nsubjects = swap.write_catalog(sample,catalog,thresholds,kind='test')
        print "SWAP: From "+str(Nsubjects)+" subjects classified,"
        print "SWAP: "+str(Nlenses)+" candidates (with P > rejection) written to "+catalog

        catalog = swap.get_new_filename(tonights.parameters,'sim_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        Nsims,Nsubjects = swap.write_catalog(sample,catalog,thresholds,kind='sim')
        print "SWAP: From "+str(Nsubjects)+" subjects classified,"
        print "SWAP: "+str(Nsims)+" sim 'candidates' (with P > rejection) written to "+catalog

        catalog = swap.get_new_filename(tonights.parameters,'dud_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        Nduds,Nsubjects = swap.write_catalog(sample,catalog,thresholds,kind='dud')
        print "SWAP: From "+str(Nsubjects)+" subjects classified,"
        print "SWAP: "+str(Nduds)+" dud 'candidates' (with P > rejection) written to "+catalog
コード例 #2
0
def make_offline_reports(args):
    """
    NAME
        make_offline_reports

    PURPOSE
        Given an offline tuple as well as other bureau tuples etc,
        this script produces the reports made at the end of SWAP

    COMMENTS

    FLAGS
        -h              Print this message
        --out           Output directory, otherwise is '.'
        --do_offline    Do offline analysis?

    INPUTS
        configfile Plain text file containing SW experiment configuration
        bureaufile
        collectionfile


    OUTPUTS

    EXAMPLE

    BUGS

    AUTHORS
        This file is part of the Space Warps project, and is distributed
        under the MIT license by the Space Warps Science Team.
        http://spacewarps.org/

    HISTORY
        2014-09-16  started Davis (KIPAC)
    """
    # ------------------------------------------------------------------
    # Some defaults:

    # default settings are for offline using only exact training info
    flags = {'do_offline': False,
             'output_directory': '.',
             'PL0': 0.5,  # initial PL guess
             'PD0': 0.5,  # initial PD guess
             'pi': 4e-2,  # initial lens probability
             'n_min_assessment': 0,  # minimum number of assessments before included in analysis
             'use_training_info': True,
             'exclude_test_info': True,
             'exclude_training_info': False,
             'N_min': 10,  # min number of EM steps required
             'N_max': 100,  # max number of EM steps
             'epsilon_min': 1e-6,  # escape condition
             }

    # this has to be easier to do...
    for arg in args:
        if arg in flags:
            flags[arg] = args[arg]
        elif arg == 'config':
            configfile = args[arg]
        elif arg == 'collection':
            collectionfile = args[arg]
        elif arg == 'bureau':
            bureaufile = args[arg]
        else:
            print "make_offline_reports: unrecognized flag ",arg

    out_dir = flags['output_directory']

    # ------------------------------------------------------------------
    # Read in run configuration:
    tonights = swap.Configuration(configfile)
    # TODO: do this correctly
    tonights.parameters['finish'] = 'now'
    tonights.parameters['start'] = 'now'
    tonights.parameters['trunk'] = \
        tonights.parameters['survey']+'_'+tonights.parameters['finish']
    tonights.parameters['dir'] = out_dir
    # How will we make decisions based on probability?
    thresholds = {}
    thresholds['detection'] = tonights.parameters['detection_threshold']
    thresholds['rejection'] = tonights.parameters['rejection_threshold']

    t = -1  # for now?!

    # ------------------------------------------------------------------
    # Read in, or create, a bureau of agents who will represent the
    # volunteers:

    bureau = swap.read_pickle(bureaufile, 'bureau')

    # ------------------------------------------------------------------
    # Read in, or create, an object representing the candidate list:

    sample = swap.read_pickle(collectionfile, 'collection')


    # ------------------------------------------------------------------
    # if do_offline, run offline analysis here:

    if flags['do_offline']:
        PL0 = flags['PL0']
        PD0 = flags['PD0']
        pi = flags['pi']
        n_min_assessment = flags['n_min_assessment']
        use_training_info = flags['use_training_info']
        exclude_test_info = flags['exclude_test_info']
        exclude_training_info = flags['exclude_training_info']
        N_min = flags['N_min']
        N_max = flags['N_max']
        epsilon_min = flags['epsilon_min']

        # initialize offline params
        bureau_offline = {}
        probabilities = {}
        online_probabilities = {}
        training_IDs = {}  # which entries in collection are training
        set_aside_subject = {}  # which subjects do we set aside? Here we set aside none
        set_aside_agent = {}  # which agents do we set aside? Here we set aside none

        collection = {}
        for ID in sample.list():
            if ID in set_aside_subject:
                continue
            else:
                collection.update({ID: sample.member[ID]})

        for ID in collection.keys():
            subject = collection[ID]
            n_assessment = len(subject.annotationhistory['ItWas'])
            if (n_assessment > n_min_assessment):
                if (subject.category == 'training'):
                    if use_training_info:
                        truth = {'LENS': 1, 'NOT': 0}[subject.truth]
                        training_IDs.update({ID: truth})
                    if exclude_training_info:
                        # when doing M step, don't use these to update parameters
                        training_IDs.update({ID: -1})
                elif (subject.category == 'test'):
                    if exclude_test_info:
                        # when doing M step, don't use these to update parameters
                        training_IDs.update({ID: -1})
                probabilities.update({ID: pi})
                online_probabilities.update({ID: subject.mean_probability})
                for agent_i in xrange(len(subject.annotationhistory['Name'])):
                    name = subject.annotationhistory['Name'][agent_i]
                    if name in set_aside_agent:
                        continue
                    xij = subject.annotationhistory['ItWas'][agent_i]
                    if name not in bureau_offline:
                        bureau_offline.update({name: {
                            'PD': PD0, 'PL': PL0,
                            'PL_in': bureau.member[name].PL,
                            'PD_in': bureau.member[name].PD,
                            'Pi': pi,
                            'Subjects': {ID: xij}}})
                    else:
                        bureau_offline[name]['Subjects'].update({ID: xij})

        # Run EM Algorithm

        bureau_offline, pi, probabilities, information_dict = EM_algorithm(
                bureau_offline, pi, probabilities, training_IDs,
                N_min=N_min, N_max=N_max, epsilon_min=epsilon_min,
                return_information=True)

        tup = (bureau_offline, pi, probabilities, information_dict)
        offlinefile = out_dir + '/offline.pickle'
        swap.write_pickle(tup, offlinefile)

        # ------------------------------------------------------------------
        # Now replace sample member probabilities with offline probabilities
        # Also update bureau with offline results
        for ID in sample.list():
            # just in case any IDs didn't get into offline somehow?!
            if ID not in probabilities.keys():
                sample.member.pop(ID)
                continue
            # This is a bit hackish: update mean_probability,
            # median_probability, and do the rejection threshold stuff
            subject = sample.member[ID]
            subject.mean_probability = probabilities[ID]
            subject.median_probability = probabilities[ID]
            # ripped from subject.py
            if subject.mean_probability < subject.rejection_threshold:
                subject.status = 'rejected'
                if subject.kind == 'test':
                    subject.state = 'inactive'
                    subject.retirement_time = -1#at_time
                    subject.retirement_age = subject.exposure

            elif subject.mean_probability > subject.detection_threshold:
                subject.status = 'detected'
                if subject.kind == 'test':
                    # Let's keep the detections live!
                    #   subject.state = 'inactive'
                    #   subject.retirement_time = at_time
                    #   subject.retirement_age = subject.exposure
                    pass

            else:
                # Keep the subject alive! This code is only reached if
                # we are not being hasty.
                subject.status = 'undecided'
                if subject.kind == 'test':
                    subject.state = 'active'
                    subject.retirement_time = 'not yet'
                    subject.retirement_age = 0.0

            # I don't think this is necessary, but just in case
            sample.member[ID] = subject

        for kind in ['sim', 'dud', 'test']:
            sample.collect_probabilities(kind)

        # now save
        collectionfile = out_dir + '/collection_offline.pickle'
        swap.write_pickle(collection, collectionfile)

        # now update bureau
        for ID in bureau.list():
            # just in case any IDs didn't make it to offline?
            if ID not in bureau_offline.keys():
                bureau.member.pop(ID)
                continue
            # update PL, PD, then update_skill
            agent = bureau.member[ID]
            agent.PL = bureau_offline[ID]['PL']
            agent.PD = bureau_offline[ID]['PD']
            agent.update_skill()

            # I don't think this is necessary, but just in case
            bureau.member[ID] = agent

        bureau.collect_probabilities()


        # now save
        bureaufile = out_dir + '/bureau_offline.pickle'
        swap.write_pickle(bureau, bureaufile)

    # ------------------------------------------------------------------
    # now we can pretend we're in SWAP.py

    new_retirementfile = swap.get_new_filename(tonights.parameters,'retire_these')
    print "make_offline_reports: saving retiree subject Zooniverse IDs..."
    N = swap.write_list(sample,new_retirementfile,item='retired_subject')
    print "make_offline_reports: "+str(N)+" lines written to "+new_retirementfile

    # Also print out lists of detections etc! These are urls of images.

    new_samplefile = swap.get_new_filename(tonights.parameters,'candidates')
    print "make_offline_reports: saving lens candidates..."
    N = swap.write_list(sample,new_samplefile,item='candidate')
    print "make_offline_reports: "+str(N)+" lines written to "+new_samplefile

    # Now save the training images, for inspection:
    new_samplefile = swap.get_new_filename(tonights.parameters,'training_true_positives')
    print "make_offline_reports: saving true positives..."
    N = swap.write_list(sample,new_samplefile,item='true_positive')
    print "make_offline_reports: "+str(N)+" lines written to "+new_samplefile

    new_samplefile = swap.get_new_filename(tonights.parameters,'training_false_positives')
    print "make_offline_reports: saving false positives..."
    N = swap.write_list(sample,new_samplefile,item='false_positive')
    print "make_offline_reports: "+str(N)+" lines written to "+new_samplefile

    new_samplefile = swap.get_new_filename(tonights.parameters,'training_false_negatives')
    print "make_offline_reports: saving false negatives..."
    N = swap.write_list(sample,new_samplefile,item='false_negative')
    print "make_offline_reports: "+str(N)+" lines written to "+new_samplefile

    # Also write out catalogs of subjects, including the ZooID, subject ID,
    # how many classifications, and probability:

    catalog = swap.get_new_filename(tonights.parameters,'candidate_catalog')
    print "make_offline_reports: saving catalog of high probability subjects..."
    Nlenses,Nsubjects = swap.write_catalog(sample,catalog,thresholds,kind='test')
    print "make_offline_reports: From "+str(Nsubjects)+" subjects classified,"
    print "make_offline_reports: "+str(Nlenses)+" candidates (with P > rejection) written to "+catalog

    catalog = swap.get_new_filename(tonights.parameters,'sim_catalog')
    print "make_offline_reports: saving catalog of high probability subjects..."
    Nsims,Nsubjects = swap.write_catalog(sample,catalog,thresholds,kind='sim')
    print "make_offline_reports: From "+str(Nsubjects)+" subjects classified,"
    print "make_offline_reports: "+str(Nsims)+" sim 'candidates' (with P > rejection) written to "+catalog

    catalog = swap.get_new_filename(tonights.parameters,'dud_catalog')
    print "make_offline_reports: saving catalog of high probability subjects..."
    Nduds,Nsubjects = swap.write_catalog(sample,catalog,thresholds,kind='dud')
    print "make_offline_reports: From "+str(Nsubjects)+" subjects classified,"
    print "make_offline_reports: "+str(Nduds)+" dud 'candidates' (with P > rejection) written to "+catalog

    # ------------------------------------------------------------------

    # Make plots! Can't plot everything - uniformly sample 200 of each
    # thing (agent or subject).

    # Agent histories:

    fig1 = bureau.start_history_plot()
    pngfile = swap.get_new_filename(tonights.parameters,'histories')
    Nc = np.min([200,bureau.size()])
    print "make_offline_reports: plotting "+str(Nc)+" agent histories in "+pngfile

    for Name in bureau.shortlist(Nc):
        bureau.member[Name].plot_history(fig1)

    bureau.finish_history_plot(fig1,t,pngfile)
    tonights.parameters['historiesplot'] = pngfile

    # Agent probabilities:

    pngfile = swap.get_new_filename(tonights.parameters,'probabilities')
    print "make_offline_reports: plotting "+str(Nc)+" agent probabilities in "+pngfile
    bureau.plot_probabilities(Nc,t,pngfile)
    tonights.parameters['probabilitiesplot'] = pngfile

    # Subject trajectories:

    fig3 = sample.start_trajectory_plot()
    pngfile = swap.get_new_filename(tonights.parameters,'trajectories')

    # Random 500  for display purposes:
    Ns = np.min([500,sample.size()])
    print "make_offline_reports: plotting "+str(Ns)+" subject trajectories in "+pngfile

    for ID in sample.shortlist(Ns):
        sample.member[ID].plot_trajectory(fig3)

    # To plot only false negatives, or only true positives:
    # for ID in sample.shortlist(Ns,kind='sim',status='rejected'):
    #     sample.member[ID].plot_trajectory(fig3)
    # for ID in sample.shortlist(Ns,kind='sim',status='detected'):
    #     sample.member[ID].plot_trajectory(fig3)

    sample.finish_trajectory_plot(fig3,pngfile,t=t)
    tonights.parameters['trajectoriesplot'] = pngfile

    # Candidates! Plot all undecideds or detections:

    fig4 = sample.start_trajectory_plot(final=True)
    pngfile = swap.get_new_filename(tonights.parameters,'sample')

    # BigN = 100000 # Would get them all...
    BigN = 500      # Can't see them all!
    candidates = []
    candidates += sample.shortlist(BigN,kind='test',status='detected')
    candidates += sample.shortlist(BigN,kind='test',status='undecided')
    sims = []
    sims += sample.shortlist(BigN,kind='sim',status='detected')
    sims += sample.shortlist(BigN,kind='sim',status='undecided')
    duds = []
    duds += sample.shortlist(BigN,kind='dud',status='detected')
    duds += sample.shortlist(BigN,kind='dud',status='undecided')

    print "make_offline_reports: plotting "+str(len(sims))+" sims in "+pngfile
    for ID in sims:
        sample.member[ID].plot_trajectory(fig4)
    print "make_offline_reports: plotting "+str(len(duds))+" duds in "+pngfile
    for ID in duds:
        sample.member[ID].plot_trajectory(fig4)
    print "make_offline_reports: plotting "+str(len(candidates))+" candidates in "+pngfile
    for ID in candidates:
        sample.member[ID].plot_trajectory(fig4)

    # They will all show up in the histogram though:
    sample.finish_trajectory_plot(fig4,pngfile,final=True)
    tonights.parameters['candidatesplot'] = pngfile

    # ------------------------------------------------------------------
    # Finally, write a PDF report:

    swap.write_report(tonights.parameters,bureau,sample)
コード例 #3
0
ファイル: SWAP.py プロジェクト: kapadia/SpaceWarps
        print "SWAP: " + str(N) + " lines written to " + new_samplefile

        new_samplefile = swap.get_new_filename(tonights.parameters,
                                               'training_false_negatives')
        print "SWAP: saving false negatives..."
        N = swap.write_list(sample, new_samplefile, item='false_negative')
        print "SWAP: " + str(N) + " lines written to " + new_samplefile

        # Also write out catalogs of subjects, including the ZooID, subject ID,
        # how many classifications, and probability:

        catalog = swap.get_new_filename(tonights.parameters,
                                        'candidate_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        Nlenses, Nsubjects = swap.write_catalog(sample,
                                                catalog,
                                                thresholds,
                                                kind='test')
        print "SWAP: From " + str(Nsubjects) + " subjects classified,"
        print "SWAP: " + str(
            Nlenses) + " candidates (with P > rejection) written to " + catalog

        catalog = swap.get_new_filename(tonights.parameters, 'sim_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        Nsims, Nsubjects = swap.write_catalog(sample,
                                              catalog,
                                              thresholds,
                                              kind='sim')
        print "SWAP: From " + str(Nsubjects) + " subjects classified,"
        print "SWAP: " + str(
            Nsims
        ) + " sim 'candidates' (with P > rejection) written to " + catalog
コード例 #4
0
ファイル: SWAP.py プロジェクト: melaniebeck/GZExpress
        N = swap.write_list(sample,new_samplefile,item='false_positive')
        print "SWAP: "+str(N)+" lines written to "+new_samplefile

        new_samplefile = swap.get_new_filename(tonights.parameters,\
                                               'training_false_negatives')
        print "SWAP: saving false negatives..."
        N = swap.write_list(sample,new_samplefile,item='false_negative')
        print "SWAP: "+str(N)+" lines written to "+new_samplefile


        # -------------------------------------------------------------------
        #####   THESE ARE CATALOGS THAT SPACEWARPS WAS INTERESTED IN   #####

        catalog = swap.get_new_filename(tonights.parameters,'candidate_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        N, Ntot = swap.write_catalog(sample,catalog,thresholds, kind='test')
        print "SWAP: From "+str(Ntot)+" subjects classified,"
        print "SWAP: "+str(N)+" candidates (with P > rejection) written to "\
            +catalog

        catalog = swap.get_new_filename(tonights.parameters,'sim_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        N, Ntot = swap.write_catalog(sample,catalog,thresholds, kind='sim')
        print "SWAP: From "+str(Ntot)+" subjects classified,"
        print "SWAP: "+str(N)+" sim 'candidates' (with P > rejection) "\
            "written to "+catalog

        catalog = swap.get_new_filename(tonights.parameters,'dud_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        N,Ntot = swap.write_catalog(sample,catalog,thresholds,kind='dud')
        print "SWAP: From "+str(Ntot)+" subjects classified,"
コード例 #5
0
def MachineClassifier(options, args):

    try: config = options.configfile
    except: pdb.set_trace()

    tonights = swap.Configuration(config)

    #"""
    # Read the pickled random state file
    random_file = open(tonights.parameters['random_file'],"r");
    random_state = cPickle.load(random_file);
    random_file.close();
    np.random.set_state(random_state);
    #"""

    # Get the machine threshold (make retirement decisions)
    threshold = tonights.parameters['machine_threshold']

    # Get list of evaluation metrics and criteria   
    eval_metrics = tonights.parameters['evaluation_metrics']

    survey = tonights.parameters['survey']
    subdir = 'sup_run4'

    #----------------------------------------------------------------------
    # read in the metadata for all subjects (Test or Training sample?)
    subjects = swap.read_pickle(tonights.parameters['metadatafile'], 'metadata')

    #----------------------------------------------------------------------
    # read in the SWAP collection
    sample = swap.read_pickle(tonights.parameters['samplefile'],'collection')

    #----------------------------------------------------------------------
    # read in or create the ML collection
    MLsample = swap.read_pickle(tonights.parameters['MLsamplefile'],
                                'MLcollection')

    # read in or create the ML bureau for machine agents (history)
    MLbureau = swap.read_pickle(tonights.parameters['MLbureaufile'], 'MLbureau')

    #-----------------------------------------------------------------------    
    #        DETERMINE IF THERE IS A TRAINING SAMPLE TO WORK WITH 
    #-----------------------------------------------------------------------
    # TO DO: training sample should only select those which are NOT part of 
    # validation sample (Nair catalog objects) 2/22/16

    # IDENTIFY TRAINING SAMPLE
    train_sample = subjects[subjects['MLsample']=='train']
    train_meta, train_features = ml.extract_training(train_sample)
    train_labels = np.array([1 if p > 0.3 else 0 \
                             for p in train_meta['SWAP_prob']])

    # IDENTIFY VALIDATION SAMPLE (FINAL) 
    valid_sample = subjects[subjects['MLsample']=='valid']
    valid_meta, valid_features = ml.extract_training(valid_sample)
    valid_labels = valid_meta['Expert_label'].filled()

    #if len(train_sample) >= 100: 
    # TO DO: LOOP THROUGH DIFFERENT MACHINES? HOW MANY MACHINES?
    for metric in eval_metrics:
        
        # REGISTER Machine Classifier
        # Construct machine name --> Machine+Metric? For now: KNC
        machine = 'KNC'
        Name = machine+'_'+metric
        
        # register an Agent for this Machine
        try: 
            test = MLbureau.member[Name]
        except: 
            MLbureau.member[Name] = swap.Agent_ML(Name, metric)
            

        #---------------------------------------------------------------    
        #     TRAIN THE MACHINE; EVALUATE ON VALIDATION SAMPLE
        #---------------------------------------------------------------        

        # Now we run the machine -- need cross validation on whatever size 
        # training sample we have .. 
        
        # For now this will be fixed until we build in other machine options
        params = {'n_neighbors':np.arange(1, 2*(len(train_sample)-1) / 3, 2), 
                  'weights':('uniform','distance')}
        
        # Create the model 
        general_model = GridSearchCV(estimator=KNC(), param_grid=params,
                                     error_score=0, scoring=metric)        

        # Train the model -- k-fold cross validation is embedded
        trained_model = general_model.fit(train_features, train_labels)

        # Test "accuracy" (metric of choice) on validation sample
        score = trained_model.score(valid_features, valid_labels)

        MLbureau.member[Name].record_training(\
                            model_described_by=trained_model.best_estimator_, 
                            with_params=trained_model.best_params_, 
                            trained_on=len(train_features), 
                            at_time=TIME, 
                            with_train_acc=traineed_model.best_score_,
                            and_valid_acc=trained_model.score(valid_features,
                                                              valid_labels))

        # Store the trained machine
        MLbureau.member[Name].model = trained_model

        
        # Compute / store confusion matrix as a function of threshold
        # produced by this machine on the Expert Validation sample

        fps, tps, thresh = mtrx._binary_clf_curve(valid_labels,
                            trained_model.predict_proba(valid_features)[:,1])
        metric_list = mtrx.compute_binary_metrics(fps, tps)
        ACC, TPR, FPR, FNR, TNR, PPV, FDR, FOR, NPV = metric_list
        
        MLbureau.member[Name].record_evaluation(accuracy=ACC, 
                                                completeness_s=TPR,
                                                contamination_s=FDR,
                                                completeness_f=TNR,
                                                contamination_f=NPV)

        pdb.set_trace()



        
        # 3. compare the metric of choice with the evaluation criterion to
        # see if this machine has sufficiently learned? 
        # ... what if my criterion is simply "Maximize Accuracy"? 
        # ... or minimize feature contamination? these require that we 
        # compare tonight's machine with the previous night's machine 
        # But if my criterion is simply "have feature contam less than 20%"
        # then it's easy.... 
        
        # IF TRAINED MACHINE PREDICTS WELL ON VALIDATION .... 
        if MLbureau.member[Name].evaluate():
            #---------------------------------------------------------------    
            #                 APPLY MACHINE TO TEST SAMPLE
            #--------------------------------------------------------------- 
            # This requires that my runKNC function returns the Machine Object
            shitski=5
      
            #---------------------------------------------------------------    
            #                    PROCESS PREDICTIONS/PROBS
            #---------------------------------------------------------------
            for s,p,l in zip(test_meta, probas, predictions):
                ID = str(s['id'])

                descriptions = Nair_or_Not(s)
                category, kind, flavor, truth = descriptions

                # LOAD EACH TEST SUBJECT INTO MACHINE COLLECTION
                # -------------------------------------------------------------
                try: 
                    test = MLsample.member[ID]
                except: MLsample.member[ID] = swap.Subject_ML(ID,
                                            str(s['name']), category, kind,
                                            truth,threshold,s['external_ref'])
                
                tstring = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
                MLsample.member[ID].was_described(by='knn', as_being=1, 
                                                  withp=p, at_time=tstring)

                # NOTE: if subject is Nair (training) it doesn't get flagged as 
                # inactive but it can be flagged as detected/rejected


                # IF MACHINE P >= THRESHOLD, INSERT INTO SWAP COLLECTION
                # -------------------------------------------------------------
                thresholds = {'detection':0.,'rejection':0.}
                if (p >= threshold) or (1-p >= threshold):
                    print "BOOM! WE'VE GOT A MACHINE-CLASSIFIED SUBJECT:"
                    print "Probability:",p
                    # Initialize the subject in SWAP Collection
                    sample.member[ID] = swap.Subject(ID, str(s['name']), 
                                            category, kind,flavor,truth,
                                            thresholds, s['external_ref'],0.) 
                    sample.member[ID].retiredby = 'machine'
                
                    # Flag subject as 'INACTIVE' / 'DETECTED' / 'REJECTED'
                    # ----------------------------------------------------------
                    if p >= threshold:
                        sample.member[str(s['id'])].state = 'inactive'
                    elif 1-p >= threshold:
                        sample.member[str(s['id'])].status = 'rejected' 

                        
            #---------------------------------------------------------------    
            #                 SAVE MACHINE METADATA? 
            #---------------------------------------------------------------
            print "Size of SWAP sample:", sample.size()
            print "Size of ML sample:", MLsample.size()

      
            if tonights.parameters['report']:
                
                # Output list of subjects to retire, based on this batch of
                # classifications. Note that what is needed here is the ZooID,
                # not the subject ID:
            
                new_retirementfile = swap.get_new_filename(tonights.parameters,\
                                                   'retire_these', source='ML')
                print "SWAP: saving Machine-retired subject Zooniverse IDs..."
                N = swap.write_list(MLsample,new_retirementfile,
                                    item='retired_subject', source='ML')
                print "SWAP: "+str(N)+" lines written to "+new_retirementfile
            
                # write catalogs of smooth/not over MLthreshold
                # -------------------------------------------------------------
                catalog = swap.get_new_filename(tonights.parameters,
                                            'retired_catalog', source='ML')
                print "SWAP: saving catalog of Machine-retired subjects..."
                Nretired, Nsubjects = swap.write_catalog(MLsample,bureau,
                                                catalog, threshold,
                                                kind='rejected', source='ML')
                print "SWAP: From "+str(Nsubjects)+" subjects classified,"
                print "SWAP: "+str(Nretired)+" retired (with P < rejection) "\
                    "written to "+catalog
            
                catalog = swap.get_new_filename(tonights.parameters,
                                            'detected_catalog', source='ML')
                print "SWAP: saving catalog of Machine detected subjects..."
                Ndetected, Nsubjects = swap.write_catalog(MLsample, bureau,
                                                catalog, threshold, 
                                                kind='detected', source='ML')
                print "SWAP: From "+str(Nsubjects)+" subjects classified,"
                print "SWAP: %i detected (with P > MLthreshold) "\
                "written to %s"%(Ndetected, catalog)    


    

    # If is hasn't been done already, save the current directory
    # ---------------------------------------------------------------------
    tonights.parameters['dir'] = os.getcwd()+'/'+tonights.parameters['trunk']
    
    if not os.path.exists(tonights.parameters['dir']):
        os.makedirs(tonights.parameters['dir'])


    # Repickle all the shits
    # -----------------------------------------------------------------------
    if tonights.parameters['repickle']:

        new_samplefile = swap.get_new_filename(tonights.parameters,'collection')
        print "SWAP: saving SWAP subjects to "+new_samplefile
        swap.write_pickle(sample,new_samplefile)
        tonights.parameters['samplefile'] = new_samplefile
        
        new_samplefile=swap.get_new_filename(tonights.parameters,'MLcollection')
        print "SWAP: saving test sample subjects to "+new_samplefile
        swap.write_pickle(MLsample,new_samplefile)
        tonights.parameters['MLsamplefile'] = new_samplefile

        metadatafile = swap.get_new_filename(tonights.parameters,'metadata')
        print "SWAP: saving metadata to "+metadatafile
        swap.write_pickle(subjects,metadatafile)
        tonights.parameters['metadatafile'] = metadatafile
       

    # Update the time increment for SWAP's next run
    # -----------------------------------------------------------------------
    t2 = datetime.datetime.strptime(tonights.parameters['start'],
                                    '%Y-%m-%d_%H:%M:%S') + \
         datetime.timedelta(days=tonights.parameters['increment'])
    tstop = datetime.datetime.strptime(tonights.parameters['end'],
                                    '%Y-%m-%d_%H:%M:%S')
    if t2 == tstop: 
        plots = True
    else:
        tonights.parameters['start'] = t2.strftime('%Y-%m-%d_%H:%M:%S')
                

    # Update configfile to reflect Machine additions
    # -----------------------------------------------------------------------
    configfile = 'update.config'

    random_file = open(tonights.parameters['random_file'],"w");
    random_state = np.random.get_state();
    cPickle.dump(random_state,random_file);
    random_file.close();
    swap.write_config(configfile, tonights.parameters)

    pdb.set_trace()
コード例 #6
0
ファイル: SWAP.py プロジェクト: melaniebeck/SpaceWarps
        N = swap.write_list(sample,new_samplefile,item='false_positive')
        print "SWAP: "+str(N)+" lines written to "+new_samplefile

        new_samplefile = swap.get_new_filename(tonights.parameters,\
                                               'training_false_negatives')
        print "SWAP: saving false negatives..."
        N = swap.write_list(sample,new_samplefile,item='false_negative')
        print "SWAP: "+str(N)+" lines written to "+new_samplefile


        # -------------------------------------------------------------------
        #####   THESE ARE CATALOGS THAT SPACEWARPS WAS INTERESTED IN   #####

        catalog = swap.get_new_filename(tonights.parameters,'candidate_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        N, Ntot = swap.write_catalog(sample,catalog,thresholds, kind='test')
        print "SWAP: From "+str(Ntot)+" subjects classified,"
        print "SWAP: "+str(N)+" candidates (with P > rejection) written to "\
            +catalog

        catalog = swap.get_new_filename(tonights.parameters,'sim_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        N, Ntot = swap.write_catalog(sample,catalog,thresholds, kind='sim')
        print "SWAP: From "+str(Ntot)+" subjects classified,"
        print "SWAP: "+str(N)+" sim 'candidates' (with P > rejection) "\
            "written to "+catalog

        catalog = swap.get_new_filename(tonights.parameters,'dud_catalog')
        print "SWAP: saving catalog of high probability subjects..."
        N,Ntot = swap.write_catalog(sample,catalog,thresholds,kind='dud')
        print "SWAP: From "+str(Ntot)+" subjects classified,"